Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvdec/source/Lib/CommonLib/Buffer.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
/** \file     Buffer.cpp
44
 *  \brief    Low-overhead class describing 2D memory layout
45
 */
46
47
#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP
48
49
// unit needs to come first due to a forward declaration
50
51
#include "Unit.h"
52
#include "Buffer.h"
53
#include "InterpolationFilter.h"
54
#include "Picture.h"
55
#include "Slice.h"
56
57
#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
58
#include "CommonDefX86.h"
59
#include <simde/x86/sse.h>
60
#endif
61
62
namespace vvdec
63
{
64
65
template< typename T >
66
void addAvgCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng )
67
0
{
68
0
#define ADD_AVG_CORE_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src1[ADDR] + src2[ADDR] + offset ), rshift ), clpRng )
69
0
#define ADD_AVG_CORE_INC    \
70
0
  src1 += src1Stride;       \
71
0
  src2 += src2Stride;       \
72
0
  dest +=  dstStride;       \
73
0
74
0
  SIZE_AWARE_PER_EL_OP( ADD_AVG_CORE_OP, ADD_AVG_CORE_INC );
75
76
0
#undef ADD_AVG_CORE_OP
77
0
#undef ADD_AVG_CORE_INC
78
0
}
79
80
template<typename T>
81
void reconstructCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng )
82
0
{
83
0
#define RECO_CORE_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
84
0
#define RECO_CORE_INC     \
85
0
  src1 += src1Stride;     \
86
0
  src2 += src2Stride;     \
87
0
  dest +=  dstStride;     \
88
0
89
0
  SIZE_AWARE_PER_EL_OP( RECO_CORE_OP, RECO_CORE_INC );
90
91
0
#undef RECO_CORE_OP
92
0
#undef RECO_CORE_INC
93
0
}
94
95
96
template<typename T>
97
void linTfCore( const T* src, ptrdiff_t srcStride, Pel *dst, ptrdiff_t dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip )
98
0
{
99
0
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) bClip ? ClipPel( rightShift( scale * src[ADDR], shift ) + offset, clpRng ) : ( rightShift( scale * src[ADDR], shift ) + offset )
100
0
#define LINTF_CORE_INC  \
101
0
  src += srcStride;     \
102
0
  dst += dstStride;     \
103
0
104
0
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
105
106
0
#undef LINTF_CORE_OP
107
0
#undef LINTF_CORE_INC
108
0
}
109
110
template<typename T>
111
void transpose4x4Core( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride )
112
0
{
113
0
  for( int i = 0; i < 4; i++ )
114
0
  {
115
0
    for( int j = 0; j < 4; j++ )
116
0
    {
117
0
      dst[j * dstStride] = src[j];
118
0
    }
119
120
0
    dst++;
121
0
    src += srcStride;
122
0
  }
123
0
}
124
125
template<typename T>
126
void transpose8x8Core( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride )
127
0
{
128
0
  for( int i = 0; i < 8; i++ )
129
0
  {
130
0
    for( int j = 0; j < 8; j++ )
131
0
    {
132
0
      dst[j * dstStride] = src[j];
133
0
    }
134
135
0
    dst++;
136
0
    src += srcStride;
137
0
  }
138
0
}
139
140
template<typename T>
141
void copyClipCore( const T* src, ptrdiff_t srcStride, Pel *dst, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng )
142
{
143
#define RECO_OP( ADDR ) dst[ADDR] = ClipPel( src[ADDR], clpRng )
144
#define RECO_INC      \
145
    src += srcStride; \
146
    dst += dstStride; \
147
148
  SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
149
150
#undef RECO_OP
151
#undef RECO_INC
152
}
153
154
template<typename T>
155
void addWeightedAvgCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t destStride, int width, int height, int rshift, int offset, int w0, int w1, const ClpRng& clpRng )
156
0
{
157
0
#define ADD_WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src1[ADDR]*w0 + src2[ADDR]*w1 + offset ), rshift ), clpRng )
158
0
#define ADD_WGHT_AVG_INC     \
159
0
    src1 += src1Stride; \
160
0
    src2 += src2Stride; \
161
0
    dest += destStride; \
162
0
163
0
  SIZE_AWARE_PER_EL_OP( ADD_WGHT_AVG_OP, ADD_WGHT_AVG_INC );
164
165
0
#undef ADD_WGHT_AVG_OP
166
0
#undef ADD_WGHT_AVG_INC
167
0
}
168
169
void copyBufferCore( const char *src, ptrdiff_t srcStride, char *dst, ptrdiff_t dstStride, int width, int height )
170
0
{
171
0
#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
172
0
  _mm_prefetch( (const char *) ( src ),             _MM_HINT_T0 );
173
0
  _mm_prefetch( (const char *) ( src + srcStride ), _MM_HINT_T0 );
174
0
  _mm_prefetch( (const char *) ( dst ),             _MM_HINT_T0 );
175
0
  _mm_prefetch( (const char *) ( dst + dstStride ), _MM_HINT_T0 );
176
177
0
#endif
178
0
  if( width == srcStride && width == dstStride )
179
0
  {
180
0
    memcpy( dst, src, width * height );
181
0
    return;
182
0
  }
183
184
0
  for( int i = 0; i < height; i++ )
185
0
  {
186
0
#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
187
0
    _mm_prefetch( (const char *) ( src + srcStride ), _MM_HINT_T0 );
188
0
    _mm_prefetch( (const char *) ( dst + dstStride ), _MM_HINT_T0 );
189
190
0
#endif
191
0
    memcpy( dst, src, width );
192
193
0
    src += srcStride;
194
0
    dst += dstStride;
195
0
  }
196
0
}
197
198
void applyLutCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const Pel* lut )
199
0
{
200
  //    const auto rsp_sgnl_op  = [=, &dst]( int ADDR ){ dst[ADDR] = lut[dst[ADDR]]; };
201
  //    const auto rsp_sgnl_inc = [=, &dst]            { dst += stride;              };
202
203
  //    size_aware_pel_op( rsp_sgnl_op, rsp_sgnl_inc, width, height );
204
205
0
    #define RSP_SGNL_OP( ADDR ) ptr[ADDR] = lut[ptr[ADDR]]
206
0
    #define RSP_SGNL_INC        ptr      += ptrStride;
207
208
0
      SIZE_AWARE_PER_EL_OP( RSP_SGNL_OP, RSP_SGNL_INC )
209
210
0
    #undef RSP_SGNL_OP
211
0
    #undef RSP_SGNL_INC
212
    
213
0
    return;
214
0
}
215
216
void fillN_CuCore( CodingUnit** ptr, ptrdiff_t ptrStride, int width, int height, CodingUnit* cuPtr )
217
0
{
218
0
  if( width == ptrStride )
219
0
  {
220
0
    std::fill_n( ptr, width * height, cuPtr );
221
0
  }
222
0
  else
223
0
  {
224
0
    CodingUnit** dst = ptr;
225
226
0
    for( int y = 0; y < height; y++, dst += ptrStride )
227
0
    {
228
0
      std::fill_n( dst, width, cuPtr );
229
0
    }
230
0
  }
231
0
}
232
233
void sampleRateConvCore( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale,
234
                         const Pel* orgSrc, const ptrdiff_t orgStride, const int orgWidth, const int orgHeight,
235
                         const int beforeScaleLeftOffset, const int beforeScaleTopOffset,
236
                         Pel* scaledSrc, const ptrdiff_t scaledStride, const int scaledWidth, const int scaledHeight,
237
                         const int afterScaleLeftOffset, const int afterScaleTopOffset,
238
                         const int bitDepth, const bool useLumaFilter,
239
                         const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag )
240
0
{
241
0
  if( orgWidth == scaledWidth && orgHeight == scaledHeight && scalingRatio == SCALE_1X && !beforeScaleLeftOffset && !beforeScaleTopOffset && !afterScaleLeftOffset && !afterScaleTopOffset )
242
0
  {
243
0
    g_pelBufOP.copyBuffer( ( const char * ) orgSrc, orgStride * sizeof( Pel ), ( char* ) scaledSrc, scaledStride * sizeof( Pel ), orgWidth * sizeof( Pel ), orgHeight );
244
245
0
    return;
246
0
  }
247
248
0
  const TFilterCoeff* filterHor = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0];
249
0
  const TFilterCoeff* filterVer = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0];
250
251
0
  const int numFracPositions  = useLumaFilter ? 15 : 31;
252
0
  const int numFracShift      = useLumaFilter ? 4 : 5;
253
0
  const int posShiftX         = SCALE_RATIO_BITS - numFracShift + compScale.first;
254
0
  const int posShiftY         = SCALE_RATIO_BITS - numFracShift + compScale.second;
255
0
  int addX  = (1 << (posShiftX - 1)) + (beforeScaleLeftOffset << SCALE_RATIO_BITS) + ((int( 1 - horCollocatedPositionFlag ) * 8 * (scalingRatio.first - SCALE_1X.first) + (1 << (2 + compScale.first))) >> (3 + compScale.first));
256
0
  int addY  = (1 << (posShiftY - 1)) + (beforeScaleTopOffset << SCALE_RATIO_BITS) + ((int( 1 - verCollocatedPositionFlag ) * 8 * (scalingRatio.second - SCALE_1X.second) + (1 << (2 + compScale.second))) >> (3 + compScale.second));
257
258
0
  const int filterLength = useLumaFilter ? NTAPS_LUMA : NTAPS_CHROMA;
259
0
  const int log2Norm     = 12;
260
261
0
  int* buf = new int[orgHeight * scaledWidth];
262
0
  int maxVal = (1 << bitDepth) - 1;
263
264
0
  CHECK( bitDepth > 17, "Overflow may happen!" );
265
266
0
  for( int i = 0; i < scaledWidth; i++ )
267
0
  {
268
0
    const Pel* org = orgSrc;
269
0
    int refPos = (((i << compScale.first) - afterScaleLeftOffset) * scalingRatio.first + addX) >> posShiftX;
270
0
    int integer = refPos >> numFracShift;
271
0
    int frac = refPos & numFracPositions;
272
0
    int* tmp = buf + i;
273
274
0
    for( int j = 0; j < orgHeight; j++ )
275
0
    {
276
0
      int sum = 0;
277
0
      const TFilterCoeff* f = filterHor + frac * filterLength;
278
279
0
      for( int k = 0; k < filterLength; k++ )
280
0
      {
281
0
        int xInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgWidth - 1 );
282
0
        sum += f[k] * org[xInt]; // postpone horizontal filtering gain removal after vertical filtering
283
0
      }
284
285
0
      *tmp = sum;
286
287
0
      tmp += scaledWidth;
288
0
      org += orgStride;
289
0
    }
290
0
  }
291
292
0
  Pel* dst = scaledSrc;
293
294
0
  for( int j = 0; j < scaledHeight; j++ )
295
0
  {
296
0
    int refPos = (((j << compScale.second) - afterScaleTopOffset) * scalingRatio.second + addY) >> posShiftY;
297
0
    int integer = refPos >> numFracShift;
298
0
    int frac = refPos & numFracPositions;
299
300
0
    for( int i = 0; i < scaledWidth; i++ )
301
0
    {
302
0
      int sum = 0;
303
0
      int* tmp = buf + i;
304
0
      const TFilterCoeff* f = filterVer + frac * filterLength;
305
306
0
      for( int k = 0; k < filterLength; k++ )
307
0
      {
308
0
        int yInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgHeight - 1 );
309
0
        sum += f[k] * tmp[yInt * scaledWidth];
310
0
      }
311
312
0
      dst[i] = std::min<int>( std::max( 0, (sum + (1 << (log2Norm - 1))) >> log2Norm ), maxVal );
313
0
    }
314
315
0
    dst += scaledStride;
316
0
  }
317
318
0
  delete[] buf;
319
0
}
320
321
void rspFwdCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const int bd, const Pel OrgCW, const Pel* LmcsPivot, const Pel* ScaleCoeff, const Pel* InputPivot )
322
0
{
323
0
  int idxY;
324
0
  int shift = getLog2( OrgCW );
325
326
  //    const auto rsp_sgnl_op  = [=, &dst]( int ADDR ){ idxY = ( dst[ADDR] >> shift ); dst[ADDR] = static_cast<Pel>( ClipBD<int>( LmcsPivot[idxY] + ( ( ScaleCoeff[idxY] * ( dst[ADDR] - InputPivot[idxY] ) + ( 1 << 10 ) ) >> 11 ), bd ) ); };
327
  //    const auto rsp_sgnl_inc = [=, &dst]            { dst += stride; };
328
329
  //    size_aware_pel_op( rsp_sgnl_op, rsp_sgnl_inc, width, height );
330
331
0
#define RSP_FWD_OP( ADDR ) { idxY = ( ptr[ADDR] >> shift ); ptr[ADDR] = static_cast<Pel>( ClipBD<int>( LmcsPivot[idxY] + ( ( ScaleCoeff[idxY] * ( ptr[ADDR] - InputPivot[idxY] ) + ( 1 << 10 ) ) >> 11 ), bd ) ); }
332
0
#define RSP_FWD_INC        ptr      += ptrStride;
333
334
0
  SIZE_AWARE_PER_EL_OP( RSP_FWD_OP, RSP_FWD_INC )
335
336
0
#undef RSP_FWD_OP
337
0
#undef RSP_FWD_INC
338
0
}
339
340
PelBufferOps::PelBufferOps()
341
256
{
342
256
  addAvg4  = addAvgCore<Pel>;
343
256
  addAvg8  = addAvgCore<Pel>;
344
256
  addAvg16 = addAvgCore<Pel>;
345
346
256
  reco4 = reconstructCore<Pel>;
347
256
  reco8 = reconstructCore<Pel>;
348
349
256
  linTf4 = linTfCore<Pel>;
350
256
  linTf8 = linTfCore<Pel>;
351
352
256
  wghtAvg4 = addWeightedAvgCore<Pel>;
353
256
  wghtAvg8 = addWeightedAvgCore<Pel>;
354
355
256
  copyBuffer = copyBufferCore;
356
357
256
  transpose4x4 = transpose4x4Core<Pel>;
358
256
  transpose8x8 = transpose8x8Core<Pel>;
359
360
256
  applyLut = applyLutCore;
361
256
  rspFwd   = rspFwdCore;
362
256
  rspBcw   = nullptr;
363
364
256
  fillN_CU = fillN_CuCore;
365
366
256
  sampleRateConv = sampleRateConvCore;
367
256
}
368
369
PelBufferOps g_pelBufOP = PelBufferOps();
370
371
template<>
372
void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t bcwIdx)
373
0
{
374
0
  const int8_t w0 = getBcwWeight(bcwIdx, REF_PIC_LIST_0);
375
0
  const int8_t w1 = getBcwWeight(bcwIdx, REF_PIC_LIST_1);
376
0
  const int8_t log2WeightBase = g_BcwLog2WeightBase;
377
0
  const Pel* src0 = other1.buf;
378
0
  const Pel* src2 = other2.buf;
379
0
  Pel* dest = buf;
380
381
0
  const ptrdiff_t src1Stride = other1.stride;
382
0
  const ptrdiff_t src2Stride = other2.stride;
383
0
  const ptrdiff_t destStride = stride;
384
0
  const int clipbd    = clpRng.bd;
385
0
  const int shiftNum  = std::max<int>( 2, ( IF_INTERNAL_PREC - clipbd ) ) + log2WeightBase;
386
0
  const int offset    = ( 1 << ( shiftNum - 1 ) ) + ( IF_INTERNAL_OFFS << log2WeightBase );
387
388
0
  if( ( width & 7 ) == 0 )
389
0
  {
390
0
    g_pelBufOP.wghtAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
391
0
  }
392
0
  else if( ( width & 3 ) == 0 )
393
0
  {
394
0
    g_pelBufOP.wghtAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
395
0
  }
396
0
  else
397
0
  {
398
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng )
399
0
#define ADD_AVG_INC     \
400
0
    src0 += src1Stride; \
401
0
    src2 += src2Stride; \
402
0
    dest += destStride; \
403
0
404
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
405
406
0
#undef ADD_AVG_OP
407
0
#undef ADD_AVG_INC
408
0
  }
409
0
}
410
411
template<>
412
void AreaBuf<Pel>::scaleSignal(const int scale, const ClpRng& clpRng)
413
0
{
414
0
  Pel* dst = buf;
415
0
  Pel* src = buf;
416
0
  int sign, absval;
417
0
  int maxAbsclipBD = ( 1 << clpRng.bd ) - 1;
418
419
0
  for (unsigned y = 0; y < height; y++)
420
0
  {
421
0
    for (unsigned x = 0; x < width; x++)
422
0
    {
423
0
      src[x] = Clip3<Pel>( -maxAbsclipBD - 1, maxAbsclipBD, src[x] );
424
0
      sign   = src[x] >= 0 ? 1 : -1;
425
0
      absval = sign * src[x];
426
427
0
      int val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
428
429
0
      if( sizeof( Pel ) == 2 ) // avoid overflow when storing data
430
0
      {
431
0
          val = Clip3<int>(-32768, 32767, val);
432
0
      }
433
0
      dst[x] = (Pel)val;
434
0
    }
435
0
    dst += stride;
436
0
    src += stride;
437
0
  }
438
0
}
439
440
template<>
441
void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng)
442
0
{
443
0
  const Pel* src0 = other1.buf;
444
0
  const Pel* src2 = other2.buf;
445
0
        Pel* dest =        buf;
446
447
0
  const ptrdiff_t src1Stride = other1.stride;
448
0
  const ptrdiff_t src2Stride = other2.stride;
449
0
  const ptrdiff_t destStride =        stride;
450
0
  const int       clipbd     = clpRng.bd;
451
0
  const int       shiftNum   = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1;
452
0
  const int       offset     = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
453
454
0
  if( ( width & 15 ) == 0 )
455
0
  {
456
0
    g_pelBufOP.addAvg16( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
457
0
  }
458
0
  else if( ( width & 7 ) == 0 )
459
0
  {
460
0
    g_pelBufOP.addAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
461
0
  }
462
0
  else if( ( width & 3 ) == 0 )
463
0
  {
464
0
    g_pelBufOP.addAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
465
0
  }
466
0
  else
467
0
  {
468
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng )
469
0
#define ADD_AVG_INC     \
470
0
    src0 += src1Stride; \
471
0
    src2 += src2Stride; \
472
0
    dest += destStride; \
473
0
474
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
475
476
0
#undef ADD_AVG_OP
477
0
#undef ADD_AVG_INC
478
0
  }
479
0
}
480
481
template<>
482
void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel> &pred, const AreaBuf<const Pel> &resi, const ClpRng& clpRng )
483
0
{
484
0
  const Pel* src1 = pred.buf;
485
0
  const Pel* src2 = resi.buf;
486
0
        Pel* dest =      buf;
487
488
0
  const ptrdiff_t src1Stride = pred.stride;
489
0
  const ptrdiff_t src2Stride = resi.stride;
490
0
  const ptrdiff_t destStride =      stride;
491
492
0
  if( ( width & 7 ) == 0 )
493
0
  {
494
0
    g_pelBufOP.reco8( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
495
0
  }
496
0
  else if( ( width & 3 ) == 0 )
497
0
  {
498
0
    g_pelBufOP.reco4( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
499
0
  }
500
0
  else
501
0
  {
502
0
#define RECO_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
503
0
#define RECO_INC        \
504
0
    src1 += src1Stride; \
505
0
    src2 += src2Stride; \
506
0
    dest += destStride; \
507
0
508
0
    SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
509
510
0
#undef RECO_OP
511
0
#undef RECO_INC
512
0
  }
513
0
}
514
515
template<>
516
void AreaBuf<Pel>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng )
517
0
{
518
0
  const Pel* src = buf;
519
0
        Pel* dst = buf;
520
521
0
  if( width == 1 )
522
0
  {
523
0
    THROW_FATAL( "Blocks of width = 1 not supported" );
524
0
  }
525
0
  else if( ( width & 7 ) == 0 )
526
0
  {
527
0
    g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
528
0
  }
529
0
  else if( ( width & 3 ) == 0 )
530
0
  {
531
0
    g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
532
0
  }
533
0
  else
534
0
  {
535
0
#define LINTF_OP( ADDR ) dst[ADDR] = ( Pel ) bClip ? ClipPel( rightShift( scale * src[ADDR], shift ) + offset, clpRng ) : ( rightShift( scale * src[ADDR], shift ) + offset )
536
0
#define LINTF_INC        \
537
0
    src += stride;       \
538
0
    dst += stride;       \
539
0
540
0
    SIZE_AWARE_PER_EL_OP( LINTF_OP, LINTF_INC );
541
542
0
#undef RECO_OP
543
0
#undef RECO_INC
544
0
  }
545
0
}
546
547
#if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86)
548
template<>
549
void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel> &other )
550
0
{
551
0
  CHECK( width != other.height || height != other.width, "Incompatible size" );
552
553
0
  if( ( ( width | height ) & 7 ) == 0 )
554
0
  {
555
0
    const Pel* src = other.buf;
556
557
0
    for( unsigned y = 0; y < other.height; y += 8 )
558
0
    {
559
0
      Pel* dst = buf + y;
560
561
0
      for( unsigned x = 0; x < other.width; x += 8 )
562
0
      {
563
0
        g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride );
564
565
0
        dst += 8 * stride;
566
0
      }
567
568
0
      src += 8 * other.stride;
569
0
    }
570
0
  }
571
0
  else if( ( ( width | height ) & 3 ) == 0 )
572
0
  {
573
0
    const Pel* src = other.buf;
574
575
0
    for( unsigned y = 0; y < other.height; y += 4 )
576
0
    {
577
0
      Pel* dst = buf + y;
578
579
0
      for( unsigned x = 0; x < other.width; x += 4 )
580
0
      {
581
0
        g_pelBufOP.transpose4x4( &src[x], other.stride, dst, stride );
582
583
0
        dst += 4 * stride;
584
0
      }
585
586
0
      src += 4 * other.stride;
587
0
    }
588
0
  }
589
0
  else
590
0
  {
591
0
          Pel* dst =       buf;
592
0
    const Pel* src = other.buf;
593
0
    width          = other.height;
594
0
    height         = other.width;
595
0
    stride         = stride < width ? width : stride;
596
597
0
    for( unsigned y = 0; y < other.height; y++ )
598
0
    {
599
0
      for( unsigned x = 0; x < other.width; x++ )
600
0
      {
601
0
        dst[y + x*stride] = src[x + y * other.stride];
602
0
      }
603
0
    }
604
0
  }
605
0
}
606
#endif
607
608
template<>
609
void AreaBuf<MotionInfo>::fill( const MotionInfo& val )
610
0
{
611
0
  if( width == stride )
612
0
  {
613
0
    std::fill_n( buf, width * height, val );
614
0
  }
615
0
  else
616
0
  {
617
0
    MotionInfo* dst = buf;
618
619
0
    for( int y = 0; y < height; y++, dst += stride )
620
0
    {
621
0
      std::fill_n( dst, width, val );
622
0
    }
623
0
  }
624
0
}
625
626
PelStorage::PelStorage()
627
0
{
628
0
  for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ )
629
0
  {
630
0
    m_origin[i]        = nullptr;
631
0
    m_allocator[i]     = nullptr;
632
0
  }
633
0
}
634
635
PelStorage::~PelStorage()
636
0
{
637
0
  destroy();
638
0
}
639
640
void PelStorage::create( const UnitArea &_UnitArea )
641
0
{
642
0
  create( _UnitArea.chromaFormat, _UnitArea.blocks[0] );
643
0
}
644
645
void PelStorage::create( const ChromaFormat _chromaFormat, const Size& _size, const unsigned _maxCUSize, const unsigned _margin, const unsigned _alignmentByte, const bool _scaleChromaMargin, const UserAllocator* userAlloc )
646
0
{
647
0
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
648
649
0
  chromaFormat = _chromaFormat;
650
651
0
  const uint32_t numCh = getNumberValidComponents( _chromaFormat );
652
653
0
  unsigned extHeight = _size.height;
654
0
  unsigned extWidth  = _size.width;
655
656
0
  if( _maxCUSize )
657
0
  {
658
0
    extHeight = ( ( _size.height + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
659
0
    extWidth  = ( ( _size.width  + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
660
0
  }
661
662
0
  const unsigned _alignment = _alignmentByte / sizeof( Pel );
663
664
0
  for( uint32_t i = 0; i < numCh; i++ )
665
0
  {
666
0
    const ComponentID compID = ComponentID( i );
667
0
    const unsigned scaleX = getComponentScaleX( compID, _chromaFormat );
668
0
    const unsigned scaleY = getComponentScaleY( compID, _chromaFormat );
669
670
0
    unsigned scaledHeight = extHeight >> scaleY;
671
0
    unsigned scaledWidth  = extWidth  >> scaleX;
672
0
    unsigned ymargin      = _margin >> (_scaleChromaMargin?scaleY:0);
673
0
    unsigned xmargin      = _margin >> (_scaleChromaMargin?scaleX:0);
674
675
0
#if 1
676
0
    if( _alignment && xmargin )
677
0
    {
678
0
      xmargin = ( ( xmargin + _alignment - 1 ) / _alignment ) * _alignment;
679
0
    }
680
681
0
#endif
682
0
    SizeType totalWidth   = scaledWidth + 2 * xmargin;
683
0
    SizeType totalHeight  = scaledHeight +2 * ymargin;
684
685
0
    if( _alignment )
686
0
    {
687
      // make sure buffer lines are align
688
0
      CHECK( _alignmentByte != MEMORY_ALIGN_DEF_SIZE, "Unsupported alignment" );
689
0
      totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment;
690
0
    }
691
692
0
#if ENABLE_SIMD_OPT_INTER
693
0
    uint32_t area = totalWidth * totalHeight + 1; // +1 for the extra Pel overread in prefetchPad_SSE, in case reading from the very bottom right of the picture
694
#else
695
    uint32_t area = totalWidth * totalHeight;
696
#endif
697
0
    CHECK( !area, "Trying to create a buffer with zero area" );
698
699
0
    m_origSi[i] = Size{ totalWidth, totalHeight };
700
0
    if( userAlloc && userAlloc->enabled )
701
0
    {
702
0
      m_origin[i] = ( Pel* ) userAlloc->create( userAlloc->opaque, (vvdecComponentType)i, sizeof(Pel)*area, MEMORY_ALIGN_DEF_SIZE, &m_allocator[i] );
703
0
      CHECK( m_origin[i] == nullptr, "external allocator callback failed (returned NULL)." );
704
0
      m_externAllocator = true;
705
0
      m_userAlloc       = userAlloc;
706
0
    }
707
0
    else
708
0
    {
709
0
      m_origin[i] = ( Pel* ) xMalloc( Pel, area );
710
0
    }
711
0
    Pel* topLeft = m_origin[i] + totalWidth * ymargin + xmargin;
712
0
    bufs.push_back( PelBuf( topLeft, totalWidth, _size.width >> scaleX, _size.height >> scaleY ) );
713
0
  }
714
0
}
715
716
void PelStorage::createFromBuf( PelUnitBuf buf )
717
0
{
718
0
  chromaFormat = buf.chromaFormat;
719
720
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
721
722
0
  bufs.resize(numCh);
723
724
0
  for( uint32_t i = 0; i < numCh; i++ )
725
0
  {
726
0
    PelBuf cPelBuf = buf.get( ComponentID( i ) );
727
0
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
728
0
  }
729
0
}
730
731
void PelStorage::swap( PelStorage& other )
732
0
{
733
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
734
735
0
  for( uint32_t i = 0; i < numCh; i++ )
736
0
  {
737
    // check this otherwise it would turn out to get very weird
738
0
    CHECK( chromaFormat                   != other.chromaFormat                  , "Incompatible formats" );
739
0
    CHECK( get( ComponentID( i ) )        != other.get( ComponentID( i ) )       , "Incompatible formats" );
740
0
    CHECK( get( ComponentID( i ) ).stride != other.get( ComponentID( i ) ).stride, "Incompatible formats" );
741
742
0
    std::swap( bufs[i].buf,    other.bufs[i].buf );
743
0
    std::swap( bufs[i].stride, other.bufs[i].stride );
744
0
    std::swap( m_origin[i],    other.m_origin[i] );
745
0
    std::swap( m_allocator[i], other.m_allocator[i] );
746
0
  }
747
0
  std::swap( m_externAllocator, other.m_externAllocator );
748
0
  std::swap( m_userAlloc,       other.m_userAlloc );
749
0
}
750
751
void PelStorage::destroy()
752
0
{
753
0
  chromaFormat = NUM_CHROMA_FORMAT;
754
0
  for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ )
755
0
  {
756
0
    if( m_origin[i] )
757
0
    {
758
0
      if ( !m_externAllocator )
759
0
      {
760
0
        xFree( m_origin[i] );
761
0
      }
762
0
      else if( m_allocator[i])
763
0
      {
764
0
        CHECK( m_userAlloc->unref == nullptr, "vvdecUnrefBufferCallback not valid, cannot unref picture buffer" )
765
0
        m_userAlloc->unref( m_userAlloc->opaque, m_allocator[i] );
766
0
      }
767
0
      m_origin[i] = nullptr;
768
0
    }
769
0
  }
770
0
  bufs.clear();
771
0
}
772
773
PelBuf PelStorage::getBuf( const ComponentID CompID )
774
0
{
775
0
  return bufs[CompID];
776
0
}
777
778
const CPelBuf PelStorage::getBuf( const ComponentID CompID ) const
779
0
{
780
0
  return bufs[CompID];
781
0
}
782
783
PelBuf PelStorage::getBuf( const CompArea &blk )
784
0
{
785
0
  const PelBuf& r = bufs[blk.compID()];
786
787
0
  CHECKD( rsAddr( blk.bottomRight(), r.stride ) >= ( ( r.height - 1 ) * r.stride + r.width ), "Trying to access a buf outside of bound!" );
788
789
0
  return PelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
790
0
}
791
792
const CPelBuf PelStorage::getBuf( const CompArea &blk ) const
793
0
{
794
0
  const PelBuf& r = bufs[blk.compID()];
795
0
  return CPelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
796
0
}
797
798
PelUnitBuf PelStorage::getBuf( const UnitArea &unit )
799
0
{
800
0
  return ( chromaFormat == CHROMA_400 ) ? PelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : PelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
801
0
}
802
803
const CPelUnitBuf PelStorage::getBuf( const UnitArea &unit ) const
804
0
{
805
0
  return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
806
0
}
807
808
template<>
809
void UnitBuf<Pel>::colorSpaceConvert( const UnitBuf<Pel> &other, const ClpRng& clpRng )
810
0
{
811
0
  const Pel* pOrg0 = bufs[COMPONENT_Y ].buf;
812
0
  const Pel* pOrg1 = bufs[COMPONENT_Cb].buf;
813
0
  const Pel* pOrg2 = bufs[COMPONENT_Cr].buf;
814
0
  const ptrdiff_t strideOrg = bufs[COMPONENT_Y ].stride;
815
816
0
  Pel* pDst0 = other.bufs[COMPONENT_Y ].buf;
817
0
  Pel* pDst1 = other.bufs[COMPONENT_Cb].buf;
818
0
  Pel* pDst2 = other.bufs[COMPONENT_Cr].buf;
819
0
  const ptrdiff_t strideDst = other.bufs[COMPONENT_Y ].stride;
820
821
0
  int width  = bufs[COMPONENT_Y].width;
822
0
  int height = bufs[COMPONENT_Y].height;
823
0
  int maxAbsclipBD = (1 << (clpRng.bd + 1)) - 1;
824
0
  int y0, cg, co;
825
826
0
  CHECKD( bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cb].stride || bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cr].stride, "unequal stride for 444 content" );
827
0
  CHECKD( other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cb].stride || other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cr].stride, "unequal stride for 444 content" );
828
0
  CHECKD( bufs[COMPONENT_Y].width != other.bufs[COMPONENT_Y].width || bufs[COMPONENT_Y].height != other.bufs[COMPONENT_Y].height, "unequal block size" );
829
830
0
  for( int y = 0; y < height; y++ )
831
0
  {
832
0
    for( int x = 0; x < width; x++ )
833
0
    {
834
0
      y0 = pOrg0[x];
835
0
      cg = pOrg1[x];
836
0
      co = pOrg2[x];
837
838
0
      y0 = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, y0);
839
0
      cg = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, cg);
840
0
      co = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, co);
841
842
0
      int t = y0 - (cg >> 1);
843
0
      pDst0[x] = cg + t;
844
0
      pDst1[x] = t - (co >> 1);
845
0
      pDst2[x] = co + pDst1[x];
846
0
    }
847
848
0
    pOrg0 += strideOrg;
849
0
    pOrg1 += strideOrg;
850
0
    pOrg2 += strideOrg;
851
0
    pDst0 += strideDst;
852
0
    pDst1 += strideDst;
853
0
    pDst2 += strideDst;
854
0
  }
855
0
}
856
857
template void UnitBuf<Pel>::writeToFile( std::string filename ) const;
858
859
template<typename T>
860
void UnitBuf<T>::writeToFile( std::string filename ) const
861
0
{
862
0
  FILE* f = fopen( filename.c_str(), "w" );
863
0
  CHECK_FATAL( f == nullptr, "writeToFile() cannot open file for writing" )
864
865
0
  for( auto& b: bufs )
866
0
  {
867
0
    for( unsigned y = 0; y < b.height; y++ )
868
0
    {
869
0
      fwrite( b.bufAt( 0, y ), sizeof( T ), b.width, f );
870
0
    }
871
0
  }
872
873
0
  fclose( f );
874
0
}
875
876
}