Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/openh264/codec/processing/src/downsample/downsamplefuncs.cpp
Line
Count
Source
1
/*!
2
 * \copy
3
 *     Copyright (c)  2008-2013, Cisco Systems
4
 *     All rights reserved.
5
 *
6
 *     Redistribution and use in source and binary forms, with or without
7
 *     modification, are permitted provided that the following conditions
8
 *     are met:
9
 *
10
 *        * Redistributions of source code must retain the above copyright
11
 *          notice, this list of conditions and the following disclaimer.
12
 *
13
 *        * Redistributions in binary form must reproduce the above copyright
14
 *          notice, this list of conditions and the following disclaimer in
15
 *          the documentation and/or other materials provided with the
16
 *          distribution.
17
 *
18
 *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
 *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
 *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21
 *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22
 *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23
 *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24
 *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
 *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
 *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
 *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28
 *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
 *     POSSIBILITY OF SUCH DAMAGE.
30
 *
31
 *  downsample_yuv.c
32
 *
33
 *  Abstract
34
 *      Implementation for source yuv data downsampling used before spatial encoding.
35
 *
36
 *  History
37
 *      10/24/2008 Created
38
 *
39
 *****************************************************************************/
40
41
#include "downsample.h"
42
43
44
WELSVP_NAMESPACE_BEGIN
45
46
47
void DyadicBilinearDownsampler_c (uint8_t* pDst, const int32_t kiDstStride,
48
                                  uint8_t* pSrc, const int32_t kiSrcStride,
49
                                  const int32_t kiSrcWidth, const int32_t kiSrcHeight)
50
51
0
{
52
0
  uint8_t* pDstLine     = pDst;
53
0
  uint8_t* pSrcLine     = pSrc;
54
0
  const int32_t kiSrcStridex2   = kiSrcStride << 1;
55
0
  const int32_t kiDstWidth      = kiSrcWidth  >> 1;
56
0
  const int32_t kiDstHeight     = kiSrcHeight >> 1;
57
58
0
  for (int32_t j = 0; j < kiDstHeight; j ++) {
59
0
    for (int32_t i = 0; i < kiDstWidth; i ++) {
60
0
      const int32_t kiSrcX = i << 1;
61
0
      const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
62
0
      const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
63
64
0
      pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
65
0
    }
66
0
    pDstLine    += kiDstStride;
67
0
    pSrcLine    += kiSrcStridex2;
68
0
  }
69
0
}
70
71
void DyadicBilinearQuarterDownsampler_c (uint8_t* pDst, const int32_t kiDstStride,
72
    uint8_t* pSrc, const int32_t kiSrcStride,
73
    const int32_t kiSrcWidth, const int32_t kiSrcHeight)
74
75
0
{
76
0
  uint8_t* pDstLine     = pDst;
77
0
  uint8_t* pSrcLine     = pSrc;
78
0
  const int32_t kiSrcStridex4   = kiSrcStride << 2;
79
0
  const int32_t kiDstWidth      = kiSrcWidth  >> 2;
80
0
  const int32_t kiDstHeight     = kiSrcHeight >> 2;
81
82
0
  for (int32_t j = 0; j < kiDstHeight; j ++) {
83
0
    for (int32_t i = 0; i < kiDstWidth; i ++) {
84
0
      const int32_t kiSrcX = i << 2;
85
0
      const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
86
0
      const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
87
88
0
      pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
89
0
    }
90
0
    pDstLine    += kiDstStride;
91
0
    pSrcLine    += kiSrcStridex4;
92
0
  }
93
0
}
94
95
void DyadicBilinearOneThirdDownsampler_c (uint8_t* pDst, const int32_t kiDstStride,
96
    uint8_t* pSrc, const int32_t kiSrcStride,
97
    const int32_t kiSrcWidth, const int32_t kiDstHeight)
98
99
0
{
100
0
  uint8_t* pDstLine     = pDst;
101
0
  uint8_t* pSrcLine     = pSrc;
102
0
  const int32_t kiSrcStridex3   = kiSrcStride * 3;
103
0
  const int32_t kiDstWidth      = kiSrcWidth / 3;
104
105
0
  for (int32_t j = 0; j < kiDstHeight; j ++) {
106
0
    for (int32_t i = 0; i < kiDstWidth; i ++) {
107
0
      const int32_t kiSrcX = i * 3;
108
0
      const int32_t kiTempRow1 = (pSrcLine[kiSrcX] + pSrcLine[kiSrcX + 1] + 1) >> 1;
109
0
      const int32_t kiTempRow2 = (pSrcLine[kiSrcX + kiSrcStride] + pSrcLine[kiSrcX + kiSrcStride + 1] + 1) >> 1;
110
111
0
      pDstLine[i] = (uint8_t) ((kiTempRow1 + kiTempRow2 + 1) >> 1);
112
0
    }
113
0
    pDstLine    += kiDstStride;
114
0
    pSrcLine    += kiSrcStridex3;
115
0
  }
116
0
}
117
118
void GeneralBilinearFastDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
119
                                       const int32_t kiDstHeight,
120
0
                                       uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
121
0
  const uint32_t kuiScaleBitWidth = 16, kuiScaleBitHeight = 15;
122
0
  const uint32_t kuiScaleWidth = (1 << kuiScaleBitWidth), kuiScaleHeight = (1 << kuiScaleBitHeight);
123
0
  int32_t fScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
124
0
  int32_t fScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
125
0
  uint32_t x;
126
0
  int32_t iYInverse, iXInverse;
127
128
0
  uint8_t* pByDst = pDst;
129
0
  uint8_t* pByLineDst = pDst;
130
131
0
  iYInverse = 1 << (kuiScaleBitHeight - 1);
132
0
  for (int32_t i = 0; i < kiDstHeight - 1; i++) {
133
0
    int32_t iYy = iYInverse >> kuiScaleBitHeight;
134
0
    int32_t fv = iYInverse & (kuiScaleHeight - 1);
135
136
0
    uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
137
138
0
    pByDst = pByLineDst;
139
0
    iXInverse = 1 << (kuiScaleBitWidth - 1);
140
0
    for (int32_t j = 0; j < kiDstWidth - 1; j++) {
141
0
      int32_t iXx = iXInverse >> kuiScaleBitWidth;
142
0
      int32_t iFu = iXInverse & (kuiScaleWidth - 1);
143
144
0
      uint8_t* pByCurrent = pBySrc + iXx;
145
0
      uint8_t a, b, c, d;
146
147
0
      a = *pByCurrent;
148
0
      b = * (pByCurrent + 1);
149
0
      c = * (pByCurrent + kiSrcStride);
150
0
      d = * (pByCurrent + kiSrcStride + 1);
151
152
0
      x  = (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * a;
153
0
      x += (((uint32_t) (iFu)) * (kuiScaleHeight - 1 - fv) >> kuiScaleBitWidth) * b;
154
0
      x += (((uint32_t) (kuiScaleWidth - 1 - iFu)) * (fv) >> kuiScaleBitWidth) * c;
155
0
      x += (((uint32_t) (iFu)) * (fv) >> kuiScaleBitWidth) * d;
156
0
      x >>= (kuiScaleBitHeight - 1);
157
0
      x += 1;
158
0
      x >>= 1;
159
      //x = (((__int64)(SCALE_BIG - 1 - iFu))*(SCALE_BIG - 1 - fv)*a + ((__int64)iFu)*(SCALE_BIG - 1 -fv)*b + ((__int64)(SCALE_BIG - 1 -iFu))*fv*c +
160
      // ((__int64)iFu)*fv*d + (1 << (2*SCALE_BIT_BIG-1)) ) >> (2*SCALE_BIT_BIG);
161
0
      x = WELS_CLAMP (x, 0, 255);
162
0
      *pByDst++ = (uint8_t)x;
163
164
0
      iXInverse += fScalex;
165
0
    }
166
0
    *pByDst = * (pBySrc + (iXInverse >> kuiScaleBitWidth));
167
0
    pByLineDst += kiDstStride;
168
0
    iYInverse += fScaley;
169
0
  }
170
171
  // last row special
172
0
  {
173
0
    int32_t iYy = iYInverse >> kuiScaleBitHeight;
174
0
    uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
175
176
0
    pByDst = pByLineDst;
177
0
    iXInverse = 1 << (kuiScaleBitWidth - 1);
178
0
    for (int32_t j = 0; j < kiDstWidth; j++) {
179
0
      int32_t iXx = iXInverse >> kuiScaleBitWidth;
180
0
      *pByDst++ = * (pBySrc + iXx);
181
182
0
      iXInverse += fScalex;
183
0
    }
184
0
  }
185
0
}
186
187
void GeneralBilinearAccurateDownsampler_c (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
188
    const int32_t kiDstHeight,
189
0
    uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
190
0
  const int32_t kiScaleBit = 15;
191
0
  const int32_t kiScale = (1 << kiScaleBit);
192
0
  int32_t iScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kiScale);
193
0
  int32_t iScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kiScale);
194
0
  int64_t x;
195
0
  int32_t iYInverse, iXInverse;
196
197
0
  uint8_t* pByDst = pDst;
198
0
  uint8_t* pByLineDst = pDst;
199
200
0
  iYInverse = 1 << (kiScaleBit - 1);
201
0
  for (int32_t i = 0; i < kiDstHeight - 1; i++) {
202
0
    int32_t iYy = iYInverse >> kiScaleBit;
203
0
    int32_t iFv = iYInverse & (kiScale - 1);
204
205
0
    uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
206
207
0
    pByDst = pByLineDst;
208
0
    iXInverse = 1 << (kiScaleBit - 1);
209
0
    for (int32_t j = 0; j < kiDstWidth - 1; j++) {
210
0
      int32_t iXx = iXInverse >> kiScaleBit;
211
0
      int32_t iFu = iXInverse & (kiScale - 1);
212
213
0
      uint8_t* pByCurrent = pBySrc + iXx;
214
0
      uint8_t a, b, c, d;
215
216
0
      a = *pByCurrent;
217
0
      b = * (pByCurrent + 1);
218
0
      c = * (pByCurrent + kiSrcStride);
219
0
      d = * (pByCurrent + kiSrcStride + 1);
220
221
0
      x = (((int64_t) (kiScale - 1 - iFu)) * (kiScale - 1 - iFv) * a + ((int64_t)iFu) * (kiScale - 1 - iFv) * b + ((int64_t) (
222
0
             kiScale - 1 - iFu)) * iFv * c +
223
0
           ((int64_t)iFu) * iFv * d + (int64_t) (1 << (2 * kiScaleBit - 1))) >> (2 * kiScaleBit);
224
0
      x = WELS_CLAMP (x, 0, 255);
225
0
      *pByDst++ = (uint8_t)x;
226
227
0
      iXInverse += iScalex;
228
0
    }
229
0
    *pByDst = * (pBySrc + (iXInverse >> kiScaleBit));
230
0
    pByLineDst += kiDstStride;
231
0
    iYInverse += iScaley;
232
0
  }
233
234
  // last row special
235
0
  {
236
0
    int32_t iYy = iYInverse >> kiScaleBit;
237
0
    uint8_t* pBySrc = pSrc + iYy * kiSrcStride;
238
239
0
    pByDst = pByLineDst;
240
0
    iXInverse = 1 << (kiScaleBit - 1);
241
0
    for (int32_t j = 0; j < kiDstWidth; j++) {
242
0
      int32_t iXx = iXInverse >> kiScaleBit;
243
0
      *pByDst++ = * (pBySrc + iXx);
244
245
0
      iXInverse += iScalex;
246
0
    }
247
0
  }
248
0
}
249
250
#if defined(X86_ASM) || defined(HAVE_NEON) || (defined(HAVE_NEON_AARCH64) && defined(__aarch64__))
251
static void GeneralBilinearDownsamplerWrap (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
252
    const int32_t kiDstHeight,
253
    uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight,
254
    const int32_t kiScaleBitWidth, const int32_t kiScaleBitHeight,
255
    void (*func) (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth, int32_t iDstHeight,
256
                  uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX, uint32_t uiScaleY)) {
257
  const uint32_t kuiScaleWidth = (1 << kiScaleBitWidth), kuiScaleHeight = (1 << kiScaleBitHeight);
258
259
  uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
260
  uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
261
262
  func (pDst, kiDstStride, kiDstWidth, kiDstHeight, pSrc, kiSrcStride, uiScalex, uiScaley);
263
}
264
265
#define DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP(suffix) \
266
  void GeneralBilinearFastDownsamplerWrap_ ## suffix ( \
267
      uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, const int32_t kiDstHeight, \
268
      uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { \
269
    GeneralBilinearDownsamplerWrap (pDst, kiDstStride, kiDstWidth, kiDstHeight, \
270
        pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, 16, 15, GeneralBilinearFastDownsampler_ ## suffix); \
271
  }
272
273
#define DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP(suffix) \
274
  void GeneralBilinearAccurateDownsamplerWrap_ ## suffix ( \
275
      uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth, const int32_t kiDstHeight, \
276
      uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) { \
277
    GeneralBilinearDownsamplerWrap (pDst, kiDstStride, kiDstWidth, kiDstHeight, \
278
        pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, 15, 15, GeneralBilinearAccurateDownsampler_ ## suffix); \
279
  }
280
#endif
281
282
#ifdef X86_ASM
283
DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (sse2)
284
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse2)
285
DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (ssse3)
286
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse41)
287
#ifdef HAVE_AVX2
288
DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (avx2)
289
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (avx2)
290
#endif
291
#endif //X86_ASM
292
293
#ifdef HAVE_NEON
294
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (neon)
295
#endif
296
297
#if defined(HAVE_NEON_AARCH64) && defined(__aarch64__)
298
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (AArch64_neon)
299
#endif
300
WELSVP_NAMESPACE_END