Coverage Report

Created: 2026-01-20 07:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openh264/codec/processing/src/downsample/downsample.cpp
Line
Count
Source
1
/*!
2
 * \copy
3
 *     Copyright (c)  2013, Cisco Systems
4
 *     All rights reserved.
5
 *
6
 *     Redistribution and use in source and binary forms, with or without
7
 *     modification, are permitted provided that the following conditions
8
 *     are met:
9
 *
10
 *        * Redistributions of source code must retain the above copyright
11
 *          notice, this list of conditions and the following disclaimer.
12
 *
13
 *        * Redistributions in binary form must reproduce the above copyright
14
 *          notice, this list of conditions and the following disclaimer in
15
 *          the documentation and/or other materials provided with the
16
 *          distribution.
17
 *
18
 *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
 *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
 *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21
 *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22
 *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23
 *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24
 *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
 *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
 *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
 *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28
 *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
 *     POSSIBILITY OF SUCH DAMAGE.
30
 *
31
 */
32
33
#include "downsample.h"
34
#include "cpu.h"
35
#include <assert.h>
36
37
WELSVP_NAMESPACE_BEGIN
38
0
#define MAX_SAMPLE_WIDTH 1920
39
0
#define MAX_SAMPLE_HEIGHT 1088
40
41
///////////////////////////////////////////////////////////////////////////////////////////////////////////////
42
43
0
CDownsampling::CDownsampling (int32_t iCpuFlag) {
44
0
  m_iCPUFlag = iCpuFlag;
45
0
  m_eMethod   = METHOD_DOWNSAMPLE;
46
0
  WelsMemset (&m_pfDownsample, 0, sizeof (m_pfDownsample));
47
0
  InitDownsampleFuncs (m_pfDownsample, m_iCPUFlag);
48
0
  WelsMemset(m_pSampleBuffer,0,sizeof(m_pSampleBuffer));
49
0
  m_bNoSampleBuffer = AllocateSampleBuffer();
50
0
}
51
52
0
CDownsampling::~CDownsampling() {
53
0
  FreeSampleBuffer();
54
0
}
55
0
bool CDownsampling::AllocateSampleBuffer() {
56
0
  for (int32_t i = 0; i < 2; i++) {
57
0
    m_pSampleBuffer[i][0] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT);
58
0
    if (!m_pSampleBuffer[i][0])
59
0
      goto FREE_RET;
60
0
    m_pSampleBuffer[i][1] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4);
61
0
    if (!m_pSampleBuffer[i][1])
62
0
      goto FREE_RET;
63
0
    m_pSampleBuffer[i][2] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4);
64
0
    if (!m_pSampleBuffer[i][2])
65
0
      goto FREE_RET;
66
0
  }
67
0
  return false;
68
0
FREE_RET:
69
0
  FreeSampleBuffer();
70
0
  return true;
71
72
0
}
73
0
void CDownsampling::FreeSampleBuffer() {
74
0
  for (int32_t i = 0; i < 2; i++) {
75
0
    WelsFree (m_pSampleBuffer[i][0]);
76
0
    m_pSampleBuffer[i][0] = NULL;
77
0
    WelsFree (m_pSampleBuffer[i][1]);
78
0
    m_pSampleBuffer[i][1] = NULL;
79
0
    WelsFree (m_pSampleBuffer[i][2]);
80
0
    m_pSampleBuffer[i][2] = NULL;
81
0
  }
82
0
}
83
84
0
void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc,  int32_t iCpuFlag) {
85
0
  sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsampler_c;
86
0
  sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_c;
87
0
  sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_c;
88
0
  sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_c;
89
0
  sDownsampleFunc.pfGeneralRatioChroma  = GeneralBilinearAccurateDownsampler_c;
90
0
  sDownsampleFunc.pfGeneralRatioLuma    = GeneralBilinearFastDownsampler_c;
91
0
#if defined(X86_ASM)
92
0
  if (iCpuFlag & WELS_CPU_SSE) {
93
0
    sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_sse;
94
0
    sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_sse;
95
0
    sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse;
96
0
  }
97
0
  if (iCpuFlag & WELS_CPU_SSE2) {
98
0
    sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse2;
99
0
    sDownsampleFunc.pfGeneralRatioLuma   = GeneralBilinearFastDownsamplerWrap_sse2;
100
0
  }
101
0
  if (iCpuFlag & WELS_CPU_SSSE3) {
102
0
    sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_ssse3;
103
0
    sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_ssse3;
104
0
    sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_ssse3;
105
0
    sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_ssse3;
106
0
    sDownsampleFunc.pfGeneralRatioLuma    = GeneralBilinearFastDownsamplerWrap_ssse3;
107
0
  }
108
0
  if (iCpuFlag & WELS_CPU_SSE41) {
109
0
    sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_sse4;
110
0
    sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_sse4;
111
0
    sDownsampleFunc.pfGeneralRatioChroma  = GeneralBilinearAccurateDownsamplerWrap_sse41;
112
0
  }
113
0
#ifdef HAVE_AVX2
114
0
  if (iCpuFlag & WELS_CPU_AVX2) {
115
0
    sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_avx2;
116
0
    sDownsampleFunc.pfGeneralRatioLuma   = GeneralBilinearFastDownsamplerWrap_avx2;
117
0
  }
118
0
#endif
119
0
#endif//X86_ASM
120
121
#if defined(HAVE_NEON)
122
  if (iCpuFlag & WELS_CPU_NEON) {
123
    sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_neon;
124
    sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_neon;
125
    sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_neon;
126
    sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_neon;
127
    sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_neon;
128
    sDownsampleFunc.pfGeneralRatioLuma   = GeneralBilinearAccurateDownsamplerWrap_neon;
129
  }
130
#endif
131
132
#if defined(HAVE_NEON_AARCH64) && defined(__aarch64__)
133
  if (iCpuFlag & WELS_CPU_NEON) {
134
    sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_AArch64_neon;
135
    sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_AArch64_neon;
136
    sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_AArch64_neon;
137
    sDownsampleFunc.pfQuarterDownsampler  = DyadicBilinearQuarterDownsampler_AArch64_neon;
138
    sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon;
139
    sDownsampleFunc.pfGeneralRatioLuma   = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon;
140
  }
141
#endif
142
0
}
143
144
0
EResult CDownsampling::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pDstPixMap) {
145
0
  int32_t iSrcWidthY = pSrcPixMap->sRect.iRectWidth;
146
0
  int32_t iSrcHeightY = pSrcPixMap->sRect.iRectHeight;
147
0
  int32_t iDstWidthY = pDstPixMap->sRect.iRectWidth;
148
0
  int32_t iDstHeightY = pDstPixMap->sRect.iRectHeight;
149
150
0
  int32_t iSrcWidthUV = iSrcWidthY >> 1;
151
0
  int32_t iSrcHeightUV = iSrcHeightY >> 1;
152
0
  int32_t iDstWidthUV = iDstWidthY >> 1;
153
0
  int32_t iDstHeightUV = iDstHeightY >> 1;
154
155
0
  if (iSrcWidthY <= iDstWidthY || iSrcHeightY <= iDstHeightY) {
156
0
    return RET_INVALIDPARAM;
157
0
  }
158
0
  if ((iSrcWidthY >> 1) > MAX_SAMPLE_WIDTH || (iSrcHeightY >> 1) > MAX_SAMPLE_HEIGHT || m_bNoSampleBuffer) {
159
0
    if ((iSrcWidthY >> 1) == iDstWidthY && (iSrcHeightY >> 1) == iDstHeightY) {
160
      // use half average functions
161
0
      DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
162
0
          (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
163
0
      DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
164
0
          (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
165
0
      DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
166
0
          (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
167
0
    } else if ((iSrcWidthY >> 2) == iDstWidthY && (iSrcHeightY >> 2) == iDstHeightY) {
168
169
0
      m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
170
0
                                           (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
171
172
0
      m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
173
0
                                           (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
174
175
0
      m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
176
0
                                           (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
177
178
0
    } else if ((iSrcWidthY / 3) == iDstWidthY && (iSrcHeightY / 3) == iDstHeightY) {
179
180
0
      m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
181
0
                                            (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iDstHeightY);
182
183
0
      m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
184
0
                                            (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iDstHeightUV);
185
186
0
      m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
187
0
                                            (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iDstHeightUV);
188
189
0
    } else {
190
0
      m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY,
191
0
                                         (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
192
193
0
      m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV,
194
0
                                           (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
195
196
0
      m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV,
197
0
                                           (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
198
0
    }
199
0
  } else {
200
201
0
    int32_t iIdx = 0;
202
0
    int32_t iHalfSrcWidth = iSrcWidthY >> 1;
203
0
    int32_t iHalfSrcHeight = iSrcHeightY >> 1;
204
0
    uint8_t* pSrcY = (uint8_t*)pSrcPixMap->pPixel[0];
205
0
    uint8_t* pSrcU = (uint8_t*)pSrcPixMap->pPixel[1];
206
0
    uint8_t* pSrcV = (uint8_t*)pSrcPixMap->pPixel[2];
207
0
    int32_t iSrcStrideY = pSrcPixMap->iStride[0];
208
0
    int32_t iSrcStrideU = pSrcPixMap->iStride[1];
209
0
    int32_t iSrcStrideV = pSrcPixMap->iStride[2];
210
211
0
    int32_t iDstStrideY = pDstPixMap->iStride[0];
212
0
    int32_t iDstStrideU = pDstPixMap->iStride[1];
213
0
    int32_t iDstStrideV = pDstPixMap->iStride[2];
214
215
0
    uint8_t* pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0];
216
0
    uint8_t* pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1];
217
0
    uint8_t* pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2];
218
0
    iIdx++;
219
0
    do {
220
0
      if ((iHalfSrcWidth == iDstWidthY) && (iHalfSrcHeight == iDstHeightY)) { //end
221
        // use half average functions
222
0
        DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
223
0
            (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
224
0
        DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
225
0
            (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV);
226
0
        DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
227
0
            (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
228
0
        break;
229
0
      } else if ((iHalfSrcWidth > iDstWidthY) && (iHalfSrcHeight > iDstHeightY)){
230
        // use half average functions
231
0
        iDstStrideY = WELS_ALIGN (iHalfSrcWidth, 32);
232
0
        iDstStrideU = WELS_ALIGN (iHalfSrcWidth >> 1, 32);
233
0
        iDstStrideV = WELS_ALIGN (iHalfSrcWidth >> 1, 32);
234
0
        DownsampleHalfAverage ((uint8_t*)pDstY, iDstStrideY,
235
0
            (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
236
0
        DownsampleHalfAverage ((uint8_t*)pDstU, iDstStrideU,
237
0
            (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV);
238
0
        DownsampleHalfAverage ((uint8_t*)pDstV, iDstStrideV,
239
0
            (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
240
241
0
        pSrcY = (uint8_t*)pDstY;
242
0
        pSrcU = (uint8_t*)pDstU;
243
0
        pSrcV = (uint8_t*)pDstV;
244
245
246
0
        iSrcWidthY = iHalfSrcWidth;
247
0
        iSrcWidthUV = iHalfSrcWidth >> 1;
248
0
        iSrcHeightY = iHalfSrcHeight;
249
0
        iSrcHeightUV = iHalfSrcHeight >> 1;
250
251
0
        iSrcStrideY = iDstStrideY;
252
0
        iSrcStrideU = iDstStrideU;
253
0
        iSrcStrideV = iDstStrideV;
254
255
0
        iHalfSrcWidth >>= 1;
256
0
        iHalfSrcHeight >>= 1;
257
258
0
        iIdx = iIdx % 2;
259
0
        pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0];
260
0
        pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1];
261
0
        pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2];
262
0
        iIdx++;
263
0
      } else {
264
0
        m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY,
265
0
                                           (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
266
267
0
        m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV,
268
0
                                             (uint8_t*)pSrcU, iSrcStrideU,  iSrcWidthUV, iSrcHeightUV);
269
270
0
        m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV,
271
0
                                             (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
272
0
        break;
273
0
      }
274
0
    } while (true);
275
0
  }
276
0
  return RET_SUCCESS;
277
0
}
278
279
void CDownsampling::DownsampleHalfAverage (uint8_t* pDst, int32_t iDstStride,
280
0
        uint8_t* pSrc, int32_t iSrcStride, int32_t iSrcWidth, int32_t iSrcHeight) {
281
0
  if ((iSrcStride & 31) == 0) {
282
0
    assert ((iDstStride & 15) == 0);
283
0
    m_pfDownsample.pfHalfAverageWidthx32 (pDst, iDstStride,
284
0
        pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 32), iSrcHeight);
285
0
  } else {
286
0
    assert ((iSrcStride & 15) == 0);
287
0
    assert ((iDstStride &  7) == 0);
288
0
    m_pfDownsample.pfHalfAverageWidthx16 (pDst, iDstStride,
289
0
        pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 16), iSrcHeight);
290
0
  }
291
0
}
292
293
294
WELSVP_NAMESPACE_END