/src/openh264/codec/processing/src/downsample/downsample.cpp
Line | Count | Source |
1 | | /*! |
2 | | * \copy |
3 | | * Copyright (c) 2013, Cisco Systems |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions |
8 | | * are met: |
9 | | * |
10 | | * * Redistributions of source code must retain the above copyright |
11 | | * notice, this list of conditions and the following disclaimer. |
12 | | * |
13 | | * * Redistributions in binary form must reproduce the above copyright |
14 | | * notice, this list of conditions and the following disclaimer in |
15 | | * the documentation and/or other materials provided with the |
16 | | * distribution. |
17 | | * |
18 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
21 | | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
22 | | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
23 | | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
24 | | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
25 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
26 | | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
28 | | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | | * POSSIBILITY OF SUCH DAMAGE. |
30 | | * |
31 | | */ |
32 | | |
33 | | #include "downsample.h" |
34 | | #include "cpu.h" |
35 | | #include <assert.h> |
36 | | |
37 | | WELSVP_NAMESPACE_BEGIN |
38 | 0 | #define MAX_SAMPLE_WIDTH 1920 |
39 | 0 | #define MAX_SAMPLE_HEIGHT 1088 |
40 | | |
41 | | /////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
42 | | |
43 | 0 | CDownsampling::CDownsampling (int32_t iCpuFlag) { |
44 | 0 | m_iCPUFlag = iCpuFlag; |
45 | 0 | m_eMethod = METHOD_DOWNSAMPLE; |
46 | 0 | WelsMemset (&m_pfDownsample, 0, sizeof (m_pfDownsample)); |
47 | 0 | InitDownsampleFuncs (m_pfDownsample, m_iCPUFlag); |
48 | 0 | WelsMemset(m_pSampleBuffer,0,sizeof(m_pSampleBuffer)); |
49 | 0 | m_bNoSampleBuffer = AllocateSampleBuffer(); |
50 | 0 | } |
51 | | |
52 | 0 | CDownsampling::~CDownsampling() { |
53 | 0 | FreeSampleBuffer(); |
54 | 0 | } |
55 | 0 | bool CDownsampling::AllocateSampleBuffer() { |
56 | 0 | for (int32_t i = 0; i < 2; i++) { |
57 | 0 | m_pSampleBuffer[i][0] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT); |
58 | 0 | if (!m_pSampleBuffer[i][0]) |
59 | 0 | goto FREE_RET; |
60 | 0 | m_pSampleBuffer[i][1] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4); |
61 | 0 | if (!m_pSampleBuffer[i][1]) |
62 | 0 | goto FREE_RET; |
63 | 0 | m_pSampleBuffer[i][2] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4); |
64 | 0 | if (!m_pSampleBuffer[i][2]) |
65 | 0 | goto FREE_RET; |
66 | 0 | } |
67 | 0 | return false; |
68 | 0 | FREE_RET: |
69 | 0 | FreeSampleBuffer(); |
70 | 0 | return true; |
71 | |
|
72 | 0 | } |
73 | 0 | void CDownsampling::FreeSampleBuffer() { |
74 | 0 | for (int32_t i = 0; i < 2; i++) { |
75 | 0 | WelsFree (m_pSampleBuffer[i][0]); |
76 | 0 | m_pSampleBuffer[i][0] = NULL; |
77 | 0 | WelsFree (m_pSampleBuffer[i][1]); |
78 | 0 | m_pSampleBuffer[i][1] = NULL; |
79 | 0 | WelsFree (m_pSampleBuffer[i][2]); |
80 | 0 | m_pSampleBuffer[i][2] = NULL; |
81 | 0 | } |
82 | 0 | } |
83 | | |
84 | 0 | void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc, int32_t iCpuFlag) { |
85 | 0 | sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsampler_c; |
86 | 0 | sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_c; |
87 | 0 | sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_c; |
88 | 0 | sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_c; |
89 | 0 | sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsampler_c; |
90 | 0 | sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsampler_c; |
91 | 0 | #if defined(X86_ASM) |
92 | 0 | if (iCpuFlag & WELS_CPU_SSE) { |
93 | 0 | sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_sse; |
94 | 0 | sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_sse; |
95 | 0 | sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse; |
96 | 0 | } |
97 | 0 | if (iCpuFlag & WELS_CPU_SSE2) { |
98 | 0 | sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse2; |
99 | 0 | sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_sse2; |
100 | 0 | } |
101 | 0 | if (iCpuFlag & WELS_CPU_SSSE3) { |
102 | 0 | sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_ssse3; |
103 | 0 | sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsamplerWidthx16_ssse3; |
104 | 0 | sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_ssse3; |
105 | 0 | sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_ssse3; |
106 | 0 | sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_ssse3; |
107 | 0 | } |
108 | 0 | if (iCpuFlag & WELS_CPU_SSE41) { |
109 | 0 | sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_sse4; |
110 | 0 | sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse4; |
111 | 0 | sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse41; |
112 | 0 | } |
113 | 0 | #ifdef HAVE_AVX2 |
114 | 0 | if (iCpuFlag & WELS_CPU_AVX2) { |
115 | 0 | sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_avx2; |
116 | 0 | sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_avx2; |
117 | 0 | } |
118 | 0 | #endif |
119 | 0 | #endif//X86_ASM |
120 | |
|
121 | | #if defined(HAVE_NEON) |
122 | | if (iCpuFlag & WELS_CPU_NEON) { |
123 | | sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_neon; |
124 | | sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_neon; |
125 | | sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_neon; |
126 | | sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_neon; |
127 | | sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_neon; |
128 | | sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearAccurateDownsamplerWrap_neon; |
129 | | } |
130 | | #endif |
131 | |
|
132 | | #if defined(HAVE_NEON_AARCH64) && defined(__aarch64__) |
133 | | if (iCpuFlag & WELS_CPU_NEON) { |
134 | | sDownsampleFunc.pfHalfAverageWidthx32 = DyadicBilinearDownsamplerWidthx32_AArch64_neon; |
135 | | sDownsampleFunc.pfHalfAverageWidthx16 = DyadicBilinearDownsampler_AArch64_neon; |
136 | | sDownsampleFunc.pfOneThirdDownsampler = DyadicBilinearOneThirdDownsampler_AArch64_neon; |
137 | | sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_AArch64_neon; |
138 | | sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon; |
139 | | sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearAccurateDownsamplerWrap_AArch64_neon; |
140 | | } |
141 | | #endif |
142 | 0 | } |
143 | | |
144 | 0 | EResult CDownsampling::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pDstPixMap) { |
145 | 0 | int32_t iSrcWidthY = pSrcPixMap->sRect.iRectWidth; |
146 | 0 | int32_t iSrcHeightY = pSrcPixMap->sRect.iRectHeight; |
147 | 0 | int32_t iDstWidthY = pDstPixMap->sRect.iRectWidth; |
148 | 0 | int32_t iDstHeightY = pDstPixMap->sRect.iRectHeight; |
149 | |
|
150 | 0 | int32_t iSrcWidthUV = iSrcWidthY >> 1; |
151 | 0 | int32_t iSrcHeightUV = iSrcHeightY >> 1; |
152 | 0 | int32_t iDstWidthUV = iDstWidthY >> 1; |
153 | 0 | int32_t iDstHeightUV = iDstHeightY >> 1; |
154 | |
|
155 | 0 | if (iSrcWidthY <= iDstWidthY || iSrcHeightY <= iDstHeightY) { |
156 | 0 | return RET_INVALIDPARAM; |
157 | 0 | } |
158 | 0 | if ((iSrcWidthY >> 1) > MAX_SAMPLE_WIDTH || (iSrcHeightY >> 1) > MAX_SAMPLE_HEIGHT || m_bNoSampleBuffer) { |
159 | 0 | if ((iSrcWidthY >> 1) == iDstWidthY && (iSrcHeightY >> 1) == iDstHeightY) { |
160 | | // use half average functions |
161 | 0 | DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], |
162 | 0 | (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY); |
163 | 0 | DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], |
164 | 0 | (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV); |
165 | 0 | DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], |
166 | 0 | (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV); |
167 | 0 | } else if ((iSrcWidthY >> 2) == iDstWidthY && (iSrcHeightY >> 2) == iDstHeightY) { |
168 | |
|
169 | 0 | m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], |
170 | 0 | (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY); |
171 | |
|
172 | 0 | m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], |
173 | 0 | (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV); |
174 | |
|
175 | 0 | m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], |
176 | 0 | (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV); |
177 | |
|
178 | 0 | } else if ((iSrcWidthY / 3) == iDstWidthY && (iSrcHeightY / 3) == iDstHeightY) { |
179 | |
|
180 | 0 | m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], |
181 | 0 | (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iDstHeightY); |
182 | |
|
183 | 0 | m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], |
184 | 0 | (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iDstHeightUV); |
185 | |
|
186 | 0 | m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], |
187 | 0 | (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iDstHeightUV); |
188 | |
|
189 | 0 | } else { |
190 | 0 | m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY, |
191 | 0 | (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY); |
192 | |
|
193 | 0 | m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV, |
194 | 0 | (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV); |
195 | |
|
196 | 0 | m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV, |
197 | 0 | (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV); |
198 | 0 | } |
199 | 0 | } else { |
200 | |
|
201 | 0 | int32_t iIdx = 0; |
202 | 0 | int32_t iHalfSrcWidth = iSrcWidthY >> 1; |
203 | 0 | int32_t iHalfSrcHeight = iSrcHeightY >> 1; |
204 | 0 | uint8_t* pSrcY = (uint8_t*)pSrcPixMap->pPixel[0]; |
205 | 0 | uint8_t* pSrcU = (uint8_t*)pSrcPixMap->pPixel[1]; |
206 | 0 | uint8_t* pSrcV = (uint8_t*)pSrcPixMap->pPixel[2]; |
207 | 0 | int32_t iSrcStrideY = pSrcPixMap->iStride[0]; |
208 | 0 | int32_t iSrcStrideU = pSrcPixMap->iStride[1]; |
209 | 0 | int32_t iSrcStrideV = pSrcPixMap->iStride[2]; |
210 | |
|
211 | 0 | int32_t iDstStrideY = pDstPixMap->iStride[0]; |
212 | 0 | int32_t iDstStrideU = pDstPixMap->iStride[1]; |
213 | 0 | int32_t iDstStrideV = pDstPixMap->iStride[2]; |
214 | |
|
215 | 0 | uint8_t* pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0]; |
216 | 0 | uint8_t* pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1]; |
217 | 0 | uint8_t* pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2]; |
218 | 0 | iIdx++; |
219 | 0 | do { |
220 | 0 | if ((iHalfSrcWidth == iDstWidthY) && (iHalfSrcHeight == iDstHeightY)) { //end |
221 | | // use half average functions |
222 | 0 | DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], |
223 | 0 | (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY); |
224 | 0 | DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], |
225 | 0 | (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV); |
226 | 0 | DownsampleHalfAverage ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], |
227 | 0 | (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV); |
228 | 0 | break; |
229 | 0 | } else if ((iHalfSrcWidth > iDstWidthY) && (iHalfSrcHeight > iDstHeightY)){ |
230 | | // use half average functions |
231 | 0 | iDstStrideY = WELS_ALIGN (iHalfSrcWidth, 32); |
232 | 0 | iDstStrideU = WELS_ALIGN (iHalfSrcWidth >> 1, 32); |
233 | 0 | iDstStrideV = WELS_ALIGN (iHalfSrcWidth >> 1, 32); |
234 | 0 | DownsampleHalfAverage ((uint8_t*)pDstY, iDstStrideY, |
235 | 0 | (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY); |
236 | 0 | DownsampleHalfAverage ((uint8_t*)pDstU, iDstStrideU, |
237 | 0 | (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV); |
238 | 0 | DownsampleHalfAverage ((uint8_t*)pDstV, iDstStrideV, |
239 | 0 | (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV); |
240 | |
|
241 | 0 | pSrcY = (uint8_t*)pDstY; |
242 | 0 | pSrcU = (uint8_t*)pDstU; |
243 | 0 | pSrcV = (uint8_t*)pDstV; |
244 | | |
245 | |
|
246 | 0 | iSrcWidthY = iHalfSrcWidth; |
247 | 0 | iSrcWidthUV = iHalfSrcWidth >> 1; |
248 | 0 | iSrcHeightY = iHalfSrcHeight; |
249 | 0 | iSrcHeightUV = iHalfSrcHeight >> 1; |
250 | |
|
251 | 0 | iSrcStrideY = iDstStrideY; |
252 | 0 | iSrcStrideU = iDstStrideU; |
253 | 0 | iSrcStrideV = iDstStrideV; |
254 | |
|
255 | 0 | iHalfSrcWidth >>= 1; |
256 | 0 | iHalfSrcHeight >>= 1; |
257 | |
|
258 | 0 | iIdx = iIdx % 2; |
259 | 0 | pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0]; |
260 | 0 | pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1]; |
261 | 0 | pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2]; |
262 | 0 | iIdx++; |
263 | 0 | } else { |
264 | 0 | m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY, |
265 | 0 | (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY); |
266 | |
|
267 | 0 | m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV, |
268 | 0 | (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV); |
269 | |
|
270 | 0 | m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV, |
271 | 0 | (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV); |
272 | 0 | break; |
273 | 0 | } |
274 | 0 | } while (true); |
275 | 0 | } |
276 | 0 | return RET_SUCCESS; |
277 | 0 | } |
278 | | |
279 | | void CDownsampling::DownsampleHalfAverage (uint8_t* pDst, int32_t iDstStride, |
280 | 0 | uint8_t* pSrc, int32_t iSrcStride, int32_t iSrcWidth, int32_t iSrcHeight) { |
281 | 0 | if ((iSrcStride & 31) == 0) { |
282 | 0 | assert ((iDstStride & 15) == 0); |
283 | 0 | m_pfDownsample.pfHalfAverageWidthx32 (pDst, iDstStride, |
284 | 0 | pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 32), iSrcHeight); |
285 | 0 | } else { |
286 | 0 | assert ((iSrcStride & 15) == 0); |
287 | 0 | assert ((iDstStride & 7) == 0); |
288 | 0 | m_pfDownsample.pfHalfAverageWidthx16 (pDst, iDstStride, |
289 | 0 | pSrc, iSrcStride, WELS_ALIGN (iSrcWidth & ~1, 16), iSrcHeight); |
290 | 0 | } |
291 | 0 | } |
292 | | |
293 | | |
294 | | WELSVP_NAMESPACE_END |