/src/libavif/ext/libyuv/source/scale.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "libyuv/scale.h" |
12 | | |
13 | | #include <assert.h> |
14 | | #include <string.h> |
15 | | |
16 | | #include "libyuv/cpu_id.h" |
17 | | #include "libyuv/planar_functions.h" // For CopyPlane |
18 | | #include "libyuv/row.h" |
19 | | #include "libyuv/scale_row.h" |
20 | | #include "libyuv/scale_uv.h" // For UVScale |
21 | | |
22 | | #ifdef __cplusplus |
23 | | namespace libyuv { |
24 | | extern "C" { |
25 | | #endif |
26 | | |
27 | 47.0k | static __inline int Abs(int v) { |
28 | 47.0k | return v >= 0 ? v : -v; |
29 | 47.0k | } |
30 | | |
31 | 0 | #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) |
32 | 1.25k | #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) |
33 | | |
34 | | // Scale plane, 1/2 |
35 | | // This is an optimized version for scaling down a plane to 1/2 of |
36 | | // its original size. |
37 | | |
38 | | static void ScalePlaneDown2(int src_width, |
39 | | int src_height, |
40 | | int dst_width, |
41 | | int dst_height, |
42 | | int src_stride, |
43 | | int dst_stride, |
44 | | const uint8_t* src_ptr, |
45 | | uint8_t* dst_ptr, |
46 | 65 | enum FilterMode filtering) { |
47 | 65 | int y; |
48 | 65 | void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride, |
49 | 65 | uint8_t* dst_ptr, int dst_width) = |
50 | 65 | filtering == kFilterNone |
51 | 65 | ? ScaleRowDown2_C |
52 | 65 | : (filtering == kFilterLinear ? ScaleRowDown2Linear_C |
53 | 65 | : ScaleRowDown2Box_C); |
54 | 65 | int row_stride = src_stride * 2; |
55 | 65 | (void)src_width; |
56 | 65 | (void)src_height; |
57 | 65 | if (!filtering) { |
58 | 0 | src_ptr += src_stride; // Point to odd rows. |
59 | 0 | src_stride = 0; |
60 | 0 | } |
61 | | |
62 | | #if defined(HAS_SCALEROWDOWN2_NEON) |
63 | | if (TestCpuFlag(kCpuHasNEON)) { |
64 | | ScaleRowDown2 = |
65 | | filtering == kFilterNone |
66 | | ? ScaleRowDown2_Any_NEON |
67 | | : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON |
68 | | : ScaleRowDown2Box_Any_NEON); |
69 | | if (IS_ALIGNED(dst_width, 16)) { |
70 | | ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON |
71 | | : (filtering == kFilterLinear |
72 | | ? ScaleRowDown2Linear_NEON |
73 | | : ScaleRowDown2Box_NEON); |
74 | | } |
75 | | } |
76 | | #endif |
77 | | #if defined(HAS_SCALEROWDOWN2_SME) |
78 | | if (TestCpuFlag(kCpuHasSME)) { |
79 | | ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SME |
80 | | : filtering == kFilterLinear ? ScaleRowDown2Linear_SME |
81 | | : ScaleRowDown2Box_SME; |
82 | | } |
83 | | #endif |
84 | 65 | #if defined(HAS_SCALEROWDOWN2_SSSE3) |
85 | 65 | if (TestCpuFlag(kCpuHasSSSE3)) { |
86 | 65 | ScaleRowDown2 = |
87 | 65 | filtering == kFilterNone |
88 | 65 | ? ScaleRowDown2_Any_SSSE3 |
89 | 65 | : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 |
90 | 65 | : ScaleRowDown2Box_Any_SSSE3); |
91 | 65 | if (IS_ALIGNED(dst_width, 16)) { |
92 | 0 | ScaleRowDown2 = |
93 | 0 | filtering == kFilterNone |
94 | 0 | ? ScaleRowDown2_SSSE3 |
95 | 0 | : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 |
96 | 0 | : ScaleRowDown2Box_SSSE3); |
97 | 0 | } |
98 | 65 | } |
99 | 65 | #endif |
100 | 65 | #if defined(HAS_SCALEROWDOWN2_AVX2) |
101 | 65 | if (TestCpuFlag(kCpuHasAVX2)) { |
102 | 65 | ScaleRowDown2 = |
103 | 65 | filtering == kFilterNone |
104 | 65 | ? ScaleRowDown2_Any_AVX2 |
105 | 65 | : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 |
106 | 65 | : ScaleRowDown2Box_Any_AVX2); |
107 | 65 | if (IS_ALIGNED(dst_width, 32)) { |
108 | 0 | ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 |
109 | 0 | : (filtering == kFilterLinear |
110 | 0 | ? ScaleRowDown2Linear_AVX2 |
111 | 0 | : ScaleRowDown2Box_AVX2); |
112 | 0 | } |
113 | 65 | } |
114 | 65 | #endif |
115 | | #if defined(HAS_SCALEROWDOWN2_MSA) |
116 | | if (TestCpuFlag(kCpuHasMSA)) { |
117 | | ScaleRowDown2 = |
118 | | filtering == kFilterNone |
119 | | ? ScaleRowDown2_Any_MSA |
120 | | : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA |
121 | | : ScaleRowDown2Box_Any_MSA); |
122 | | if (IS_ALIGNED(dst_width, 32)) { |
123 | | ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA |
124 | | : (filtering == kFilterLinear |
125 | | ? ScaleRowDown2Linear_MSA |
126 | | : ScaleRowDown2Box_MSA); |
127 | | } |
128 | | } |
129 | | #endif |
130 | | #if defined(HAS_SCALEROWDOWN2_LSX) |
131 | | if (TestCpuFlag(kCpuHasLSX)) { |
132 | | ScaleRowDown2 = |
133 | | filtering == kFilterNone |
134 | | ? ScaleRowDown2_Any_LSX |
135 | | : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_LSX |
136 | | : ScaleRowDown2Box_Any_LSX); |
137 | | if (IS_ALIGNED(dst_width, 32)) { |
138 | | ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_LSX |
139 | | : (filtering == kFilterLinear |
140 | | ? ScaleRowDown2Linear_LSX |
141 | | : ScaleRowDown2Box_LSX); |
142 | | } |
143 | | } |
144 | | #endif |
145 | | #if defined(HAS_SCALEROWDOWN2_RVV) |
146 | | if (TestCpuFlag(kCpuHasRVV)) { |
147 | | ScaleRowDown2 = filtering == kFilterNone |
148 | | ? ScaleRowDown2_RVV |
149 | | : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV |
150 | | : ScaleRowDown2Box_RVV); |
151 | | } |
152 | | #endif |
153 | | |
154 | 65 | if (filtering == kFilterLinear) { |
155 | 0 | src_stride = 0; |
156 | 0 | } |
157 | | // TODO(fbarchard): Loop through source height to allow odd height. |
158 | 1.70k | for (y = 0; y < dst_height; ++y) { |
159 | 1.64k | ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); |
160 | 1.64k | src_ptr += row_stride; |
161 | 1.64k | dst_ptr += dst_stride; |
162 | 1.64k | } |
163 | 65 | } |
164 | | |
165 | | static void ScalePlaneDown2_16(int src_width, |
166 | | int src_height, |
167 | | int dst_width, |
168 | | int dst_height, |
169 | | int src_stride, |
170 | | int dst_stride, |
171 | | const uint16_t* src_ptr, |
172 | | uint16_t* dst_ptr, |
173 | 74 | enum FilterMode filtering) { |
174 | 74 | int y; |
175 | 74 | void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride, |
176 | 74 | uint16_t* dst_ptr, int dst_width) = |
177 | 74 | filtering == kFilterNone |
178 | 74 | ? ScaleRowDown2_16_C |
179 | 74 | : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C |
180 | 74 | : ScaleRowDown2Box_16_C); |
181 | 74 | int row_stride = src_stride * 2; |
182 | 74 | (void)src_width; |
183 | 74 | (void)src_height; |
184 | 74 | if (!filtering) { |
185 | 0 | src_ptr += src_stride; // Point to odd rows. |
186 | 0 | src_stride = 0; |
187 | 0 | } |
188 | | |
189 | | #if defined(HAS_SCALEROWDOWN2_16_NEON) |
190 | | if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { |
191 | | ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_NEON |
192 | | : filtering == kFilterLinear ? ScaleRowDown2Linear_16_NEON |
193 | | : ScaleRowDown2Box_16_NEON; |
194 | | } |
195 | | #endif |
196 | | #if defined(HAS_SCALEROWDOWN2_16_SME) |
197 | | if (TestCpuFlag(kCpuHasSME)) { |
198 | | ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SME |
199 | | : filtering == kFilterLinear ? ScaleRowDown2Linear_16_SME |
200 | | : ScaleRowDown2Box_16_SME; |
201 | | } |
202 | | #endif |
203 | | #if defined(HAS_SCALEROWDOWN2_16_SSE2) |
204 | | if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { |
205 | | ScaleRowDown2 = |
206 | | filtering == kFilterNone |
207 | | ? ScaleRowDown2_16_SSE2 |
208 | | : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 |
209 | | : ScaleRowDown2Box_16_SSE2); |
210 | | } |
211 | | #endif |
212 | | |
213 | 74 | if (filtering == kFilterLinear) { |
214 | 0 | src_stride = 0; |
215 | 0 | } |
216 | | // TODO(fbarchard): Loop through source height to allow odd height. |
217 | 2.76k | for (y = 0; y < dst_height; ++y) { |
218 | 2.68k | ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); |
219 | 2.68k | src_ptr += row_stride; |
220 | 2.68k | dst_ptr += dst_stride; |
221 | 2.68k | } |
222 | 74 | } |
223 | | |
224 | | void ScalePlaneDown2_16To8(int src_width, |
225 | | int src_height, |
226 | | int dst_width, |
227 | | int dst_height, |
228 | | int src_stride, |
229 | | int dst_stride, |
230 | | const uint16_t* src_ptr, |
231 | | uint8_t* dst_ptr, |
232 | | int scale, |
233 | 0 | enum FilterMode filtering) { |
234 | 0 | int y; |
235 | 0 | void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride, |
236 | 0 | uint8_t* dst_ptr, int dst_width, int scale) = |
237 | 0 | (src_width & 1) |
238 | 0 | ? (filtering == kFilterNone |
239 | 0 | ? ScaleRowDown2_16To8_Odd_C |
240 | 0 | : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C |
241 | 0 | : ScaleRowDown2Box_16To8_Odd_C)) |
242 | 0 | : (filtering == kFilterNone |
243 | 0 | ? ScaleRowDown2_16To8_C |
244 | 0 | : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C |
245 | 0 | : ScaleRowDown2Box_16To8_C)); |
246 | 0 | int row_stride = src_stride * 2; |
247 | 0 | (void)dst_height; |
248 | 0 | if (!filtering) { |
249 | 0 | src_ptr += src_stride; // Point to odd rows. |
250 | 0 | src_stride = 0; |
251 | 0 | } |
252 | |
|
253 | 0 | if (filtering == kFilterLinear) { |
254 | 0 | src_stride = 0; |
255 | 0 | } |
256 | 0 | for (y = 0; y < src_height / 2; ++y) { |
257 | 0 | ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale); |
258 | 0 | src_ptr += row_stride; |
259 | 0 | dst_ptr += dst_stride; |
260 | 0 | } |
261 | 0 | if (src_height & 1) { |
262 | 0 | if (!filtering) { |
263 | 0 | src_ptr -= src_stride; // Point to last row. |
264 | 0 | } |
265 | 0 | ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale); |
266 | 0 | } |
267 | 0 | } |
268 | | |
269 | | // Scale plane, 1/4 |
270 | | // This is an optimized version for scaling down a plane to 1/4 of |
271 | | // its original size. |
272 | | |
273 | | static void ScalePlaneDown4(int src_width, |
274 | | int src_height, |
275 | | int dst_width, |
276 | | int dst_height, |
277 | | int src_stride, |
278 | | int dst_stride, |
279 | | const uint8_t* src_ptr, |
280 | | uint8_t* dst_ptr, |
281 | 44 | enum FilterMode filtering) { |
282 | 44 | int y; |
283 | 44 | void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride, |
284 | 44 | uint8_t* dst_ptr, int dst_width) = |
285 | 44 | filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; |
286 | 44 | int row_stride = src_stride * 4; |
287 | 44 | (void)src_width; |
288 | 44 | (void)src_height; |
289 | 44 | if (!filtering) { |
290 | 0 | src_ptr += src_stride * 2; // Point to row 2. |
291 | 0 | src_stride = 0; |
292 | 0 | } |
293 | | #if defined(HAS_SCALEROWDOWN4_NEON) |
294 | | if (TestCpuFlag(kCpuHasNEON)) { |
295 | | ScaleRowDown4 = |
296 | | filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON; |
297 | | if (IS_ALIGNED(dst_width, 16)) { |
298 | | ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; |
299 | | } |
300 | | } |
301 | | #endif |
302 | 44 | #if defined(HAS_SCALEROWDOWN4_SSSE3) |
303 | 44 | if (TestCpuFlag(kCpuHasSSSE3)) { |
304 | 44 | ScaleRowDown4 = |
305 | 44 | filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3; |
306 | 44 | if (IS_ALIGNED(dst_width, 8)) { |
307 | 0 | ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3; |
308 | 0 | } |
309 | 44 | } |
310 | 44 | #endif |
311 | 44 | #if defined(HAS_SCALEROWDOWN4_AVX2) |
312 | 44 | if (TestCpuFlag(kCpuHasAVX2)) { |
313 | 44 | ScaleRowDown4 = |
314 | 44 | filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2; |
315 | 44 | if (IS_ALIGNED(dst_width, 16)) { |
316 | 0 | ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2; |
317 | 0 | } |
318 | 44 | } |
319 | 44 | #endif |
320 | | #if defined(HAS_SCALEROWDOWN4_MSA) |
321 | | if (TestCpuFlag(kCpuHasMSA)) { |
322 | | ScaleRowDown4 = |
323 | | filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA; |
324 | | if (IS_ALIGNED(dst_width, 16)) { |
325 | | ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA; |
326 | | } |
327 | | } |
328 | | #endif |
329 | | #if defined(HAS_SCALEROWDOWN4_LSX) |
330 | | if (TestCpuFlag(kCpuHasLSX)) { |
331 | | ScaleRowDown4 = |
332 | | filtering ? ScaleRowDown4Box_Any_LSX : ScaleRowDown4_Any_LSX; |
333 | | if (IS_ALIGNED(dst_width, 16)) { |
334 | | ScaleRowDown4 = filtering ? ScaleRowDown4Box_LSX : ScaleRowDown4_LSX; |
335 | | } |
336 | | } |
337 | | #endif |
338 | | #if defined(HAS_SCALEROWDOWN4_RVV) |
339 | | if (TestCpuFlag(kCpuHasRVV)) { |
340 | | ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV; |
341 | | } |
342 | | #endif |
343 | | |
344 | 44 | if (filtering == kFilterLinear) { |
345 | 0 | src_stride = 0; |
346 | 0 | } |
347 | 498 | for (y = 0; y < dst_height; ++y) { |
348 | 454 | ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); |
349 | 454 | src_ptr += row_stride; |
350 | 454 | dst_ptr += dst_stride; |
351 | 454 | } |
352 | 44 | } |
353 | | |
354 | | static void ScalePlaneDown4_16(int src_width, |
355 | | int src_height, |
356 | | int dst_width, |
357 | | int dst_height, |
358 | | int src_stride, |
359 | | int dst_stride, |
360 | | const uint16_t* src_ptr, |
361 | | uint16_t* dst_ptr, |
362 | 38 | enum FilterMode filtering) { |
363 | 38 | int y; |
364 | 38 | void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride, |
365 | 38 | uint16_t* dst_ptr, int dst_width) = |
366 | 38 | filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; |
367 | 38 | int row_stride = src_stride * 4; |
368 | 38 | (void)src_width; |
369 | 38 | (void)src_height; |
370 | 38 | if (!filtering) { |
371 | 0 | src_ptr += src_stride * 2; // Point to row 2. |
372 | 0 | src_stride = 0; |
373 | 0 | } |
374 | | #if defined(HAS_SCALEROWDOWN4_16_NEON) |
375 | | if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { |
376 | | ScaleRowDown4 = |
377 | | filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON; |
378 | | } |
379 | | #endif |
380 | | #if defined(HAS_SCALEROWDOWN4_16_SSE2) |
381 | | if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { |
382 | | ScaleRowDown4 = |
383 | | filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2; |
384 | | } |
385 | | #endif |
386 | | |
387 | 38 | if (filtering == kFilterLinear) { |
388 | 0 | src_stride = 0; |
389 | 0 | } |
390 | 304 | for (y = 0; y < dst_height; ++y) { |
391 | 266 | ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); |
392 | 266 | src_ptr += row_stride; |
393 | 266 | dst_ptr += dst_stride; |
394 | 266 | } |
395 | 38 | } |
396 | | |
397 | | // Scale plane down, 3/4 |
398 | | static void ScalePlaneDown34(int src_width, |
399 | | int src_height, |
400 | | int dst_width, |
401 | | int dst_height, |
402 | | int src_stride, |
403 | | int dst_stride, |
404 | | const uint8_t* src_ptr, |
405 | | uint8_t* dst_ptr, |
406 | 27 | enum FilterMode filtering) { |
407 | 27 | int y; |
408 | 27 | void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride, |
409 | 27 | uint8_t* dst_ptr, int dst_width); |
410 | 27 | void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride, |
411 | 27 | uint8_t* dst_ptr, int dst_width); |
412 | 27 | const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
413 | 27 | (void)src_width; |
414 | 27 | (void)src_height; |
415 | 27 | assert(dst_width % 3 == 0); |
416 | 27 | if (!filtering) { |
417 | 0 | ScaleRowDown34_0 = ScaleRowDown34_C; |
418 | 0 | ScaleRowDown34_1 = ScaleRowDown34_C; |
419 | 27 | } else { |
420 | 27 | ScaleRowDown34_0 = ScaleRowDown34_0_Box_C; |
421 | 27 | ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; |
422 | 27 | } |
423 | | #if defined(HAS_SCALEROWDOWN34_NEON) |
424 | | if (TestCpuFlag(kCpuHasNEON)) { |
425 | | #if defined(__aarch64__) |
426 | | if (dst_width % 48 == 0) { |
427 | | #else |
428 | | if (dst_width % 24 == 0) { |
429 | | #endif |
430 | | if (!filtering) { |
431 | | ScaleRowDown34_0 = ScaleRowDown34_NEON; |
432 | | ScaleRowDown34_1 = ScaleRowDown34_NEON; |
433 | | } else { |
434 | | ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; |
435 | | ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; |
436 | | } |
437 | | } else { |
438 | | if (!filtering) { |
439 | | ScaleRowDown34_0 = ScaleRowDown34_Any_NEON; |
440 | | ScaleRowDown34_1 = ScaleRowDown34_Any_NEON; |
441 | | } else { |
442 | | ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON; |
443 | | ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON; |
444 | | } |
445 | | } |
446 | | } |
447 | | #endif |
448 | | #if defined(HAS_SCALEROWDOWN34_MSA) |
449 | | if (TestCpuFlag(kCpuHasMSA)) { |
450 | | if (dst_width % 48 == 0) { |
451 | | if (!filtering) { |
452 | | ScaleRowDown34_0 = ScaleRowDown34_MSA; |
453 | | ScaleRowDown34_1 = ScaleRowDown34_MSA; |
454 | | } else { |
455 | | ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA; |
456 | | ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA; |
457 | | } |
458 | | } else { |
459 | | if (!filtering) { |
460 | | ScaleRowDown34_0 = ScaleRowDown34_Any_MSA; |
461 | | ScaleRowDown34_1 = ScaleRowDown34_Any_MSA; |
462 | | } else { |
463 | | ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA; |
464 | | ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA; |
465 | | } |
466 | | } |
467 | | } |
468 | | #endif |
469 | | #if defined(HAS_SCALEROWDOWN34_LSX) |
470 | | if (TestCpuFlag(kCpuHasLSX)) { |
471 | | if (dst_width % 48 == 0) { |
472 | | if (!filtering) { |
473 | | ScaleRowDown34_0 = ScaleRowDown34_LSX; |
474 | | ScaleRowDown34_1 = ScaleRowDown34_LSX; |
475 | | } else { |
476 | | ScaleRowDown34_0 = ScaleRowDown34_0_Box_LSX; |
477 | | ScaleRowDown34_1 = ScaleRowDown34_1_Box_LSX; |
478 | | } |
479 | | } else { |
480 | | if (!filtering) { |
481 | | ScaleRowDown34_0 = ScaleRowDown34_Any_LSX; |
482 | | ScaleRowDown34_1 = ScaleRowDown34_Any_LSX; |
483 | | } else { |
484 | | ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_LSX; |
485 | | ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_LSX; |
486 | | } |
487 | | } |
488 | | } |
489 | | #endif |
490 | 27 | #if defined(HAS_SCALEROWDOWN34_SSSE3) |
491 | 27 | if (TestCpuFlag(kCpuHasSSSE3)) { |
492 | 27 | if (dst_width % 24 == 0) { |
493 | 0 | if (!filtering) { |
494 | 0 | ScaleRowDown34_0 = ScaleRowDown34_SSSE3; |
495 | 0 | ScaleRowDown34_1 = ScaleRowDown34_SSSE3; |
496 | 0 | } else { |
497 | 0 | ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; |
498 | 0 | ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; |
499 | 0 | } |
500 | 27 | } else { |
501 | 27 | if (!filtering) { |
502 | 0 | ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3; |
503 | 0 | ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3; |
504 | 27 | } else { |
505 | 27 | ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3; |
506 | 27 | ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3; |
507 | 27 | } |
508 | 27 | } |
509 | 27 | } |
510 | 27 | #endif |
511 | | #if defined(HAS_SCALEROWDOWN34_RVV) |
512 | | if (TestCpuFlag(kCpuHasRVV)) { |
513 | | if (!filtering) { |
514 | | ScaleRowDown34_0 = ScaleRowDown34_RVV; |
515 | | ScaleRowDown34_1 = ScaleRowDown34_RVV; |
516 | | } else { |
517 | | ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV; |
518 | | ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV; |
519 | | } |
520 | | } |
521 | | #endif |
522 | | |
523 | 618 | for (y = 0; y < dst_height - 2; y += 3) { |
524 | 591 | ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
525 | 591 | src_ptr += src_stride; |
526 | 591 | dst_ptr += dst_stride; |
527 | 591 | ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); |
528 | 591 | src_ptr += src_stride; |
529 | 591 | dst_ptr += dst_stride; |
530 | 591 | ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width); |
531 | 591 | src_ptr += src_stride * 2; |
532 | 591 | dst_ptr += dst_stride; |
533 | 591 | } |
534 | | |
535 | | // Remainder 1 or 2 rows with last row vertically unfiltered |
536 | 27 | if ((dst_height % 3) == 2) { |
537 | 0 | ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
538 | 0 | src_ptr += src_stride; |
539 | 0 | dst_ptr += dst_stride; |
540 | 0 | ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); |
541 | 27 | } else if ((dst_height % 3) == 1) { |
542 | 0 | ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); |
543 | 0 | } |
544 | 27 | } |
545 | | |
546 | | static void ScalePlaneDown34_16(int src_width, |
547 | | int src_height, |
548 | | int dst_width, |
549 | | int dst_height, |
550 | | int src_stride, |
551 | | int dst_stride, |
552 | | const uint16_t* src_ptr, |
553 | | uint16_t* dst_ptr, |
554 | 30 | enum FilterMode filtering) { |
555 | 30 | int y; |
556 | 30 | void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride, |
557 | 30 | uint16_t* dst_ptr, int dst_width); |
558 | 30 | void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride, |
559 | 30 | uint16_t* dst_ptr, int dst_width); |
560 | 30 | const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
561 | 30 | (void)src_width; |
562 | 30 | (void)src_height; |
563 | 30 | assert(dst_width % 3 == 0); |
564 | 30 | if (!filtering) { |
565 | 0 | ScaleRowDown34_0 = ScaleRowDown34_16_C; |
566 | 0 | ScaleRowDown34_1 = ScaleRowDown34_16_C; |
567 | 30 | } else { |
568 | 30 | ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C; |
569 | 30 | ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C; |
570 | 30 | } |
571 | | #if defined(HAS_SCALEROWDOWN34_16_NEON) |
572 | | if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { |
573 | | if (!filtering) { |
574 | | ScaleRowDown34_0 = ScaleRowDown34_16_NEON; |
575 | | ScaleRowDown34_1 = ScaleRowDown34_16_NEON; |
576 | | } else { |
577 | | ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON; |
578 | | ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON; |
579 | | } |
580 | | } |
581 | | #endif |
582 | | #if defined(HAS_SCALEROWDOWN34_16_SSSE3) |
583 | | if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { |
584 | | if (!filtering) { |
585 | | ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3; |
586 | | ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3; |
587 | | } else { |
588 | | ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3; |
589 | | ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3; |
590 | | } |
591 | | } |
592 | | #endif |
593 | | |
594 | 627 | for (y = 0; y < dst_height - 2; y += 3) { |
595 | 597 | ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
596 | 597 | src_ptr += src_stride; |
597 | 597 | dst_ptr += dst_stride; |
598 | 597 | ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); |
599 | 597 | src_ptr += src_stride; |
600 | 597 | dst_ptr += dst_stride; |
601 | 597 | ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width); |
602 | 597 | src_ptr += src_stride * 2; |
603 | 597 | dst_ptr += dst_stride; |
604 | 597 | } |
605 | | |
606 | | // Remainder 1 or 2 rows with last row vertically unfiltered |
607 | 30 | if ((dst_height % 3) == 2) { |
608 | 0 | ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
609 | 0 | src_ptr += src_stride; |
610 | 0 | dst_ptr += dst_stride; |
611 | 0 | ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); |
612 | 30 | } else if ((dst_height % 3) == 1) { |
613 | 0 | ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); |
614 | 0 | } |
615 | 30 | } |
616 | | |
617 | | // Scale plane, 3/8 |
618 | | // This is an optimized version for scaling down a plane to 3/8 |
619 | | // of its original size. |
620 | | // |
621 | | // Uses box filter arranges like this |
622 | | // aaabbbcc -> abc |
623 | | // aaabbbcc def |
624 | | // aaabbbcc ghi |
625 | | // dddeeeff |
626 | | // dddeeeff |
627 | | // dddeeeff |
628 | | // ggghhhii |
629 | | // ggghhhii |
630 | | // Boxes are 3x3, 2x3, 3x2 and 2x2 |
631 | | |
632 | | static void ScalePlaneDown38(int src_width, |
633 | | int src_height, |
634 | | int dst_width, |
635 | | int dst_height, |
636 | | int src_stride, |
637 | | int dst_stride, |
638 | | const uint8_t* src_ptr, |
639 | | uint8_t* dst_ptr, |
640 | 22 | enum FilterMode filtering) { |
641 | 22 | int y; |
642 | 22 | void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride, |
643 | 22 | uint8_t* dst_ptr, int dst_width); |
644 | 22 | void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride, |
645 | 22 | uint8_t* dst_ptr, int dst_width); |
646 | 22 | const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
647 | 22 | assert(dst_width % 3 == 0); |
648 | 22 | (void)src_width; |
649 | 22 | (void)src_height; |
650 | 22 | if (!filtering) { |
651 | 0 | ScaleRowDown38_3 = ScaleRowDown38_C; |
652 | 0 | ScaleRowDown38_2 = ScaleRowDown38_C; |
653 | 22 | } else { |
654 | 22 | ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; |
655 | 22 | ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; |
656 | 22 | } |
657 | | |
658 | | #if defined(HAS_SCALEROWDOWN38_NEON) |
659 | | if (TestCpuFlag(kCpuHasNEON)) { |
660 | | if (!filtering) { |
661 | | ScaleRowDown38_3 = ScaleRowDown38_Any_NEON; |
662 | | ScaleRowDown38_2 = ScaleRowDown38_Any_NEON; |
663 | | } else { |
664 | | ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON; |
665 | | ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON; |
666 | | } |
667 | | if (dst_width % 12 == 0) { |
668 | | if (!filtering) { |
669 | | ScaleRowDown38_3 = ScaleRowDown38_NEON; |
670 | | ScaleRowDown38_2 = ScaleRowDown38_NEON; |
671 | | } else { |
672 | | ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; |
673 | | ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; |
674 | | } |
675 | | } |
676 | | } |
677 | | #endif |
678 | 22 | #if defined(HAS_SCALEROWDOWN38_SSSE3) |
679 | 22 | if (TestCpuFlag(kCpuHasSSSE3)) { |
680 | 22 | if (!filtering) { |
681 | 0 | ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3; |
682 | 0 | ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3; |
683 | 22 | } else { |
684 | 22 | ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3; |
685 | 22 | ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3; |
686 | 22 | } |
687 | 22 | if (dst_width % 12 == 0 && !filtering) { |
688 | 0 | ScaleRowDown38_3 = ScaleRowDown38_SSSE3; |
689 | 0 | ScaleRowDown38_2 = ScaleRowDown38_SSSE3; |
690 | 0 | } |
691 | 22 | if (dst_width % 6 == 0 && filtering) { |
692 | 0 | ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; |
693 | 0 | ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; |
694 | 0 | } |
695 | 22 | } |
696 | 22 | #endif |
697 | | #if defined(HAS_SCALEROWDOWN38_MSA) |
698 | | if (TestCpuFlag(kCpuHasMSA)) { |
699 | | if (!filtering) { |
700 | | ScaleRowDown38_3 = ScaleRowDown38_Any_MSA; |
701 | | ScaleRowDown38_2 = ScaleRowDown38_Any_MSA; |
702 | | } else { |
703 | | ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA; |
704 | | ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA; |
705 | | } |
706 | | if (dst_width % 12 == 0) { |
707 | | if (!filtering) { |
708 | | ScaleRowDown38_3 = ScaleRowDown38_MSA; |
709 | | ScaleRowDown38_2 = ScaleRowDown38_MSA; |
710 | | } else { |
711 | | ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA; |
712 | | ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA; |
713 | | } |
714 | | } |
715 | | } |
716 | | #endif |
717 | | #if defined(HAS_SCALEROWDOWN38_LSX) |
718 | | if (TestCpuFlag(kCpuHasLSX)) { |
719 | | if (!filtering) { |
720 | | ScaleRowDown38_3 = ScaleRowDown38_Any_LSX; |
721 | | ScaleRowDown38_2 = ScaleRowDown38_Any_LSX; |
722 | | } else { |
723 | | ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_LSX; |
724 | | ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_LSX; |
725 | | } |
726 | | if (dst_width % 12 == 0) { |
727 | | if (!filtering) { |
728 | | ScaleRowDown38_3 = ScaleRowDown38_LSX; |
729 | | ScaleRowDown38_2 = ScaleRowDown38_LSX; |
730 | | } else { |
731 | | ScaleRowDown38_3 = ScaleRowDown38_3_Box_LSX; |
732 | | ScaleRowDown38_2 = ScaleRowDown38_2_Box_LSX; |
733 | | } |
734 | | } |
735 | | } |
736 | | #endif |
737 | | #if defined(HAS_SCALEROWDOWN38_RVV) |
738 | | if (TestCpuFlag(kCpuHasRVV)) { |
739 | | if (!filtering) { |
740 | | ScaleRowDown38_3 = ScaleRowDown38_RVV; |
741 | | ScaleRowDown38_2 = ScaleRowDown38_RVV; |
742 | | } else { |
743 | | ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV; |
744 | | ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV; |
745 | | } |
746 | | } |
747 | | #endif |
748 | | |
749 | 257 | for (y = 0; y < dst_height - 2; y += 3) { |
750 | 235 | ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
751 | 235 | src_ptr += src_stride * 3; |
752 | 235 | dst_ptr += dst_stride; |
753 | 235 | ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
754 | 235 | src_ptr += src_stride * 3; |
755 | 235 | dst_ptr += dst_stride; |
756 | 235 | ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); |
757 | 235 | src_ptr += src_stride * 2; |
758 | 235 | dst_ptr += dst_stride; |
759 | 235 | } |
760 | | |
761 | | // Remainder 1 or 2 rows with last row vertically unfiltered |
762 | 22 | if ((dst_height % 3) == 2) { |
763 | 0 | ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
764 | 0 | src_ptr += src_stride * 3; |
765 | 0 | dst_ptr += dst_stride; |
766 | 0 | ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
767 | 22 | } else if ((dst_height % 3) == 1) { |
768 | 0 | ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
769 | 0 | } |
770 | 22 | } |
771 | | |
772 | | static void ScalePlaneDown38_16(int src_width, |
773 | | int src_height, |
774 | | int dst_width, |
775 | | int dst_height, |
776 | | int src_stride, |
777 | | int dst_stride, |
778 | | const uint16_t* src_ptr, |
779 | | uint16_t* dst_ptr, |
780 | 24 | enum FilterMode filtering) { |
781 | 24 | int y; |
782 | 24 | void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride, |
783 | 24 | uint16_t* dst_ptr, int dst_width); |
784 | 24 | void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride, |
785 | 24 | uint16_t* dst_ptr, int dst_width); |
786 | 24 | const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
787 | 24 | (void)src_width; |
788 | 24 | (void)src_height; |
789 | 24 | assert(dst_width % 3 == 0); |
790 | 24 | if (!filtering) { |
791 | 0 | ScaleRowDown38_3 = ScaleRowDown38_16_C; |
792 | 0 | ScaleRowDown38_2 = ScaleRowDown38_16_C; |
793 | 24 | } else { |
794 | 24 | ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C; |
795 | 24 | ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C; |
796 | 24 | } |
797 | | #if defined(HAS_SCALEROWDOWN38_16_NEON) |
798 | | if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { |
799 | | if (!filtering) { |
800 | | ScaleRowDown38_3 = ScaleRowDown38_16_NEON; |
801 | | ScaleRowDown38_2 = ScaleRowDown38_16_NEON; |
802 | | } else { |
803 | | ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON; |
804 | | ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON; |
805 | | } |
806 | | } |
807 | | #endif |
808 | | #if defined(HAS_SCALEROWDOWN38_16_SSSE3) |
809 | | if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { |
810 | | if (!filtering) { |
811 | | ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3; |
812 | | ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3; |
813 | | } else { |
814 | | ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3; |
815 | | ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3; |
816 | | } |
817 | | } |
818 | | #endif |
819 | | |
820 | 303 | for (y = 0; y < dst_height - 2; y += 3) { |
821 | 279 | ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
822 | 279 | src_ptr += src_stride * 3; |
823 | 279 | dst_ptr += dst_stride; |
824 | 279 | ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
825 | 279 | src_ptr += src_stride * 3; |
826 | 279 | dst_ptr += dst_stride; |
827 | 279 | ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); |
828 | 279 | src_ptr += src_stride * 2; |
829 | 279 | dst_ptr += dst_stride; |
830 | 279 | } |
831 | | |
832 | | // Remainder 1 or 2 rows with last row vertically unfiltered |
833 | 24 | if ((dst_height % 3) == 2) { |
834 | 0 | ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
835 | 0 | src_ptr += src_stride * 3; |
836 | 0 | dst_ptr += dst_stride; |
837 | 0 | ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
838 | 24 | } else if ((dst_height % 3) == 1) { |
839 | 0 | ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
840 | 0 | } |
841 | 24 | } |
842 | | |
843 | 4.26M | #define MIN1(x) ((x) < 1 ? 1 : (x)) |
844 | | |
845 | 4.69M | static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) { |
846 | 4.69M | uint32_t sum = 0u; |
847 | 4.69M | int x; |
848 | 4.69M | assert(iboxwidth > 0); |
849 | 25.9M | for (x = 0; x < iboxwidth; ++x) { |
850 | 21.2M | sum += src_ptr[x]; |
851 | 21.2M | } |
852 | 4.69M | return sum; |
853 | 4.69M | } |
854 | | |
855 | 1.73M | static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) { |
856 | 1.73M | uint32_t sum = 0u; |
857 | 1.73M | int x; |
858 | 1.73M | assert(iboxwidth > 0); |
859 | 10.9M | for (x = 0; x < iboxwidth; ++x) { |
860 | 9.18M | sum += src_ptr[x]; |
861 | 9.18M | } |
862 | 1.73M | return sum; |
863 | 1.73M | } |
864 | | |
865 | | static void ScaleAddCols2_C(int dst_width, |
866 | | int boxheight, |
867 | | int x, |
868 | | int dx, |
869 | | const uint16_t* src_ptr, |
870 | 35.2k | uint8_t* dst_ptr) { |
871 | 35.2k | int i; |
872 | 35.2k | int scaletbl[2]; |
873 | 35.2k | int minboxwidth = dx >> 16; |
874 | 35.2k | int boxwidth; |
875 | 35.2k | scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); |
876 | 35.2k | scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); |
877 | 2.50M | for (i = 0; i < dst_width; ++i) { |
878 | 2.46M | int ix = x >> 16; |
879 | 2.46M | x += dx; |
880 | 2.46M | boxwidth = MIN1((x >> 16) - ix); |
881 | 2.46M | int scaletbl_index = boxwidth - minboxwidth; |
882 | 2.46M | assert((scaletbl_index == 0) || (scaletbl_index == 1)); |
883 | 2.46M | *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) * |
884 | 2.46M | scaletbl[scaletbl_index] >> |
885 | 2.46M | 16); |
886 | 2.46M | } |
887 | 35.2k | } |
888 | | |
889 | | static void ScaleAddCols2_16_C(int dst_width, |
890 | | int boxheight, |
891 | | int x, |
892 | | int dx, |
893 | | const uint32_t* src_ptr, |
894 | 47.3k | uint16_t* dst_ptr) { |
895 | 47.3k | int i; |
896 | 47.3k | int scaletbl[2]; |
897 | 47.3k | int minboxwidth = dx >> 16; |
898 | 47.3k | int boxwidth; |
899 | 47.3k | scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); |
900 | 47.3k | scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); |
901 | 1.42M | for (i = 0; i < dst_width; ++i) { |
902 | 1.37M | int ix = x >> 16; |
903 | 1.37M | x += dx; |
904 | 1.37M | boxwidth = MIN1((x >> 16) - ix); |
905 | 1.37M | int scaletbl_index = boxwidth - minboxwidth; |
906 | 1.37M | assert((scaletbl_index == 0) || (scaletbl_index == 1)); |
907 | 1.37M | *dst_ptr++ = |
908 | 1.37M | SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] >> 16; |
909 | 1.37M | } |
910 | 47.3k | } |
911 | | |
912 | | static void ScaleAddCols0_C(int dst_width, |
913 | | int boxheight, |
914 | | int x, |
915 | | int dx, |
916 | | const uint16_t* src_ptr, |
917 | 0 | uint8_t* dst_ptr) { |
918 | 0 | int scaleval = 65536 / boxheight; |
919 | 0 | int i; |
920 | 0 | (void)dx; |
921 | 0 | src_ptr += (x >> 16); |
922 | 0 | for (i = 0; i < dst_width; ++i) { |
923 | 0 | *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16); |
924 | 0 | } |
925 | 0 | } |
926 | | |
927 | | static void ScaleAddCols1_C(int dst_width, |
928 | | int boxheight, |
929 | | int x, |
930 | | int dx, |
931 | | const uint16_t* src_ptr, |
932 | 54.6k | uint8_t* dst_ptr) { |
933 | 54.6k | int boxwidth = MIN1(dx >> 16); |
934 | 54.6k | int scaleval = 65536 / (boxwidth * boxheight); |
935 | 54.6k | int i; |
936 | 54.6k | x >>= 16; |
937 | 2.28M | for (i = 0; i < dst_width; ++i) { |
938 | 2.23M | *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16); |
939 | 2.23M | x += boxwidth; |
940 | 2.23M | } |
941 | 54.6k | } |
942 | | |
943 | | static void ScaleAddCols1_16_C(int dst_width, |
944 | | int boxheight, |
945 | | int x, |
946 | | int dx, |
947 | | const uint32_t* src_ptr, |
948 | 30.1k | uint16_t* dst_ptr) { |
949 | 30.1k | int boxwidth = MIN1(dx >> 16); |
950 | 30.1k | int scaleval = 65536 / (boxwidth * boxheight); |
951 | 30.1k | int i; |
952 | 390k | for (i = 0; i < dst_width; ++i) { |
953 | 360k | *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16; |
954 | 360k | x += boxwidth; |
955 | 360k | } |
956 | 30.1k | } |
957 | | |
958 | | // Scale plane down to any dimensions, with interpolation. |
959 | | // (boxfilter). |
960 | | // |
961 | | // Same method as SimpleScale, which is fixed point, outputting |
962 | | // one pixel of destination using fixed point (16.16) to step |
963 | | // through source, sampling a box of pixel with simple |
964 | | // averaging. |
965 | | static int ScalePlaneBox(int src_width, |
966 | | int src_height, |
967 | | int dst_width, |
968 | | int dst_height, |
969 | | int src_stride, |
970 | | int dst_stride, |
971 | | const uint8_t* src_ptr, |
972 | 1.18k | uint8_t* dst_ptr) { |
973 | 1.18k | int j, k; |
974 | | // Initial source x/y coordinate and step values as 16.16 fixed point. |
975 | 1.18k | int x = 0; |
976 | 1.18k | int y = 0; |
977 | 1.18k | int dx = 0; |
978 | 1.18k | int dy = 0; |
979 | 1.18k | const int max_y = (src_height << 16); |
980 | 1.18k | ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y, |
981 | 1.18k | &dx, &dy); |
982 | 1.18k | src_width = Abs(src_width); |
983 | 1.18k | { |
984 | | // Allocate a row buffer of uint16_t. |
985 | 1.18k | align_buffer_64(row16, src_width * 2); |
986 | 1.18k | if (!row16) |
987 | 0 | return 1; |
988 | 1.18k | void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, |
989 | 1.18k | const uint16_t* src_ptr, uint8_t* dst_ptr) = |
990 | 1.18k | (dx & 0xffff) ? ScaleAddCols2_C |
991 | 1.18k | : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C); |
992 | 1.18k | void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr, |
993 | 1.18k | int src_width) = ScaleAddRow_C; |
994 | 1.18k | #if defined(HAS_SCALEADDROW_SSE2) |
995 | 1.18k | if (TestCpuFlag(kCpuHasSSE2)) { |
996 | 1.18k | ScaleAddRow = ScaleAddRow_Any_SSE2; |
997 | 1.18k | if (IS_ALIGNED(src_width, 16)) { |
998 | 158 | ScaleAddRow = ScaleAddRow_SSE2; |
999 | 158 | } |
1000 | 1.18k | } |
1001 | 1.18k | #endif |
1002 | 1.18k | #if defined(HAS_SCALEADDROW_AVX2) |
1003 | 1.18k | if (TestCpuFlag(kCpuHasAVX2)) { |
1004 | 1.18k | ScaleAddRow = ScaleAddRow_Any_AVX2; |
1005 | 1.18k | if (IS_ALIGNED(src_width, 32)) { |
1006 | 103 | ScaleAddRow = ScaleAddRow_AVX2; |
1007 | 103 | } |
1008 | 1.18k | } |
1009 | 1.18k | #endif |
1010 | | #if defined(HAS_SCALEADDROW_NEON) |
1011 | | if (TestCpuFlag(kCpuHasNEON)) { |
1012 | | ScaleAddRow = ScaleAddRow_Any_NEON; |
1013 | | if (IS_ALIGNED(src_width, 16)) { |
1014 | | ScaleAddRow = ScaleAddRow_NEON; |
1015 | | } |
1016 | | } |
1017 | | #endif |
1018 | | #if defined(HAS_SCALEADDROW_MSA) |
1019 | | if (TestCpuFlag(kCpuHasMSA)) { |
1020 | | ScaleAddRow = ScaleAddRow_Any_MSA; |
1021 | | if (IS_ALIGNED(src_width, 16)) { |
1022 | | ScaleAddRow = ScaleAddRow_MSA; |
1023 | | } |
1024 | | } |
1025 | | #endif |
1026 | | #if defined(HAS_SCALEADDROW_LSX) |
1027 | | if (TestCpuFlag(kCpuHasLSX)) { |
1028 | | ScaleAddRow = ScaleAddRow_Any_LSX; |
1029 | | if (IS_ALIGNED(src_width, 16)) { |
1030 | | ScaleAddRow = ScaleAddRow_LSX; |
1031 | | } |
1032 | | } |
1033 | | #endif |
1034 | | #if defined(HAS_SCALEADDROW_RVV) |
1035 | | if (TestCpuFlag(kCpuHasRVV)) { |
1036 | | ScaleAddRow = ScaleAddRow_RVV; |
1037 | | } |
1038 | | #endif |
1039 | | |
1040 | 91.1k | for (j = 0; j < dst_height; ++j) { |
1041 | 89.9k | int boxheight; |
1042 | 89.9k | int iy = y >> 16; |
1043 | 89.9k | const uint8_t* src = src_ptr + iy * (int64_t)src_stride; |
1044 | 89.9k | y += dy; |
1045 | 89.9k | if (y > max_y) { |
1046 | 0 | y = max_y; |
1047 | 0 | } |
1048 | 89.9k | boxheight = MIN1((y >> 16) - iy); |
1049 | 89.9k | memset(row16, 0, src_width * 2); |
1050 | 901k | for (k = 0; k < boxheight; ++k) { |
1051 | 811k | ScaleAddRow(src, (uint16_t*)(row16), src_width); |
1052 | 811k | src += src_stride; |
1053 | 811k | } |
1054 | 89.9k | ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr); |
1055 | 89.9k | dst_ptr += dst_stride; |
1056 | 89.9k | } |
1057 | 1.18k | free_aligned_buffer_64(row16); |
1058 | 1.18k | } |
1059 | 0 | return 0; |
1060 | 1.18k | } |
1061 | | |
1062 | | static int ScalePlaneBox_16(int src_width, |
1063 | | int src_height, |
1064 | | int dst_width, |
1065 | | int dst_height, |
1066 | | int src_stride, |
1067 | | int dst_stride, |
1068 | | const uint16_t* src_ptr, |
1069 | 1.25k | uint16_t* dst_ptr) { |
1070 | 1.25k | int j, k; |
1071 | | // Initial source x/y coordinate and step values as 16.16 fixed point. |
1072 | 1.25k | int x = 0; |
1073 | 1.25k | int y = 0; |
1074 | 1.25k | int dx = 0; |
1075 | 1.25k | int dy = 0; |
1076 | 1.25k | const int max_y = (src_height << 16); |
1077 | 1.25k | ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y, |
1078 | 1.25k | &dx, &dy); |
1079 | 1.25k | src_width = Abs(src_width); |
1080 | 1.25k | { |
1081 | | // Allocate a row buffer of uint32_t. |
1082 | 1.25k | align_buffer_64(row32, src_width * 4); |
1083 | 1.25k | if (!row32) |
1084 | 0 | return 1; |
1085 | 1.25k | void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, |
1086 | 1.25k | const uint32_t* src_ptr, uint16_t* dst_ptr) = |
1087 | 1.25k | (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C; |
1088 | 1.25k | void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr, |
1089 | 1.25k | int src_width) = ScaleAddRow_16_C; |
1090 | | |
1091 | | #if defined(HAS_SCALEADDROW_16_SSE2) |
1092 | | if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) { |
1093 | | ScaleAddRow = ScaleAddRow_16_SSE2; |
1094 | | } |
1095 | | #endif |
1096 | | |
1097 | 78.7k | for (j = 0; j < dst_height; ++j) { |
1098 | 77.4k | int boxheight; |
1099 | 77.4k | int iy = y >> 16; |
1100 | 77.4k | const uint16_t* src = src_ptr + iy * (int64_t)src_stride; |
1101 | 77.4k | y += dy; |
1102 | 77.4k | if (y > max_y) { |
1103 | 0 | y = max_y; |
1104 | 0 | } |
1105 | 77.4k | boxheight = MIN1((y >> 16) - iy); |
1106 | 77.4k | memset(row32, 0, src_width * 4); |
1107 | 783k | for (k = 0; k < boxheight; ++k) { |
1108 | 705k | ScaleAddRow(src, (uint32_t*)(row32), src_width); |
1109 | 705k | src += src_stride; |
1110 | 705k | } |
1111 | 77.4k | ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr); |
1112 | 77.4k | dst_ptr += dst_stride; |
1113 | 77.4k | } |
1114 | 1.25k | free_aligned_buffer_64(row32); |
1115 | 1.25k | } |
1116 | 0 | return 0; |
1117 | 1.25k | } |
1118 | | |
1119 | | // Scale plane down with bilinear interpolation. |
1120 | | static int ScalePlaneBilinearDown(int src_width, |
1121 | | int src_height, |
1122 | | int dst_width, |
1123 | | int dst_height, |
1124 | | int src_stride, |
1125 | | int dst_stride, |
1126 | | const uint8_t* src_ptr, |
1127 | | uint8_t* dst_ptr, |
1128 | 3.75k | enum FilterMode filtering) { |
1129 | | // Initial source x/y coordinate and step values as 16.16 fixed point. |
1130 | 3.75k | int x = 0; |
1131 | 3.75k | int y = 0; |
1132 | 3.75k | int dx = 0; |
1133 | 3.75k | int dy = 0; |
1134 | | // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. |
1135 | | // Allocate a row buffer. |
1136 | 3.75k | align_buffer_64(row, src_width); |
1137 | 3.75k | if (!row) |
1138 | 0 | return 1; |
1139 | | |
1140 | 3.75k | const int max_y = (src_height - 1) << 16; |
1141 | 3.75k | int j; |
1142 | 3.75k | void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, |
1143 | 3.75k | int dst_width, int x, int dx) = |
1144 | 3.75k | (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; |
1145 | 3.75k | void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, |
1146 | 3.75k | ptrdiff_t src_stride, int dst_width, |
1147 | 3.75k | int source_y_fraction) = InterpolateRow_C; |
1148 | 3.75k | ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, |
1149 | 3.75k | &dx, &dy); |
1150 | 3.75k | src_width = Abs(src_width); |
1151 | | |
1152 | 3.75k | #if defined(HAS_INTERPOLATEROW_SSSE3) |
1153 | 3.75k | if (TestCpuFlag(kCpuHasSSSE3)) { |
1154 | 3.75k | InterpolateRow = InterpolateRow_Any_SSSE3; |
1155 | 3.75k | if (IS_ALIGNED(src_width, 16)) { |
1156 | 923 | InterpolateRow = InterpolateRow_SSSE3; |
1157 | 923 | } |
1158 | 3.75k | } |
1159 | 3.75k | #endif |
1160 | 3.75k | #if defined(HAS_INTERPOLATEROW_AVX2) |
1161 | 3.75k | if (TestCpuFlag(kCpuHasAVX2)) { |
1162 | 3.75k | InterpolateRow = InterpolateRow_Any_AVX2; |
1163 | 3.75k | if (IS_ALIGNED(src_width, 32)) { |
1164 | 603 | InterpolateRow = InterpolateRow_AVX2; |
1165 | 603 | } |
1166 | 3.75k | } |
1167 | 3.75k | #endif |
1168 | | #if defined(HAS_INTERPOLATEROW_NEON) |
1169 | | if (TestCpuFlag(kCpuHasNEON)) { |
1170 | | InterpolateRow = InterpolateRow_Any_NEON; |
1171 | | if (IS_ALIGNED(src_width, 16)) { |
1172 | | InterpolateRow = InterpolateRow_NEON; |
1173 | | } |
1174 | | } |
1175 | | #endif |
1176 | | #if defined(HAS_INTERPOLATEROW_SME) |
1177 | | if (TestCpuFlag(kCpuHasSME)) { |
1178 | | InterpolateRow = InterpolateRow_SME; |
1179 | | } |
1180 | | #endif |
1181 | | #if defined(HAS_INTERPOLATEROW_MSA) |
1182 | | if (TestCpuFlag(kCpuHasMSA)) { |
1183 | | InterpolateRow = InterpolateRow_Any_MSA; |
1184 | | if (IS_ALIGNED(src_width, 32)) { |
1185 | | InterpolateRow = InterpolateRow_MSA; |
1186 | | } |
1187 | | } |
1188 | | #endif |
1189 | | #if defined(HAS_INTERPOLATEROW_LSX) |
1190 | | if (TestCpuFlag(kCpuHasLSX)) { |
1191 | | InterpolateRow = InterpolateRow_Any_LSX; |
1192 | | if (IS_ALIGNED(src_width, 32)) { |
1193 | | InterpolateRow = InterpolateRow_LSX; |
1194 | | } |
1195 | | } |
1196 | | #endif |
1197 | | #if defined(HAS_INTERPOLATEROW_RVV) |
1198 | | if (TestCpuFlag(kCpuHasRVV)) { |
1199 | | InterpolateRow = InterpolateRow_RVV; |
1200 | | } |
1201 | | #endif |
1202 | | |
1203 | 3.75k | #if defined(HAS_SCALEFILTERCOLS_SSSE3) |
1204 | 3.75k | if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
1205 | 3.75k | ScaleFilterCols = ScaleFilterCols_SSSE3; |
1206 | 3.75k | } |
1207 | 3.75k | #endif |
1208 | | #if defined(HAS_SCALEFILTERCOLS_NEON) |
1209 | | if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) { |
1210 | | ScaleFilterCols = ScaleFilterCols_Any_NEON; |
1211 | | if (IS_ALIGNED(dst_width, 8)) { |
1212 | | ScaleFilterCols = ScaleFilterCols_NEON; |
1213 | | } |
1214 | | } |
1215 | | #endif |
1216 | | #if defined(HAS_SCALEFILTERCOLS_MSA) |
1217 | | if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) { |
1218 | | ScaleFilterCols = ScaleFilterCols_Any_MSA; |
1219 | | if (IS_ALIGNED(dst_width, 16)) { |
1220 | | ScaleFilterCols = ScaleFilterCols_MSA; |
1221 | | } |
1222 | | } |
1223 | | #endif |
1224 | | #if defined(HAS_SCALEFILTERCOLS_LSX) |
1225 | | if (TestCpuFlag(kCpuHasLSX) && src_width < 32768) { |
1226 | | ScaleFilterCols = ScaleFilterCols_Any_LSX; |
1227 | | if (IS_ALIGNED(dst_width, 16)) { |
1228 | | ScaleFilterCols = ScaleFilterCols_LSX; |
1229 | | } |
1230 | | } |
1231 | | #endif |
1232 | 3.75k | if (y > max_y) { |
1233 | 43 | y = max_y; |
1234 | 43 | } |
1235 | | |
1236 | 264k | for (j = 0; j < dst_height; ++j) { |
1237 | 260k | int yi = y >> 16; |
1238 | 260k | const uint8_t* src = src_ptr + yi * (int64_t)src_stride; |
1239 | 260k | if (filtering == kFilterLinear) { |
1240 | 78.3k | ScaleFilterCols(dst_ptr, src, dst_width, x, dx); |
1241 | 182k | } else { |
1242 | 182k | int yf = (y >> 8) & 255; |
1243 | 182k | InterpolateRow(row, src, src_stride, src_width, yf); |
1244 | 182k | ScaleFilterCols(dst_ptr, row, dst_width, x, dx); |
1245 | 182k | } |
1246 | 260k | dst_ptr += dst_stride; |
1247 | 260k | y += dy; |
1248 | 260k | if (y > max_y) { |
1249 | 4.89k | y = max_y; |
1250 | 4.89k | } |
1251 | 260k | } |
1252 | 3.75k | free_aligned_buffer_64(row); |
1253 | 3.75k | return 0; |
1254 | 3.75k | } |
1255 | | |
1256 | | static int ScalePlaneBilinearDown_16(int src_width, |
1257 | | int src_height, |
1258 | | int dst_width, |
1259 | | int dst_height, |
1260 | | int src_stride, |
1261 | | int dst_stride, |
1262 | | const uint16_t* src_ptr, |
1263 | | uint16_t* dst_ptr, |
1264 | 5.89k | enum FilterMode filtering) { |
1265 | | // Initial source x/y coordinate and step values as 16.16 fixed point. |
1266 | 5.89k | int x = 0; |
1267 | 5.89k | int y = 0; |
1268 | 5.89k | int dx = 0; |
1269 | 5.89k | int dy = 0; |
1270 | | // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. |
1271 | | // Allocate a row buffer. |
1272 | 5.89k | align_buffer_64(row, src_width * 2); |
1273 | 5.89k | if (!row) |
1274 | 0 | return 1; |
1275 | | |
1276 | 5.89k | const int max_y = (src_height - 1) << 16; |
1277 | 5.89k | int j; |
1278 | 5.89k | void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, |
1279 | 5.89k | int dst_width, int x, int dx) = |
1280 | 5.89k | (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; |
1281 | 5.89k | void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr, |
1282 | 5.89k | ptrdiff_t src_stride, int dst_width, |
1283 | 5.89k | int source_y_fraction) = InterpolateRow_16_C; |
1284 | 5.89k | ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, |
1285 | 5.89k | &dx, &dy); |
1286 | 5.89k | src_width = Abs(src_width); |
1287 | | |
1288 | | #if defined(HAS_INTERPOLATEROW_16_SSE2) |
1289 | | if (TestCpuFlag(kCpuHasSSE2)) { |
1290 | | InterpolateRow = InterpolateRow_16_Any_SSE2; |
1291 | | if (IS_ALIGNED(src_width, 16)) { |
1292 | | InterpolateRow = InterpolateRow_16_SSE2; |
1293 | | } |
1294 | | } |
1295 | | #endif |
1296 | | #if defined(HAS_INTERPOLATEROW_16_SSSE3) |
1297 | | if (TestCpuFlag(kCpuHasSSSE3)) { |
1298 | | InterpolateRow = InterpolateRow_16_Any_SSSE3; |
1299 | | if (IS_ALIGNED(src_width, 16)) { |
1300 | | InterpolateRow = InterpolateRow_16_SSSE3; |
1301 | | } |
1302 | | } |
1303 | | #endif |
1304 | | #if defined(HAS_INTERPOLATEROW_16_AVX2) |
1305 | | if (TestCpuFlag(kCpuHasAVX2)) { |
1306 | | InterpolateRow = InterpolateRow_16_Any_AVX2; |
1307 | | if (IS_ALIGNED(src_width, 32)) { |
1308 | | InterpolateRow = InterpolateRow_16_AVX2; |
1309 | | } |
1310 | | } |
1311 | | #endif |
1312 | | #if defined(HAS_INTERPOLATEROW_16_NEON) |
1313 | | if (TestCpuFlag(kCpuHasNEON)) { |
1314 | | InterpolateRow = InterpolateRow_16_Any_NEON; |
1315 | | if (IS_ALIGNED(src_width, 16)) { |
1316 | | InterpolateRow = InterpolateRow_16_NEON; |
1317 | | } |
1318 | | } |
1319 | | #endif |
1320 | | #if defined(HAS_INTERPOLATEROW_16_SME) |
1321 | | if (TestCpuFlag(kCpuHasSME)) { |
1322 | | InterpolateRow = InterpolateRow_16_SME; |
1323 | | } |
1324 | | #endif |
1325 | | |
1326 | | #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) |
1327 | | if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
1328 | | ScaleFilterCols = ScaleFilterCols_16_SSSE3; |
1329 | | } |
1330 | | #endif |
1331 | 5.89k | if (y > max_y) { |
1332 | 107 | y = max_y; |
1333 | 107 | } |
1334 | | |
1335 | 380k | for (j = 0; j < dst_height; ++j) { |
1336 | 374k | int yi = y >> 16; |
1337 | 374k | const uint16_t* src = src_ptr + yi * (int64_t)src_stride; |
1338 | 374k | if (filtering == kFilterLinear) { |
1339 | 231k | ScaleFilterCols(dst_ptr, src, dst_width, x, dx); |
1340 | 231k | } else { |
1341 | 143k | int yf = (y >> 8) & 255; |
1342 | 143k | InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf); |
1343 | 143k | ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx); |
1344 | 143k | } |
1345 | 374k | dst_ptr += dst_stride; |
1346 | 374k | y += dy; |
1347 | 374k | if (y > max_y) { |
1348 | 8.27k | y = max_y; |
1349 | 8.27k | } |
1350 | 374k | } |
1351 | 5.89k | free_aligned_buffer_64(row); |
1352 | 5.89k | return 0; |
1353 | 5.89k | } |
1354 | | |
1355 | | // Scale up down with bilinear interpolation. |
1356 | | static int ScalePlaneBilinearUp(int src_width, |
1357 | | int src_height, |
1358 | | int dst_width, |
1359 | | int dst_height, |
1360 | | int src_stride, |
1361 | | int dst_stride, |
1362 | | const uint8_t* src_ptr, |
1363 | | uint8_t* dst_ptr, |
1364 | 4.75k | enum FilterMode filtering) { |
1365 | 4.75k | int j; |
1366 | | // Initial source x/y coordinate and step values as 16.16 fixed point. |
1367 | 4.75k | int x = 0; |
1368 | 4.75k | int y = 0; |
1369 | 4.75k | int dx = 0; |
1370 | 4.75k | int dy = 0; |
1371 | 4.75k | const int max_y = (src_height - 1) << 16; |
1372 | 4.75k | void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, |
1373 | 4.75k | ptrdiff_t src_stride, int dst_width, |
1374 | 4.75k | int source_y_fraction) = InterpolateRow_C; |
1375 | 4.75k | void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, |
1376 | 4.75k | int dst_width, int x, int dx) = |
1377 | 4.75k | filtering ? ScaleFilterCols_C : ScaleCols_C; |
1378 | 4.75k | ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, |
1379 | 4.75k | &dx, &dy); |
1380 | 4.75k | src_width = Abs(src_width); |
1381 | | |
1382 | 4.75k | #if defined(HAS_INTERPOLATEROW_SSSE3) |
1383 | 4.75k | if (TestCpuFlag(kCpuHasSSSE3)) { |
1384 | 4.75k | InterpolateRow = InterpolateRow_Any_SSSE3; |
1385 | 4.75k | if (IS_ALIGNED(dst_width, 16)) { |
1386 | 2.63k | InterpolateRow = InterpolateRow_SSSE3; |
1387 | 2.63k | } |
1388 | 4.75k | } |
1389 | 4.75k | #endif |
1390 | 4.75k | #if defined(HAS_INTERPOLATEROW_AVX2) |
1391 | 4.75k | if (TestCpuFlag(kCpuHasAVX2)) { |
1392 | 4.75k | InterpolateRow = InterpolateRow_Any_AVX2; |
1393 | 4.75k | if (IS_ALIGNED(dst_width, 32)) { |
1394 | 2.29k | InterpolateRow = InterpolateRow_AVX2; |
1395 | 2.29k | } |
1396 | 4.75k | } |
1397 | 4.75k | #endif |
1398 | | #if defined(HAS_INTERPOLATEROW_NEON) |
1399 | | if (TestCpuFlag(kCpuHasNEON)) { |
1400 | | InterpolateRow = InterpolateRow_Any_NEON; |
1401 | | if (IS_ALIGNED(dst_width, 16)) { |
1402 | | InterpolateRow = InterpolateRow_NEON; |
1403 | | } |
1404 | | } |
1405 | | #endif |
1406 | | #if defined(HAS_INTERPOLATEROW_SME) |
1407 | | if (TestCpuFlag(kCpuHasSME)) { |
1408 | | InterpolateRow = InterpolateRow_SME; |
1409 | | } |
1410 | | #endif |
1411 | | #if defined(HAS_INTERPOLATEROW_RVV) |
1412 | | if (TestCpuFlag(kCpuHasRVV)) { |
1413 | | InterpolateRow = InterpolateRow_RVV; |
1414 | | } |
1415 | | #endif |
1416 | | |
1417 | 4.75k | if (filtering && src_width >= 32768) { |
1418 | 0 | ScaleFilterCols = ScaleFilterCols64_C; |
1419 | 0 | } |
1420 | 4.75k | #if defined(HAS_SCALEFILTERCOLS_SSSE3) |
1421 | 4.75k | if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
1422 | 4.75k | ScaleFilterCols = ScaleFilterCols_SSSE3; |
1423 | 4.75k | } |
1424 | 4.75k | #endif |
1425 | | #if defined(HAS_SCALEFILTERCOLS_NEON) |
1426 | | if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) { |
1427 | | ScaleFilterCols = ScaleFilterCols_Any_NEON; |
1428 | | if (IS_ALIGNED(dst_width, 8)) { |
1429 | | ScaleFilterCols = ScaleFilterCols_NEON; |
1430 | | } |
1431 | | } |
1432 | | #endif |
1433 | | #if defined(HAS_SCALEFILTERCOLS_MSA) |
1434 | | if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) { |
1435 | | ScaleFilterCols = ScaleFilterCols_Any_MSA; |
1436 | | if (IS_ALIGNED(dst_width, 16)) { |
1437 | | ScaleFilterCols = ScaleFilterCols_MSA; |
1438 | | } |
1439 | | } |
1440 | | #endif |
1441 | | #if defined(HAS_SCALEFILTERCOLS_LSX) |
1442 | | if (filtering && TestCpuFlag(kCpuHasLSX) && src_width < 32768) { |
1443 | | ScaleFilterCols = ScaleFilterCols_Any_LSX; |
1444 | | if (IS_ALIGNED(dst_width, 16)) { |
1445 | | ScaleFilterCols = ScaleFilterCols_LSX; |
1446 | | } |
1447 | | } |
1448 | | #endif |
1449 | 4.75k | if (!filtering && src_width * 2 == dst_width && x < 0x8000) { |
1450 | 0 | ScaleFilterCols = ScaleColsUp2_C; |
1451 | | #if defined(HAS_SCALECOLS_SSE2) |
1452 | | if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { |
1453 | | ScaleFilterCols = ScaleColsUp2_SSE2; |
1454 | | } |
1455 | | #endif |
1456 | 0 | } |
1457 | | |
1458 | 4.75k | if (y > max_y) { |
1459 | 631 | y = max_y; |
1460 | 631 | } |
1461 | 4.75k | { |
1462 | 4.75k | int yi = y >> 16; |
1463 | 4.75k | const uint8_t* src = src_ptr + yi * (int64_t)src_stride; |
1464 | | |
1465 | | // Allocate 2 row buffers. |
1466 | 4.75k | const int row_size = (dst_width + 31) & ~31; |
1467 | 4.75k | align_buffer_64(row, row_size * 2); |
1468 | 4.75k | if (!row) |
1469 | 0 | return 1; |
1470 | | |
1471 | 4.75k | uint8_t* rowptr = row; |
1472 | 4.75k | int rowstride = row_size; |
1473 | 4.75k | int lasty = yi; |
1474 | | |
1475 | 4.75k | ScaleFilterCols(rowptr, src, dst_width, x, dx); |
1476 | 4.75k | if (src_height > 1) { |
1477 | 4.11k | src += src_stride; |
1478 | 4.11k | } |
1479 | 4.75k | ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); |
1480 | 4.75k | if (src_height > 2) { |
1481 | 3.65k | src += src_stride; |
1482 | 3.65k | } |
1483 | | |
1484 | 2.17M | for (j = 0; j < dst_height; ++j) { |
1485 | 2.17M | yi = y >> 16; |
1486 | 2.17M | if (yi != lasty) { |
1487 | 300k | if (y > max_y) { |
1488 | 0 | y = max_y; |
1489 | 0 | yi = y >> 16; |
1490 | 0 | src = src_ptr + yi * (int64_t)src_stride; |
1491 | 0 | } |
1492 | 300k | if (yi != lasty) { |
1493 | 300k | ScaleFilterCols(rowptr, src, dst_width, x, dx); |
1494 | 300k | rowptr += rowstride; |
1495 | 300k | rowstride = -rowstride; |
1496 | 300k | lasty = yi; |
1497 | 300k | if ((y + 65536) < max_y) { |
1498 | 296k | src += src_stride; |
1499 | 296k | } |
1500 | 300k | } |
1501 | 300k | } |
1502 | 2.17M | if (filtering == kFilterLinear) { |
1503 | 92.8k | InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); |
1504 | 2.07M | } else { |
1505 | 2.07M | int yf = (y >> 8) & 255; |
1506 | 2.07M | InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); |
1507 | 2.07M | } |
1508 | 2.17M | dst_ptr += dst_stride; |
1509 | 2.17M | y += dy; |
1510 | 2.17M | } |
1511 | 4.75k | free_aligned_buffer_64(row); |
1512 | 4.75k | } |
1513 | 0 | return 0; |
1514 | 4.75k | } |
1515 | | |
1516 | | // Scale plane, horizontally up by 2 times. |
1517 | | // Uses linear filter horizontally, nearest vertically. |
1518 | | // This is an optimized version for scaling up a plane to 2 times of |
1519 | | // its original width, using linear interpolation. |
1520 | | // This is used to scale U and V planes of I422 to I444. |
1521 | | static void ScalePlaneUp2_Linear(int src_width, |
1522 | | int src_height, |
1523 | | int dst_width, |
1524 | | int dst_height, |
1525 | | int src_stride, |
1526 | | int dst_stride, |
1527 | | const uint8_t* src_ptr, |
1528 | 225 | uint8_t* dst_ptr) { |
1529 | 225 | void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = |
1530 | 225 | ScaleRowUp2_Linear_Any_C; |
1531 | 225 | int i; |
1532 | 225 | int y; |
1533 | 225 | int dy; |
1534 | | |
1535 | 225 | (void)src_width; |
1536 | | // This function can only scale up by 2 times horizontally. |
1537 | 225 | assert(src_width == ((dst_width + 1) / 2)); |
1538 | | |
1539 | 225 | #ifdef HAS_SCALEROWUP2_LINEAR_SSE2 |
1540 | 225 | if (TestCpuFlag(kCpuHasSSE2)) { |
1541 | 225 | ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; |
1542 | 225 | } |
1543 | 225 | #endif |
1544 | | |
1545 | 225 | #ifdef HAS_SCALEROWUP2_LINEAR_SSSE3 |
1546 | 225 | if (TestCpuFlag(kCpuHasSSSE3)) { |
1547 | 225 | ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; |
1548 | 225 | } |
1549 | 225 | #endif |
1550 | | |
1551 | 225 | #ifdef HAS_SCALEROWUP2_LINEAR_AVX2 |
1552 | 225 | if (TestCpuFlag(kCpuHasAVX2)) { |
1553 | 225 | ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; |
1554 | 225 | } |
1555 | 225 | #endif |
1556 | | |
1557 | | #ifdef HAS_SCALEROWUP2_LINEAR_NEON |
1558 | | if (TestCpuFlag(kCpuHasNEON)) { |
1559 | | ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; |
1560 | | } |
1561 | | #endif |
1562 | | #ifdef HAS_SCALEROWUP2_LINEAR_RVV |
1563 | | if (TestCpuFlag(kCpuHasRVV)) { |
1564 | | ScaleRowUp = ScaleRowUp2_Linear_RVV; |
1565 | | } |
1566 | | #endif |
1567 | | |
1568 | 225 | if (dst_height == 1) { |
1569 | 37 | ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, |
1570 | 37 | dst_width); |
1571 | 188 | } else { |
1572 | 188 | dy = FixedDiv(src_height - 1, dst_height - 1); |
1573 | 188 | y = (1 << 15) - 1; |
1574 | 122k | for (i = 0; i < dst_height; ++i) { |
1575 | 122k | ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); |
1576 | 122k | dst_ptr += dst_stride; |
1577 | 122k | y += dy; |
1578 | 122k | } |
1579 | 188 | } |
1580 | 225 | } |
1581 | | |
1582 | | // Scale plane, up by 2 times. |
1583 | | // This is an optimized version for scaling up a plane to 2 times of |
1584 | | // its original size, using bilinear interpolation. |
1585 | | // This is used to scale U and V planes of I420 to I444. |
1586 | | static void ScalePlaneUp2_Bilinear(int src_width, |
1587 | | int src_height, |
1588 | | int dst_width, |
1589 | | int dst_height, |
1590 | | int src_stride, |
1591 | | int dst_stride, |
1592 | | const uint8_t* src_ptr, |
1593 | 277 | uint8_t* dst_ptr) { |
1594 | 277 | void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, |
1595 | 277 | uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = |
1596 | 277 | ScaleRowUp2_Bilinear_Any_C; |
1597 | 277 | int x; |
1598 | | |
1599 | 277 | (void)src_width; |
1600 | | // This function can only scale up by 2 times. |
1601 | 277 | assert(src_width == ((dst_width + 1) / 2)); |
1602 | 277 | assert(src_height == ((dst_height + 1) / 2)); |
1603 | | |
1604 | 277 | #ifdef HAS_SCALEROWUP2_BILINEAR_SSE2 |
1605 | 277 | if (TestCpuFlag(kCpuHasSSE2)) { |
1606 | 277 | Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; |
1607 | 277 | } |
1608 | 277 | #endif |
1609 | | |
1610 | 277 | #ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3 |
1611 | 277 | if (TestCpuFlag(kCpuHasSSSE3)) { |
1612 | 277 | Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; |
1613 | 277 | } |
1614 | 277 | #endif |
1615 | | |
1616 | 277 | #ifdef HAS_SCALEROWUP2_BILINEAR_AVX2 |
1617 | 277 | if (TestCpuFlag(kCpuHasAVX2)) { |
1618 | 277 | Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; |
1619 | 277 | } |
1620 | 277 | #endif |
1621 | | |
1622 | | #ifdef HAS_SCALEROWUP2_BILINEAR_NEON |
1623 | | if (TestCpuFlag(kCpuHasNEON)) { |
1624 | | Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; |
1625 | | } |
1626 | | #endif |
1627 | | #ifdef HAS_SCALEROWUP2_BILINEAR_RVV |
1628 | | if (TestCpuFlag(kCpuHasRVV)) { |
1629 | | Scale2RowUp = ScaleRowUp2_Bilinear_RVV; |
1630 | | } |
1631 | | #endif |
1632 | | |
1633 | 277 | Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); |
1634 | 277 | dst_ptr += dst_stride; |
1635 | 15.7k | for (x = 0; x < src_height - 1; ++x) { |
1636 | 15.4k | Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width); |
1637 | 15.4k | src_ptr += src_stride; |
1638 | | // TODO(fbarchard): Test performance of writing one row of destination at a |
1639 | | // time. |
1640 | 15.4k | dst_ptr += 2 * dst_stride; |
1641 | 15.4k | } |
1642 | 277 | if (!(dst_height & 1)) { |
1643 | 146 | Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); |
1644 | 146 | } |
1645 | 277 | } |
1646 | | |
1647 | | // Scale at most 14 bit plane, horizontally up by 2 times. |
1648 | | // This is an optimized version for scaling up a plane to 2 times of |
1649 | | // its original width, using linear interpolation. |
1650 | | // stride is in count of uint16_t. |
1651 | | // This is used to scale U and V planes of I210 to I410 and I212 to I412. |
1652 | | static void ScalePlaneUp2_12_Linear(int src_width, |
1653 | | int src_height, |
1654 | | int dst_width, |
1655 | | int dst_height, |
1656 | | int src_stride, |
1657 | | int dst_stride, |
1658 | | const uint16_t* src_ptr, |
1659 | 272 | uint16_t* dst_ptr) { |
1660 | 272 | void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, |
1661 | 272 | int dst_width) = ScaleRowUp2_Linear_16_Any_C; |
1662 | 272 | int i; |
1663 | 272 | int y; |
1664 | 272 | int dy; |
1665 | | |
1666 | 272 | (void)src_width; |
1667 | | // This function can only scale up by 2 times horizontally. |
1668 | 272 | assert(src_width == ((dst_width + 1) / 2)); |
1669 | | |
1670 | 272 | #ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3 |
1671 | 272 | if (TestCpuFlag(kCpuHasSSSE3)) { |
1672 | 272 | ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; |
1673 | 272 | } |
1674 | 272 | #endif |
1675 | | |
1676 | 272 | #ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2 |
1677 | 272 | if (TestCpuFlag(kCpuHasAVX2)) { |
1678 | 272 | ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; |
1679 | 272 | } |
1680 | 272 | #endif |
1681 | | |
1682 | | #ifdef HAS_SCALEROWUP2_LINEAR_12_NEON |
1683 | | if (TestCpuFlag(kCpuHasNEON)) { |
1684 | | ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; |
1685 | | } |
1686 | | #endif |
1687 | | |
1688 | 272 | if (dst_height == 1) { |
1689 | 19 | ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, |
1690 | 19 | dst_width); |
1691 | 253 | } else { |
1692 | 253 | dy = FixedDiv(src_height - 1, dst_height - 1); |
1693 | 253 | y = (1 << 15) - 1; |
1694 | 198k | for (i = 0; i < dst_height; ++i) { |
1695 | 198k | ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); |
1696 | 198k | dst_ptr += dst_stride; |
1697 | 198k | y += dy; |
1698 | 198k | } |
1699 | 253 | } |
1700 | 272 | } |
1701 | | |
1702 | | // Scale at most 12 bit plane, up by 2 times. |
1703 | | // This is an optimized version for scaling up a plane to 2 times of |
1704 | | // its original size, using bilinear interpolation. |
1705 | | // stride is in count of uint16_t. |
1706 | | // This is used to scale U and V planes of I010 to I410 and I012 to I412. |
1707 | | static void ScalePlaneUp2_12_Bilinear(int src_width, |
1708 | | int src_height, |
1709 | | int dst_width, |
1710 | | int dst_height, |
1711 | | int src_stride, |
1712 | | int dst_stride, |
1713 | | const uint16_t* src_ptr, |
1714 | 357 | uint16_t* dst_ptr) { |
1715 | 357 | void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, |
1716 | 357 | uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = |
1717 | 357 | ScaleRowUp2_Bilinear_16_Any_C; |
1718 | 357 | int x; |
1719 | | |
1720 | 357 | (void)src_width; |
1721 | | // This function can only scale up by 2 times. |
1722 | 357 | assert(src_width == ((dst_width + 1) / 2)); |
1723 | 357 | assert(src_height == ((dst_height + 1) / 2)); |
1724 | | |
1725 | 357 | #ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3 |
1726 | 357 | if (TestCpuFlag(kCpuHasSSSE3)) { |
1727 | 357 | Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; |
1728 | 357 | } |
1729 | 357 | #endif |
1730 | | |
1731 | 357 | #ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2 |
1732 | 357 | if (TestCpuFlag(kCpuHasAVX2)) { |
1733 | 357 | Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; |
1734 | 357 | } |
1735 | 357 | #endif |
1736 | | |
1737 | | #ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON |
1738 | | if (TestCpuFlag(kCpuHasNEON)) { |
1739 | | Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; |
1740 | | } |
1741 | | #endif |
1742 | | |
1743 | 357 | Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); |
1744 | 357 | dst_ptr += dst_stride; |
1745 | 16.1k | for (x = 0; x < src_height - 1; ++x) { |
1746 | 15.8k | Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width); |
1747 | 15.8k | src_ptr += src_stride; |
1748 | 15.8k | dst_ptr += 2 * dst_stride; |
1749 | 15.8k | } |
1750 | 357 | if (!(dst_height & 1)) { |
1751 | 115 | Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); |
1752 | 115 | } |
1753 | 357 | } |
1754 | | |
1755 | | static void ScalePlaneUp2_16_Linear(int src_width, |
1756 | | int src_height, |
1757 | | int dst_width, |
1758 | | int dst_height, |
1759 | | int src_stride, |
1760 | | int dst_stride, |
1761 | | const uint16_t* src_ptr, |
1762 | 0 | uint16_t* dst_ptr) { |
1763 | 0 | void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, |
1764 | 0 | int dst_width) = ScaleRowUp2_Linear_16_Any_C; |
1765 | 0 | int i; |
1766 | 0 | int y; |
1767 | 0 | int dy; |
1768 | |
|
1769 | 0 | (void)src_width; |
1770 | | // This function can only scale up by 2 times horizontally. |
1771 | 0 | assert(src_width == ((dst_width + 1) / 2)); |
1772 | |
|
1773 | 0 | #ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2 |
1774 | 0 | if (TestCpuFlag(kCpuHasSSE2)) { |
1775 | 0 | ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2; |
1776 | 0 | } |
1777 | 0 | #endif |
1778 | |
|
1779 | 0 | #ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2 |
1780 | 0 | if (TestCpuFlag(kCpuHasAVX2)) { |
1781 | 0 | ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2; |
1782 | 0 | } |
1783 | 0 | #endif |
1784 | |
|
1785 | | #ifdef HAS_SCALEROWUP2_LINEAR_16_NEON |
1786 | | if (TestCpuFlag(kCpuHasNEON)) { |
1787 | | ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON; |
1788 | | } |
1789 | | #endif |
1790 | |
|
1791 | 0 | if (dst_height == 1) { |
1792 | 0 | ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, |
1793 | 0 | dst_width); |
1794 | 0 | } else { |
1795 | 0 | dy = FixedDiv(src_height - 1, dst_height - 1); |
1796 | 0 | y = (1 << 15) - 1; |
1797 | 0 | for (i = 0; i < dst_height; ++i) { |
1798 | 0 | ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); |
1799 | 0 | dst_ptr += dst_stride; |
1800 | 0 | y += dy; |
1801 | 0 | } |
1802 | 0 | } |
1803 | 0 | } |
1804 | | |
1805 | | static void ScalePlaneUp2_16_Bilinear(int src_width, |
1806 | | int src_height, |
1807 | | int dst_width, |
1808 | | int dst_height, |
1809 | | int src_stride, |
1810 | | int dst_stride, |
1811 | | const uint16_t* src_ptr, |
1812 | 0 | uint16_t* dst_ptr) { |
1813 | 0 | void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, |
1814 | 0 | uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = |
1815 | 0 | ScaleRowUp2_Bilinear_16_Any_C; |
1816 | 0 | int x; |
1817 | |
|
1818 | 0 | (void)src_width; |
1819 | | // This function can only scale up by 2 times. |
1820 | 0 | assert(src_width == ((dst_width + 1) / 2)); |
1821 | 0 | assert(src_height == ((dst_height + 1) / 2)); |
1822 | |
|
1823 | 0 | #ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2 |
1824 | 0 | if (TestCpuFlag(kCpuHasSSE2)) { |
1825 | 0 | Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSE2; |
1826 | 0 | } |
1827 | 0 | #endif |
1828 | |
|
1829 | 0 | #ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2 |
1830 | 0 | if (TestCpuFlag(kCpuHasAVX2)) { |
1831 | 0 | Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2; |
1832 | 0 | } |
1833 | 0 | #endif |
1834 | |
|
1835 | | #ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON |
1836 | | if (TestCpuFlag(kCpuHasNEON)) { |
1837 | | Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON; |
1838 | | } |
1839 | | #endif |
1840 | |
|
1841 | 0 | Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); |
1842 | 0 | dst_ptr += dst_stride; |
1843 | 0 | for (x = 0; x < src_height - 1; ++x) { |
1844 | 0 | Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width); |
1845 | 0 | src_ptr += src_stride; |
1846 | 0 | dst_ptr += 2 * dst_stride; |
1847 | 0 | } |
1848 | 0 | if (!(dst_height & 1)) { |
1849 | 0 | Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); |
1850 | 0 | } |
1851 | 0 | } |
1852 | | |
1853 | | static int ScalePlaneBilinearUp_16(int src_width, |
1854 | | int src_height, |
1855 | | int dst_width, |
1856 | | int dst_height, |
1857 | | int src_stride, |
1858 | | int dst_stride, |
1859 | | const uint16_t* src_ptr, |
1860 | | uint16_t* dst_ptr, |
1861 | 4.43k | enum FilterMode filtering) { |
1862 | 4.43k | int j; |
1863 | | // Initial source x/y coordinate and step values as 16.16 fixed point. |
1864 | 4.43k | int x = 0; |
1865 | 4.43k | int y = 0; |
1866 | 4.43k | int dx = 0; |
1867 | 4.43k | int dy = 0; |
1868 | 4.43k | const int max_y = (src_height - 1) << 16; |
1869 | 4.43k | void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr, |
1870 | 4.43k | ptrdiff_t src_stride, int dst_width, |
1871 | 4.43k | int source_y_fraction) = InterpolateRow_16_C; |
1872 | 4.43k | void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, |
1873 | 4.43k | int dst_width, int x, int dx) = |
1874 | 4.43k | filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; |
1875 | 4.43k | ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, |
1876 | 4.43k | &dx, &dy); |
1877 | 4.43k | src_width = Abs(src_width); |
1878 | | |
1879 | | #if defined(HAS_INTERPOLATEROW_16_SSE2) |
1880 | | if (TestCpuFlag(kCpuHasSSE2)) { |
1881 | | InterpolateRow = InterpolateRow_16_Any_SSE2; |
1882 | | if (IS_ALIGNED(dst_width, 16)) { |
1883 | | InterpolateRow = InterpolateRow_16_SSE2; |
1884 | | } |
1885 | | } |
1886 | | #endif |
1887 | | #if defined(HAS_INTERPOLATEROW_16_SSSE3) |
1888 | | if (TestCpuFlag(kCpuHasSSSE3)) { |
1889 | | InterpolateRow = InterpolateRow_16_Any_SSSE3; |
1890 | | if (IS_ALIGNED(dst_width, 16)) { |
1891 | | InterpolateRow = InterpolateRow_16_SSSE3; |
1892 | | } |
1893 | | } |
1894 | | #endif |
1895 | | #if defined(HAS_INTERPOLATEROW_16_AVX2) |
1896 | | if (TestCpuFlag(kCpuHasAVX2)) { |
1897 | | InterpolateRow = InterpolateRow_16_Any_AVX2; |
1898 | | if (IS_ALIGNED(dst_width, 32)) { |
1899 | | InterpolateRow = InterpolateRow_16_AVX2; |
1900 | | } |
1901 | | } |
1902 | | #endif |
1903 | | #if defined(HAS_INTERPOLATEROW_16_NEON) |
1904 | | if (TestCpuFlag(kCpuHasNEON)) { |
1905 | | InterpolateRow = InterpolateRow_16_Any_NEON; |
1906 | | if (IS_ALIGNED(dst_width, 16)) { |
1907 | | InterpolateRow = InterpolateRow_16_NEON; |
1908 | | } |
1909 | | } |
1910 | | #endif |
1911 | | #if defined(HAS_INTERPOLATEROW_16_SME) |
1912 | | if (TestCpuFlag(kCpuHasSME)) { |
1913 | | InterpolateRow = InterpolateRow_16_SME; |
1914 | | } |
1915 | | #endif |
1916 | | |
1917 | 4.43k | if (filtering && src_width >= 32768) { |
1918 | 0 | ScaleFilterCols = ScaleFilterCols64_16_C; |
1919 | 0 | } |
1920 | | #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) |
1921 | | if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
1922 | | ScaleFilterCols = ScaleFilterCols_16_SSSE3; |
1923 | | } |
1924 | | #endif |
1925 | 4.43k | if (!filtering && src_width * 2 == dst_width && x < 0x8000) { |
1926 | 0 | ScaleFilterCols = ScaleColsUp2_16_C; |
1927 | | #if defined(HAS_SCALECOLS_16_SSE2) |
1928 | | if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { |
1929 | | ScaleFilterCols = ScaleColsUp2_16_SSE2; |
1930 | | } |
1931 | | #endif |
1932 | 0 | } |
1933 | 4.43k | if (y > max_y) { |
1934 | 651 | y = max_y; |
1935 | 651 | } |
1936 | 4.43k | { |
1937 | 4.43k | int yi = y >> 16; |
1938 | 4.43k | const uint16_t* src = src_ptr + yi * (int64_t)src_stride; |
1939 | | |
1940 | | // Allocate 2 row buffers. |
1941 | 4.43k | const int row_size = (dst_width + 31) & ~31; |
1942 | 4.43k | align_buffer_64(row, row_size * 4); |
1943 | 4.43k | int rowstride = row_size; |
1944 | 4.43k | int lasty = yi; |
1945 | 4.43k | uint16_t* rowptr = (uint16_t*)row; |
1946 | 4.43k | if (!row) |
1947 | 0 | return 1; |
1948 | | |
1949 | 4.43k | ScaleFilterCols(rowptr, src, dst_width, x, dx); |
1950 | 4.43k | if (src_height > 1) { |
1951 | 3.78k | src += src_stride; |
1952 | 3.78k | } |
1953 | 4.43k | ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); |
1954 | 4.43k | if (src_height > 2) { |
1955 | 2.95k | src += src_stride; |
1956 | 2.95k | } |
1957 | | |
1958 | 2.45M | for (j = 0; j < dst_height; ++j) { |
1959 | 2.44M | yi = y >> 16; |
1960 | 2.44M | if (yi != lasty) { |
1961 | 193k | if (y > max_y) { |
1962 | 0 | y = max_y; |
1963 | 0 | yi = y >> 16; |
1964 | 0 | src = src_ptr + yi * (int64_t)src_stride; |
1965 | 0 | } |
1966 | 193k | if (yi != lasty) { |
1967 | 193k | ScaleFilterCols(rowptr, src, dst_width, x, dx); |
1968 | 193k | rowptr += rowstride; |
1969 | 193k | rowstride = -rowstride; |
1970 | 193k | lasty = yi; |
1971 | 193k | if ((y + 65536) < max_y) { |
1972 | 190k | src += src_stride; |
1973 | 190k | } |
1974 | 193k | } |
1975 | 193k | } |
1976 | 2.44M | if (filtering == kFilterLinear) { |
1977 | 249k | InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); |
1978 | 2.19M | } else { |
1979 | 2.19M | int yf = (y >> 8) & 255; |
1980 | 2.19M | InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); |
1981 | 2.19M | } |
1982 | 2.44M | dst_ptr += dst_stride; |
1983 | 2.44M | y += dy; |
1984 | 2.44M | } |
1985 | 4.43k | free_aligned_buffer_64(row); |
1986 | 4.43k | } |
1987 | 0 | return 0; |
1988 | 4.43k | } |
1989 | | |
1990 | | // Scale Plane to/from any dimensions, without interpolation. |
1991 | | // Fixed point math is used for performance: The upper 16 bits |
1992 | | // of x and dx is the integer part of the source position and |
1993 | | // the lower 16 bits are the fixed decimal part. |
1994 | | |
1995 | | static void ScalePlaneSimple(int src_width, |
1996 | | int src_height, |
1997 | | int dst_width, |
1998 | | int dst_height, |
1999 | | int src_stride, |
2000 | | int dst_stride, |
2001 | | const uint8_t* src_ptr, |
2002 | 988 | uint8_t* dst_ptr) { |
2003 | 988 | int i; |
2004 | 988 | void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, |
2005 | 988 | int x, int dx) = ScaleCols_C; |
2006 | | // Initial source x/y coordinate and step values as 16.16 fixed point. |
2007 | 988 | int x = 0; |
2008 | 988 | int y = 0; |
2009 | 988 | int dx = 0; |
2010 | 988 | int dy = 0; |
2011 | 988 | ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y, |
2012 | 988 | &dx, &dy); |
2013 | 988 | src_width = Abs(src_width); |
2014 | | |
2015 | 988 | if (src_width * 2 == dst_width && x < 0x8000) { |
2016 | 62 | ScaleCols = ScaleColsUp2_C; |
2017 | | #if defined(HAS_SCALECOLS_SSE2) |
2018 | | if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { |
2019 | | ScaleCols = ScaleColsUp2_SSE2; |
2020 | | } |
2021 | | #endif |
2022 | 62 | } |
2023 | | |
2024 | 648k | for (i = 0; i < dst_height; ++i) { |
2025 | 647k | ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, |
2026 | 647k | dx); |
2027 | 647k | dst_ptr += dst_stride; |
2028 | 647k | y += dy; |
2029 | 647k | } |
2030 | 988 | } |
2031 | | |
2032 | | static void ScalePlaneSimple_16(int src_width, |
2033 | | int src_height, |
2034 | | int dst_width, |
2035 | | int dst_height, |
2036 | | int src_stride, |
2037 | | int dst_stride, |
2038 | | const uint16_t* src_ptr, |
2039 | 844 | uint16_t* dst_ptr) { |
2040 | 844 | int i; |
2041 | 844 | void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, |
2042 | 844 | int x, int dx) = ScaleCols_16_C; |
2043 | | // Initial source x/y coordinate and step values as 16.16 fixed point. |
2044 | 844 | int x = 0; |
2045 | 844 | int y = 0; |
2046 | 844 | int dx = 0; |
2047 | 844 | int dy = 0; |
2048 | 844 | ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y, |
2049 | 844 | &dx, &dy); |
2050 | 844 | src_width = Abs(src_width); |
2051 | | |
2052 | 844 | if (src_width * 2 == dst_width && x < 0x8000) { |
2053 | 66 | ScaleCols = ScaleColsUp2_16_C; |
2054 | | #if defined(HAS_SCALECOLS_16_SSE2) |
2055 | | if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { |
2056 | | ScaleCols = ScaleColsUp2_16_SSE2; |
2057 | | } |
2058 | | #endif |
2059 | 66 | } |
2060 | | |
2061 | 537k | for (i = 0; i < dst_height; ++i) { |
2062 | 536k | ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, |
2063 | 536k | dx); |
2064 | 536k | dst_ptr += dst_stride; |
2065 | 536k | y += dy; |
2066 | 536k | } |
2067 | 844 | } |
2068 | | |
2069 | | // Scale a plane. |
2070 | | // This function dispatches to a specialized scaler based on scale factor. |
2071 | | LIBYUV_API |
2072 | | int ScalePlane(const uint8_t* src, |
2073 | | int src_stride, |
2074 | | int src_width, |
2075 | | int src_height, |
2076 | | uint8_t* dst, |
2077 | | int dst_stride, |
2078 | | int dst_width, |
2079 | | int dst_height, |
2080 | 16.6k | enum FilterMode filtering) { |
2081 | | // Simplify filtering when possible. |
2082 | 16.6k | filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, |
2083 | 16.6k | filtering); |
2084 | | |
2085 | | // Negative height means invert the image. |
2086 | 16.6k | if (src_height < 0) { |
2087 | 0 | src_height = -src_height; |
2088 | 0 | src = src + (src_height - 1) * (int64_t)src_stride; |
2089 | 0 | src_stride = -src_stride; |
2090 | 0 | } |
2091 | | // Use specialized scales to improve performance for common resolutions. |
2092 | | // For example, all the 1/2 scalings will use ScalePlaneDown2() |
2093 | 16.6k | if (dst_width == src_width && dst_height == src_height) { |
2094 | | // Straight copy. |
2095 | 92 | CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); |
2096 | 92 | return 0; |
2097 | 92 | } |
2098 | 16.5k | if (dst_width == src_width && filtering != kFilterBox) { |
2099 | 5.25k | int dy = 0; |
2100 | 5.25k | int y = 0; |
2101 | | // When scaling down, use the center 2 rows to filter. |
2102 | | // When scaling up, last row of destination uses the last 2 source rows. |
2103 | 5.25k | if (dst_height <= src_height) { |
2104 | 634 | dy = FixedDiv(src_height, dst_height); |
2105 | 634 | y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter. |
2106 | 4.61k | } else if (src_height > 1 && dst_height > 1) { |
2107 | 4.47k | dy = FixedDiv1(src_height, dst_height); |
2108 | 4.47k | } |
2109 | | // Arbitrary scale vertically, but unscaled horizontally. |
2110 | 5.25k | ScalePlaneVertical(src_height, dst_width, dst_height, src_stride, |
2111 | 5.25k | dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); |
2112 | 5.25k | return 0; |
2113 | 5.25k | } |
2114 | 11.3k | if (dst_width <= Abs(src_width) && dst_height <= src_height) { |
2115 | | // Scale down. |
2116 | 2.55k | if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) { |
2117 | | // optimized, 3/4 |
2118 | 27 | ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride, |
2119 | 27 | dst_stride, src, dst, filtering); |
2120 | 27 | return 0; |
2121 | 27 | } |
2122 | 2.53k | if (2 * dst_width == src_width && 2 * dst_height == src_height) { |
2123 | | // optimized, 1/2 |
2124 | 65 | ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride, |
2125 | 65 | dst_stride, src, dst, filtering); |
2126 | 65 | return 0; |
2127 | 65 | } |
2128 | | // 3/8 rounded up for odd sized chroma height. |
2129 | 2.46k | if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { |
2130 | | // optimized, 3/8 |
2131 | 22 | ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride, |
2132 | 22 | dst_stride, src, dst, filtering); |
2133 | 22 | return 0; |
2134 | 22 | } |
2135 | 2.44k | if (4 * dst_width == src_width && 4 * dst_height == src_height && |
2136 | 2.44k | (filtering == kFilterBox || filtering == kFilterNone)) { |
2137 | | // optimized, 1/4 |
2138 | 44 | ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride, |
2139 | 44 | dst_stride, src, dst, filtering); |
2140 | 44 | return 0; |
2141 | 44 | } |
2142 | 2.44k | } |
2143 | 11.1k | if (filtering == kFilterBox && dst_height * 2 < src_height) { |
2144 | 1.18k | return ScalePlaneBox(src_width, src_height, dst_width, dst_height, |
2145 | 1.18k | src_stride, dst_stride, src, dst); |
2146 | 1.18k | } |
2147 | 9.99k | if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { |
2148 | 225 | ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height, |
2149 | 225 | src_stride, dst_stride, src, dst); |
2150 | 225 | return 0; |
2151 | 225 | } |
2152 | 9.77k | if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && |
2153 | 9.77k | (filtering == kFilterBilinear || filtering == kFilterBox)) { |
2154 | 277 | ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height, |
2155 | 277 | src_stride, dst_stride, src, dst); |
2156 | 277 | return 0; |
2157 | 277 | } |
2158 | 9.49k | if (filtering && dst_height > src_height) { |
2159 | 4.75k | return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, |
2160 | 4.75k | src_stride, dst_stride, src, dst, filtering); |
2161 | 4.75k | } |
2162 | 4.74k | if (filtering) { |
2163 | 3.75k | return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, |
2164 | 3.75k | src_stride, dst_stride, src, dst, filtering); |
2165 | 3.75k | } |
2166 | 988 | ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride, |
2167 | 988 | dst_stride, src, dst); |
2168 | 988 | return 0; |
2169 | 4.74k | } |
2170 | | |
2171 | | LIBYUV_API |
2172 | | int ScalePlane_16(const uint16_t* src, |
2173 | | int src_stride, |
2174 | | int src_width, |
2175 | | int src_height, |
2176 | | uint16_t* dst, |
2177 | | int dst_stride, |
2178 | | int dst_width, |
2179 | | int dst_height, |
2180 | 14.5k | enum FilterMode filtering) { |
2181 | | // Simplify filtering when possible. |
2182 | 14.5k | filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, |
2183 | 14.5k | filtering); |
2184 | | |
2185 | | // Negative height means invert the image. |
2186 | 14.5k | if (src_height < 0) { |
2187 | 0 | src_height = -src_height; |
2188 | 0 | src = src + (src_height - 1) * (int64_t)src_stride; |
2189 | 0 | src_stride = -src_stride; |
2190 | 0 | } |
2191 | | // Use specialized scales to improve performance for common resolutions. |
2192 | | // For example, all the 1/2 scalings will use ScalePlaneDown2() |
2193 | 14.5k | if (dst_width == src_width && dst_height == src_height) { |
2194 | | // Straight copy. |
2195 | 362 | CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); |
2196 | 362 | return 0; |
2197 | 362 | } |
2198 | 14.2k | if (dst_width == src_width && filtering != kFilterBox) { |
2199 | 1.63k | int dy = 0; |
2200 | 1.63k | int y = 0; |
2201 | | // When scaling down, use the center 2 rows to filter. |
2202 | | // When scaling up, last row of destination uses the last 2 source rows. |
2203 | 1.63k | if (dst_height <= src_height) { |
2204 | 616 | dy = FixedDiv(src_height, dst_height); |
2205 | 616 | y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter. |
2206 | | // When scaling up, ensure the last row of destination uses the last |
2207 | | // source. Avoid divide by zero for dst_height but will do no scaling |
2208 | | // later. |
2209 | 1.01k | } else if (src_height > 1 && dst_height > 1) { |
2210 | 931 | dy = FixedDiv1(src_height, dst_height); |
2211 | 931 | } |
2212 | | // Arbitrary scale vertically, but unscaled horizontally. |
2213 | 1.63k | ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, |
2214 | 1.63k | dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); |
2215 | 1.63k | return 0; |
2216 | 1.63k | } |
2217 | 12.5k | if (dst_width <= Abs(src_width) && dst_height <= src_height) { |
2218 | | // Scale down. |
2219 | 4.42k | if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) { |
2220 | | // optimized, 3/4 |
2221 | 30 | ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, |
2222 | 30 | src_stride, dst_stride, src, dst, filtering); |
2223 | 30 | return 0; |
2224 | 30 | } |
2225 | 4.39k | if (2 * dst_width == src_width && 2 * dst_height == src_height) { |
2226 | | // optimized, 1/2 |
2227 | 74 | ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height, |
2228 | 74 | src_stride, dst_stride, src, dst, filtering); |
2229 | 74 | return 0; |
2230 | 74 | } |
2231 | | // 3/8 rounded up for odd sized chroma height. |
2232 | 4.32k | if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { |
2233 | | // optimized, 3/8 |
2234 | 24 | ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, |
2235 | 24 | src_stride, dst_stride, src, dst, filtering); |
2236 | 24 | return 0; |
2237 | 24 | } |
2238 | 4.30k | if (4 * dst_width == src_width && 4 * dst_height == src_height && |
2239 | 4.30k | (filtering == kFilterBox || filtering == kFilterNone)) { |
2240 | | // optimized, 1/4 |
2241 | 38 | ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, |
2242 | 38 | src_stride, dst_stride, src, dst, filtering); |
2243 | 38 | return 0; |
2244 | 38 | } |
2245 | 4.30k | } |
2246 | 12.4k | if (filtering == kFilterBox && dst_height * 2 < src_height) { |
2247 | 1.25k | return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, |
2248 | 1.25k | src_stride, dst_stride, src, dst); |
2249 | 1.25k | } |
2250 | 11.1k | if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { |
2251 | 0 | ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height, |
2252 | 0 | src_stride, dst_stride, src, dst); |
2253 | 0 | return 0; |
2254 | 0 | } |
2255 | 11.1k | if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && |
2256 | 11.1k | (filtering == kFilterBilinear || filtering == kFilterBox)) { |
2257 | 0 | ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height, |
2258 | 0 | src_stride, dst_stride, src, dst); |
2259 | 0 | return 0; |
2260 | 0 | } |
2261 | 11.1k | if (filtering && dst_height > src_height) { |
2262 | 4.43k | return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, |
2263 | 4.43k | src_stride, dst_stride, src, dst, filtering); |
2264 | 4.43k | } |
2265 | 6.74k | if (filtering) { |
2266 | 5.89k | return ScalePlaneBilinearDown_16(src_width, src_height, dst_width, |
2267 | 5.89k | dst_height, src_stride, dst_stride, src, |
2268 | 5.89k | dst, filtering); |
2269 | 5.89k | } |
2270 | 844 | ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride, |
2271 | 844 | dst_stride, src, dst); |
2272 | 844 | return 0; |
2273 | 6.74k | } |
2274 | | |
2275 | | LIBYUV_API |
2276 | | int ScalePlane_12(const uint16_t* src, |
2277 | | int src_stride, |
2278 | | int src_width, |
2279 | | int src_height, |
2280 | | uint16_t* dst, |
2281 | | int dst_stride, |
2282 | | int dst_width, |
2283 | | int dst_height, |
2284 | 15.2k | enum FilterMode filtering) { |
2285 | | // Simplify filtering when possible. |
2286 | 15.2k | filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, |
2287 | 15.2k | filtering); |
2288 | | |
2289 | | // Negative height means invert the image. |
2290 | 15.2k | if (src_height < 0) { |
2291 | 0 | src_height = -src_height; |
2292 | 0 | src = src + (src_height - 1) * (int64_t)src_stride; |
2293 | 0 | src_stride = -src_stride; |
2294 | 0 | } |
2295 | | |
2296 | 15.2k | if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { |
2297 | 272 | ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height, |
2298 | 272 | src_stride, dst_stride, src, dst); |
2299 | 272 | return 0; |
2300 | 272 | } |
2301 | 14.9k | if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && |
2302 | 14.9k | (filtering == kFilterBilinear || filtering == kFilterBox)) { |
2303 | 357 | ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height, |
2304 | 357 | src_stride, dst_stride, src, dst); |
2305 | 357 | return 0; |
2306 | 357 | } |
2307 | | |
2308 | 14.5k | return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride, |
2309 | 14.5k | dst_width, dst_height, filtering); |
2310 | 14.9k | } |
2311 | | |
2312 | | // Scale an I420 image. |
2313 | | // This function in turn calls a scaling function for each plane. |
2314 | | |
2315 | | LIBYUV_API |
2316 | | int I420Scale(const uint8_t* src_y, |
2317 | | int src_stride_y, |
2318 | | const uint8_t* src_u, |
2319 | | int src_stride_u, |
2320 | | const uint8_t* src_v, |
2321 | | int src_stride_v, |
2322 | | int src_width, |
2323 | | int src_height, |
2324 | | uint8_t* dst_y, |
2325 | | int dst_stride_y, |
2326 | | uint8_t* dst_u, |
2327 | | int dst_stride_u, |
2328 | | uint8_t* dst_v, |
2329 | | int dst_stride_v, |
2330 | | int dst_width, |
2331 | | int dst_height, |
2332 | 0 | enum FilterMode filtering) { |
2333 | 0 | int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
2334 | 0 | int src_halfheight = SUBSAMPLE(src_height, 1, 1); |
2335 | 0 | int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
2336 | 0 | int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
2337 | 0 | int r; |
2338 | |
|
2339 | 0 | if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || |
2340 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || |
2341 | 0 | dst_width <= 0 || dst_height <= 0) { |
2342 | 0 | return -1; |
2343 | 0 | } |
2344 | | |
2345 | 0 | r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, |
2346 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2347 | 0 | if (r != 0) { |
2348 | 0 | return r; |
2349 | 0 | } |
2350 | 0 | r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, |
2351 | 0 | dst_stride_u, dst_halfwidth, dst_halfheight, filtering); |
2352 | 0 | if (r != 0) { |
2353 | 0 | return r; |
2354 | 0 | } |
2355 | 0 | r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, |
2356 | 0 | dst_stride_v, dst_halfwidth, dst_halfheight, filtering); |
2357 | 0 | return r; |
2358 | 0 | } |
2359 | | |
2360 | | LIBYUV_API |
2361 | | int I420Scale_16(const uint16_t* src_y, |
2362 | | int src_stride_y, |
2363 | | const uint16_t* src_u, |
2364 | | int src_stride_u, |
2365 | | const uint16_t* src_v, |
2366 | | int src_stride_v, |
2367 | | int src_width, |
2368 | | int src_height, |
2369 | | uint16_t* dst_y, |
2370 | | int dst_stride_y, |
2371 | | uint16_t* dst_u, |
2372 | | int dst_stride_u, |
2373 | | uint16_t* dst_v, |
2374 | | int dst_stride_v, |
2375 | | int dst_width, |
2376 | | int dst_height, |
2377 | 0 | enum FilterMode filtering) { |
2378 | 0 | int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
2379 | 0 | int src_halfheight = SUBSAMPLE(src_height, 1, 1); |
2380 | 0 | int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
2381 | 0 | int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
2382 | 0 | int r; |
2383 | |
|
2384 | 0 | if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || |
2385 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || |
2386 | 0 | dst_width <= 0 || dst_height <= 0) { |
2387 | 0 | return -1; |
2388 | 0 | } |
2389 | | |
2390 | 0 | r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, |
2391 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2392 | 0 | if (r != 0) { |
2393 | 0 | return r; |
2394 | 0 | } |
2395 | 0 | r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, |
2396 | 0 | dst_stride_u, dst_halfwidth, dst_halfheight, filtering); |
2397 | 0 | if (r != 0) { |
2398 | 0 | return r; |
2399 | 0 | } |
2400 | 0 | r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, |
2401 | 0 | dst_stride_v, dst_halfwidth, dst_halfheight, filtering); |
2402 | 0 | return r; |
2403 | 0 | } |
2404 | | |
2405 | | LIBYUV_API |
2406 | | int I420Scale_12(const uint16_t* src_y, |
2407 | | int src_stride_y, |
2408 | | const uint16_t* src_u, |
2409 | | int src_stride_u, |
2410 | | const uint16_t* src_v, |
2411 | | int src_stride_v, |
2412 | | int src_width, |
2413 | | int src_height, |
2414 | | uint16_t* dst_y, |
2415 | | int dst_stride_y, |
2416 | | uint16_t* dst_u, |
2417 | | int dst_stride_u, |
2418 | | uint16_t* dst_v, |
2419 | | int dst_stride_v, |
2420 | | int dst_width, |
2421 | | int dst_height, |
2422 | 0 | enum FilterMode filtering) { |
2423 | 0 | int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
2424 | 0 | int src_halfheight = SUBSAMPLE(src_height, 1, 1); |
2425 | 0 | int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
2426 | 0 | int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
2427 | 0 | int r; |
2428 | |
|
2429 | 0 | if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || |
2430 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || |
2431 | 0 | dst_width <= 0 || dst_height <= 0) { |
2432 | 0 | return -1; |
2433 | 0 | } |
2434 | | |
2435 | 0 | r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, |
2436 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2437 | 0 | if (r != 0) { |
2438 | 0 | return r; |
2439 | 0 | } |
2440 | 0 | r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, |
2441 | 0 | dst_stride_u, dst_halfwidth, dst_halfheight, filtering); |
2442 | 0 | if (r != 0) { |
2443 | 0 | return r; |
2444 | 0 | } |
2445 | 0 | r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, |
2446 | 0 | dst_stride_v, dst_halfwidth, dst_halfheight, filtering); |
2447 | 0 | return r; |
2448 | 0 | } |
2449 | | |
2450 | | // Scale an I444 image. |
2451 | | // This function in turn calls a scaling function for each plane. |
2452 | | |
2453 | | LIBYUV_API |
2454 | | int I444Scale(const uint8_t* src_y, |
2455 | | int src_stride_y, |
2456 | | const uint8_t* src_u, |
2457 | | int src_stride_u, |
2458 | | const uint8_t* src_v, |
2459 | | int src_stride_v, |
2460 | | int src_width, |
2461 | | int src_height, |
2462 | | uint8_t* dst_y, |
2463 | | int dst_stride_y, |
2464 | | uint8_t* dst_u, |
2465 | | int dst_stride_u, |
2466 | | uint8_t* dst_v, |
2467 | | int dst_stride_v, |
2468 | | int dst_width, |
2469 | | int dst_height, |
2470 | 0 | enum FilterMode filtering) { |
2471 | 0 | int r; |
2472 | |
|
2473 | 0 | if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || |
2474 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || |
2475 | 0 | dst_width <= 0 || dst_height <= 0) { |
2476 | 0 | return -1; |
2477 | 0 | } |
2478 | | |
2479 | 0 | r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, |
2480 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2481 | 0 | if (r != 0) { |
2482 | 0 | return r; |
2483 | 0 | } |
2484 | 0 | r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, |
2485 | 0 | dst_stride_u, dst_width, dst_height, filtering); |
2486 | 0 | if (r != 0) { |
2487 | 0 | return r; |
2488 | 0 | } |
2489 | 0 | r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, |
2490 | 0 | dst_stride_v, dst_width, dst_height, filtering); |
2491 | 0 | return r; |
2492 | 0 | } |
2493 | | |
2494 | | LIBYUV_API |
2495 | | int I444Scale_16(const uint16_t* src_y, |
2496 | | int src_stride_y, |
2497 | | const uint16_t* src_u, |
2498 | | int src_stride_u, |
2499 | | const uint16_t* src_v, |
2500 | | int src_stride_v, |
2501 | | int src_width, |
2502 | | int src_height, |
2503 | | uint16_t* dst_y, |
2504 | | int dst_stride_y, |
2505 | | uint16_t* dst_u, |
2506 | | int dst_stride_u, |
2507 | | uint16_t* dst_v, |
2508 | | int dst_stride_v, |
2509 | | int dst_width, |
2510 | | int dst_height, |
2511 | 0 | enum FilterMode filtering) { |
2512 | 0 | int r; |
2513 | |
|
2514 | 0 | if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || |
2515 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || |
2516 | 0 | dst_width <= 0 || dst_height <= 0) { |
2517 | 0 | return -1; |
2518 | 0 | } |
2519 | | |
2520 | 0 | r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, |
2521 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2522 | 0 | if (r != 0) { |
2523 | 0 | return r; |
2524 | 0 | } |
2525 | 0 | r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, |
2526 | 0 | dst_stride_u, dst_width, dst_height, filtering); |
2527 | 0 | if (r != 0) { |
2528 | 0 | return r; |
2529 | 0 | } |
2530 | 0 | r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, |
2531 | 0 | dst_stride_v, dst_width, dst_height, filtering); |
2532 | 0 | return r; |
2533 | 0 | } |
2534 | | |
2535 | | LIBYUV_API |
2536 | | int I444Scale_12(const uint16_t* src_y, |
2537 | | int src_stride_y, |
2538 | | const uint16_t* src_u, |
2539 | | int src_stride_u, |
2540 | | const uint16_t* src_v, |
2541 | | int src_stride_v, |
2542 | | int src_width, |
2543 | | int src_height, |
2544 | | uint16_t* dst_y, |
2545 | | int dst_stride_y, |
2546 | | uint16_t* dst_u, |
2547 | | int dst_stride_u, |
2548 | | uint16_t* dst_v, |
2549 | | int dst_stride_v, |
2550 | | int dst_width, |
2551 | | int dst_height, |
2552 | 0 | enum FilterMode filtering) { |
2553 | 0 | int r; |
2554 | |
|
2555 | 0 | if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || |
2556 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || |
2557 | 0 | dst_width <= 0 || dst_height <= 0) { |
2558 | 0 | return -1; |
2559 | 0 | } |
2560 | | |
2561 | 0 | r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, |
2562 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2563 | 0 | if (r != 0) { |
2564 | 0 | return r; |
2565 | 0 | } |
2566 | 0 | r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, |
2567 | 0 | dst_stride_u, dst_width, dst_height, filtering); |
2568 | 0 | if (r != 0) { |
2569 | 0 | return r; |
2570 | 0 | } |
2571 | 0 | r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, |
2572 | 0 | dst_stride_v, dst_width, dst_height, filtering); |
2573 | 0 | return r; |
2574 | 0 | } |
2575 | | |
2576 | | // Scale an I422 image. |
2577 | | // This function in turn calls a scaling function for each plane. |
2578 | | |
2579 | | LIBYUV_API |
2580 | | int I422Scale(const uint8_t* src_y, |
2581 | | int src_stride_y, |
2582 | | const uint8_t* src_u, |
2583 | | int src_stride_u, |
2584 | | const uint8_t* src_v, |
2585 | | int src_stride_v, |
2586 | | int src_width, |
2587 | | int src_height, |
2588 | | uint8_t* dst_y, |
2589 | | int dst_stride_y, |
2590 | | uint8_t* dst_u, |
2591 | | int dst_stride_u, |
2592 | | uint8_t* dst_v, |
2593 | | int dst_stride_v, |
2594 | | int dst_width, |
2595 | | int dst_height, |
2596 | 0 | enum FilterMode filtering) { |
2597 | 0 | int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
2598 | 0 | int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
2599 | 0 | int r; |
2600 | |
|
2601 | 0 | if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || |
2602 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || |
2603 | 0 | dst_width <= 0 || dst_height <= 0) { |
2604 | 0 | return -1; |
2605 | 0 | } |
2606 | | |
2607 | 0 | r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, |
2608 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2609 | 0 | if (r != 0) { |
2610 | 0 | return r; |
2611 | 0 | } |
2612 | 0 | r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u, |
2613 | 0 | dst_stride_u, dst_halfwidth, dst_height, filtering); |
2614 | 0 | if (r != 0) { |
2615 | 0 | return r; |
2616 | 0 | } |
2617 | 0 | r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v, |
2618 | 0 | dst_stride_v, dst_halfwidth, dst_height, filtering); |
2619 | 0 | return r; |
2620 | 0 | } |
2621 | | |
2622 | | LIBYUV_API |
2623 | | int I422Scale_16(const uint16_t* src_y, |
2624 | | int src_stride_y, |
2625 | | const uint16_t* src_u, |
2626 | | int src_stride_u, |
2627 | | const uint16_t* src_v, |
2628 | | int src_stride_v, |
2629 | | int src_width, |
2630 | | int src_height, |
2631 | | uint16_t* dst_y, |
2632 | | int dst_stride_y, |
2633 | | uint16_t* dst_u, |
2634 | | int dst_stride_u, |
2635 | | uint16_t* dst_v, |
2636 | | int dst_stride_v, |
2637 | | int dst_width, |
2638 | | int dst_height, |
2639 | 0 | enum FilterMode filtering) { |
2640 | 0 | int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
2641 | 0 | int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
2642 | 0 | int r; |
2643 | |
|
2644 | 0 | if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || |
2645 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || |
2646 | 0 | dst_width <= 0 || dst_height <= 0) { |
2647 | 0 | return -1; |
2648 | 0 | } |
2649 | | |
2650 | 0 | r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, |
2651 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2652 | 0 | if (r != 0) { |
2653 | 0 | return r; |
2654 | 0 | } |
2655 | 0 | r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u, |
2656 | 0 | dst_stride_u, dst_halfwidth, dst_height, filtering); |
2657 | 0 | if (r != 0) { |
2658 | 0 | return r; |
2659 | 0 | } |
2660 | 0 | r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v, |
2661 | 0 | dst_stride_v, dst_halfwidth, dst_height, filtering); |
2662 | 0 | return r; |
2663 | 0 | } |
2664 | | |
2665 | | LIBYUV_API |
2666 | | int I422Scale_12(const uint16_t* src_y, |
2667 | | int src_stride_y, |
2668 | | const uint16_t* src_u, |
2669 | | int src_stride_u, |
2670 | | const uint16_t* src_v, |
2671 | | int src_stride_v, |
2672 | | int src_width, |
2673 | | int src_height, |
2674 | | uint16_t* dst_y, |
2675 | | int dst_stride_y, |
2676 | | uint16_t* dst_u, |
2677 | | int dst_stride_u, |
2678 | | uint16_t* dst_v, |
2679 | | int dst_stride_v, |
2680 | | int dst_width, |
2681 | | int dst_height, |
2682 | 0 | enum FilterMode filtering) { |
2683 | 0 | int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
2684 | 0 | int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
2685 | 0 | int r; |
2686 | |
|
2687 | 0 | if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || |
2688 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || |
2689 | 0 | dst_width <= 0 || dst_height <= 0) { |
2690 | 0 | return -1; |
2691 | 0 | } |
2692 | | |
2693 | 0 | r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, |
2694 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2695 | 0 | if (r != 0) { |
2696 | 0 | return r; |
2697 | 0 | } |
2698 | 0 | r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u, |
2699 | 0 | dst_stride_u, dst_halfwidth, dst_height, filtering); |
2700 | 0 | if (r != 0) { |
2701 | 0 | return r; |
2702 | 0 | } |
2703 | 0 | r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v, |
2704 | 0 | dst_stride_v, dst_halfwidth, dst_height, filtering); |
2705 | 0 | return r; |
2706 | 0 | } |
2707 | | |
2708 | | // Scale an NV12 image. |
2709 | | // This function in turn calls a scaling function for each plane. |
2710 | | |
2711 | | LIBYUV_API |
2712 | | int NV12Scale(const uint8_t* src_y, |
2713 | | int src_stride_y, |
2714 | | const uint8_t* src_uv, |
2715 | | int src_stride_uv, |
2716 | | int src_width, |
2717 | | int src_height, |
2718 | | uint8_t* dst_y, |
2719 | | int dst_stride_y, |
2720 | | uint8_t* dst_uv, |
2721 | | int dst_stride_uv, |
2722 | | int dst_width, |
2723 | | int dst_height, |
2724 | 0 | enum FilterMode filtering) { |
2725 | 0 | int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
2726 | 0 | int src_halfheight = SUBSAMPLE(src_height, 1, 1); |
2727 | 0 | int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
2728 | 0 | int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
2729 | 0 | int r; |
2730 | |
|
2731 | 0 | if (!src_y || !src_uv || src_width <= 0 || src_height == 0 || |
2732 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv || |
2733 | 0 | dst_width <= 0 || dst_height <= 0) { |
2734 | 0 | return -1; |
2735 | 0 | } |
2736 | | |
2737 | 0 | r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, |
2738 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2739 | 0 | if (r != 0) { |
2740 | 0 | return r; |
2741 | 0 | } |
2742 | 0 | r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv, |
2743 | 0 | dst_stride_uv, dst_halfwidth, dst_halfheight, filtering); |
2744 | 0 | return r; |
2745 | 0 | } |
2746 | | |
2747 | | LIBYUV_API |
2748 | | int NV24Scale(const uint8_t* src_y, |
2749 | | int src_stride_y, |
2750 | | const uint8_t* src_uv, |
2751 | | int src_stride_uv, |
2752 | | int src_width, |
2753 | | int src_height, |
2754 | | uint8_t* dst_y, |
2755 | | int dst_stride_y, |
2756 | | uint8_t* dst_uv, |
2757 | | int dst_stride_uv, |
2758 | | int dst_width, |
2759 | | int dst_height, |
2760 | 0 | enum FilterMode filtering) { |
2761 | 0 | int r; |
2762 | |
|
2763 | 0 | if (!src_y || !src_uv || src_width <= 0 || src_height == 0 || |
2764 | 0 | src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv || |
2765 | 0 | dst_width <= 0 || dst_height <= 0) { |
2766 | 0 | return -1; |
2767 | 0 | } |
2768 | | |
2769 | 0 | r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, |
2770 | 0 | dst_stride_y, dst_width, dst_height, filtering); |
2771 | 0 | if (r != 0) { |
2772 | 0 | return r; |
2773 | 0 | } |
2774 | 0 | r = UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv, |
2775 | 0 | dst_stride_uv, dst_width, dst_height, filtering); |
2776 | 0 | return r; |
2777 | 0 | } |
2778 | | |
2779 | | // Deprecated api |
2780 | | LIBYUV_API |
2781 | | int Scale(const uint8_t* src_y, |
2782 | | const uint8_t* src_u, |
2783 | | const uint8_t* src_v, |
2784 | | int src_stride_y, |
2785 | | int src_stride_u, |
2786 | | int src_stride_v, |
2787 | | int src_width, |
2788 | | int src_height, |
2789 | | uint8_t* dst_y, |
2790 | | uint8_t* dst_u, |
2791 | | uint8_t* dst_v, |
2792 | | int dst_stride_y, |
2793 | | int dst_stride_u, |
2794 | | int dst_stride_v, |
2795 | | int dst_width, |
2796 | | int dst_height, |
2797 | 0 | LIBYUV_BOOL interpolate) { |
2798 | 0 | return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v, |
2799 | 0 | src_stride_v, src_width, src_height, dst_y, dst_stride_y, |
2800 | 0 | dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width, |
2801 | 0 | dst_height, interpolate ? kFilterBox : kFilterNone); |
2802 | 0 | } |
2803 | | |
2804 | | #ifdef __cplusplus |
2805 | | } // extern "C" |
2806 | | } // namespace libyuv |
2807 | | #endif |