/src/libavif/ext/libyuv/source/scale_any.cc
Line | Count | Source |
1 | | /* |
2 | | * Copyright 2015 The LibYuv Project Authors. All rights reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <string.h> // For memset/memcpy |
12 | | |
13 | | #include "libyuv/scale.h" |
14 | | #include "libyuv/scale_row.h" |
15 | | |
16 | | #include "libyuv/basic_types.h" |
17 | | |
18 | | #ifdef __cplusplus |
19 | | namespace libyuv { |
20 | | extern "C" { |
21 | | #endif |
22 | | |
23 | | // Fixed scale down. |
24 | | // Mask may be non-power of 2, so use MOD |
25 | | #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ |
26 | | void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ |
27 | 8 | int dst_width) { \ |
28 | 8 | int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ |
29 | 8 | int n = dst_width - r; \ |
30 | 8 | if (n > 0) { \ |
31 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ |
32 | 0 | } \ |
33 | 8 | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ |
34 | 8 | dst_ptr + n * BPP, r); \ |
35 | 8 | } Unexecuted instantiation: ScaleRowDown2_Any_SSSE3 Unexecuted instantiation: ScaleRowDown2Linear_Any_SSSE3 Unexecuted instantiation: ScaleRowDown2Box_Any_SSSE3 Unexecuted instantiation: ScaleUVRowDown2Box_Any_SSSE3 Unexecuted instantiation: ScaleUVRowDown2Box_Any_AVX2 Unexecuted instantiation: ScaleRowDown2_Any_AVX2 Unexecuted instantiation: ScaleRowDown2Linear_Any_AVX2 ScaleRowDown2Box_Any_AVX2 Line | Count | Source | 27 | 8 | int dst_width) { \ | 28 | 8 | int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ | 29 | 8 | int n = dst_width - r; \ | 30 | 8 | if (n > 0) { \ | 31 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ | 32 | 0 | } \ | 33 | 8 | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ | 34 | 8 | dst_ptr + n * BPP, r); \ | 35 | 8 | } |
Unexecuted instantiation: ScaleRowDown4_Any_SSSE3 Unexecuted instantiation: ScaleRowDown4Box_Any_SSSE3 Unexecuted instantiation: ScaleRowDown4_Any_AVX2 Unexecuted instantiation: ScaleRowDown4Box_Any_AVX2 Unexecuted instantiation: ScaleRowDown34_Any_SSSE3 Unexecuted instantiation: ScaleRowDown34_0_Box_Any_SSSE3 Unexecuted instantiation: ScaleRowDown34_1_Box_Any_SSSE3 Unexecuted instantiation: ScaleRowDown38_Any_SSSE3 Unexecuted instantiation: ScaleRowDown38_3_Box_Any_SSSE3 Unexecuted instantiation: ScaleRowDown38_2_Box_Any_SSSE3 Unexecuted instantiation: ScaleARGBRowDown2_Any_SSE2 Unexecuted instantiation: ScaleARGBRowDown2Linear_Any_SSE2 Unexecuted instantiation: ScaleARGBRowDown2Box_Any_SSE2 |
36 | | |
37 | | // Fixed scale down for odd source width. Used by I420Blend subsampling. |
38 | | // Since dst_width is (width + 1) / 2, this function scales one less pixel |
39 | | // and copies the last pixel. |
40 | | #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ |
41 | | void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ |
42 | 0 | int dst_width) { \ |
43 | 0 | int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ |
44 | 0 | int n = (dst_width - 1) - r; \ |
45 | 0 | if (n > 0) { \ |
46 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ |
47 | 0 | } \ |
48 | 0 | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ |
49 | 0 | dst_ptr + n * BPP, r + 1); \ |
50 | 0 | } Unexecuted instantiation: ScaleRowDown2Box_Odd_SSSE3 Unexecuted instantiation: ScaleRowDown2Box_Odd_AVX2 |
51 | | |
52 | | #ifdef HAS_SCALEROWDOWN2_SSSE3 |
53 | | SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) |
54 | | SDANY(ScaleRowDown2Linear_Any_SSSE3, |
55 | | ScaleRowDown2Linear_SSSE3, |
56 | | ScaleRowDown2Linear_C, |
57 | | 2, |
58 | | 1, |
59 | | 15) |
60 | | SDANY(ScaleRowDown2Box_Any_SSSE3, |
61 | | ScaleRowDown2Box_SSSE3, |
62 | | ScaleRowDown2Box_C, |
63 | | 2, |
64 | | 1, |
65 | | 15) |
66 | | SDODD(ScaleRowDown2Box_Odd_SSSE3, |
67 | | ScaleRowDown2Box_SSSE3, |
68 | | ScaleRowDown2Box_Odd_C, |
69 | | 2, |
70 | | 1, |
71 | | 15) |
72 | | #endif |
73 | | #ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3 |
74 | | SDANY(ScaleUVRowDown2Box_Any_SSSE3, |
75 | | ScaleUVRowDown2Box_SSSE3, |
76 | | ScaleUVRowDown2Box_C, |
77 | | 2, |
78 | | 2, |
79 | | 3) |
80 | | #endif |
81 | | #ifdef HAS_SCALEUVROWDOWN2BOX_AVX2 |
82 | | SDANY(ScaleUVRowDown2Box_Any_AVX2, |
83 | | ScaleUVRowDown2Box_AVX2, |
84 | | ScaleUVRowDown2Box_C, |
85 | | 2, |
86 | | 2, |
87 | | 7) |
88 | | #endif |
89 | | #ifdef HAS_SCALEROWDOWN2_AVX2 |
90 | | SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) |
91 | | SDANY(ScaleRowDown2Linear_Any_AVX2, |
92 | | ScaleRowDown2Linear_AVX2, |
93 | | ScaleRowDown2Linear_C, |
94 | | 2, |
95 | | 1, |
96 | | 31) |
97 | | SDANY(ScaleRowDown2Box_Any_AVX2, |
98 | | ScaleRowDown2Box_AVX2, |
99 | | ScaleRowDown2Box_C, |
100 | | 2, |
101 | | 1, |
102 | | 31) |
103 | | SDODD(ScaleRowDown2Box_Odd_AVX2, |
104 | | ScaleRowDown2Box_AVX2, |
105 | | ScaleRowDown2Box_Odd_C, |
106 | | 2, |
107 | | 1, |
108 | | 31) |
109 | | #endif |
110 | | #ifdef HAS_SCALEROWDOWN2_NEON |
111 | | SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) |
112 | | SDANY(ScaleRowDown2Linear_Any_NEON, |
113 | | ScaleRowDown2Linear_NEON, |
114 | | ScaleRowDown2Linear_C, |
115 | | 2, |
116 | | 1, |
117 | | 15) |
118 | | SDANY(ScaleRowDown2Box_Any_NEON, |
119 | | ScaleRowDown2Box_NEON, |
120 | | ScaleRowDown2Box_C, |
121 | | 2, |
122 | | 1, |
123 | | 15) |
124 | | SDODD(ScaleRowDown2Box_Odd_NEON, |
125 | | ScaleRowDown2Box_NEON, |
126 | | ScaleRowDown2Box_Odd_C, |
127 | | 2, |
128 | | 1, |
129 | | 15) |
130 | | #endif |
131 | | #ifdef HAS_SCALEUVROWDOWN2_NEON |
132 | | SDANY(ScaleUVRowDown2_Any_NEON, |
133 | | ScaleUVRowDown2_NEON, |
134 | | ScaleUVRowDown2_C, |
135 | | 2, |
136 | | 2, |
137 | | 7) |
138 | | #endif |
139 | | #ifdef HAS_SCALEUVROWDOWN2LINEAR_NEON |
140 | | SDANY(ScaleUVRowDown2Linear_Any_NEON, |
141 | | ScaleUVRowDown2Linear_NEON, |
142 | | ScaleUVRowDown2Linear_C, |
143 | | 2, |
144 | | 2, |
145 | | 7) |
146 | | #endif |
147 | | #ifdef HAS_SCALEUVROWDOWN2BOX_NEON |
148 | | SDANY(ScaleUVRowDown2Box_Any_NEON, |
149 | | ScaleUVRowDown2Box_NEON, |
150 | | ScaleUVRowDown2Box_C, |
151 | | 2, |
152 | | 2, |
153 | | 7) |
154 | | #endif |
155 | | #ifdef HAS_SCALEROWDOWN2_LSX |
156 | | SDANY(ScaleRowDown2_Any_LSX, ScaleRowDown2_LSX, ScaleRowDown2_C, 2, 1, 31) |
157 | | SDANY(ScaleRowDown2Linear_Any_LSX, |
158 | | ScaleRowDown2Linear_LSX, |
159 | | ScaleRowDown2Linear_C, |
160 | | 2, |
161 | | 1, |
162 | | 31) |
163 | | SDANY(ScaleRowDown2Box_Any_LSX, |
164 | | ScaleRowDown2Box_LSX, |
165 | | ScaleRowDown2Box_C, |
166 | | 2, |
167 | | 1, |
168 | | 31) |
169 | | #endif |
170 | | #ifdef HAS_SCALEROWDOWN4_SSSE3 |
171 | | SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) |
172 | | SDANY(ScaleRowDown4Box_Any_SSSE3, |
173 | | ScaleRowDown4Box_SSSE3, |
174 | | ScaleRowDown4Box_C, |
175 | | 4, |
176 | | 1, |
177 | | 7) |
178 | | #endif |
179 | | #ifdef HAS_SCALEROWDOWN4_AVX2 |
180 | | SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15) |
181 | | SDANY(ScaleRowDown4Box_Any_AVX2, |
182 | | ScaleRowDown4Box_AVX2, |
183 | | ScaleRowDown4Box_C, |
184 | | 4, |
185 | | 1, |
186 | | 15) |
187 | | #endif |
188 | | #ifdef HAS_SCALEROWDOWN4_NEON |
189 | | SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 15) |
190 | | SDANY(ScaleRowDown4Box_Any_NEON, |
191 | | ScaleRowDown4Box_NEON, |
192 | | ScaleRowDown4Box_C, |
193 | | 4, |
194 | | 1, |
195 | | 7) |
196 | | #endif |
197 | | #ifdef HAS_SCALEROWDOWN4_LSX |
198 | | SDANY(ScaleRowDown4_Any_LSX, ScaleRowDown4_LSX, ScaleRowDown4_C, 4, 1, 15) |
199 | | SDANY(ScaleRowDown4Box_Any_LSX, |
200 | | ScaleRowDown4Box_LSX, |
201 | | ScaleRowDown4Box_C, |
202 | | 4, |
203 | | 1, |
204 | | 15) |
205 | | #endif |
206 | | #ifdef HAS_SCALEROWDOWN34_SSSE3 |
207 | | SDANY(ScaleRowDown34_Any_SSSE3, |
208 | | ScaleRowDown34_SSSE3, |
209 | | ScaleRowDown34_C, |
210 | | 4 / 3, |
211 | | 1, |
212 | | 23) |
213 | | SDANY(ScaleRowDown34_0_Box_Any_SSSE3, |
214 | | ScaleRowDown34_0_Box_SSSE3, |
215 | | ScaleRowDown34_0_Box_C, |
216 | | 4 / 3, |
217 | | 1, |
218 | | 23) |
219 | | SDANY(ScaleRowDown34_1_Box_Any_SSSE3, |
220 | | ScaleRowDown34_1_Box_SSSE3, |
221 | | ScaleRowDown34_1_Box_C, |
222 | | 4 / 3, |
223 | | 1, |
224 | | 23) |
225 | | #endif |
226 | | #ifdef HAS_SCALEROWDOWN34_NEON |
227 | | #ifdef __aarch64__ |
228 | | SDANY(ScaleRowDown34_Any_NEON, |
229 | | ScaleRowDown34_NEON, |
230 | | ScaleRowDown34_C, |
231 | | 4 / 3, |
232 | | 1, |
233 | | 47) |
234 | | SDANY(ScaleRowDown34_0_Box_Any_NEON, |
235 | | ScaleRowDown34_0_Box_NEON, |
236 | | ScaleRowDown34_0_Box_C, |
237 | | 4 / 3, |
238 | | 1, |
239 | | 47) |
240 | | SDANY(ScaleRowDown34_1_Box_Any_NEON, |
241 | | ScaleRowDown34_1_Box_NEON, |
242 | | ScaleRowDown34_1_Box_C, |
243 | | 4 / 3, |
244 | | 1, |
245 | | 47) |
246 | | #else |
247 | | SDANY(ScaleRowDown34_Any_NEON, |
248 | | ScaleRowDown34_NEON, |
249 | | ScaleRowDown34_C, |
250 | | 4 / 3, |
251 | | 1, |
252 | | 23) |
253 | | SDANY(ScaleRowDown34_0_Box_Any_NEON, |
254 | | ScaleRowDown34_0_Box_NEON, |
255 | | ScaleRowDown34_0_Box_C, |
256 | | 4 / 3, |
257 | | 1, |
258 | | 23) |
259 | | SDANY(ScaleRowDown34_1_Box_Any_NEON, |
260 | | ScaleRowDown34_1_Box_NEON, |
261 | | ScaleRowDown34_1_Box_C, |
262 | | 4 / 3, |
263 | | 1, |
264 | | 23) |
265 | | #endif |
266 | | #endif |
267 | | #ifdef HAS_SCALEROWDOWN34_LSX |
268 | | SDANY(ScaleRowDown34_Any_LSX, |
269 | | ScaleRowDown34_LSX, |
270 | | ScaleRowDown34_C, |
271 | | 4 / 3, |
272 | | 1, |
273 | | 47) |
274 | | SDANY(ScaleRowDown34_0_Box_Any_LSX, |
275 | | ScaleRowDown34_0_Box_LSX, |
276 | | ScaleRowDown34_0_Box_C, |
277 | | 4 / 3, |
278 | | 1, |
279 | | 47) |
280 | | SDANY(ScaleRowDown34_1_Box_Any_LSX, |
281 | | ScaleRowDown34_1_Box_LSX, |
282 | | ScaleRowDown34_1_Box_C, |
283 | | 4 / 3, |
284 | | 1, |
285 | | 47) |
286 | | #endif |
287 | | #ifdef HAS_SCALEROWDOWN38_SSSE3 |
288 | | SDANY(ScaleRowDown38_Any_SSSE3, |
289 | | ScaleRowDown38_SSSE3, |
290 | | ScaleRowDown38_C, |
291 | | 8 / 3, |
292 | | 1, |
293 | | 11) |
294 | | SDANY(ScaleRowDown38_3_Box_Any_SSSE3, |
295 | | ScaleRowDown38_3_Box_SSSE3, |
296 | | ScaleRowDown38_3_Box_C, |
297 | | 8 / 3, |
298 | | 1, |
299 | | 5) |
300 | | SDANY(ScaleRowDown38_2_Box_Any_SSSE3, |
301 | | ScaleRowDown38_2_Box_SSSE3, |
302 | | ScaleRowDown38_2_Box_C, |
303 | | 8 / 3, |
304 | | 1, |
305 | | 5) |
306 | | #endif |
307 | | #ifdef HAS_SCALEROWDOWN38_NEON |
308 | | SDANY(ScaleRowDown38_Any_NEON, |
309 | | ScaleRowDown38_NEON, |
310 | | ScaleRowDown38_C, |
311 | | 8 / 3, |
312 | | 1, |
313 | | 11) |
314 | | SDANY(ScaleRowDown38_3_Box_Any_NEON, |
315 | | ScaleRowDown38_3_Box_NEON, |
316 | | ScaleRowDown38_3_Box_C, |
317 | | 8 / 3, |
318 | | 1, |
319 | | 11) |
320 | | SDANY(ScaleRowDown38_2_Box_Any_NEON, |
321 | | ScaleRowDown38_2_Box_NEON, |
322 | | ScaleRowDown38_2_Box_C, |
323 | | 8 / 3, |
324 | | 1, |
325 | | 11) |
326 | | #endif |
327 | | #ifdef HAS_SCALEROWDOWN38_LSX |
328 | | SDANY(ScaleRowDown38_Any_LSX, |
329 | | ScaleRowDown38_LSX, |
330 | | ScaleRowDown38_C, |
331 | | 8 / 3, |
332 | | 1, |
333 | | 11) |
334 | | SDANY(ScaleRowDown38_3_Box_Any_LSX, |
335 | | ScaleRowDown38_3_Box_LSX, |
336 | | ScaleRowDown38_3_Box_C, |
337 | | 8 / 3, |
338 | | 1, |
339 | | 11) |
340 | | SDANY(ScaleRowDown38_2_Box_Any_LSX, |
341 | | ScaleRowDown38_2_Box_LSX, |
342 | | ScaleRowDown38_2_Box_C, |
343 | | 8 / 3, |
344 | | 1, |
345 | | 11) |
346 | | #endif |
347 | | |
348 | | #ifdef HAS_SCALEARGBROWDOWN2_SSE2 |
349 | | SDANY(ScaleARGBRowDown2_Any_SSE2, |
350 | | ScaleARGBRowDown2_SSE2, |
351 | | ScaleARGBRowDown2_C, |
352 | | 2, |
353 | | 4, |
354 | | 3) |
355 | | SDANY(ScaleARGBRowDown2Linear_Any_SSE2, |
356 | | ScaleARGBRowDown2Linear_SSE2, |
357 | | ScaleARGBRowDown2Linear_C, |
358 | | 2, |
359 | | 4, |
360 | | 3) |
361 | | SDANY(ScaleARGBRowDown2Box_Any_SSE2, |
362 | | ScaleARGBRowDown2Box_SSE2, |
363 | | ScaleARGBRowDown2Box_C, |
364 | | 2, |
365 | | 4, |
366 | | 3) |
367 | | #endif |
368 | | #ifdef HAS_SCALEARGBROWDOWN2_NEON |
369 | | SDANY(ScaleARGBRowDown2_Any_NEON, |
370 | | ScaleARGBRowDown2_NEON, |
371 | | ScaleARGBRowDown2_C, |
372 | | 2, |
373 | | 4, |
374 | | 7) |
375 | | SDANY(ScaleARGBRowDown2Linear_Any_NEON, |
376 | | ScaleARGBRowDown2Linear_NEON, |
377 | | ScaleARGBRowDown2Linear_C, |
378 | | 2, |
379 | | 4, |
380 | | 7) |
381 | | SDANY(ScaleARGBRowDown2Box_Any_NEON, |
382 | | ScaleARGBRowDown2Box_NEON, |
383 | | ScaleARGBRowDown2Box_C, |
384 | | 2, |
385 | | 4, |
386 | | 7) |
387 | | #endif |
388 | | #ifdef HAS_SCALEARGBROWDOWN2_LSX |
389 | | SDANY(ScaleARGBRowDown2_Any_LSX, |
390 | | ScaleARGBRowDown2_LSX, |
391 | | ScaleARGBRowDown2_C, |
392 | | 2, |
393 | | 4, |
394 | | 3) |
395 | | SDANY(ScaleARGBRowDown2Linear_Any_LSX, |
396 | | ScaleARGBRowDown2Linear_LSX, |
397 | | ScaleARGBRowDown2Linear_C, |
398 | | 2, |
399 | | 4, |
400 | | 3) |
401 | | SDANY(ScaleARGBRowDown2Box_Any_LSX, |
402 | | ScaleARGBRowDown2Box_LSX, |
403 | | ScaleARGBRowDown2Box_C, |
404 | | 2, |
405 | | 4, |
406 | | 3) |
407 | | #endif |
408 | | #undef SDANY |
409 | | |
410 | | // Scale down by even scale factor. |
411 | | #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ |
412 | | void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ |
413 | 0 | uint8_t* dst_ptr, int dst_width) { \ |
414 | 0 | int r = dst_width & MASK; \ |
415 | 0 | int n = dst_width & ~MASK; \ |
416 | 0 | if (n > 0) { \ |
417 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ |
418 | 0 | } \ |
419 | 0 | SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ |
420 | 0 | dst_ptr + n * BPP, r); \ |
421 | 0 | } Unexecuted instantiation: ScaleARGBRowDownEven_Any_SSE2 Unexecuted instantiation: ScaleARGBRowDownEvenBox_Any_SSE2 |
422 | | |
423 | | #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 |
424 | | SDAANY(ScaleARGBRowDownEven_Any_SSE2, |
425 | | ScaleARGBRowDownEven_SSE2, |
426 | | ScaleARGBRowDownEven_C, |
427 | | 4, |
428 | | 3) |
429 | | SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, |
430 | | ScaleARGBRowDownEvenBox_SSE2, |
431 | | ScaleARGBRowDownEvenBox_C, |
432 | | 4, |
433 | | 3) |
434 | | #endif |
435 | | #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON |
436 | | SDAANY(ScaleARGBRowDownEven_Any_NEON, |
437 | | ScaleARGBRowDownEven_NEON, |
438 | | ScaleARGBRowDownEven_C, |
439 | | 4, |
440 | | 3) |
441 | | SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, |
442 | | ScaleARGBRowDownEvenBox_NEON, |
443 | | ScaleARGBRowDownEvenBox_C, |
444 | | 4, |
445 | | 3) |
446 | | #endif |
447 | | #ifdef HAS_SCALEARGBROWDOWNEVEN_LSX |
448 | | SDAANY(ScaleARGBRowDownEven_Any_LSX, |
449 | | ScaleARGBRowDownEven_LSX, |
450 | | ScaleARGBRowDownEven_C, |
451 | | 4, |
452 | | 3) |
453 | | SDAANY(ScaleARGBRowDownEvenBox_Any_LSX, |
454 | | ScaleARGBRowDownEvenBox_LSX, |
455 | | ScaleARGBRowDownEvenBox_C, |
456 | | 4, |
457 | | 3) |
458 | | #endif |
459 | | #ifdef HAS_SCALEUVROWDOWNEVEN_NEON |
460 | | SDAANY(ScaleUVRowDownEven_Any_NEON, |
461 | | ScaleUVRowDownEven_NEON, |
462 | | ScaleUVRowDownEven_C, |
463 | | 2, |
464 | | 3) |
465 | | #endif |
466 | | |
467 | | #ifdef SASIMDONLY |
468 | | // This also works and uses memcpy and SIMD instead of C, but is slower on ARM |
469 | | |
470 | | // Add rows box filter scale down. Using macro from row_any |
471 | | #define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ |
472 | | void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \ |
473 | | SIMD_ALIGNED(uint16_t dst_temp[32]); \ |
474 | | SIMD_ALIGNED(uint8_t src_temp[32]); \ |
475 | | memset(dst_temp, 0, 32 * 2); /* for msan */ \ |
476 | | int r = width & MASK; \ |
477 | | int n = width & ~MASK; \ |
478 | | if (n > 0) { \ |
479 | | ANY_SIMD(src_ptr, dst_ptr, n); \ |
480 | | } \ |
481 | | memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \ |
482 | | memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \ |
483 | | ANY_SIMD(src_temp, dst_temp, MASK + 1); \ |
484 | | memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \ |
485 | | } |
486 | | |
487 | | #ifdef HAS_SCALEADDROW_SSE2 |
488 | | SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15) |
489 | | #endif |
490 | | #ifdef HAS_SCALEADDROW_AVX2 |
491 | | SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31) |
492 | | #endif |
493 | | #ifdef HAS_SCALEADDROW_NEON |
494 | | SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15) |
495 | | #endif |
496 | | #ifdef HAS_SCALEADDROW_LSX |
497 | | SAROW(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, 1, 2, 15) |
498 | | #endif |
499 | | #undef SAANY |
500 | | |
501 | | #else |
502 | | |
503 | | // Add rows box filter scale down. |
504 | | #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ |
505 | 1.28M | void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ |
506 | 1.28M | int n = src_width & ~MASK; \ |
507 | 1.28M | if (n > 0) { \ |
508 | 243k | SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ |
509 | 243k | } \ |
510 | 1.28M | SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ |
511 | 1.28M | } Unexecuted instantiation: ScaleAddRow_Any_SSE2 Line | Count | Source | 505 | 1.28M | void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ | 506 | 1.28M | int n = src_width & ~MASK; \ | 507 | 1.28M | if (n > 0) { \ | 508 | 243k | SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ | 509 | 243k | } \ | 510 | 1.28M | SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ | 511 | 1.28M | } |
|
512 | | |
513 | | #ifdef HAS_SCALEADDROW_SSE2 |
514 | | SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15) |
515 | | #endif |
516 | | #ifdef HAS_SCALEADDROW_AVX2 |
517 | | SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) |
518 | | #endif |
519 | | #ifdef HAS_SCALEADDROW_NEON |
520 | | SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) |
521 | | #endif |
522 | | #ifdef HAS_SCALEADDROW_LSX |
523 | | SAANY(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, ScaleAddRow_C, 15) |
524 | | #endif |
525 | | #undef SAANY |
526 | | |
527 | | #endif // SASIMDONLY |
528 | | |
529 | | // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols |
530 | | #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ |
531 | | void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ |
532 | | int dx) { \ |
533 | | int r = dst_width & MASK; \ |
534 | | int n = dst_width & ~MASK; \ |
535 | | if (n > 0) { \ |
536 | | TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ |
537 | | } \ |
538 | | TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ |
539 | | } |
540 | | |
541 | | #ifdef HAS_SCALEFILTERCOLS_NEON |
542 | | CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) |
543 | | #endif |
544 | | #ifdef HAS_SCALEFILTERCOLS_LSX |
545 | | CANY(ScaleFilterCols_Any_LSX, ScaleFilterCols_LSX, ScaleFilterCols_C, 1, 15) |
546 | | #endif |
547 | | #ifdef HAS_SCALEARGBCOLS_NEON |
548 | | CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) |
549 | | #endif |
550 | | #ifdef HAS_SCALEARGBCOLS_LSX |
551 | | CANY(ScaleARGBCols_Any_LSX, ScaleARGBCols_LSX, ScaleARGBCols_C, 4, 3) |
552 | | #endif |
553 | | #ifdef HAS_SCALEARGBFILTERCOLS_NEON |
554 | | CANY(ScaleARGBFilterCols_Any_NEON, |
555 | | ScaleARGBFilterCols_NEON, |
556 | | ScaleARGBFilterCols_C, |
557 | | 4, |
558 | | 3) |
559 | | #endif |
560 | | #ifdef HAS_SCALEARGBFILTERCOLS_LSX |
561 | | CANY(ScaleARGBFilterCols_Any_LSX, |
562 | | ScaleARGBFilterCols_LSX, |
563 | | ScaleARGBFilterCols_C, |
564 | | 4, |
565 | | 7) |
566 | | #endif |
567 | | #undef CANY |
568 | | |
569 | | // Scale up horizontally 2 times using linear filter. |
570 | | #define SUH2LANY(NAME, SIMD, C, MASK, PTYPE) \ |
571 | 81.6k | void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ |
572 | 81.6k | int work_width = (dst_width - 1) & ~1; \ |
573 | 81.6k | int r = work_width & MASK; \ |
574 | 81.6k | int n = work_width & ~MASK; \ |
575 | 81.6k | dst_ptr[0] = src_ptr[0]; \ |
576 | 81.6k | if (work_width > 0) { \ |
577 | 63.7k | if (n != 0) { \ |
578 | 15.8k | SIMD(src_ptr, dst_ptr + 1, n); \ |
579 | 15.8k | } \ |
580 | 63.7k | C(src_ptr + (n / 2), dst_ptr + n + 1, r); \ |
581 | 63.7k | } \ |
582 | 81.6k | dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \ |
583 | 81.6k | } Unexecuted instantiation: ScaleRowUp2_Linear_Any_C Unexecuted instantiation: ScaleRowUp2_Linear_16_Any_C Unexecuted instantiation: ScaleRowUp2_Linear_Any_SSE2 Unexecuted instantiation: ScaleRowUp2_Linear_Any_SSSE3 Unexecuted instantiation: ScaleRowUp2_Linear_12_Any_SSSE3 Unexecuted instantiation: ScaleRowUp2_Linear_16_Any_SSE2 ScaleRowUp2_Linear_Any_AVX2 Line | Count | Source | 571 | 60.8k | void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ | 572 | 60.8k | int work_width = (dst_width - 1) & ~1; \ | 573 | 60.8k | int r = work_width & MASK; \ | 574 | 60.8k | int n = work_width & ~MASK; \ | 575 | 60.8k | dst_ptr[0] = src_ptr[0]; \ | 576 | 60.8k | if (work_width > 0) { \ | 577 | 49.9k | if (n != 0) { \ | 578 | 11.1k | SIMD(src_ptr, dst_ptr + 1, n); \ | 579 | 11.1k | } \ | 580 | 49.9k | C(src_ptr + (n / 2), dst_ptr + n + 1, r); \ | 581 | 49.9k | } \ | 582 | 60.8k | dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \ | 583 | 60.8k | } |
ScaleRowUp2_Linear_12_Any_AVX2 Line | Count | Source | 571 | 20.7k | void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ | 572 | 20.7k | int work_width = (dst_width - 1) & ~1; \ | 573 | 20.7k | int r = work_width & MASK; \ | 574 | 20.7k | int n = work_width & ~MASK; \ | 575 | 20.7k | dst_ptr[0] = src_ptr[0]; \ | 576 | 20.7k | if (work_width > 0) { \ | 577 | 13.7k | if (n != 0) { \ | 578 | 4.71k | SIMD(src_ptr, dst_ptr + 1, n); \ | 579 | 4.71k | } \ | 580 | 13.7k | C(src_ptr + (n / 2), dst_ptr + n + 1, r); \ | 581 | 13.7k | } \ | 582 | 20.7k | dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \ | 583 | 20.7k | } |
Unexecuted instantiation: ScaleRowUp2_Linear_16_Any_AVX2 |
584 | | |
585 | | // Even the C versions need to be wrapped, because boundary pixels have to |
586 | | // be handled differently |
587 | | |
588 | | SUH2LANY(ScaleRowUp2_Linear_Any_C, |
589 | | ScaleRowUp2_Linear_C, |
590 | | ScaleRowUp2_Linear_C, |
591 | | 0, |
592 | | uint8_t) |
593 | | |
594 | | SUH2LANY(ScaleRowUp2_Linear_16_Any_C, |
595 | | ScaleRowUp2_Linear_16_C, |
596 | | ScaleRowUp2_Linear_16_C, |
597 | | 0, |
598 | | uint16_t) |
599 | | |
600 | | #ifdef HAS_SCALEROWUP2_LINEAR_SSE2 |
601 | | SUH2LANY(ScaleRowUp2_Linear_Any_SSE2, |
602 | | ScaleRowUp2_Linear_SSE2, |
603 | | ScaleRowUp2_Linear_C, |
604 | | 15, |
605 | | uint8_t) |
606 | | #endif |
607 | | |
608 | | #ifdef HAS_SCALEROWUP2_LINEAR_SSSE3 |
609 | | SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3, |
610 | | ScaleRowUp2_Linear_SSSE3, |
611 | | ScaleRowUp2_Linear_C, |
612 | | 15, |
613 | | uint8_t) |
614 | | #endif |
615 | | |
616 | | #ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3 |
617 | | SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3, |
618 | | ScaleRowUp2_Linear_12_SSSE3, |
619 | | ScaleRowUp2_Linear_16_C, |
620 | | 15, |
621 | | uint16_t) |
622 | | #endif |
623 | | |
624 | | #ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2 |
625 | | SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2, |
626 | | ScaleRowUp2_Linear_16_SSE2, |
627 | | ScaleRowUp2_Linear_16_C, |
628 | | 7, |
629 | | uint16_t) |
630 | | #endif |
631 | | |
632 | | #ifdef HAS_SCALEROWUP2_LINEAR_AVX2 |
633 | | SUH2LANY(ScaleRowUp2_Linear_Any_AVX2, |
634 | | ScaleRowUp2_Linear_AVX2, |
635 | | ScaleRowUp2_Linear_C, |
636 | | 31, |
637 | | uint8_t) |
638 | | #endif |
639 | | |
640 | | #ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2 |
641 | | SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2, |
642 | | ScaleRowUp2_Linear_12_AVX2, |
643 | | ScaleRowUp2_Linear_16_C, |
644 | | 31, |
645 | | uint16_t) |
646 | | #endif |
647 | | |
648 | | #ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2 |
649 | | SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2, |
650 | | ScaleRowUp2_Linear_16_AVX2, |
651 | | ScaleRowUp2_Linear_16_C, |
652 | | 15, |
653 | | uint16_t) |
654 | | #endif |
655 | | |
656 | | #ifdef HAS_SCALEROWUP2_LINEAR_NEON |
657 | | #ifdef __aarch64__ |
658 | | SUH2LANY(ScaleRowUp2_Linear_Any_NEON, |
659 | | ScaleRowUp2_Linear_NEON, |
660 | | ScaleRowUp2_Linear_C, |
661 | | 31, |
662 | | uint8_t) |
663 | | #else |
664 | | SUH2LANY(ScaleRowUp2_Linear_Any_NEON, |
665 | | ScaleRowUp2_Linear_NEON, |
666 | | ScaleRowUp2_Linear_C, |
667 | | 15, |
668 | | uint8_t) |
669 | | #endif |
670 | | #endif |
671 | | |
672 | | #ifdef HAS_SCALEROWUP2_LINEAR_12_NEON |
673 | | SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON, |
674 | | ScaleRowUp2_Linear_12_NEON, |
675 | | ScaleRowUp2_Linear_16_C, |
676 | | 15, |
677 | | uint16_t) |
678 | | #endif |
679 | | |
680 | | #ifdef HAS_SCALEROWUP2_LINEAR_16_NEON |
681 | | SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON, |
682 | | ScaleRowUp2_Linear_16_NEON, |
683 | | ScaleRowUp2_Linear_16_C, |
684 | | 15, |
685 | | uint16_t) |
686 | | #endif |
687 | | |
688 | | #undef SUH2LANY |
689 | | |
690 | | // Scale up 2 times using bilinear filter. |
691 | | // This function produces 2 rows at a time. |
692 | | #define SU2BLANY(NAME, SIMD, C, MASK, PTYPE) \ |
693 | | void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \ |
694 | 16.1k | ptrdiff_t dst_stride, int dst_width) { \ |
695 | 16.1k | int work_width = (dst_width - 1) & ~1; \ |
696 | 16.1k | int r = work_width & MASK; \ |
697 | 16.1k | int n = work_width & ~MASK; \ |
698 | 16.1k | const PTYPE* sa = src_ptr; \ |
699 | 16.1k | const PTYPE* sb = src_ptr + src_stride; \ |
700 | 16.1k | PTYPE* da = dst_ptr; \ |
701 | 16.1k | PTYPE* db = dst_ptr + dst_stride; \ |
702 | 16.1k | da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ |
703 | 16.1k | db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ |
704 | 16.1k | if (work_width > 0) { \ |
705 | 6.20k | if (n != 0) { \ |
706 | 2.59k | SIMD(sa, sb - sa, da + 1, db - da, n); \ |
707 | 2.59k | } \ |
708 | 6.20k | C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \ |
709 | 6.20k | } \ |
710 | 16.1k | da[dst_width - 1] = \ |
711 | 16.1k | (3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \ |
712 | 16.1k | db[dst_width - 1] = \ |
713 | 16.1k | (sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \ |
714 | 16.1k | } Unexecuted instantiation: ScaleRowUp2_Bilinear_Any_C Unexecuted instantiation: ScaleRowUp2_Bilinear_16_Any_C Unexecuted instantiation: ScaleRowUp2_Bilinear_Any_SSE2 Unexecuted instantiation: ScaleRowUp2_Bilinear_12_Any_SSSE3 Unexecuted instantiation: ScaleRowUp2_Bilinear_16_Any_SSE2 Unexecuted instantiation: ScaleRowUp2_Bilinear_Any_SSSE3 ScaleRowUp2_Bilinear_Any_AVX2 Line | Count | Source | 694 | 11.5k | ptrdiff_t dst_stride, int dst_width) { \ | 695 | 11.5k | int work_width = (dst_width - 1) & ~1; \ | 696 | 11.5k | int r = work_width & MASK; \ | 697 | 11.5k | int n = work_width & ~MASK; \ | 698 | 11.5k | const PTYPE* sa = src_ptr; \ | 699 | 11.5k | const PTYPE* sb = src_ptr + src_stride; \ | 700 | 11.5k | PTYPE* da = dst_ptr; \ | 701 | 11.5k | PTYPE* db = dst_ptr + dst_stride; \ | 702 | 11.5k | da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ | 703 | 11.5k | db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ | 704 | 11.5k | if (work_width > 0) { \ | 705 | 3.89k | if (n != 0) { \ | 706 | 1.26k | SIMD(sa, sb - sa, da + 1, db - da, n); \ | 707 | 1.26k | } \ | 708 | 3.89k | C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \ | 709 | 3.89k | } \ | 710 | 11.5k | da[dst_width - 1] = \ | 711 | 11.5k | (3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \ | 712 | 11.5k | db[dst_width - 1] = \ | 713 | 11.5k | (sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \ | 714 | 11.5k | } |
ScaleRowUp2_Bilinear_12_Any_AVX2 Line | Count | Source | 694 | 4.51k | ptrdiff_t dst_stride, int dst_width) { \ | 695 | 4.51k | int work_width = (dst_width - 1) & ~1; \ | 696 | 4.51k | int r = work_width & MASK; \ | 697 | 4.51k | int n = work_width & ~MASK; \ | 698 | 4.51k | const PTYPE* sa = src_ptr; \ | 699 | 4.51k | const PTYPE* sb = src_ptr + src_stride; \ | 700 | 4.51k | PTYPE* da = dst_ptr; \ | 701 | 4.51k | PTYPE* db = dst_ptr + dst_stride; \ | 702 | 4.51k | da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ | 703 | 4.51k | db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ | 704 | 4.51k | if (work_width > 0) { \ | 705 | 2.30k | if (n != 0) { \ | 706 | 1.33k | SIMD(sa, sb - sa, da + 1, db - da, n); \ | 707 | 1.33k | } \ | 708 | 2.30k | C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \ | 709 | 2.30k | } \ | 710 | 4.51k | da[dst_width - 1] = \ | 711 | 4.51k | (3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \ | 712 | 4.51k | db[dst_width - 1] = \ | 713 | 4.51k | (sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \ | 714 | 4.51k | } |
Unexecuted instantiation: ScaleRowUp2_Bilinear_16_Any_AVX2 |
715 | | |
716 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_C, |
717 | | ScaleRowUp2_Bilinear_C, |
718 | | ScaleRowUp2_Bilinear_C, |
719 | | 0, |
720 | | uint8_t) |
721 | | |
722 | | SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C, |
723 | | ScaleRowUp2_Bilinear_16_C, |
724 | | ScaleRowUp2_Bilinear_16_C, |
725 | | 0, |
726 | | uint16_t) |
727 | | |
728 | | #ifdef HAS_SCALEROWUP2_BILINEAR_SSE2 |
729 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2, |
730 | | ScaleRowUp2_Bilinear_SSE2, |
731 | | ScaleRowUp2_Bilinear_C, |
732 | | 15, |
733 | | uint8_t) |
734 | | #endif |
735 | | |
736 | | #ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3 |
737 | | SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3, |
738 | | ScaleRowUp2_Bilinear_12_SSSE3, |
739 | | ScaleRowUp2_Bilinear_16_C, |
740 | | 15, |
741 | | uint16_t) |
742 | | #endif |
743 | | |
744 | | #ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2 |
745 | | SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSE2, |
746 | | ScaleRowUp2_Bilinear_16_SSE2, |
747 | | ScaleRowUp2_Bilinear_16_C, |
748 | | 7, |
749 | | uint16_t) |
750 | | #endif |
751 | | |
752 | | #ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3 |
753 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3, |
754 | | ScaleRowUp2_Bilinear_SSSE3, |
755 | | ScaleRowUp2_Bilinear_C, |
756 | | 15, |
757 | | uint8_t) |
758 | | #endif |
759 | | |
760 | | #ifdef HAS_SCALEROWUP2_BILINEAR_AVX2 |
761 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2, |
762 | | ScaleRowUp2_Bilinear_AVX2, |
763 | | ScaleRowUp2_Bilinear_C, |
764 | | 31, |
765 | | uint8_t) |
766 | | #endif |
767 | | |
768 | | #ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2 |
769 | | SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2, |
770 | | ScaleRowUp2_Bilinear_12_AVX2, |
771 | | ScaleRowUp2_Bilinear_16_C, |
772 | | 15, |
773 | | uint16_t) |
774 | | #endif |
775 | | |
776 | | #ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2 |
777 | | SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2, |
778 | | ScaleRowUp2_Bilinear_16_AVX2, |
779 | | ScaleRowUp2_Bilinear_16_C, |
780 | | 15, |
781 | | uint16_t) |
782 | | #endif |
783 | | |
784 | | #ifdef HAS_SCALEROWUP2_BILINEAR_NEON |
785 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON, |
786 | | ScaleRowUp2_Bilinear_NEON, |
787 | | ScaleRowUp2_Bilinear_C, |
788 | | 15, |
789 | | uint8_t) |
790 | | #endif |
791 | | |
792 | | #ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON |
793 | | SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON, |
794 | | ScaleRowUp2_Bilinear_12_NEON, |
795 | | ScaleRowUp2_Bilinear_16_C, |
796 | | 15, |
797 | | uint16_t) |
798 | | #endif |
799 | | |
800 | | #ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON |
801 | | SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON, |
802 | | ScaleRowUp2_Bilinear_16_NEON, |
803 | | ScaleRowUp2_Bilinear_16_C, |
804 | | 7, |
805 | | uint16_t) |
806 | | #endif |
807 | | |
808 | | #undef SU2BLANY |
809 | | |
810 | | // Scale bi-planar plane up horizontally 2 times using linear filter. |
811 | | #define SBUH2LANY(NAME, SIMD, C, MASK, PTYPE) \ |
812 | 0 | void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ |
813 | 0 | int work_width = (dst_width - 1) & ~1; \ |
814 | 0 | int r = work_width & MASK; \ |
815 | 0 | int n = work_width & ~MASK; \ |
816 | 0 | dst_ptr[0] = src_ptr[0]; \ |
817 | 0 | dst_ptr[1] = src_ptr[1]; \ |
818 | 0 | if (work_width > 0) { \ |
819 | 0 | if (n != 0) { \ |
820 | 0 | SIMD(src_ptr, dst_ptr + 2, n); \ |
821 | 0 | } \ |
822 | 0 | C(src_ptr + n, dst_ptr + 2 * n + 2, r); \ |
823 | 0 | } \ |
824 | 0 | dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2]; \ |
825 | 0 | dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1]; \ |
826 | 0 | } Unexecuted instantiation: ScaleUVRowUp2_Linear_Any_C Unexecuted instantiation: ScaleUVRowUp2_Linear_16_Any_C Unexecuted instantiation: ScaleUVRowUp2_Linear_Any_SSSE3 Unexecuted instantiation: ScaleUVRowUp2_Linear_Any_AVX2 Unexecuted instantiation: ScaleUVRowUp2_Linear_16_Any_SSE41 Unexecuted instantiation: ScaleUVRowUp2_Linear_16_Any_AVX2 |
827 | | |
828 | | SBUH2LANY(ScaleUVRowUp2_Linear_Any_C, |
829 | | ScaleUVRowUp2_Linear_C, |
830 | | ScaleUVRowUp2_Linear_C, |
831 | | 0, |
832 | | uint8_t) |
833 | | |
834 | | SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C, |
835 | | ScaleUVRowUp2_Linear_16_C, |
836 | | ScaleUVRowUp2_Linear_16_C, |
837 | | 0, |
838 | | uint16_t) |
839 | | |
840 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3 |
841 | | SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3, |
842 | | ScaleUVRowUp2_Linear_SSSE3, |
843 | | ScaleUVRowUp2_Linear_C, |
844 | | 7, |
845 | | uint8_t) |
846 | | #endif |
847 | | |
848 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2 |
849 | | SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2, |
850 | | ScaleUVRowUp2_Linear_AVX2, |
851 | | ScaleUVRowUp2_Linear_C, |
852 | | 15, |
853 | | uint8_t) |
854 | | #endif |
855 | | |
856 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 |
857 | | SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41, |
858 | | ScaleUVRowUp2_Linear_16_SSE41, |
859 | | ScaleUVRowUp2_Linear_16_C, |
860 | | 3, |
861 | | uint16_t) |
862 | | #endif |
863 | | |
864 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 |
865 | | SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2, |
866 | | ScaleUVRowUp2_Linear_16_AVX2, |
867 | | ScaleUVRowUp2_Linear_16_C, |
868 | | 7, |
869 | | uint16_t) |
870 | | #endif |
871 | | |
872 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_NEON |
873 | | SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON, |
874 | | ScaleUVRowUp2_Linear_NEON, |
875 | | ScaleUVRowUp2_Linear_C, |
876 | | 15, |
877 | | uint8_t) |
878 | | #endif |
879 | | |
880 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON |
881 | | SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON, |
882 | | ScaleUVRowUp2_Linear_16_NEON, |
883 | | ScaleUVRowUp2_Linear_16_C, |
884 | | 15, |
885 | | uint16_t) |
886 | | #endif |
887 | | |
888 | | #undef SBUH2LANY |
889 | | |
890 | | // Scale bi-planar plane up 2 times using bilinear filter. |
891 | | // This function produces 2 rows at a time. |
892 | | #define SBU2BLANY(NAME, SIMD, C, MASK, PTYPE) \ |
893 | | void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \ |
894 | 0 | ptrdiff_t dst_stride, int dst_width) { \ |
895 | 0 | int work_width = (dst_width - 1) & ~1; \ |
896 | 0 | int r = work_width & MASK; \ |
897 | 0 | int n = work_width & ~MASK; \ |
898 | 0 | const PTYPE* sa = src_ptr; \ |
899 | 0 | const PTYPE* sb = src_ptr + src_stride; \ |
900 | 0 | PTYPE* da = dst_ptr; \ |
901 | 0 | PTYPE* db = dst_ptr + dst_stride; \ |
902 | 0 | da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ |
903 | 0 | db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ |
904 | 0 | da[1] = (3 * sa[1] + sb[1] + 2) >> 2; \ |
905 | 0 | db[1] = (sa[1] + 3 * sb[1] + 2) >> 2; \ |
906 | 0 | if (work_width > 0) { \ |
907 | 0 | if (n != 0) { \ |
908 | 0 | SIMD(sa, sb - sa, da + 2, db - da, n); \ |
909 | 0 | } \ |
910 | 0 | C(sa + n, sb - sa, da + 2 * n + 2, db - da, r); \ |
911 | 0 | } \ |
912 | 0 | da[2 * dst_width - 2] = (3 * sa[((dst_width + 1) & ~1) - 2] + \ |
913 | 0 | sb[((dst_width + 1) & ~1) - 2] + 2) >> \ |
914 | 0 | 2; \ |
915 | 0 | db[2 * dst_width - 2] = (sa[((dst_width + 1) & ~1) - 2] + \ |
916 | 0 | 3 * sb[((dst_width + 1) & ~1) - 2] + 2) >> \ |
917 | 0 | 2; \ |
918 | 0 | da[2 * dst_width - 1] = (3 * sa[((dst_width + 1) & ~1) - 1] + \ |
919 | 0 | sb[((dst_width + 1) & ~1) - 1] + 2) >> \ |
920 | 0 | 2; \ |
921 | 0 | db[2 * dst_width - 1] = (sa[((dst_width + 1) & ~1) - 1] + \ |
922 | 0 | 3 * sb[((dst_width + 1) & ~1) - 1] + 2) >> \ |
923 | 0 | 2; \ |
924 | 0 | } Unexecuted instantiation: ScaleUVRowUp2_Bilinear_Any_C Unexecuted instantiation: ScaleUVRowUp2_Bilinear_16_Any_C Unexecuted instantiation: ScaleUVRowUp2_Bilinear_Any_SSSE3 Unexecuted instantiation: ScaleUVRowUp2_Bilinear_Any_AVX2 Unexecuted instantiation: ScaleUVRowUp2_Bilinear_16_Any_SSE41 Unexecuted instantiation: ScaleUVRowUp2_Bilinear_16_Any_AVX2 |
925 | | |
926 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_C, |
927 | | ScaleUVRowUp2_Bilinear_C, |
928 | | ScaleUVRowUp2_Bilinear_C, |
929 | | 0, |
930 | | uint8_t) |
931 | | |
932 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C, |
933 | | ScaleUVRowUp2_Bilinear_16_C, |
934 | | ScaleUVRowUp2_Bilinear_16_C, |
935 | | 0, |
936 | | uint16_t) |
937 | | |
938 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3 |
939 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3, |
940 | | ScaleUVRowUp2_Bilinear_SSSE3, |
941 | | ScaleUVRowUp2_Bilinear_C, |
942 | | 7, |
943 | | uint8_t) |
944 | | #endif |
945 | | |
946 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2 |
947 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2, |
948 | | ScaleUVRowUp2_Bilinear_AVX2, |
949 | | ScaleUVRowUp2_Bilinear_C, |
950 | | 15, |
951 | | uint8_t) |
952 | | #endif |
953 | | |
954 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 |
955 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41, |
956 | | ScaleUVRowUp2_Bilinear_16_SSE41, |
957 | | ScaleUVRowUp2_Bilinear_16_C, |
958 | | 7, |
959 | | uint16_t) |
960 | | #endif |
961 | | |
962 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 |
963 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2, |
964 | | ScaleUVRowUp2_Bilinear_16_AVX2, |
965 | | ScaleUVRowUp2_Bilinear_16_C, |
966 | | 7, |
967 | | uint16_t) |
968 | | #endif |
969 | | |
970 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON |
971 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON, |
972 | | ScaleUVRowUp2_Bilinear_NEON, |
973 | | ScaleUVRowUp2_Bilinear_C, |
974 | | 7, |
975 | | uint8_t) |
976 | | #endif |
977 | | |
978 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON |
979 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON, |
980 | | ScaleUVRowUp2_Bilinear_16_NEON, |
981 | | ScaleUVRowUp2_Bilinear_16_C, |
982 | | 7, |
983 | | uint16_t) |
984 | | #endif |
985 | | |
986 | | #undef SBU2BLANY |
987 | | |
988 | | #ifdef __cplusplus |
989 | | } // extern "C" |
990 | | } // namespace libyuv |
991 | | #endif |