/src/libavif/ext/libyuv/source/scale_any.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2015 The LibYuv Project Authors. All rights reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <string.h> // For memset/memcpy |
12 | | |
13 | | #include "libyuv/scale.h" |
14 | | #include "libyuv/scale_row.h" |
15 | | |
16 | | #include "libyuv/basic_types.h" |
17 | | |
18 | | #ifdef __cplusplus |
19 | | namespace libyuv { |
20 | | extern "C" { |
21 | | #endif |
22 | | |
23 | | // Fixed scale down. |
24 | | // Mask may be non-power of 2, so use MOD |
25 | | #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ |
26 | | void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ |
27 | 4.57k | int dst_width) { \ |
28 | 4.57k | int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ |
29 | 4.57k | int n = dst_width - r; \ |
30 | 4.57k | if (n > 0) { \ |
31 | 456 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ |
32 | 456 | } \ |
33 | 4.57k | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ |
34 | 4.57k | dst_ptr + n * BPP, r); \ |
35 | 4.57k | } Unexecuted instantiation: ScaleRowDown2_Any_SSSE3 Unexecuted instantiation: ScaleRowDown2Linear_Any_SSSE3 Unexecuted instantiation: ScaleRowDown2Box_Any_SSSE3 Unexecuted instantiation: ScaleUVRowDown2Box_Any_SSSE3 Unexecuted instantiation: ScaleUVRowDown2Box_Any_AVX2 Unexecuted instantiation: ScaleRowDown2_Any_AVX2 Unexecuted instantiation: ScaleRowDown2Linear_Any_AVX2 ScaleRowDown2Box_Any_AVX2 Line | Count | Source | 27 | 1.64k | int dst_width) { \ | 28 | 1.64k | int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ | 29 | 1.64k | int n = dst_width - r; \ | 30 | 1.64k | if (n > 0) { \ | 31 | 404 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ | 32 | 404 | } \ | 33 | 1.64k | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ | 34 | 1.64k | dst_ptr + n * BPP, r); \ | 35 | 1.64k | } |
Unexecuted instantiation: ScaleRowDown4_Any_SSSE3 Unexecuted instantiation: ScaleRowDown4Box_Any_SSSE3 Unexecuted instantiation: ScaleRowDown4_Any_AVX2 ScaleRowDown4Box_Any_AVX2 Line | Count | Source | 27 | 454 | int dst_width) { \ | 28 | 454 | int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ | 29 | 454 | int n = dst_width - r; \ | 30 | 454 | if (n > 0) { \ | 31 | 52 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ | 32 | 52 | } \ | 33 | 454 | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ | 34 | 454 | dst_ptr + n * BPP, r); \ | 35 | 454 | } |
Unexecuted instantiation: ScaleRowDown34_Any_SSSE3 ScaleRowDown34_0_Box_Any_SSSE3 Line | Count | Source | 27 | 1.18k | int dst_width) { \ | 28 | 1.18k | int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ | 29 | 1.18k | int n = dst_width - r; \ | 30 | 1.18k | if (n > 0) { \ | 31 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ | 32 | 0 | } \ | 33 | 1.18k | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ | 34 | 1.18k | dst_ptr + n * BPP, r); \ | 35 | 1.18k | } |
ScaleRowDown34_1_Box_Any_SSSE3 Line | Count | Source | 27 | 591 | int dst_width) { \ | 28 | 591 | int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ | 29 | 591 | int n = dst_width - r; \ | 30 | 591 | if (n > 0) { \ | 31 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ | 32 | 0 | } \ | 33 | 591 | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ | 34 | 591 | dst_ptr + n * BPP, r); \ | 35 | 591 | } |
Unexecuted instantiation: ScaleRowDown38_Any_SSSE3 ScaleRowDown38_3_Box_Any_SSSE3 Line | Count | Source | 27 | 470 | int dst_width) { \ | 28 | 470 | int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ | 29 | 470 | int n = dst_width - r; \ | 30 | 470 | if (n > 0) { \ | 31 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ | 32 | 0 | } \ | 33 | 470 | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ | 34 | 470 | dst_ptr + n * BPP, r); \ | 35 | 470 | } |
ScaleRowDown38_2_Box_Any_SSSE3 Line | Count | Source | 27 | 235 | int dst_width) { \ | 28 | 235 | int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ | 29 | 235 | int n = dst_width - r; \ | 30 | 235 | if (n > 0) { \ | 31 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ | 32 | 0 | } \ | 33 | 235 | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ | 34 | 235 | dst_ptr + n * BPP, r); \ | 35 | 235 | } |
Unexecuted instantiation: ScaleARGBRowDown2_Any_SSE2 Unexecuted instantiation: ScaleARGBRowDown2Linear_Any_SSE2 Unexecuted instantiation: ScaleARGBRowDown2Box_Any_SSE2 |
36 | | |
37 | | // Fixed scale down for odd source width. Used by I420Blend subsampling. |
38 | | // Since dst_width is (width + 1) / 2, this function scales one less pixel |
39 | | // and copies the last pixel. |
40 | | #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ |
41 | | void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ |
42 | 0 | int dst_width) { \ |
43 | 0 | int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ |
44 | 0 | int n = (dst_width - 1) - r; \ |
45 | 0 | if (n > 0) { \ |
46 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ |
47 | 0 | } \ |
48 | 0 | SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ |
49 | 0 | dst_ptr + n * BPP, r + 1); \ |
50 | 0 | } Unexecuted instantiation: ScaleRowDown2Box_Odd_SSSE3 Unexecuted instantiation: ScaleRowDown2Box_Odd_AVX2 |
51 | | |
52 | | #ifdef HAS_SCALEROWDOWN2_SSSE3 |
53 | | SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) |
54 | | SDANY(ScaleRowDown2Linear_Any_SSSE3, |
55 | | ScaleRowDown2Linear_SSSE3, |
56 | | ScaleRowDown2Linear_C, |
57 | | 2, |
58 | | 1, |
59 | | 15) |
60 | | SDANY(ScaleRowDown2Box_Any_SSSE3, |
61 | | ScaleRowDown2Box_SSSE3, |
62 | | ScaleRowDown2Box_C, |
63 | | 2, |
64 | | 1, |
65 | | 15) |
66 | | SDODD(ScaleRowDown2Box_Odd_SSSE3, |
67 | | ScaleRowDown2Box_SSSE3, |
68 | | ScaleRowDown2Box_Odd_C, |
69 | | 2, |
70 | | 1, |
71 | | 15) |
72 | | #endif |
73 | | #ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3 |
74 | | SDANY(ScaleUVRowDown2Box_Any_SSSE3, |
75 | | ScaleUVRowDown2Box_SSSE3, |
76 | | ScaleUVRowDown2Box_C, |
77 | | 2, |
78 | | 2, |
79 | | 3) |
80 | | #endif |
81 | | #ifdef HAS_SCALEUVROWDOWN2BOX_AVX2 |
82 | | SDANY(ScaleUVRowDown2Box_Any_AVX2, |
83 | | ScaleUVRowDown2Box_AVX2, |
84 | | ScaleUVRowDown2Box_C, |
85 | | 2, |
86 | | 2, |
87 | | 7) |
88 | | #endif |
89 | | #ifdef HAS_SCALEROWDOWN2_AVX2 |
90 | | SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) |
91 | | SDANY(ScaleRowDown2Linear_Any_AVX2, |
92 | | ScaleRowDown2Linear_AVX2, |
93 | | ScaleRowDown2Linear_C, |
94 | | 2, |
95 | | 1, |
96 | | 31) |
97 | | SDANY(ScaleRowDown2Box_Any_AVX2, |
98 | | ScaleRowDown2Box_AVX2, |
99 | | ScaleRowDown2Box_C, |
100 | | 2, |
101 | | 1, |
102 | | 31) |
103 | | SDODD(ScaleRowDown2Box_Odd_AVX2, |
104 | | ScaleRowDown2Box_AVX2, |
105 | | ScaleRowDown2Box_Odd_C, |
106 | | 2, |
107 | | 1, |
108 | | 31) |
109 | | #endif |
110 | | #ifdef HAS_SCALEROWDOWN2_NEON |
111 | | SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) |
112 | | SDANY(ScaleRowDown2Linear_Any_NEON, |
113 | | ScaleRowDown2Linear_NEON, |
114 | | ScaleRowDown2Linear_C, |
115 | | 2, |
116 | | 1, |
117 | | 15) |
118 | | SDANY(ScaleRowDown2Box_Any_NEON, |
119 | | ScaleRowDown2Box_NEON, |
120 | | ScaleRowDown2Box_C, |
121 | | 2, |
122 | | 1, |
123 | | 15) |
124 | | SDODD(ScaleRowDown2Box_Odd_NEON, |
125 | | ScaleRowDown2Box_NEON, |
126 | | ScaleRowDown2Box_Odd_C, |
127 | | 2, |
128 | | 1, |
129 | | 15) |
130 | | #endif |
131 | | #ifdef HAS_SCALEUVROWDOWN2_NEON |
132 | | SDANY(ScaleUVRowDown2_Any_NEON, |
133 | | ScaleUVRowDown2_NEON, |
134 | | ScaleUVRowDown2_C, |
135 | | 2, |
136 | | 2, |
137 | | 7) |
138 | | #endif |
139 | | #ifdef HAS_SCALEUVROWDOWN2LINEAR_NEON |
140 | | SDANY(ScaleUVRowDown2Linear_Any_NEON, |
141 | | ScaleUVRowDown2Linear_NEON, |
142 | | ScaleUVRowDown2Linear_C, |
143 | | 2, |
144 | | 2, |
145 | | 7) |
146 | | #endif |
147 | | #ifdef HAS_SCALEUVROWDOWN2BOX_NEON |
148 | | SDANY(ScaleUVRowDown2Box_Any_NEON, |
149 | | ScaleUVRowDown2Box_NEON, |
150 | | ScaleUVRowDown2Box_C, |
151 | | 2, |
152 | | 2, |
153 | | 7) |
154 | | #endif |
155 | | |
156 | | #ifdef HAS_SCALEROWDOWN2_MSA |
157 | | SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31) |
158 | | SDANY(ScaleRowDown2Linear_Any_MSA, |
159 | | ScaleRowDown2Linear_MSA, |
160 | | ScaleRowDown2Linear_C, |
161 | | 2, |
162 | | 1, |
163 | | 31) |
164 | | SDANY(ScaleRowDown2Box_Any_MSA, |
165 | | ScaleRowDown2Box_MSA, |
166 | | ScaleRowDown2Box_C, |
167 | | 2, |
168 | | 1, |
169 | | 31) |
170 | | #endif |
171 | | #ifdef HAS_SCALEROWDOWN2_LSX |
172 | | SDANY(ScaleRowDown2_Any_LSX, ScaleRowDown2_LSX, ScaleRowDown2_C, 2, 1, 31) |
173 | | SDANY(ScaleRowDown2Linear_Any_LSX, |
174 | | ScaleRowDown2Linear_LSX, |
175 | | ScaleRowDown2Linear_C, |
176 | | 2, |
177 | | 1, |
178 | | 31) |
179 | | SDANY(ScaleRowDown2Box_Any_LSX, |
180 | | ScaleRowDown2Box_LSX, |
181 | | ScaleRowDown2Box_C, |
182 | | 2, |
183 | | 1, |
184 | | 31) |
185 | | #endif |
186 | | #ifdef HAS_SCALEROWDOWN4_SSSE3 |
187 | | SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) |
188 | | SDANY(ScaleRowDown4Box_Any_SSSE3, |
189 | | ScaleRowDown4Box_SSSE3, |
190 | | ScaleRowDown4Box_C, |
191 | | 4, |
192 | | 1, |
193 | | 7) |
194 | | #endif |
195 | | #ifdef HAS_SCALEROWDOWN4_AVX2 |
196 | | SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15) |
197 | | SDANY(ScaleRowDown4Box_Any_AVX2, |
198 | | ScaleRowDown4Box_AVX2, |
199 | | ScaleRowDown4Box_C, |
200 | | 4, |
201 | | 1, |
202 | | 15) |
203 | | #endif |
204 | | #ifdef HAS_SCALEROWDOWN4_NEON |
205 | | SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 15) |
206 | | SDANY(ScaleRowDown4Box_Any_NEON, |
207 | | ScaleRowDown4Box_NEON, |
208 | | ScaleRowDown4Box_C, |
209 | | 4, |
210 | | 1, |
211 | | 7) |
212 | | #endif |
213 | | #ifdef HAS_SCALEROWDOWN4_MSA |
214 | | SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15) |
215 | | SDANY(ScaleRowDown4Box_Any_MSA, |
216 | | ScaleRowDown4Box_MSA, |
217 | | ScaleRowDown4Box_C, |
218 | | 4, |
219 | | 1, |
220 | | 15) |
221 | | #endif |
222 | | #ifdef HAS_SCALEROWDOWN4_LSX |
223 | | SDANY(ScaleRowDown4_Any_LSX, ScaleRowDown4_LSX, ScaleRowDown4_C, 4, 1, 15) |
224 | | SDANY(ScaleRowDown4Box_Any_LSX, |
225 | | ScaleRowDown4Box_LSX, |
226 | | ScaleRowDown4Box_C, |
227 | | 4, |
228 | | 1, |
229 | | 15) |
230 | | #endif |
231 | | #ifdef HAS_SCALEROWDOWN34_SSSE3 |
232 | | SDANY(ScaleRowDown34_Any_SSSE3, |
233 | | ScaleRowDown34_SSSE3, |
234 | | ScaleRowDown34_C, |
235 | | 4 / 3, |
236 | | 1, |
237 | | 23) |
238 | | SDANY(ScaleRowDown34_0_Box_Any_SSSE3, |
239 | | ScaleRowDown34_0_Box_SSSE3, |
240 | | ScaleRowDown34_0_Box_C, |
241 | | 4 / 3, |
242 | | 1, |
243 | | 23) |
244 | | SDANY(ScaleRowDown34_1_Box_Any_SSSE3, |
245 | | ScaleRowDown34_1_Box_SSSE3, |
246 | | ScaleRowDown34_1_Box_C, |
247 | | 4 / 3, |
248 | | 1, |
249 | | 23) |
250 | | #endif |
251 | | #ifdef HAS_SCALEROWDOWN34_NEON |
252 | | #ifdef __aarch64__ |
253 | | SDANY(ScaleRowDown34_Any_NEON, |
254 | | ScaleRowDown34_NEON, |
255 | | ScaleRowDown34_C, |
256 | | 4 / 3, |
257 | | 1, |
258 | | 47) |
259 | | SDANY(ScaleRowDown34_0_Box_Any_NEON, |
260 | | ScaleRowDown34_0_Box_NEON, |
261 | | ScaleRowDown34_0_Box_C, |
262 | | 4 / 3, |
263 | | 1, |
264 | | 47) |
265 | | SDANY(ScaleRowDown34_1_Box_Any_NEON, |
266 | | ScaleRowDown34_1_Box_NEON, |
267 | | ScaleRowDown34_1_Box_C, |
268 | | 4 / 3, |
269 | | 1, |
270 | | 47) |
271 | | #else |
272 | | SDANY(ScaleRowDown34_Any_NEON, |
273 | | ScaleRowDown34_NEON, |
274 | | ScaleRowDown34_C, |
275 | | 4 / 3, |
276 | | 1, |
277 | | 23) |
278 | | SDANY(ScaleRowDown34_0_Box_Any_NEON, |
279 | | ScaleRowDown34_0_Box_NEON, |
280 | | ScaleRowDown34_0_Box_C, |
281 | | 4 / 3, |
282 | | 1, |
283 | | 23) |
284 | | SDANY(ScaleRowDown34_1_Box_Any_NEON, |
285 | | ScaleRowDown34_1_Box_NEON, |
286 | | ScaleRowDown34_1_Box_C, |
287 | | 4 / 3, |
288 | | 1, |
289 | | 23) |
290 | | #endif |
291 | | #endif |
292 | | #ifdef HAS_SCALEROWDOWN34_MSA |
293 | | SDANY(ScaleRowDown34_Any_MSA, |
294 | | ScaleRowDown34_MSA, |
295 | | ScaleRowDown34_C, |
296 | | 4 / 3, |
297 | | 1, |
298 | | 47) |
299 | | SDANY(ScaleRowDown34_0_Box_Any_MSA, |
300 | | ScaleRowDown34_0_Box_MSA, |
301 | | ScaleRowDown34_0_Box_C, |
302 | | 4 / 3, |
303 | | 1, |
304 | | 47) |
305 | | SDANY(ScaleRowDown34_1_Box_Any_MSA, |
306 | | ScaleRowDown34_1_Box_MSA, |
307 | | ScaleRowDown34_1_Box_C, |
308 | | 4 / 3, |
309 | | 1, |
310 | | 47) |
311 | | #endif |
312 | | #ifdef HAS_SCALEROWDOWN34_LSX |
313 | | SDANY(ScaleRowDown34_Any_LSX, |
314 | | ScaleRowDown34_LSX, |
315 | | ScaleRowDown34_C, |
316 | | 4 / 3, |
317 | | 1, |
318 | | 47) |
319 | | SDANY(ScaleRowDown34_0_Box_Any_LSX, |
320 | | ScaleRowDown34_0_Box_LSX, |
321 | | ScaleRowDown34_0_Box_C, |
322 | | 4 / 3, |
323 | | 1, |
324 | | 47) |
325 | | SDANY(ScaleRowDown34_1_Box_Any_LSX, |
326 | | ScaleRowDown34_1_Box_LSX, |
327 | | ScaleRowDown34_1_Box_C, |
328 | | 4 / 3, |
329 | | 1, |
330 | | 47) |
331 | | #endif |
332 | | #ifdef HAS_SCALEROWDOWN38_SSSE3 |
333 | | SDANY(ScaleRowDown38_Any_SSSE3, |
334 | | ScaleRowDown38_SSSE3, |
335 | | ScaleRowDown38_C, |
336 | | 8 / 3, |
337 | | 1, |
338 | | 11) |
339 | | SDANY(ScaleRowDown38_3_Box_Any_SSSE3, |
340 | | ScaleRowDown38_3_Box_SSSE3, |
341 | | ScaleRowDown38_3_Box_C, |
342 | | 8 / 3, |
343 | | 1, |
344 | | 5) |
345 | | SDANY(ScaleRowDown38_2_Box_Any_SSSE3, |
346 | | ScaleRowDown38_2_Box_SSSE3, |
347 | | ScaleRowDown38_2_Box_C, |
348 | | 8 / 3, |
349 | | 1, |
350 | | 5) |
351 | | #endif |
352 | | #ifdef HAS_SCALEROWDOWN38_NEON |
353 | | SDANY(ScaleRowDown38_Any_NEON, |
354 | | ScaleRowDown38_NEON, |
355 | | ScaleRowDown38_C, |
356 | | 8 / 3, |
357 | | 1, |
358 | | 11) |
359 | | SDANY(ScaleRowDown38_3_Box_Any_NEON, |
360 | | ScaleRowDown38_3_Box_NEON, |
361 | | ScaleRowDown38_3_Box_C, |
362 | | 8 / 3, |
363 | | 1, |
364 | | 11) |
365 | | SDANY(ScaleRowDown38_2_Box_Any_NEON, |
366 | | ScaleRowDown38_2_Box_NEON, |
367 | | ScaleRowDown38_2_Box_C, |
368 | | 8 / 3, |
369 | | 1, |
370 | | 11) |
371 | | #endif |
372 | | #ifdef HAS_SCALEROWDOWN38_MSA |
373 | | SDANY(ScaleRowDown38_Any_MSA, |
374 | | ScaleRowDown38_MSA, |
375 | | ScaleRowDown38_C, |
376 | | 8 / 3, |
377 | | 1, |
378 | | 11) |
379 | | SDANY(ScaleRowDown38_3_Box_Any_MSA, |
380 | | ScaleRowDown38_3_Box_MSA, |
381 | | ScaleRowDown38_3_Box_C, |
382 | | 8 / 3, |
383 | | 1, |
384 | | 11) |
385 | | SDANY(ScaleRowDown38_2_Box_Any_MSA, |
386 | | ScaleRowDown38_2_Box_MSA, |
387 | | ScaleRowDown38_2_Box_C, |
388 | | 8 / 3, |
389 | | 1, |
390 | | 11) |
391 | | #endif |
392 | | #ifdef HAS_SCALEROWDOWN38_LSX |
393 | | SDANY(ScaleRowDown38_Any_LSX, |
394 | | ScaleRowDown38_LSX, |
395 | | ScaleRowDown38_C, |
396 | | 8 / 3, |
397 | | 1, |
398 | | 11) |
399 | | SDANY(ScaleRowDown38_3_Box_Any_LSX, |
400 | | ScaleRowDown38_3_Box_LSX, |
401 | | ScaleRowDown38_3_Box_C, |
402 | | 8 / 3, |
403 | | 1, |
404 | | 11) |
405 | | SDANY(ScaleRowDown38_2_Box_Any_LSX, |
406 | | ScaleRowDown38_2_Box_LSX, |
407 | | ScaleRowDown38_2_Box_C, |
408 | | 8 / 3, |
409 | | 1, |
410 | | 11) |
411 | | #endif |
412 | | |
413 | | #ifdef HAS_SCALEARGBROWDOWN2_SSE2 |
414 | | SDANY(ScaleARGBRowDown2_Any_SSE2, |
415 | | ScaleARGBRowDown2_SSE2, |
416 | | ScaleARGBRowDown2_C, |
417 | | 2, |
418 | | 4, |
419 | | 3) |
420 | | SDANY(ScaleARGBRowDown2Linear_Any_SSE2, |
421 | | ScaleARGBRowDown2Linear_SSE2, |
422 | | ScaleARGBRowDown2Linear_C, |
423 | | 2, |
424 | | 4, |
425 | | 3) |
426 | | SDANY(ScaleARGBRowDown2Box_Any_SSE2, |
427 | | ScaleARGBRowDown2Box_SSE2, |
428 | | ScaleARGBRowDown2Box_C, |
429 | | 2, |
430 | | 4, |
431 | | 3) |
432 | | #endif |
433 | | #ifdef HAS_SCALEARGBROWDOWN2_NEON |
434 | | SDANY(ScaleARGBRowDown2_Any_NEON, |
435 | | ScaleARGBRowDown2_NEON, |
436 | | ScaleARGBRowDown2_C, |
437 | | 2, |
438 | | 4, |
439 | | 7) |
440 | | SDANY(ScaleARGBRowDown2Linear_Any_NEON, |
441 | | ScaleARGBRowDown2Linear_NEON, |
442 | | ScaleARGBRowDown2Linear_C, |
443 | | 2, |
444 | | 4, |
445 | | 7) |
446 | | SDANY(ScaleARGBRowDown2Box_Any_NEON, |
447 | | ScaleARGBRowDown2Box_NEON, |
448 | | ScaleARGBRowDown2Box_C, |
449 | | 2, |
450 | | 4, |
451 | | 7) |
452 | | #endif |
453 | | #ifdef HAS_SCALEARGBROWDOWN2_MSA |
454 | | SDANY(ScaleARGBRowDown2_Any_MSA, |
455 | | ScaleARGBRowDown2_MSA, |
456 | | ScaleARGBRowDown2_C, |
457 | | 2, |
458 | | 4, |
459 | | 3) |
460 | | SDANY(ScaleARGBRowDown2Linear_Any_MSA, |
461 | | ScaleARGBRowDown2Linear_MSA, |
462 | | ScaleARGBRowDown2Linear_C, |
463 | | 2, |
464 | | 4, |
465 | | 3) |
466 | | SDANY(ScaleARGBRowDown2Box_Any_MSA, |
467 | | ScaleARGBRowDown2Box_MSA, |
468 | | ScaleARGBRowDown2Box_C, |
469 | | 2, |
470 | | 4, |
471 | | 3) |
472 | | #endif |
473 | | #ifdef HAS_SCALEARGBROWDOWN2_LSX |
474 | | SDANY(ScaleARGBRowDown2_Any_LSX, |
475 | | ScaleARGBRowDown2_LSX, |
476 | | ScaleARGBRowDown2_C, |
477 | | 2, |
478 | | 4, |
479 | | 3) |
480 | | SDANY(ScaleARGBRowDown2Linear_Any_LSX, |
481 | | ScaleARGBRowDown2Linear_LSX, |
482 | | ScaleARGBRowDown2Linear_C, |
483 | | 2, |
484 | | 4, |
485 | | 3) |
486 | | SDANY(ScaleARGBRowDown2Box_Any_LSX, |
487 | | ScaleARGBRowDown2Box_LSX, |
488 | | ScaleARGBRowDown2Box_C, |
489 | | 2, |
490 | | 4, |
491 | | 3) |
492 | | #endif |
493 | | #undef SDANY |
494 | | |
495 | | // Scale down by even scale factor. |
496 | | #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ |
497 | | void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ |
498 | 0 | uint8_t* dst_ptr, int dst_width) { \ |
499 | 0 | int r = dst_width & MASK; \ |
500 | 0 | int n = dst_width & ~MASK; \ |
501 | 0 | if (n > 0) { \ |
502 | 0 | SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ |
503 | 0 | } \ |
504 | 0 | SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ |
505 | 0 | dst_ptr + n * BPP, r); \ |
506 | 0 | } Unexecuted instantiation: ScaleARGBRowDownEven_Any_SSE2 Unexecuted instantiation: ScaleARGBRowDownEvenBox_Any_SSE2 |
507 | | |
508 | | #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 |
509 | | SDAANY(ScaleARGBRowDownEven_Any_SSE2, |
510 | | ScaleARGBRowDownEven_SSE2, |
511 | | ScaleARGBRowDownEven_C, |
512 | | 4, |
513 | | 3) |
514 | | SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, |
515 | | ScaleARGBRowDownEvenBox_SSE2, |
516 | | ScaleARGBRowDownEvenBox_C, |
517 | | 4, |
518 | | 3) |
519 | | #endif |
520 | | #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON |
521 | | SDAANY(ScaleARGBRowDownEven_Any_NEON, |
522 | | ScaleARGBRowDownEven_NEON, |
523 | | ScaleARGBRowDownEven_C, |
524 | | 4, |
525 | | 3) |
526 | | SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, |
527 | | ScaleARGBRowDownEvenBox_NEON, |
528 | | ScaleARGBRowDownEvenBox_C, |
529 | | 4, |
530 | | 3) |
531 | | #endif |
532 | | #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA |
533 | | SDAANY(ScaleARGBRowDownEven_Any_MSA, |
534 | | ScaleARGBRowDownEven_MSA, |
535 | | ScaleARGBRowDownEven_C, |
536 | | 4, |
537 | | 3) |
538 | | SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, |
539 | | ScaleARGBRowDownEvenBox_MSA, |
540 | | ScaleARGBRowDownEvenBox_C, |
541 | | 4, |
542 | | 3) |
543 | | #endif |
544 | | #ifdef HAS_SCALEARGBROWDOWNEVEN_LSX |
545 | | SDAANY(ScaleARGBRowDownEven_Any_LSX, |
546 | | ScaleARGBRowDownEven_LSX, |
547 | | ScaleARGBRowDownEven_C, |
548 | | 4, |
549 | | 3) |
550 | | SDAANY(ScaleARGBRowDownEvenBox_Any_LSX, |
551 | | ScaleARGBRowDownEvenBox_LSX, |
552 | | ScaleARGBRowDownEvenBox_C, |
553 | | 4, |
554 | | 3) |
555 | | #endif |
556 | | #ifdef HAS_SCALEUVROWDOWNEVEN_NEON |
557 | | SDAANY(ScaleUVRowDownEven_Any_NEON, |
558 | | ScaleUVRowDownEven_NEON, |
559 | | ScaleUVRowDownEven_C, |
560 | | 2, |
561 | | 3) |
562 | | #endif |
563 | | |
564 | | #ifdef SASIMDONLY |
565 | | // This also works and uses memcpy and SIMD instead of C, but is slower on ARM |
566 | | |
567 | | // Add rows box filter scale down. Using macro from row_any |
568 | | #define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ |
569 | | void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \ |
570 | | SIMD_ALIGNED(uint16_t dst_temp[32]); \ |
571 | | SIMD_ALIGNED(uint8_t src_temp[32]); \ |
572 | | memset(dst_temp, 0, 32 * 2); /* for msan */ \ |
573 | | int r = width & MASK; \ |
574 | | int n = width & ~MASK; \ |
575 | | if (n > 0) { \ |
576 | | ANY_SIMD(src_ptr, dst_ptr, n); \ |
577 | | } \ |
578 | | memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \ |
579 | | memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \ |
580 | | ANY_SIMD(src_temp, dst_temp, MASK + 1); \ |
581 | | memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \ |
582 | | } |
583 | | |
584 | | #ifdef HAS_SCALEADDROW_SSE2 |
585 | | SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15) |
586 | | #endif |
587 | | #ifdef HAS_SCALEADDROW_AVX2 |
588 | | SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31) |
589 | | #endif |
590 | | #ifdef HAS_SCALEADDROW_NEON |
591 | | SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15) |
592 | | #endif |
593 | | #ifdef HAS_SCALEADDROW_MSA |
594 | | SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15) |
595 | | #endif |
596 | | #ifdef HAS_SCALEADDROW_LSX |
597 | | SAROW(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, 1, 2, 15) |
598 | | #endif |
599 | | #undef SAANY |
600 | | |
601 | | #else |
602 | | |
603 | | // Add rows box filter scale down. |
604 | | #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ |
605 | 756k | void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ |
606 | 756k | int n = src_width & ~MASK; \ |
607 | 756k | if (n > 0) { \ |
608 | 504k | SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ |
609 | 504k | } \ |
610 | 756k | SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ |
611 | 756k | } Unexecuted instantiation: ScaleAddRow_Any_SSE2 Line | Count | Source | 605 | 756k | void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ | 606 | 756k | int n = src_width & ~MASK; \ | 607 | 756k | if (n > 0) { \ | 608 | 504k | SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ | 609 | 504k | } \ | 610 | 756k | SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ | 611 | 756k | } |
|
612 | | |
613 | | #ifdef HAS_SCALEADDROW_SSE2 |
614 | | SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15) |
615 | | #endif |
616 | | #ifdef HAS_SCALEADDROW_AVX2 |
617 | | SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) |
618 | | #endif |
619 | | #ifdef HAS_SCALEADDROW_NEON |
620 | | SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) |
621 | | #endif |
622 | | #ifdef HAS_SCALEADDROW_MSA |
623 | | SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) |
624 | | #endif |
625 | | #ifdef HAS_SCALEADDROW_LSX |
626 | | SAANY(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, ScaleAddRow_C, 15) |
627 | | #endif |
628 | | #undef SAANY |
629 | | |
630 | | #endif // SASIMDONLY |
631 | | |
632 | | // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols |
633 | | #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ |
634 | | void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ |
635 | | int dx) { \ |
636 | | int r = dst_width & MASK; \ |
637 | | int n = dst_width & ~MASK; \ |
638 | | if (n > 0) { \ |
639 | | TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ |
640 | | } \ |
641 | | TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ |
642 | | } |
643 | | |
644 | | #ifdef HAS_SCALEFILTERCOLS_NEON |
645 | | CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) |
646 | | #endif |
647 | | #ifdef HAS_SCALEFILTERCOLS_MSA |
648 | | CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15) |
649 | | #endif |
650 | | #ifdef HAS_SCALEFILTERCOLS_LSX |
651 | | CANY(ScaleFilterCols_Any_LSX, ScaleFilterCols_LSX, ScaleFilterCols_C, 1, 15) |
652 | | #endif |
653 | | #ifdef HAS_SCALEARGBCOLS_NEON |
654 | | CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) |
655 | | #endif |
656 | | #ifdef HAS_SCALEARGBCOLS_MSA |
657 | | CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) |
658 | | #endif |
659 | | #ifdef HAS_SCALEARGBCOLS_LSX |
660 | | CANY(ScaleARGBCols_Any_LSX, ScaleARGBCols_LSX, ScaleARGBCols_C, 4, 3) |
661 | | #endif |
662 | | #ifdef HAS_SCALEARGBFILTERCOLS_NEON |
663 | | CANY(ScaleARGBFilterCols_Any_NEON, |
664 | | ScaleARGBFilterCols_NEON, |
665 | | ScaleARGBFilterCols_C, |
666 | | 4, |
667 | | 3) |
668 | | #endif |
669 | | #ifdef HAS_SCALEARGBFILTERCOLS_MSA |
670 | | CANY(ScaleARGBFilterCols_Any_MSA, |
671 | | ScaleARGBFilterCols_MSA, |
672 | | ScaleARGBFilterCols_C, |
673 | | 4, |
674 | | 7) |
675 | | #endif |
676 | | #ifdef HAS_SCALEARGBFILTERCOLS_LSX |
677 | | CANY(ScaleARGBFilterCols_Any_LSX, |
678 | | ScaleARGBFilterCols_LSX, |
679 | | ScaleARGBFilterCols_C, |
680 | | 4, |
681 | | 7) |
682 | | #endif |
683 | | #undef CANY |
684 | | |
685 | | // Scale up horizontally 2 times using linear filter. |
686 | | #define SUH2LANY(NAME, SIMD, C, MASK, PTYPE) \ |
687 | 347k | void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ |
688 | 347k | int work_width = (dst_width - 1) & ~1; \ |
689 | 347k | int r = work_width & MASK; \ |
690 | 347k | int n = work_width & ~MASK; \ |
691 | 347k | dst_ptr[0] = src_ptr[0]; \ |
692 | 347k | if (work_width > 0) { \ |
693 | 335k | if (n != 0) { \ |
694 | 89.9k | SIMD(src_ptr, dst_ptr + 1, n); \ |
695 | 89.9k | } \ |
696 | 335k | C(src_ptr + (n / 2), dst_ptr + n + 1, r); \ |
697 | 335k | } \ |
698 | 347k | dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \ |
699 | 347k | } Unexecuted instantiation: ScaleRowUp2_Linear_Any_C Unexecuted instantiation: ScaleRowUp2_Linear_16_Any_C Unexecuted instantiation: ScaleRowUp2_Linear_Any_SSE2 Unexecuted instantiation: ScaleRowUp2_Linear_Any_SSSE3 Unexecuted instantiation: ScaleRowUp2_Linear_12_Any_SSSE3 Unexecuted instantiation: ScaleRowUp2_Linear_16_Any_SSE2 ScaleRowUp2_Linear_Any_AVX2 Line | Count | Source | 687 | 137k | void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ | 688 | 137k | int work_width = (dst_width - 1) & ~1; \ | 689 | 137k | int r = work_width & MASK; \ | 690 | 137k | int n = work_width & ~MASK; \ | 691 | 137k | dst_ptr[0] = src_ptr[0]; \ | 692 | 137k | if (work_width > 0) { \ | 693 | 130k | if (n != 0) { \ | 694 | 13.9k | SIMD(src_ptr, dst_ptr + 1, n); \ | 695 | 13.9k | } \ | 696 | 130k | C(src_ptr + (n / 2), dst_ptr + n + 1, r); \ | 697 | 130k | } \ | 698 | 137k | dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \ | 699 | 137k | } |
ScaleRowUp2_Linear_12_Any_AVX2 Line | Count | Source | 687 | 210k | void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ | 688 | 210k | int work_width = (dst_width - 1) & ~1; \ | 689 | 210k | int r = work_width & MASK; \ | 690 | 210k | int n = work_width & ~MASK; \ | 691 | 210k | dst_ptr[0] = src_ptr[0]; \ | 692 | 210k | if (work_width > 0) { \ | 693 | 205k | if (n != 0) { \ | 694 | 76.0k | SIMD(src_ptr, dst_ptr + 1, n); \ | 695 | 76.0k | } \ | 696 | 205k | C(src_ptr + (n / 2), dst_ptr + n + 1, r); \ | 697 | 205k | } \ | 698 | 210k | dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \ | 699 | 210k | } |
Unexecuted instantiation: ScaleRowUp2_Linear_16_Any_AVX2 |
700 | | |
701 | | // Even the C versions need to be wrapped, because boundary pixels have to |
702 | | // be handled differently |
703 | | |
704 | | SUH2LANY(ScaleRowUp2_Linear_Any_C, |
705 | | ScaleRowUp2_Linear_C, |
706 | | ScaleRowUp2_Linear_C, |
707 | | 0, |
708 | | uint8_t) |
709 | | |
710 | | SUH2LANY(ScaleRowUp2_Linear_16_Any_C, |
711 | | ScaleRowUp2_Linear_16_C, |
712 | | ScaleRowUp2_Linear_16_C, |
713 | | 0, |
714 | | uint16_t) |
715 | | |
716 | | #ifdef HAS_SCALEROWUP2_LINEAR_SSE2 |
717 | | SUH2LANY(ScaleRowUp2_Linear_Any_SSE2, |
718 | | ScaleRowUp2_Linear_SSE2, |
719 | | ScaleRowUp2_Linear_C, |
720 | | 15, |
721 | | uint8_t) |
722 | | #endif |
723 | | |
724 | | #ifdef HAS_SCALEROWUP2_LINEAR_SSSE3 |
725 | | SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3, |
726 | | ScaleRowUp2_Linear_SSSE3, |
727 | | ScaleRowUp2_Linear_C, |
728 | | 15, |
729 | | uint8_t) |
730 | | #endif |
731 | | |
732 | | #ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3 |
733 | | SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3, |
734 | | ScaleRowUp2_Linear_12_SSSE3, |
735 | | ScaleRowUp2_Linear_16_C, |
736 | | 15, |
737 | | uint16_t) |
738 | | #endif |
739 | | |
740 | | #ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2 |
741 | | SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2, |
742 | | ScaleRowUp2_Linear_16_SSE2, |
743 | | ScaleRowUp2_Linear_16_C, |
744 | | 7, |
745 | | uint16_t) |
746 | | #endif |
747 | | |
748 | | #ifdef HAS_SCALEROWUP2_LINEAR_AVX2 |
749 | | SUH2LANY(ScaleRowUp2_Linear_Any_AVX2, |
750 | | ScaleRowUp2_Linear_AVX2, |
751 | | ScaleRowUp2_Linear_C, |
752 | | 31, |
753 | | uint8_t) |
754 | | #endif |
755 | | |
756 | | #ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2 |
757 | | SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2, |
758 | | ScaleRowUp2_Linear_12_AVX2, |
759 | | ScaleRowUp2_Linear_16_C, |
760 | | 31, |
761 | | uint16_t) |
762 | | #endif |
763 | | |
764 | | #ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2 |
765 | | SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2, |
766 | | ScaleRowUp2_Linear_16_AVX2, |
767 | | ScaleRowUp2_Linear_16_C, |
768 | | 15, |
769 | | uint16_t) |
770 | | #endif |
771 | | |
772 | | #ifdef HAS_SCALEROWUP2_LINEAR_NEON |
773 | | #ifdef __aarch64__ |
774 | | SUH2LANY(ScaleRowUp2_Linear_Any_NEON, |
775 | | ScaleRowUp2_Linear_NEON, |
776 | | ScaleRowUp2_Linear_C, |
777 | | 31, |
778 | | uint8_t) |
779 | | #else |
780 | | SUH2LANY(ScaleRowUp2_Linear_Any_NEON, |
781 | | ScaleRowUp2_Linear_NEON, |
782 | | ScaleRowUp2_Linear_C, |
783 | | 15, |
784 | | uint8_t) |
785 | | #endif |
786 | | #endif |
787 | | |
788 | | #ifdef HAS_SCALEROWUP2_LINEAR_12_NEON |
789 | | SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON, |
790 | | ScaleRowUp2_Linear_12_NEON, |
791 | | ScaleRowUp2_Linear_16_C, |
792 | | 15, |
793 | | uint16_t) |
794 | | #endif |
795 | | |
796 | | #ifdef HAS_SCALEROWUP2_LINEAR_16_NEON |
797 | | SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON, |
798 | | ScaleRowUp2_Linear_16_NEON, |
799 | | ScaleRowUp2_Linear_16_C, |
800 | | 15, |
801 | | uint16_t) |
802 | | #endif |
803 | | |
804 | | #undef SUH2LANY |
805 | | |
806 | | // Scale up 2 times using bilinear filter. |
807 | | // This function produces 2 rows at a time. |
808 | | #define SU2BLANY(NAME, SIMD, C, MASK, PTYPE) \ |
809 | | void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \ |
810 | 45.7k | ptrdiff_t dst_stride, int dst_width) { \ |
811 | 45.7k | int work_width = (dst_width - 1) & ~1; \ |
812 | 45.7k | int r = work_width & MASK; \ |
813 | 45.7k | int n = work_width & ~MASK; \ |
814 | 45.7k | const PTYPE* sa = src_ptr; \ |
815 | 45.7k | const PTYPE* sb = src_ptr + src_stride; \ |
816 | 45.7k | PTYPE* da = dst_ptr; \ |
817 | 45.7k | PTYPE* db = dst_ptr + dst_stride; \ |
818 | 45.7k | da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ |
819 | 45.7k | db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ |
820 | 45.7k | if (work_width > 0) { \ |
821 | 36.6k | if (n != 0) { \ |
822 | 33.5k | SIMD(sa, sb - sa, da + 1, db - da, n); \ |
823 | 33.5k | } \ |
824 | 36.6k | C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \ |
825 | 36.6k | } \ |
826 | 45.7k | da[dst_width - 1] = \ |
827 | 45.7k | (3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \ |
828 | 45.7k | db[dst_width - 1] = \ |
829 | 45.7k | (sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \ |
830 | 45.7k | } Unexecuted instantiation: ScaleRowUp2_Bilinear_Any_C Unexecuted instantiation: ScaleRowUp2_Bilinear_16_Any_C Unexecuted instantiation: ScaleRowUp2_Bilinear_Any_SSE2 Unexecuted instantiation: ScaleRowUp2_Bilinear_12_Any_SSSE3 Unexecuted instantiation: ScaleRowUp2_Bilinear_16_Any_SSE2 Unexecuted instantiation: ScaleRowUp2_Bilinear_Any_SSSE3 ScaleRowUp2_Bilinear_Any_AVX2 Line | Count | Source | 810 | 26.5k | ptrdiff_t dst_stride, int dst_width) { \ | 811 | 26.5k | int work_width = (dst_width - 1) & ~1; \ | 812 | 26.5k | int r = work_width & MASK; \ | 813 | 26.5k | int n = work_width & ~MASK; \ | 814 | 26.5k | const PTYPE* sa = src_ptr; \ | 815 | 26.5k | const PTYPE* sb = src_ptr + src_stride; \ | 816 | 26.5k | PTYPE* da = dst_ptr; \ | 817 | 26.5k | PTYPE* db = dst_ptr + dst_stride; \ | 818 | 26.5k | da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ | 819 | 26.5k | db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ | 820 | 26.5k | if (work_width > 0) { \ | 821 | 19.2k | if (n != 0) { \ | 822 | 16.4k | SIMD(sa, sb - sa, da + 1, db - da, n); \ | 823 | 16.4k | } \ | 824 | 19.2k | C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \ | 825 | 19.2k | } \ | 826 | 26.5k | da[dst_width - 1] = \ | 827 | 26.5k | (3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \ | 828 | 26.5k | db[dst_width - 1] = \ | 829 | 26.5k | (sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \ | 830 | 26.5k | } |
ScaleRowUp2_Bilinear_12_Any_AVX2 Line | Count | Source | 810 | 19.2k | ptrdiff_t dst_stride, int dst_width) { \ | 811 | 19.2k | int work_width = (dst_width - 1) & ~1; \ | 812 | 19.2k | int r = work_width & MASK; \ | 813 | 19.2k | int n = work_width & ~MASK; \ | 814 | 19.2k | const PTYPE* sa = src_ptr; \ | 815 | 19.2k | const PTYPE* sb = src_ptr + src_stride; \ | 816 | 19.2k | PTYPE* da = dst_ptr; \ | 817 | 19.2k | PTYPE* db = dst_ptr + dst_stride; \ | 818 | 19.2k | da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ | 819 | 19.2k | db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ | 820 | 19.2k | if (work_width > 0) { \ | 821 | 17.4k | if (n != 0) { \ | 822 | 17.1k | SIMD(sa, sb - sa, da + 1, db - da, n); \ | 823 | 17.1k | } \ | 824 | 17.4k | C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \ | 825 | 17.4k | } \ | 826 | 19.2k | da[dst_width - 1] = \ | 827 | 19.2k | (3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \ | 828 | 19.2k | db[dst_width - 1] = \ | 829 | 19.2k | (sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \ | 830 | 19.2k | } |
Unexecuted instantiation: ScaleRowUp2_Bilinear_16_Any_AVX2 |
831 | | |
832 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_C, |
833 | | ScaleRowUp2_Bilinear_C, |
834 | | ScaleRowUp2_Bilinear_C, |
835 | | 0, |
836 | | uint8_t) |
837 | | |
838 | | SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C, |
839 | | ScaleRowUp2_Bilinear_16_C, |
840 | | ScaleRowUp2_Bilinear_16_C, |
841 | | 0, |
842 | | uint16_t) |
843 | | |
844 | | #ifdef HAS_SCALEROWUP2_BILINEAR_SSE2 |
845 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2, |
846 | | ScaleRowUp2_Bilinear_SSE2, |
847 | | ScaleRowUp2_Bilinear_C, |
848 | | 15, |
849 | | uint8_t) |
850 | | #endif |
851 | | |
852 | | #ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3 |
853 | | SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3, |
854 | | ScaleRowUp2_Bilinear_12_SSSE3, |
855 | | ScaleRowUp2_Bilinear_16_C, |
856 | | 15, |
857 | | uint16_t) |
858 | | #endif |
859 | | |
860 | | #ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2 |
861 | | SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSE2, |
862 | | ScaleRowUp2_Bilinear_16_SSE2, |
863 | | ScaleRowUp2_Bilinear_16_C, |
864 | | 7, |
865 | | uint16_t) |
866 | | #endif |
867 | | |
868 | | #ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3 |
869 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3, |
870 | | ScaleRowUp2_Bilinear_SSSE3, |
871 | | ScaleRowUp2_Bilinear_C, |
872 | | 15, |
873 | | uint8_t) |
874 | | #endif |
875 | | |
876 | | #ifdef HAS_SCALEROWUP2_BILINEAR_AVX2 |
877 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2, |
878 | | ScaleRowUp2_Bilinear_AVX2, |
879 | | ScaleRowUp2_Bilinear_C, |
880 | | 31, |
881 | | uint8_t) |
882 | | #endif |
883 | | |
884 | | #ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2 |
885 | | SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2, |
886 | | ScaleRowUp2_Bilinear_12_AVX2, |
887 | | ScaleRowUp2_Bilinear_16_C, |
888 | | 15, |
889 | | uint16_t) |
890 | | #endif |
891 | | |
892 | | #ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2 |
893 | | SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2, |
894 | | ScaleRowUp2_Bilinear_16_AVX2, |
895 | | ScaleRowUp2_Bilinear_16_C, |
896 | | 15, |
897 | | uint16_t) |
898 | | #endif |
899 | | |
900 | | #ifdef HAS_SCALEROWUP2_BILINEAR_NEON |
901 | | SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON, |
902 | | ScaleRowUp2_Bilinear_NEON, |
903 | | ScaleRowUp2_Bilinear_C, |
904 | | 15, |
905 | | uint8_t) |
906 | | #endif |
907 | | |
908 | | #ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON |
909 | | SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON, |
910 | | ScaleRowUp2_Bilinear_12_NEON, |
911 | | ScaleRowUp2_Bilinear_16_C, |
912 | | 15, |
913 | | uint16_t) |
914 | | #endif |
915 | | |
916 | | #ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON |
917 | | SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON, |
918 | | ScaleRowUp2_Bilinear_16_NEON, |
919 | | ScaleRowUp2_Bilinear_16_C, |
920 | | 7, |
921 | | uint16_t) |
922 | | #endif |
923 | | |
924 | | #undef SU2BLANY |
925 | | |
926 | | // Scale bi-planar plane up horizontally 2 times using linear filter. |
927 | | #define SBUH2LANY(NAME, SIMD, C, MASK, PTYPE) \ |
928 | 0 | void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ |
929 | 0 | int work_width = (dst_width - 1) & ~1; \ |
930 | 0 | int r = work_width & MASK; \ |
931 | 0 | int n = work_width & ~MASK; \ |
932 | 0 | dst_ptr[0] = src_ptr[0]; \ |
933 | 0 | dst_ptr[1] = src_ptr[1]; \ |
934 | 0 | if (work_width > 0) { \ |
935 | 0 | if (n != 0) { \ |
936 | 0 | SIMD(src_ptr, dst_ptr + 2, n); \ |
937 | 0 | } \ |
938 | 0 | C(src_ptr + n, dst_ptr + 2 * n + 2, r); \ |
939 | 0 | } \ |
940 | 0 | dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2]; \ |
941 | 0 | dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1]; \ |
942 | 0 | } Unexecuted instantiation: ScaleUVRowUp2_Linear_Any_C Unexecuted instantiation: ScaleUVRowUp2_Linear_16_Any_C Unexecuted instantiation: ScaleUVRowUp2_Linear_Any_SSSE3 Unexecuted instantiation: ScaleUVRowUp2_Linear_Any_AVX2 Unexecuted instantiation: ScaleUVRowUp2_Linear_16_Any_SSE41 Unexecuted instantiation: ScaleUVRowUp2_Linear_16_Any_AVX2 |
943 | | |
944 | | SBUH2LANY(ScaleUVRowUp2_Linear_Any_C, |
945 | | ScaleUVRowUp2_Linear_C, |
946 | | ScaleUVRowUp2_Linear_C, |
947 | | 0, |
948 | | uint8_t) |
949 | | |
950 | | SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C, |
951 | | ScaleUVRowUp2_Linear_16_C, |
952 | | ScaleUVRowUp2_Linear_16_C, |
953 | | 0, |
954 | | uint16_t) |
955 | | |
956 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3 |
957 | | SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3, |
958 | | ScaleUVRowUp2_Linear_SSSE3, |
959 | | ScaleUVRowUp2_Linear_C, |
960 | | 7, |
961 | | uint8_t) |
962 | | #endif |
963 | | |
964 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2 |
965 | | SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2, |
966 | | ScaleUVRowUp2_Linear_AVX2, |
967 | | ScaleUVRowUp2_Linear_C, |
968 | | 15, |
969 | | uint8_t) |
970 | | #endif |
971 | | |
972 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 |
973 | | SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41, |
974 | | ScaleUVRowUp2_Linear_16_SSE41, |
975 | | ScaleUVRowUp2_Linear_16_C, |
976 | | 3, |
977 | | uint16_t) |
978 | | #endif |
979 | | |
980 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 |
981 | | SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2, |
982 | | ScaleUVRowUp2_Linear_16_AVX2, |
983 | | ScaleUVRowUp2_Linear_16_C, |
984 | | 7, |
985 | | uint16_t) |
986 | | #endif |
987 | | |
988 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_NEON |
989 | | SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON, |
990 | | ScaleUVRowUp2_Linear_NEON, |
991 | | ScaleUVRowUp2_Linear_C, |
992 | | 15, |
993 | | uint8_t) |
994 | | #endif |
995 | | |
996 | | #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON |
997 | | SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON, |
998 | | ScaleUVRowUp2_Linear_16_NEON, |
999 | | ScaleUVRowUp2_Linear_16_C, |
1000 | | 15, |
1001 | | uint16_t) |
1002 | | #endif |
1003 | | |
1004 | | #undef SBUH2LANY |
1005 | | |
1006 | | // Scale bi-planar plane up 2 times using bilinear filter. |
1007 | | // This function produces 2 rows at a time. |
1008 | | #define SBU2BLANY(NAME, SIMD, C, MASK, PTYPE) \ |
1009 | | void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \ |
1010 | 0 | ptrdiff_t dst_stride, int dst_width) { \ |
1011 | 0 | int work_width = (dst_width - 1) & ~1; \ |
1012 | 0 | int r = work_width & MASK; \ |
1013 | 0 | int n = work_width & ~MASK; \ |
1014 | 0 | const PTYPE* sa = src_ptr; \ |
1015 | 0 | const PTYPE* sb = src_ptr + src_stride; \ |
1016 | 0 | PTYPE* da = dst_ptr; \ |
1017 | 0 | PTYPE* db = dst_ptr + dst_stride; \ |
1018 | 0 | da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ |
1019 | 0 | db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ |
1020 | 0 | da[1] = (3 * sa[1] + sb[1] + 2) >> 2; \ |
1021 | 0 | db[1] = (sa[1] + 3 * sb[1] + 2) >> 2; \ |
1022 | 0 | if (work_width > 0) { \ |
1023 | 0 | if (n != 0) { \ |
1024 | 0 | SIMD(sa, sb - sa, da + 2, db - da, n); \ |
1025 | 0 | } \ |
1026 | 0 | C(sa + n, sb - sa, da + 2 * n + 2, db - da, r); \ |
1027 | 0 | } \ |
1028 | 0 | da[2 * dst_width - 2] = (3 * sa[((dst_width + 1) & ~1) - 2] + \ |
1029 | 0 | sb[((dst_width + 1) & ~1) - 2] + 2) >> \ |
1030 | 0 | 2; \ |
1031 | 0 | db[2 * dst_width - 2] = (sa[((dst_width + 1) & ~1) - 2] + \ |
1032 | 0 | 3 * sb[((dst_width + 1) & ~1) - 2] + 2) >> \ |
1033 | 0 | 2; \ |
1034 | 0 | da[2 * dst_width - 1] = (3 * sa[((dst_width + 1) & ~1) - 1] + \ |
1035 | 0 | sb[((dst_width + 1) & ~1) - 1] + 2) >> \ |
1036 | 0 | 2; \ |
1037 | 0 | db[2 * dst_width - 1] = (sa[((dst_width + 1) & ~1) - 1] + \ |
1038 | 0 | 3 * sb[((dst_width + 1) & ~1) - 1] + 2) >> \ |
1039 | 0 | 2; \ |
1040 | 0 | } Unexecuted instantiation: ScaleUVRowUp2_Bilinear_Any_C Unexecuted instantiation: ScaleUVRowUp2_Bilinear_16_Any_C Unexecuted instantiation: ScaleUVRowUp2_Bilinear_Any_SSSE3 Unexecuted instantiation: ScaleUVRowUp2_Bilinear_Any_AVX2 Unexecuted instantiation: ScaleUVRowUp2_Bilinear_16_Any_SSE41 Unexecuted instantiation: ScaleUVRowUp2_Bilinear_16_Any_AVX2 |
1041 | | |
1042 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_C, |
1043 | | ScaleUVRowUp2_Bilinear_C, |
1044 | | ScaleUVRowUp2_Bilinear_C, |
1045 | | 0, |
1046 | | uint8_t) |
1047 | | |
1048 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C, |
1049 | | ScaleUVRowUp2_Bilinear_16_C, |
1050 | | ScaleUVRowUp2_Bilinear_16_C, |
1051 | | 0, |
1052 | | uint16_t) |
1053 | | |
1054 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3 |
1055 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3, |
1056 | | ScaleUVRowUp2_Bilinear_SSSE3, |
1057 | | ScaleUVRowUp2_Bilinear_C, |
1058 | | 7, |
1059 | | uint8_t) |
1060 | | #endif |
1061 | | |
1062 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2 |
1063 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2, |
1064 | | ScaleUVRowUp2_Bilinear_AVX2, |
1065 | | ScaleUVRowUp2_Bilinear_C, |
1066 | | 15, |
1067 | | uint8_t) |
1068 | | #endif |
1069 | | |
1070 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 |
1071 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41, |
1072 | | ScaleUVRowUp2_Bilinear_16_SSE41, |
1073 | | ScaleUVRowUp2_Bilinear_16_C, |
1074 | | 7, |
1075 | | uint16_t) |
1076 | | #endif |
1077 | | |
1078 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 |
1079 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2, |
1080 | | ScaleUVRowUp2_Bilinear_16_AVX2, |
1081 | | ScaleUVRowUp2_Bilinear_16_C, |
1082 | | 7, |
1083 | | uint16_t) |
1084 | | #endif |
1085 | | |
1086 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON |
1087 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON, |
1088 | | ScaleUVRowUp2_Bilinear_NEON, |
1089 | | ScaleUVRowUp2_Bilinear_C, |
1090 | | 7, |
1091 | | uint8_t) |
1092 | | #endif |
1093 | | |
1094 | | #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON |
1095 | | SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON, |
1096 | | ScaleUVRowUp2_Bilinear_16_NEON, |
1097 | | ScaleUVRowUp2_Bilinear_16_C, |
1098 | | 7, |
1099 | | uint16_t) |
1100 | | #endif |
1101 | | |
1102 | | #undef SBU2BLANY |
1103 | | |
1104 | | #ifdef __cplusplus |
1105 | | } // extern "C" |
1106 | | } // namespace libyuv |
1107 | | #endif |