/src/libavif/ext/libyuv/source/scale_common.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2013 The LibYuv Project Authors. All rights reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "libyuv/scale.h" |
12 | | |
13 | | #include <assert.h> |
14 | | #include <string.h> |
15 | | |
16 | | #include "libyuv/cpu_id.h" |
17 | | #include "libyuv/planar_functions.h" // For CopyARGB |
18 | | #include "libyuv/row.h" |
19 | | #include "libyuv/scale_row.h" |
20 | | |
21 | | #ifdef __cplusplus |
22 | | namespace libyuv { |
23 | | extern "C" { |
24 | | #endif |
25 | | |
26 | | #ifdef __cplusplus |
27 | 0 | #define STATIC_CAST(type, expr) static_cast<type>(expr) |
28 | | #else |
29 | | #define STATIC_CAST(type, expr) (type)(expr) |
30 | | #endif |
31 | | |
32 | | // TODO(fbarchard): make clamp255 preserve negative values. |
33 | 0 | static __inline int32_t clamp255(int32_t v) { |
34 | 0 | return (-(v >= 255) | v) & 255; |
35 | 0 | } |
36 | | |
37 | | // Use scale to convert lsb formats to msb, depending how many bits there are: |
38 | | // 32768 = 9 bits |
39 | | // 16384 = 10 bits |
40 | | // 4096 = 12 bits |
41 | | // 256 = 16 bits |
42 | | // TODO(fbarchard): change scale to bits |
43 | | #define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16) |
44 | | |
45 | 40.3k | static __inline int Abs(int v) { |
46 | 40.3k | return v >= 0 ? v : -v; |
47 | 40.3k | } |
48 | | |
49 | | // CPU agnostic row functions |
50 | | void ScaleRowDown2_C(const uint8_t* src_ptr, |
51 | | ptrdiff_t src_stride, |
52 | | uint8_t* dst, |
53 | 0 | int dst_width) { |
54 | 0 | int x; |
55 | 0 | (void)src_stride; |
56 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
57 | 0 | dst[0] = src_ptr[1]; |
58 | 0 | dst[1] = src_ptr[3]; |
59 | 0 | dst += 2; |
60 | 0 | src_ptr += 4; |
61 | 0 | } |
62 | 0 | if (dst_width & 1) { |
63 | 0 | dst[0] = src_ptr[1]; |
64 | 0 | } |
65 | 0 | } |
66 | | |
67 | | void ScaleRowDown2_16_C(const uint16_t* src_ptr, |
68 | | ptrdiff_t src_stride, |
69 | | uint16_t* dst, |
70 | 0 | int dst_width) { |
71 | 0 | int x; |
72 | 0 | (void)src_stride; |
73 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
74 | 0 | dst[0] = src_ptr[1]; |
75 | 0 | dst[1] = src_ptr[3]; |
76 | 0 | dst += 2; |
77 | 0 | src_ptr += 4; |
78 | 0 | } |
79 | 0 | if (dst_width & 1) { |
80 | 0 | dst[0] = src_ptr[1]; |
81 | 0 | } |
82 | 0 | } |
83 | | |
84 | | void ScaleRowDown2_16To8_C(const uint16_t* src_ptr, |
85 | | ptrdiff_t src_stride, |
86 | | uint8_t* dst, |
87 | | int dst_width, |
88 | 0 | int scale) { |
89 | 0 | int x; |
90 | 0 | (void)src_stride; |
91 | 0 | assert(scale >= 256); |
92 | 0 | assert(scale <= 32768); |
93 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
94 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale)); |
95 | 0 | dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale)); |
96 | 0 | dst += 2; |
97 | 0 | src_ptr += 4; |
98 | 0 | } |
99 | 0 | if (dst_width & 1) { |
100 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale)); |
101 | 0 | } |
102 | 0 | } |
103 | | |
104 | | void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr, |
105 | | ptrdiff_t src_stride, |
106 | | uint8_t* dst, |
107 | | int dst_width, |
108 | 0 | int scale) { |
109 | 0 | int x; |
110 | 0 | (void)src_stride; |
111 | 0 | assert(scale >= 256); |
112 | 0 | assert(scale <= 32768); |
113 | 0 | dst_width -= 1; |
114 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
115 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale)); |
116 | 0 | dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale)); |
117 | 0 | dst += 2; |
118 | 0 | src_ptr += 4; |
119 | 0 | } |
120 | 0 | if (dst_width & 1) { |
121 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale)); |
122 | 0 | dst += 1; |
123 | 0 | src_ptr += 2; |
124 | 0 | } |
125 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale)); |
126 | 0 | } |
127 | | |
128 | | void ScaleRowDown2Linear_C(const uint8_t* src_ptr, |
129 | | ptrdiff_t src_stride, |
130 | | uint8_t* dst, |
131 | 0 | int dst_width) { |
132 | 0 | const uint8_t* s = src_ptr; |
133 | 0 | int x; |
134 | 0 | (void)src_stride; |
135 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
136 | 0 | dst[0] = (s[0] + s[1] + 1) >> 1; |
137 | 0 | dst[1] = (s[2] + s[3] + 1) >> 1; |
138 | 0 | dst += 2; |
139 | 0 | s += 4; |
140 | 0 | } |
141 | 0 | if (dst_width & 1) { |
142 | 0 | dst[0] = (s[0] + s[1] + 1) >> 1; |
143 | 0 | } |
144 | 0 | } |
145 | | |
146 | | void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, |
147 | | ptrdiff_t src_stride, |
148 | | uint16_t* dst, |
149 | 0 | int dst_width) { |
150 | 0 | const uint16_t* s = src_ptr; |
151 | 0 | int x; |
152 | 0 | (void)src_stride; |
153 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
154 | 0 | dst[0] = (s[0] + s[1] + 1) >> 1; |
155 | 0 | dst[1] = (s[2] + s[3] + 1) >> 1; |
156 | 0 | dst += 2; |
157 | 0 | s += 4; |
158 | 0 | } |
159 | 0 | if (dst_width & 1) { |
160 | 0 | dst[0] = (s[0] + s[1] + 1) >> 1; |
161 | 0 | } |
162 | 0 | } |
163 | | |
164 | | void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr, |
165 | | ptrdiff_t src_stride, |
166 | | uint8_t* dst, |
167 | | int dst_width, |
168 | 0 | int scale) { |
169 | 0 | const uint16_t* s = src_ptr; |
170 | 0 | int x; |
171 | 0 | (void)src_stride; |
172 | 0 | assert(scale >= 256); |
173 | 0 | assert(scale <= 32768); |
174 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
175 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale)); |
176 | 0 | dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale)); |
177 | 0 | dst += 2; |
178 | 0 | s += 4; |
179 | 0 | } |
180 | 0 | if (dst_width & 1) { |
181 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale)); |
182 | 0 | } |
183 | 0 | } |
184 | | |
185 | | void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr, |
186 | | ptrdiff_t src_stride, |
187 | | uint8_t* dst, |
188 | | int dst_width, |
189 | 0 | int scale) { |
190 | 0 | const uint16_t* s = src_ptr; |
191 | 0 | int x; |
192 | 0 | (void)src_stride; |
193 | 0 | assert(scale >= 256); |
194 | 0 | assert(scale <= 32768); |
195 | 0 | dst_width -= 1; |
196 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
197 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale)); |
198 | 0 | dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale)); |
199 | 0 | dst += 2; |
200 | 0 | s += 4; |
201 | 0 | } |
202 | 0 | if (dst_width & 1) { |
203 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale)); |
204 | 0 | dst += 1; |
205 | 0 | s += 2; |
206 | 0 | } |
207 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale)); |
208 | 0 | } |
209 | | |
210 | | void ScaleRowDown2Box_C(const uint8_t* src_ptr, |
211 | | ptrdiff_t src_stride, |
212 | | uint8_t* dst, |
213 | 1.61k | int dst_width) { |
214 | 1.61k | const uint8_t* s = src_ptr; |
215 | 1.61k | const uint8_t* t = src_ptr + src_stride; |
216 | 1.61k | int x; |
217 | 16.3k | for (x = 0; x < dst_width - 1; x += 2) { |
218 | 14.6k | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
219 | 14.6k | dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; |
220 | 14.6k | dst += 2; |
221 | 14.6k | s += 4; |
222 | 14.6k | t += 4; |
223 | 14.6k | } |
224 | 1.61k | if (dst_width & 1) { |
225 | 349 | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
226 | 349 | } |
227 | 1.61k | } |
228 | | |
229 | | void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr, |
230 | | ptrdiff_t src_stride, |
231 | | uint8_t* dst, |
232 | 0 | int dst_width) { |
233 | 0 | const uint8_t* s = src_ptr; |
234 | 0 | const uint8_t* t = src_ptr + src_stride; |
235 | 0 | int x; |
236 | 0 | dst_width -= 1; |
237 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
238 | 0 | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
239 | 0 | dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; |
240 | 0 | dst += 2; |
241 | 0 | s += 4; |
242 | 0 | t += 4; |
243 | 0 | } |
244 | 0 | if (dst_width & 1) { |
245 | 0 | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
246 | 0 | dst += 1; |
247 | 0 | s += 2; |
248 | 0 | t += 2; |
249 | 0 | } |
250 | 0 | dst[0] = (s[0] + t[0] + 1) >> 1; |
251 | 0 | } |
252 | | |
253 | | void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, |
254 | | ptrdiff_t src_stride, |
255 | | uint16_t* dst, |
256 | 2.28k | int dst_width) { |
257 | 2.28k | const uint16_t* s = src_ptr; |
258 | 2.28k | const uint16_t* t = src_ptr + src_stride; |
259 | 2.28k | int x; |
260 | 30.0k | for (x = 0; x < dst_width - 1; x += 2) { |
261 | 27.7k | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
262 | 27.7k | dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; |
263 | 27.7k | dst += 2; |
264 | 27.7k | s += 4; |
265 | 27.7k | t += 4; |
266 | 27.7k | } |
267 | 2.28k | if (dst_width & 1) { |
268 | 211 | dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
269 | 211 | } |
270 | 2.28k | } |
271 | | |
272 | | void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr, |
273 | | ptrdiff_t src_stride, |
274 | | uint8_t* dst, |
275 | | int dst_width, |
276 | 0 | int scale) { |
277 | 0 | const uint16_t* s = src_ptr; |
278 | 0 | const uint16_t* t = src_ptr + src_stride; |
279 | 0 | int x; |
280 | 0 | assert(scale >= 256); |
281 | 0 | assert(scale <= 32768); |
282 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
283 | 0 | dst[0] = STATIC_CAST(uint8_t, |
284 | 0 | C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale)); |
285 | 0 | dst[1] = STATIC_CAST(uint8_t, |
286 | 0 | C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale)); |
287 | 0 | dst += 2; |
288 | 0 | s += 4; |
289 | 0 | t += 4; |
290 | 0 | } |
291 | 0 | if (dst_width & 1) { |
292 | 0 | dst[0] = STATIC_CAST(uint8_t, |
293 | 0 | C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale)); |
294 | 0 | } |
295 | 0 | } |
296 | | |
297 | | void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr, |
298 | | ptrdiff_t src_stride, |
299 | | uint8_t* dst, |
300 | | int dst_width, |
301 | 0 | int scale) { |
302 | 0 | const uint16_t* s = src_ptr; |
303 | 0 | const uint16_t* t = src_ptr + src_stride; |
304 | 0 | int x; |
305 | 0 | assert(scale >= 256); |
306 | 0 | assert(scale <= 32768); |
307 | 0 | dst_width -= 1; |
308 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
309 | 0 | dst[0] = STATIC_CAST(uint8_t, |
310 | 0 | C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale)); |
311 | 0 | dst[1] = STATIC_CAST(uint8_t, |
312 | 0 | C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale)); |
313 | 0 | dst += 2; |
314 | 0 | s += 4; |
315 | 0 | t += 4; |
316 | 0 | } |
317 | 0 | if (dst_width & 1) { |
318 | 0 | dst[0] = STATIC_CAST(uint8_t, |
319 | 0 | C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale)); |
320 | 0 | dst += 1; |
321 | 0 | s += 2; |
322 | 0 | t += 2; |
323 | 0 | } |
324 | 0 | dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale)); |
325 | 0 | } |
326 | | |
327 | | void ScaleRowDown4_C(const uint8_t* src_ptr, |
328 | | ptrdiff_t src_stride, |
329 | | uint8_t* dst, |
330 | 0 | int dst_width) { |
331 | 0 | int x; |
332 | 0 | (void)src_stride; |
333 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
334 | 0 | dst[0] = src_ptr[2]; |
335 | 0 | dst[1] = src_ptr[6]; |
336 | 0 | dst += 2; |
337 | 0 | src_ptr += 8; |
338 | 0 | } |
339 | 0 | if (dst_width & 1) { |
340 | 0 | dst[0] = src_ptr[2]; |
341 | 0 | } |
342 | 0 | } |
343 | | |
344 | | void ScaleRowDown4_16_C(const uint16_t* src_ptr, |
345 | | ptrdiff_t src_stride, |
346 | | uint16_t* dst, |
347 | 0 | int dst_width) { |
348 | 0 | int x; |
349 | 0 | (void)src_stride; |
350 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
351 | 0 | dst[0] = src_ptr[2]; |
352 | 0 | dst[1] = src_ptr[6]; |
353 | 0 | dst += 2; |
354 | 0 | src_ptr += 8; |
355 | 0 | } |
356 | 0 | if (dst_width & 1) { |
357 | 0 | dst[0] = src_ptr[2]; |
358 | 0 | } |
359 | 0 | } |
360 | | |
361 | | void ScaleRowDown4Box_C(const uint8_t* src_ptr, |
362 | | ptrdiff_t src_stride, |
363 | | uint8_t* dst, |
364 | 454 | int dst_width) { |
365 | 454 | intptr_t stride = src_stride; |
366 | 454 | int x; |
367 | 2.26k | for (x = 0; x < dst_width - 1; x += 2) { |
368 | 1.81k | dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + |
369 | 1.81k | src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + |
370 | 1.81k | src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + |
371 | 1.81k | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + |
372 | 1.81k | src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + |
373 | 1.81k | src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + |
374 | 1.81k | src_ptr[stride * 3 + 3] + 8) >> |
375 | 1.81k | 4; |
376 | 1.81k | dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + |
377 | 1.81k | src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] + |
378 | 1.81k | src_ptr[stride + 7] + src_ptr[stride * 2 + 4] + |
379 | 1.81k | src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] + |
380 | 1.81k | src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] + |
381 | 1.81k | src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] + |
382 | 1.81k | src_ptr[stride * 3 + 7] + 8) >> |
383 | 1.81k | 4; |
384 | 1.81k | dst += 2; |
385 | 1.81k | src_ptr += 8; |
386 | 1.81k | } |
387 | 454 | if (dst_width & 1) { |
388 | 205 | dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + |
389 | 205 | src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + |
390 | 205 | src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + |
391 | 205 | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + |
392 | 205 | src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + |
393 | 205 | src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + |
394 | 205 | src_ptr[stride * 3 + 3] + 8) >> |
395 | 205 | 4; |
396 | 205 | } |
397 | 454 | } |
398 | | |
399 | | void ScaleRowDown4Box_16_C(const uint16_t* src_ptr, |
400 | | ptrdiff_t src_stride, |
401 | | uint16_t* dst, |
402 | 268 | int dst_width) { |
403 | 268 | intptr_t stride = src_stride; |
404 | 268 | int x; |
405 | 10.4k | for (x = 0; x < dst_width - 1; x += 2) { |
406 | 10.2k | dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + |
407 | 10.2k | src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + |
408 | 10.2k | src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + |
409 | 10.2k | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + |
410 | 10.2k | src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + |
411 | 10.2k | src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + |
412 | 10.2k | src_ptr[stride * 3 + 3] + 8) >> |
413 | 10.2k | 4; |
414 | 10.2k | dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + |
415 | 10.2k | src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] + |
416 | 10.2k | src_ptr[stride + 7] + src_ptr[stride * 2 + 4] + |
417 | 10.2k | src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] + |
418 | 10.2k | src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] + |
419 | 10.2k | src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] + |
420 | 10.2k | src_ptr[stride * 3 + 7] + 8) >> |
421 | 10.2k | 4; |
422 | 10.2k | dst += 2; |
423 | 10.2k | src_ptr += 8; |
424 | 10.2k | } |
425 | 268 | if (dst_width & 1) { |
426 | 18 | dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + |
427 | 18 | src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + |
428 | 18 | src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + |
429 | 18 | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + |
430 | 18 | src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + |
431 | 18 | src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + |
432 | 18 | src_ptr[stride * 3 + 3] + 8) >> |
433 | 18 | 4; |
434 | 18 | } |
435 | 268 | } |
436 | | |
437 | | void ScaleRowDown34_C(const uint8_t* src_ptr, |
438 | | ptrdiff_t src_stride, |
439 | | uint8_t* dst, |
440 | 0 | int dst_width) { |
441 | 0 | int x; |
442 | 0 | (void)src_stride; |
443 | 0 | assert((dst_width % 3 == 0) && (dst_width > 0)); |
444 | 0 | for (x = 0; x < dst_width; x += 3) { |
445 | 0 | dst[0] = src_ptr[0]; |
446 | 0 | dst[1] = src_ptr[1]; |
447 | 0 | dst[2] = src_ptr[3]; |
448 | 0 | dst += 3; |
449 | 0 | src_ptr += 4; |
450 | 0 | } |
451 | 0 | } |
452 | | |
453 | | void ScaleRowDown34_16_C(const uint16_t* src_ptr, |
454 | | ptrdiff_t src_stride, |
455 | | uint16_t* dst, |
456 | 0 | int dst_width) { |
457 | 0 | int x; |
458 | 0 | (void)src_stride; |
459 | 0 | assert((dst_width % 3 == 0) && (dst_width > 0)); |
460 | 0 | for (x = 0; x < dst_width; x += 3) { |
461 | 0 | dst[0] = src_ptr[0]; |
462 | 0 | dst[1] = src_ptr[1]; |
463 | 0 | dst[2] = src_ptr[3]; |
464 | 0 | dst += 3; |
465 | 0 | src_ptr += 4; |
466 | 0 | } |
467 | 0 | } |
468 | | |
469 | | // Filter rows 0 and 1 together, 3 : 1 |
470 | | void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr, |
471 | | ptrdiff_t src_stride, |
472 | | uint8_t* d, |
473 | 1.18k | int dst_width) { |
474 | 1.18k | const uint8_t* s = src_ptr; |
475 | 1.18k | const uint8_t* t = src_ptr + src_stride; |
476 | 1.18k | int x; |
477 | 1.18k | assert((dst_width % 3 == 0) && (dst_width > 0)); |
478 | 3.49k | for (x = 0; x < dst_width; x += 3) { |
479 | 2.31k | uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; |
480 | 2.31k | uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; |
481 | 2.31k | uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; |
482 | 2.31k | uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; |
483 | 2.31k | uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; |
484 | 2.31k | uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; |
485 | 2.31k | d[0] = (a0 * 3 + b0 + 2) >> 2; |
486 | 2.31k | d[1] = (a1 * 3 + b1 + 2) >> 2; |
487 | 2.31k | d[2] = (a2 * 3 + b2 + 2) >> 2; |
488 | 2.31k | d += 3; |
489 | 2.31k | s += 4; |
490 | 2.31k | t += 4; |
491 | 2.31k | } |
492 | 1.18k | } |
493 | | |
494 | | void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr, |
495 | | ptrdiff_t src_stride, |
496 | | uint16_t* d, |
497 | 1.18k | int dst_width) { |
498 | 1.18k | const uint16_t* s = src_ptr; |
499 | 1.18k | const uint16_t* t = src_ptr + src_stride; |
500 | 1.18k | int x; |
501 | 1.18k | assert((dst_width % 3 == 0) && (dst_width > 0)); |
502 | 3.49k | for (x = 0; x < dst_width; x += 3) { |
503 | 2.31k | uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; |
504 | 2.31k | uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; |
505 | 2.31k | uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; |
506 | 2.31k | uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; |
507 | 2.31k | uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; |
508 | 2.31k | uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; |
509 | 2.31k | d[0] = (a0 * 3 + b0 + 2) >> 2; |
510 | 2.31k | d[1] = (a1 * 3 + b1 + 2) >> 2; |
511 | 2.31k | d[2] = (a2 * 3 + b2 + 2) >> 2; |
512 | 2.31k | d += 3; |
513 | 2.31k | s += 4; |
514 | 2.31k | t += 4; |
515 | 2.31k | } |
516 | 1.18k | } |
517 | | |
518 | | // Filter rows 1 and 2 together, 1 : 1 |
519 | | void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr, |
520 | | ptrdiff_t src_stride, |
521 | | uint8_t* d, |
522 | 591 | int dst_width) { |
523 | 591 | const uint8_t* s = src_ptr; |
524 | 591 | const uint8_t* t = src_ptr + src_stride; |
525 | 591 | int x; |
526 | 591 | assert((dst_width % 3 == 0) && (dst_width > 0)); |
527 | 1.74k | for (x = 0; x < dst_width; x += 3) { |
528 | 1.15k | uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; |
529 | 1.15k | uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; |
530 | 1.15k | uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; |
531 | 1.15k | uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; |
532 | 1.15k | uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; |
533 | 1.15k | uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; |
534 | 1.15k | d[0] = (a0 + b0 + 1) >> 1; |
535 | 1.15k | d[1] = (a1 + b1 + 1) >> 1; |
536 | 1.15k | d[2] = (a2 + b2 + 1) >> 1; |
537 | 1.15k | d += 3; |
538 | 1.15k | s += 4; |
539 | 1.15k | t += 4; |
540 | 1.15k | } |
541 | 591 | } |
542 | | |
543 | | void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, |
544 | | ptrdiff_t src_stride, |
545 | | uint16_t* d, |
546 | 591 | int dst_width) { |
547 | 591 | const uint16_t* s = src_ptr; |
548 | 591 | const uint16_t* t = src_ptr + src_stride; |
549 | 591 | int x; |
550 | 591 | assert((dst_width % 3 == 0) && (dst_width > 0)); |
551 | 1.74k | for (x = 0; x < dst_width; x += 3) { |
552 | 1.15k | uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; |
553 | 1.15k | uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; |
554 | 1.15k | uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; |
555 | 1.15k | uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; |
556 | 1.15k | uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; |
557 | 1.15k | uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; |
558 | 1.15k | d[0] = (a0 + b0 + 1) >> 1; |
559 | 1.15k | d[1] = (a1 + b1 + 1) >> 1; |
560 | 1.15k | d[2] = (a2 + b2 + 1) >> 1; |
561 | 1.15k | d += 3; |
562 | 1.15k | s += 4; |
563 | 1.15k | t += 4; |
564 | 1.15k | } |
565 | 591 | } |
566 | | |
567 | | // Sample position: (O is src sample position, X is dst sample position) |
568 | | // |
569 | | // v dst_ptr at here v stop at here |
570 | | // X O X X O X X O X X O X X O X |
571 | | // ^ src_ptr at here |
572 | | void ScaleRowUp2_Linear_C(const uint8_t* src_ptr, |
573 | | uint8_t* dst_ptr, |
574 | 128k | int dst_width) { |
575 | 128k | int src_width = dst_width >> 1; |
576 | 128k | int x; |
577 | 128k | assert((dst_width % 2 == 0) && (dst_width >= 0)); |
578 | 1.01M | for (x = 0; x < src_width; ++x) { |
579 | 883k | dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2; |
580 | 883k | dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2; |
581 | 883k | } |
582 | 128k | } |
583 | | |
584 | | // Sample position: (O is src sample position, X is dst sample position) |
585 | | // |
586 | | // src_ptr at here |
587 | | // X v X X X X X X X X X |
588 | | // O O O O O |
589 | | // X X X X X X X X X X |
590 | | // ^ dst_ptr at here ^ stop at here |
591 | | // X X X X X X X X X X |
592 | | // O O O O O |
593 | | // X X X X X X X X X X |
594 | | void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr, |
595 | | ptrdiff_t src_stride, |
596 | | uint8_t* dst_ptr, |
597 | | ptrdiff_t dst_stride, |
598 | 18.8k | int dst_width) { |
599 | 18.8k | const uint8_t* s = src_ptr; |
600 | 18.8k | const uint8_t* t = src_ptr + src_stride; |
601 | 18.8k | uint8_t* d = dst_ptr; |
602 | 18.8k | uint8_t* e = dst_ptr + dst_stride; |
603 | 18.8k | int src_width = dst_width >> 1; |
604 | 18.8k | int x; |
605 | 18.8k | assert((dst_width % 2 == 0) && (dst_width >= 0)); |
606 | 264k | for (x = 0; x < src_width; ++x) { |
607 | 246k | d[2 * x + 0] = |
608 | 246k | (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4; |
609 | 246k | d[2 * x + 1] = |
610 | 246k | (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4; |
611 | 246k | e[2 * x + 0] = |
612 | 246k | (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4; |
613 | 246k | e[2 * x + 1] = |
614 | 246k | (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4; |
615 | 246k | } |
616 | 18.8k | } |
617 | | |
618 | | // Only suitable for at most 14 bit range. |
619 | | void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr, |
620 | | uint16_t* dst_ptr, |
621 | 200k | int dst_width) { |
622 | 200k | int src_width = dst_width >> 1; |
623 | 200k | int x; |
624 | 200k | assert((dst_width % 2 == 0) && (dst_width >= 0)); |
625 | 1.03M | for (x = 0; x < src_width; ++x) { |
626 | 838k | dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2; |
627 | 838k | dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2; |
628 | 838k | } |
629 | 200k | } |
630 | | |
631 | | // Only suitable for at most 12bit range. |
632 | | void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr, |
633 | | ptrdiff_t src_stride, |
634 | | uint16_t* dst_ptr, |
635 | | ptrdiff_t dst_stride, |
636 | 17.2k | int dst_width) { |
637 | 17.2k | const uint16_t* s = src_ptr; |
638 | 17.2k | const uint16_t* t = src_ptr + src_stride; |
639 | 17.2k | uint16_t* d = dst_ptr; |
640 | 17.2k | uint16_t* e = dst_ptr + dst_stride; |
641 | 17.2k | int src_width = dst_width >> 1; |
642 | 17.2k | int x; |
643 | 17.2k | assert((dst_width % 2 == 0) && (dst_width >= 0)); |
644 | 126k | for (x = 0; x < src_width; ++x) { |
645 | 109k | d[2 * x + 0] = |
646 | 109k | (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4; |
647 | 109k | d[2 * x + 1] = |
648 | 109k | (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4; |
649 | 109k | e[2 * x + 0] = |
650 | 109k | (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4; |
651 | 109k | e[2 * x + 1] = |
652 | 109k | (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4; |
653 | 109k | } |
654 | 17.2k | } |
655 | | |
656 | | // Scales a single row of pixels using point sampling. |
657 | | void ScaleCols_C(uint8_t* dst_ptr, |
658 | | const uint8_t* src_ptr, |
659 | | int dst_width, |
660 | | int x, |
661 | 520k | int dx) { |
662 | 520k | int j; |
663 | 166M | for (j = 0; j < dst_width - 1; j += 2) { |
664 | 166M | dst_ptr[0] = src_ptr[x >> 16]; |
665 | 166M | x += dx; |
666 | 166M | dst_ptr[1] = src_ptr[x >> 16]; |
667 | 166M | x += dx; |
668 | 166M | dst_ptr += 2; |
669 | 166M | } |
670 | 520k | if (dst_width & 1) { |
671 | 341k | dst_ptr[0] = src_ptr[x >> 16]; |
672 | 341k | } |
673 | 520k | } |
674 | | |
675 | | void ScaleCols_16_C(uint16_t* dst_ptr, |
676 | | const uint16_t* src_ptr, |
677 | | int dst_width, |
678 | | int x, |
679 | 528k | int dx) { |
680 | 528k | int j; |
681 | 275M | for (j = 0; j < dst_width - 1; j += 2) { |
682 | 274M | dst_ptr[0] = src_ptr[x >> 16]; |
683 | 274M | x += dx; |
684 | 274M | dst_ptr[1] = src_ptr[x >> 16]; |
685 | 274M | x += dx; |
686 | 274M | dst_ptr += 2; |
687 | 274M | } |
688 | 528k | if (dst_width & 1) { |
689 | 388k | dst_ptr[0] = src_ptr[x >> 16]; |
690 | 388k | } |
691 | 528k | } |
692 | | |
693 | | // Scales a single row of pixels up by 2x using point sampling. |
694 | | void ScaleColsUp2_C(uint8_t* dst_ptr, |
695 | | const uint8_t* src_ptr, |
696 | | int dst_width, |
697 | | int x, |
698 | 51.9k | int dx) { |
699 | 51.9k | int j; |
700 | 51.9k | (void)x; |
701 | 51.9k | (void)dx; |
702 | 103k | for (j = 0; j < dst_width - 1; j += 2) { |
703 | 51.9k | dst_ptr[1] = dst_ptr[0] = src_ptr[0]; |
704 | 51.9k | src_ptr += 1; |
705 | 51.9k | dst_ptr += 2; |
706 | 51.9k | } |
707 | 51.9k | if (dst_width & 1) { |
708 | 0 | dst_ptr[0] = src_ptr[0]; |
709 | 0 | } |
710 | 51.9k | } |
711 | | |
712 | | void ScaleColsUp2_16_C(uint16_t* dst_ptr, |
713 | | const uint16_t* src_ptr, |
714 | | int dst_width, |
715 | | int x, |
716 | 7.16k | int dx) { |
717 | 7.16k | int j; |
718 | 7.16k | (void)x; |
719 | 7.16k | (void)dx; |
720 | 14.3k | for (j = 0; j < dst_width - 1; j += 2) { |
721 | 7.16k | dst_ptr[1] = dst_ptr[0] = src_ptr[0]; |
722 | 7.16k | src_ptr += 1; |
723 | 7.16k | dst_ptr += 2; |
724 | 7.16k | } |
725 | 7.16k | if (dst_width & 1) { |
726 | 0 | dst_ptr[0] = src_ptr[0]; |
727 | 0 | } |
728 | 7.16k | } |
729 | | |
730 | | // (1-f)a + fb can be replaced with a + f(b-a) |
731 | | #if defined(__arm__) || defined(__aarch64__) |
732 | | #define BLENDER(a, b, f) \ |
733 | | (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) |
734 | | #else |
735 | | // Intel uses 7 bit math with rounding. |
736 | | #define BLENDER(a, b, f) \ |
737 | 0 | (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7)) |
738 | | #endif |
739 | | |
740 | | void ScaleFilterCols_C(uint8_t* dst_ptr, |
741 | | const uint8_t* src_ptr, |
742 | | int dst_width, |
743 | | int x, |
744 | 0 | int dx) { |
745 | 0 | int j; |
746 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
747 | 0 | int xi = x >> 16; |
748 | 0 | int a = src_ptr[xi]; |
749 | 0 | int b = src_ptr[xi + 1]; |
750 | 0 | dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
751 | 0 | x += dx; |
752 | 0 | xi = x >> 16; |
753 | 0 | a = src_ptr[xi]; |
754 | 0 | b = src_ptr[xi + 1]; |
755 | 0 | dst_ptr[1] = BLENDER(a, b, x & 0xffff); |
756 | 0 | x += dx; |
757 | 0 | dst_ptr += 2; |
758 | 0 | } |
759 | 0 | if (dst_width & 1) { |
760 | 0 | int xi = x >> 16; |
761 | 0 | int a = src_ptr[xi]; |
762 | 0 | int b = src_ptr[xi + 1]; |
763 | 0 | dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
764 | 0 | } |
765 | 0 | } |
766 | | |
767 | | void ScaleFilterCols64_C(uint8_t* dst_ptr, |
768 | | const uint8_t* src_ptr, |
769 | | int dst_width, |
770 | | int x32, |
771 | 0 | int dx) { |
772 | 0 | int64_t x = (int64_t)(x32); |
773 | 0 | int j; |
774 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
775 | 0 | int64_t xi = x >> 16; |
776 | 0 | int a = src_ptr[xi]; |
777 | 0 | int b = src_ptr[xi + 1]; |
778 | 0 | dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
779 | 0 | x += dx; |
780 | 0 | xi = x >> 16; |
781 | 0 | a = src_ptr[xi]; |
782 | 0 | b = src_ptr[xi + 1]; |
783 | 0 | dst_ptr[1] = BLENDER(a, b, x & 0xffff); |
784 | 0 | x += dx; |
785 | 0 | dst_ptr += 2; |
786 | 0 | } |
787 | 0 | if (dst_width & 1) { |
788 | 0 | int64_t xi = x >> 16; |
789 | 0 | int a = src_ptr[xi]; |
790 | 0 | int b = src_ptr[xi + 1]; |
791 | 0 | dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
792 | 0 | } |
793 | 0 | } |
794 | | #undef BLENDER |
795 | | |
796 | | // Same as 8 bit arm blender but return is cast to uint16_t |
797 | | #define BLENDER(a, b, f) \ |
798 | 70.7M | (uint16_t)( \ |
799 | 70.7M | (int)(a) + \ |
800 | 70.7M | (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16)) |
801 | | |
802 | | void ScaleFilterCols_16_C(uint16_t* dst_ptr, |
803 | | const uint16_t* src_ptr, |
804 | | int dst_width, |
805 | | int x, |
806 | 548k | int dx) { |
807 | 548k | int j; |
808 | 35.8M | for (j = 0; j < dst_width - 1; j += 2) { |
809 | 35.2M | int xi = x >> 16; |
810 | 35.2M | int a = src_ptr[xi]; |
811 | 35.2M | int b = src_ptr[xi + 1]; |
812 | 35.2M | dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
813 | 35.2M | x += dx; |
814 | 35.2M | xi = x >> 16; |
815 | 35.2M | a = src_ptr[xi]; |
816 | 35.2M | b = src_ptr[xi + 1]; |
817 | 35.2M | dst_ptr[1] = BLENDER(a, b, x & 0xffff); |
818 | 35.2M | x += dx; |
819 | 35.2M | dst_ptr += 2; |
820 | 35.2M | } |
821 | 548k | if (dst_width & 1) { |
822 | 153k | int xi = x >> 16; |
823 | 153k | int a = src_ptr[xi]; |
824 | 153k | int b = src_ptr[xi + 1]; |
825 | 153k | dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
826 | 153k | } |
827 | 548k | } |
828 | | |
829 | | void ScaleFilterCols64_16_C(uint16_t* dst_ptr, |
830 | | const uint16_t* src_ptr, |
831 | | int dst_width, |
832 | | int x32, |
833 | 0 | int dx) { |
834 | 0 | int64_t x = (int64_t)(x32); |
835 | 0 | int j; |
836 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
837 | 0 | int64_t xi = x >> 16; |
838 | 0 | int a = src_ptr[xi]; |
839 | 0 | int b = src_ptr[xi + 1]; |
840 | 0 | dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
841 | 0 | x += dx; |
842 | 0 | xi = x >> 16; |
843 | 0 | a = src_ptr[xi]; |
844 | 0 | b = src_ptr[xi + 1]; |
845 | 0 | dst_ptr[1] = BLENDER(a, b, x & 0xffff); |
846 | 0 | x += dx; |
847 | 0 | dst_ptr += 2; |
848 | 0 | } |
849 | 0 | if (dst_width & 1) { |
850 | 0 | int64_t xi = x >> 16; |
851 | 0 | int a = src_ptr[xi]; |
852 | 0 | int b = src_ptr[xi + 1]; |
853 | 0 | dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
854 | 0 | } |
855 | 0 | } |
856 | | #undef BLENDER |
857 | | |
858 | | void ScaleRowDown38_C(const uint8_t* src_ptr, |
859 | | ptrdiff_t src_stride, |
860 | | uint8_t* dst, |
861 | 0 | int dst_width) { |
862 | 0 | int x; |
863 | 0 | (void)src_stride; |
864 | 0 | assert(dst_width % 3 == 0); |
865 | 0 | for (x = 0; x < dst_width; x += 3) { |
866 | 0 | dst[0] = src_ptr[0]; |
867 | 0 | dst[1] = src_ptr[3]; |
868 | 0 | dst[2] = src_ptr[6]; |
869 | 0 | dst += 3; |
870 | 0 | src_ptr += 8; |
871 | 0 | } |
872 | 0 | } |
873 | | |
874 | | void ScaleRowDown38_16_C(const uint16_t* src_ptr, |
875 | | ptrdiff_t src_stride, |
876 | | uint16_t* dst, |
877 | 0 | int dst_width) { |
878 | 0 | int x; |
879 | 0 | (void)src_stride; |
880 | 0 | assert(dst_width % 3 == 0); |
881 | 0 | for (x = 0; x < dst_width; x += 3) { |
882 | 0 | dst[0] = src_ptr[0]; |
883 | 0 | dst[1] = src_ptr[3]; |
884 | 0 | dst[2] = src_ptr[6]; |
885 | 0 | dst += 3; |
886 | 0 | src_ptr += 8; |
887 | 0 | } |
888 | 0 | } |
889 | | |
890 | | // 8x3 -> 3x1 |
891 | | void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, |
892 | | ptrdiff_t src_stride, |
893 | | uint8_t* dst_ptr, |
894 | 470 | int dst_width) { |
895 | 470 | intptr_t stride = src_stride; |
896 | 470 | int i; |
897 | 470 | assert((dst_width % 3 == 0) && (dst_width > 0)); |
898 | 940 | for (i = 0; i < dst_width; i += 3) { |
899 | 470 | dst_ptr[0] = |
900 | 470 | (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + |
901 | 470 | src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + |
902 | 470 | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * |
903 | 470 | (65536 / 9) >> |
904 | 470 | 16; |
905 | 470 | dst_ptr[1] = |
906 | 470 | (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + |
907 | 470 | src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + |
908 | 470 | src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * |
909 | 470 | (65536 / 9) >> |
910 | 470 | 16; |
911 | 470 | dst_ptr[2] = |
912 | 470 | (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] + |
913 | 470 | src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * |
914 | 470 | (65536 / 6) >> |
915 | 470 | 16; |
916 | 470 | src_ptr += 8; |
917 | 470 | dst_ptr += 3; |
918 | 470 | } |
919 | 470 | } |
920 | | |
921 | | void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, |
922 | | ptrdiff_t src_stride, |
923 | | uint16_t* dst_ptr, |
924 | 420 | int dst_width) { |
925 | 420 | intptr_t stride = src_stride; |
926 | 420 | int i; |
927 | 420 | assert((dst_width % 3 == 0) && (dst_width > 0)); |
928 | 840 | for (i = 0; i < dst_width; i += 3) { |
929 | 420 | dst_ptr[0] = |
930 | 420 | (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + |
931 | 420 | src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + |
932 | 420 | src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * |
933 | 420 | (65536u / 9u) >> |
934 | 420 | 16; |
935 | 420 | dst_ptr[1] = |
936 | 420 | (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + |
937 | 420 | src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + |
938 | 420 | src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * |
939 | 420 | (65536u / 9u) >> |
940 | 420 | 16; |
941 | 420 | dst_ptr[2] = |
942 | 420 | (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] + |
943 | 420 | src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * |
944 | 420 | (65536u / 6u) >> |
945 | 420 | 16; |
946 | 420 | src_ptr += 8; |
947 | 420 | dst_ptr += 3; |
948 | 420 | } |
949 | 420 | } |
950 | | |
951 | | // 8x2 -> 3x1 |
952 | | void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, |
953 | | ptrdiff_t src_stride, |
954 | | uint8_t* dst_ptr, |
955 | 235 | int dst_width) { |
956 | 235 | intptr_t stride = src_stride; |
957 | 235 | int i; |
958 | 235 | assert((dst_width % 3 == 0) && (dst_width > 0)); |
959 | 470 | for (i = 0; i < dst_width; i += 3) { |
960 | 235 | dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + |
961 | 235 | src_ptr[stride + 1] + src_ptr[stride + 2]) * |
962 | 235 | (65536 / 6) >> |
963 | 235 | 16; |
964 | 235 | dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + |
965 | 235 | src_ptr[stride + 4] + src_ptr[stride + 5]) * |
966 | 235 | (65536 / 6) >> |
967 | 235 | 16; |
968 | 235 | dst_ptr[2] = |
969 | 235 | (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) * |
970 | 235 | (65536 / 4) >> |
971 | 235 | 16; |
972 | 235 | src_ptr += 8; |
973 | 235 | dst_ptr += 3; |
974 | 235 | } |
975 | 235 | } |
976 | | |
977 | | void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, |
978 | | ptrdiff_t src_stride, |
979 | | uint16_t* dst_ptr, |
980 | 210 | int dst_width) { |
981 | 210 | intptr_t stride = src_stride; |
982 | 210 | int i; |
983 | 210 | assert((dst_width % 3 == 0) && (dst_width > 0)); |
984 | 420 | for (i = 0; i < dst_width; i += 3) { |
985 | 210 | dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + |
986 | 210 | src_ptr[stride + 1] + src_ptr[stride + 2]) * |
987 | 210 | (65536u / 6u) >> |
988 | 210 | 16; |
989 | 210 | dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + |
990 | 210 | src_ptr[stride + 4] + src_ptr[stride + 5]) * |
991 | 210 | (65536u / 6u) >> |
992 | 210 | 16; |
993 | 210 | dst_ptr[2] = |
994 | 210 | (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) * |
995 | 210 | (65536u / 4u) >> |
996 | 210 | 16; |
997 | 210 | src_ptr += 8; |
998 | 210 | dst_ptr += 3; |
999 | 210 | } |
1000 | 210 | } |
1001 | | |
1002 | 693k | void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { |
1003 | 693k | int x; |
1004 | 693k | assert(src_width > 0); |
1005 | 7.04M | for (x = 0; x < src_width - 1; x += 2) { |
1006 | 6.34M | dst_ptr[0] += src_ptr[0]; |
1007 | 6.34M | dst_ptr[1] += src_ptr[1]; |
1008 | 6.34M | src_ptr += 2; |
1009 | 6.34M | dst_ptr += 2; |
1010 | 6.34M | } |
1011 | 693k | if (src_width & 1) { |
1012 | 71.4k | dst_ptr[0] += src_ptr[0]; |
1013 | 71.4k | } |
1014 | 693k | } |
1015 | | |
1016 | | void ScaleAddRow_16_C(const uint16_t* src_ptr, |
1017 | | uint32_t* dst_ptr, |
1018 | 620k | int src_width) { |
1019 | 620k | int x; |
1020 | 620k | assert(src_width > 0); |
1021 | 44.8M | for (x = 0; x < src_width - 1; x += 2) { |
1022 | 44.2M | dst_ptr[0] += src_ptr[0]; |
1023 | 44.2M | dst_ptr[1] += src_ptr[1]; |
1024 | 44.2M | src_ptr += 2; |
1025 | 44.2M | dst_ptr += 2; |
1026 | 44.2M | } |
1027 | 620k | if (src_width & 1) { |
1028 | 253k | dst_ptr[0] += src_ptr[0]; |
1029 | 253k | } |
1030 | 620k | } |
1031 | | |
1032 | | // ARGB scale row functions |
1033 | | |
1034 | | void ScaleARGBRowDown2_C(const uint8_t* src_argb, |
1035 | | ptrdiff_t src_stride, |
1036 | | uint8_t* dst_argb, |
1037 | 0 | int dst_width) { |
1038 | 0 | const uint32_t* src = (const uint32_t*)(src_argb); |
1039 | 0 | uint32_t* dst = (uint32_t*)(dst_argb); |
1040 | 0 | int x; |
1041 | 0 | (void)src_stride; |
1042 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
1043 | 0 | dst[0] = src[1]; |
1044 | 0 | dst[1] = src[3]; |
1045 | 0 | src += 4; |
1046 | 0 | dst += 2; |
1047 | 0 | } |
1048 | 0 | if (dst_width & 1) { |
1049 | 0 | dst[0] = src[1]; |
1050 | 0 | } |
1051 | 0 | } |
1052 | | |
1053 | | void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb, |
1054 | | ptrdiff_t src_stride, |
1055 | | uint8_t* dst_argb, |
1056 | 0 | int dst_width) { |
1057 | 0 | int x; |
1058 | 0 | (void)src_stride; |
1059 | 0 | for (x = 0; x < dst_width; ++x) { |
1060 | 0 | dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1; |
1061 | 0 | dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1; |
1062 | 0 | dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1; |
1063 | 0 | dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1; |
1064 | 0 | src_argb += 8; |
1065 | 0 | dst_argb += 4; |
1066 | 0 | } |
1067 | 0 | } |
1068 | | |
1069 | | void ScaleARGBRowDown2Box_C(const uint8_t* src_argb, |
1070 | | ptrdiff_t src_stride, |
1071 | | uint8_t* dst_argb, |
1072 | 0 | int dst_width) { |
1073 | 0 | int x; |
1074 | 0 | for (x = 0; x < dst_width; ++x) { |
1075 | 0 | dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] + |
1076 | 0 | src_argb[src_stride + 4] + 2) >> |
1077 | 0 | 2; |
1078 | 0 | dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] + |
1079 | 0 | src_argb[src_stride + 5] + 2) >> |
1080 | 0 | 2; |
1081 | 0 | dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] + |
1082 | 0 | src_argb[src_stride + 6] + 2) >> |
1083 | 0 | 2; |
1084 | 0 | dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] + |
1085 | 0 | src_argb[src_stride + 7] + 2) >> |
1086 | 0 | 2; |
1087 | 0 | src_argb += 8; |
1088 | 0 | dst_argb += 4; |
1089 | 0 | } |
1090 | 0 | } |
1091 | | |
1092 | | void ScaleARGBRowDownEven_C(const uint8_t* src_argb, |
1093 | | ptrdiff_t src_stride, |
1094 | | int src_stepx, |
1095 | | uint8_t* dst_argb, |
1096 | 0 | int dst_width) { |
1097 | 0 | const uint32_t* src = (const uint32_t*)(src_argb); |
1098 | 0 | uint32_t* dst = (uint32_t*)(dst_argb); |
1099 | 0 | (void)src_stride; |
1100 | 0 | int x; |
1101 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
1102 | 0 | dst[0] = src[0]; |
1103 | 0 | dst[1] = src[src_stepx]; |
1104 | 0 | src += src_stepx * 2; |
1105 | 0 | dst += 2; |
1106 | 0 | } |
1107 | 0 | if (dst_width & 1) { |
1108 | 0 | dst[0] = src[0]; |
1109 | 0 | } |
1110 | 0 | } |
1111 | | |
1112 | | void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb, |
1113 | | ptrdiff_t src_stride, |
1114 | | int src_stepx, |
1115 | | uint8_t* dst_argb, |
1116 | 0 | int dst_width) { |
1117 | 0 | int x; |
1118 | 0 | for (x = 0; x < dst_width; ++x) { |
1119 | 0 | dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] + |
1120 | 0 | src_argb[src_stride + 4] + 2) >> |
1121 | 0 | 2; |
1122 | 0 | dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] + |
1123 | 0 | src_argb[src_stride + 5] + 2) >> |
1124 | 0 | 2; |
1125 | 0 | dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] + |
1126 | 0 | src_argb[src_stride + 6] + 2) >> |
1127 | 0 | 2; |
1128 | 0 | dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] + |
1129 | 0 | src_argb[src_stride + 7] + 2) >> |
1130 | 0 | 2; |
1131 | 0 | src_argb += src_stepx * 4; |
1132 | 0 | dst_argb += 4; |
1133 | 0 | } |
1134 | 0 | } |
1135 | | |
1136 | | // Scales a single row of pixels using point sampling. |
1137 | | void ScaleARGBCols_C(uint8_t* dst_argb, |
1138 | | const uint8_t* src_argb, |
1139 | | int dst_width, |
1140 | | int x, |
1141 | 0 | int dx) { |
1142 | 0 | const uint32_t* src = (const uint32_t*)(src_argb); |
1143 | 0 | uint32_t* dst = (uint32_t*)(dst_argb); |
1144 | 0 | int j; |
1145 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1146 | 0 | dst[0] = src[x >> 16]; |
1147 | 0 | x += dx; |
1148 | 0 | dst[1] = src[x >> 16]; |
1149 | 0 | x += dx; |
1150 | 0 | dst += 2; |
1151 | 0 | } |
1152 | 0 | if (dst_width & 1) { |
1153 | 0 | dst[0] = src[x >> 16]; |
1154 | 0 | } |
1155 | 0 | } |
1156 | | |
1157 | | void ScaleARGBCols64_C(uint8_t* dst_argb, |
1158 | | const uint8_t* src_argb, |
1159 | | int dst_width, |
1160 | | int x32, |
1161 | 0 | int dx) { |
1162 | 0 | int64_t x = (int64_t)(x32); |
1163 | 0 | const uint32_t* src = (const uint32_t*)(src_argb); |
1164 | 0 | uint32_t* dst = (uint32_t*)(dst_argb); |
1165 | 0 | int j; |
1166 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1167 | 0 | dst[0] = src[x >> 16]; |
1168 | 0 | x += dx; |
1169 | 0 | dst[1] = src[x >> 16]; |
1170 | 0 | x += dx; |
1171 | 0 | dst += 2; |
1172 | 0 | } |
1173 | 0 | if (dst_width & 1) { |
1174 | 0 | dst[0] = src[x >> 16]; |
1175 | 0 | } |
1176 | 0 | } |
1177 | | |
1178 | | // Scales a single row of pixels up by 2x using point sampling. |
1179 | | void ScaleARGBColsUp2_C(uint8_t* dst_argb, |
1180 | | const uint8_t* src_argb, |
1181 | | int dst_width, |
1182 | | int x, |
1183 | 0 | int dx) { |
1184 | 0 | const uint32_t* src = (const uint32_t*)(src_argb); |
1185 | 0 | uint32_t* dst = (uint32_t*)(dst_argb); |
1186 | 0 | int j; |
1187 | 0 | (void)x; |
1188 | 0 | (void)dx; |
1189 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1190 | 0 | dst[1] = dst[0] = src[0]; |
1191 | 0 | src += 1; |
1192 | 0 | dst += 2; |
1193 | 0 | } |
1194 | 0 | if (dst_width & 1) { |
1195 | 0 | dst[0] = src[0]; |
1196 | 0 | } |
1197 | 0 | } |
1198 | | |
1199 | | // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607. |
1200 | | // Mimics SSSE3 blender |
1201 | 0 | #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7 |
1202 | | #define BLENDERC(a, b, f, s) \ |
1203 | 0 | (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) |
1204 | | #define BLENDER(a, b, f) \ |
1205 | 0 | BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \ |
1206 | 0 | BLENDERC(a, b, f, 0) |
1207 | | |
1208 | | void ScaleARGBFilterCols_C(uint8_t* dst_argb, |
1209 | | const uint8_t* src_argb, |
1210 | | int dst_width, |
1211 | | int x, |
1212 | 0 | int dx) { |
1213 | 0 | const uint32_t* src = (const uint32_t*)(src_argb); |
1214 | 0 | uint32_t* dst = (uint32_t*)(dst_argb); |
1215 | 0 | int j; |
1216 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1217 | 0 | int xi = x >> 16; |
1218 | 0 | int xf = (x >> 9) & 0x7f; |
1219 | 0 | uint32_t a = src[xi]; |
1220 | 0 | uint32_t b = src[xi + 1]; |
1221 | 0 | dst[0] = BLENDER(a, b, xf); |
1222 | 0 | x += dx; |
1223 | 0 | xi = x >> 16; |
1224 | 0 | xf = (x >> 9) & 0x7f; |
1225 | 0 | a = src[xi]; |
1226 | 0 | b = src[xi + 1]; |
1227 | 0 | dst[1] = BLENDER(a, b, xf); |
1228 | 0 | x += dx; |
1229 | 0 | dst += 2; |
1230 | 0 | } |
1231 | 0 | if (dst_width & 1) { |
1232 | 0 | int xi = x >> 16; |
1233 | 0 | int xf = (x >> 9) & 0x7f; |
1234 | 0 | uint32_t a = src[xi]; |
1235 | 0 | uint32_t b = src[xi + 1]; |
1236 | 0 | dst[0] = BLENDER(a, b, xf); |
1237 | 0 | } |
1238 | 0 | } |
1239 | | |
1240 | | void ScaleARGBFilterCols64_C(uint8_t* dst_argb, |
1241 | | const uint8_t* src_argb, |
1242 | | int dst_width, |
1243 | | int x32, |
1244 | 0 | int dx) { |
1245 | 0 | int64_t x = (int64_t)(x32); |
1246 | 0 | const uint32_t* src = (const uint32_t*)(src_argb); |
1247 | 0 | uint32_t* dst = (uint32_t*)(dst_argb); |
1248 | 0 | int j; |
1249 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1250 | 0 | int64_t xi = x >> 16; |
1251 | 0 | int xf = (x >> 9) & 0x7f; |
1252 | 0 | uint32_t a = src[xi]; |
1253 | 0 | uint32_t b = src[xi + 1]; |
1254 | 0 | dst[0] = BLENDER(a, b, xf); |
1255 | 0 | x += dx; |
1256 | 0 | xi = x >> 16; |
1257 | 0 | xf = (x >> 9) & 0x7f; |
1258 | 0 | a = src[xi]; |
1259 | 0 | b = src[xi + 1]; |
1260 | 0 | dst[1] = BLENDER(a, b, xf); |
1261 | 0 | x += dx; |
1262 | 0 | dst += 2; |
1263 | 0 | } |
1264 | 0 | if (dst_width & 1) { |
1265 | 0 | int64_t xi = x >> 16; |
1266 | 0 | int xf = (x >> 9) & 0x7f; |
1267 | 0 | uint32_t a = src[xi]; |
1268 | 0 | uint32_t b = src[xi + 1]; |
1269 | 0 | dst[0] = BLENDER(a, b, xf); |
1270 | 0 | } |
1271 | 0 | } |
1272 | | #undef BLENDER1 |
1273 | | #undef BLENDERC |
1274 | | #undef BLENDER |
1275 | | |
1276 | | // UV scale row functions |
1277 | | // same as ARGB but 2 channels |
1278 | | |
1279 | | void ScaleUVRowDown2_C(const uint8_t* src_uv, |
1280 | | ptrdiff_t src_stride, |
1281 | | uint8_t* dst_uv, |
1282 | 0 | int dst_width) { |
1283 | 0 | int x; |
1284 | 0 | (void)src_stride; |
1285 | 0 | for (x = 0; x < dst_width; ++x) { |
1286 | 0 | dst_uv[0] = src_uv[2]; // Store the 2nd UV |
1287 | 0 | dst_uv[1] = src_uv[3]; |
1288 | 0 | src_uv += 4; |
1289 | 0 | dst_uv += 2; |
1290 | 0 | } |
1291 | 0 | } |
1292 | | |
1293 | | void ScaleUVRowDown2Linear_C(const uint8_t* src_uv, |
1294 | | ptrdiff_t src_stride, |
1295 | | uint8_t* dst_uv, |
1296 | 0 | int dst_width) { |
1297 | 0 | int x; |
1298 | 0 | (void)src_stride; |
1299 | 0 | for (x = 0; x < dst_width; ++x) { |
1300 | 0 | dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1; |
1301 | 0 | dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1; |
1302 | 0 | src_uv += 4; |
1303 | 0 | dst_uv += 2; |
1304 | 0 | } |
1305 | 0 | } |
1306 | | |
1307 | | void ScaleUVRowDown2Box_C(const uint8_t* src_uv, |
1308 | | ptrdiff_t src_stride, |
1309 | | uint8_t* dst_uv, |
1310 | 0 | int dst_width) { |
1311 | 0 | int x; |
1312 | 0 | for (x = 0; x < dst_width; ++x) { |
1313 | 0 | dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] + |
1314 | 0 | src_uv[src_stride + 2] + 2) >> |
1315 | 0 | 2; |
1316 | 0 | dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] + |
1317 | 0 | src_uv[src_stride + 3] + 2) >> |
1318 | 0 | 2; |
1319 | 0 | src_uv += 4; |
1320 | 0 | dst_uv += 2; |
1321 | 0 | } |
1322 | 0 | } |
1323 | | |
1324 | | void ScaleUVRowDownEven_C(const uint8_t* src_uv, |
1325 | | ptrdiff_t src_stride, |
1326 | | int src_stepx, |
1327 | | uint8_t* dst_uv, |
1328 | 0 | int dst_width) { |
1329 | 0 | const uint16_t* src = (const uint16_t*)(src_uv); |
1330 | 0 | uint16_t* dst = (uint16_t*)(dst_uv); |
1331 | 0 | (void)src_stride; |
1332 | 0 | int x; |
1333 | 0 | for (x = 0; x < dst_width - 1; x += 2) { |
1334 | 0 | dst[0] = src[0]; |
1335 | 0 | dst[1] = src[src_stepx]; |
1336 | 0 | src += src_stepx * 2; |
1337 | 0 | dst += 2; |
1338 | 0 | } |
1339 | 0 | if (dst_width & 1) { |
1340 | 0 | dst[0] = src[0]; |
1341 | 0 | } |
1342 | 0 | } |
1343 | | |
1344 | | void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv, |
1345 | | ptrdiff_t src_stride, |
1346 | | int src_stepx, |
1347 | | uint8_t* dst_uv, |
1348 | 0 | int dst_width) { |
1349 | 0 | int x; |
1350 | 0 | for (x = 0; x < dst_width; ++x) { |
1351 | 0 | dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] + |
1352 | 0 | src_uv[src_stride + 2] + 2) >> |
1353 | 0 | 2; |
1354 | 0 | dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] + |
1355 | 0 | src_uv[src_stride + 3] + 2) >> |
1356 | 0 | 2; |
1357 | 0 | src_uv += src_stepx * 2; |
1358 | 0 | dst_uv += 2; |
1359 | 0 | } |
1360 | 0 | } |
1361 | | |
1362 | | void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr, |
1363 | | uint8_t* dst_ptr, |
1364 | 0 | int dst_width) { |
1365 | 0 | int src_width = dst_width >> 1; |
1366 | 0 | int x; |
1367 | 0 | assert((dst_width % 2 == 0) && (dst_width >= 0)); |
1368 | 0 | for (x = 0; x < src_width; ++x) { |
1369 | 0 | dst_ptr[4 * x + 0] = |
1370 | 0 | (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2; |
1371 | 0 | dst_ptr[4 * x + 1] = |
1372 | 0 | (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2; |
1373 | 0 | dst_ptr[4 * x + 2] = |
1374 | 0 | (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2; |
1375 | 0 | dst_ptr[4 * x + 3] = |
1376 | 0 | (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2; |
1377 | 0 | } |
1378 | 0 | } |
1379 | | |
1380 | | void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr, |
1381 | | ptrdiff_t src_stride, |
1382 | | uint8_t* dst_ptr, |
1383 | | ptrdiff_t dst_stride, |
1384 | 0 | int dst_width) { |
1385 | 0 | const uint8_t* s = src_ptr; |
1386 | 0 | const uint8_t* t = src_ptr + src_stride; |
1387 | 0 | uint8_t* d = dst_ptr; |
1388 | 0 | uint8_t* e = dst_ptr + dst_stride; |
1389 | 0 | int src_width = dst_width >> 1; |
1390 | 0 | int x; |
1391 | 0 | assert((dst_width % 2 == 0) && (dst_width >= 0)); |
1392 | 0 | for (x = 0; x < src_width; ++x) { |
1393 | 0 | d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 + |
1394 | 0 | t[2 * x + 2] * 1 + 8) >> |
1395 | 0 | 4; |
1396 | 0 | d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 + |
1397 | 0 | t[2 * x + 3] * 1 + 8) >> |
1398 | 0 | 4; |
1399 | 0 | d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 + |
1400 | 0 | t[2 * x + 2] * 3 + 8) >> |
1401 | 0 | 4; |
1402 | 0 | d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 + |
1403 | 0 | t[2 * x + 3] * 3 + 8) >> |
1404 | 0 | 4; |
1405 | 0 | e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 + |
1406 | 0 | t[2 * x + 2] * 3 + 8) >> |
1407 | 0 | 4; |
1408 | 0 | e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 + |
1409 | 0 | t[2 * x + 3] * 3 + 8) >> |
1410 | 0 | 4; |
1411 | 0 | e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 + |
1412 | 0 | t[2 * x + 2] * 9 + 8) >> |
1413 | 0 | 4; |
1414 | 0 | e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 + |
1415 | 0 | t[2 * x + 3] * 9 + 8) >> |
1416 | 0 | 4; |
1417 | 0 | } |
1418 | 0 | } |
1419 | | |
1420 | | void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr, |
1421 | | uint16_t* dst_ptr, |
1422 | 0 | int dst_width) { |
1423 | 0 | int src_width = dst_width >> 1; |
1424 | 0 | int x; |
1425 | 0 | assert((dst_width % 2 == 0) && (dst_width >= 0)); |
1426 | 0 | for (x = 0; x < src_width; ++x) { |
1427 | 0 | dst_ptr[4 * x + 0] = |
1428 | 0 | (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2; |
1429 | 0 | dst_ptr[4 * x + 1] = |
1430 | 0 | (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2; |
1431 | 0 | dst_ptr[4 * x + 2] = |
1432 | 0 | (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2; |
1433 | 0 | dst_ptr[4 * x + 3] = |
1434 | 0 | (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2; |
1435 | 0 | } |
1436 | 0 | } |
1437 | | |
1438 | | void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr, |
1439 | | ptrdiff_t src_stride, |
1440 | | uint16_t* dst_ptr, |
1441 | | ptrdiff_t dst_stride, |
1442 | 0 | int dst_width) { |
1443 | 0 | const uint16_t* s = src_ptr; |
1444 | 0 | const uint16_t* t = src_ptr + src_stride; |
1445 | 0 | uint16_t* d = dst_ptr; |
1446 | 0 | uint16_t* e = dst_ptr + dst_stride; |
1447 | 0 | int src_width = dst_width >> 1; |
1448 | 0 | int x; |
1449 | 0 | assert((dst_width % 2 == 0) && (dst_width >= 0)); |
1450 | 0 | for (x = 0; x < src_width; ++x) { |
1451 | 0 | d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 + |
1452 | 0 | t[2 * x + 2] * 1 + 8) >> |
1453 | 0 | 4; |
1454 | 0 | d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 + |
1455 | 0 | t[2 * x + 3] * 1 + 8) >> |
1456 | 0 | 4; |
1457 | 0 | d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 + |
1458 | 0 | t[2 * x + 2] * 3 + 8) >> |
1459 | 0 | 4; |
1460 | 0 | d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 + |
1461 | 0 | t[2 * x + 3] * 3 + 8) >> |
1462 | 0 | 4; |
1463 | 0 | e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 + |
1464 | 0 | t[2 * x + 2] * 3 + 8) >> |
1465 | 0 | 4; |
1466 | 0 | e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 + |
1467 | 0 | t[2 * x + 3] * 3 + 8) >> |
1468 | 0 | 4; |
1469 | 0 | e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 + |
1470 | 0 | t[2 * x + 2] * 9 + 8) >> |
1471 | 0 | 4; |
1472 | 0 | e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 + |
1473 | 0 | t[2 * x + 3] * 9 + 8) >> |
1474 | 0 | 4; |
1475 | 0 | } |
1476 | 0 | } |
1477 | | |
1478 | | // Scales a single row of pixels using point sampling. |
1479 | | void ScaleUVCols_C(uint8_t* dst_uv, |
1480 | | const uint8_t* src_uv, |
1481 | | int dst_width, |
1482 | | int x, |
1483 | 0 | int dx) { |
1484 | 0 | const uint16_t* src = (const uint16_t*)(src_uv); |
1485 | 0 | uint16_t* dst = (uint16_t*)(dst_uv); |
1486 | 0 | int j; |
1487 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1488 | 0 | dst[0] = src[x >> 16]; |
1489 | 0 | x += dx; |
1490 | 0 | dst[1] = src[x >> 16]; |
1491 | 0 | x += dx; |
1492 | 0 | dst += 2; |
1493 | 0 | } |
1494 | 0 | if (dst_width & 1) { |
1495 | 0 | dst[0] = src[x >> 16]; |
1496 | 0 | } |
1497 | 0 | } |
1498 | | |
1499 | | void ScaleUVCols64_C(uint8_t* dst_uv, |
1500 | | const uint8_t* src_uv, |
1501 | | int dst_width, |
1502 | | int x32, |
1503 | 0 | int dx) { |
1504 | 0 | int64_t x = (int64_t)(x32); |
1505 | 0 | const uint16_t* src = (const uint16_t*)(src_uv); |
1506 | 0 | uint16_t* dst = (uint16_t*)(dst_uv); |
1507 | 0 | int j; |
1508 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1509 | 0 | dst[0] = src[x >> 16]; |
1510 | 0 | x += dx; |
1511 | 0 | dst[1] = src[x >> 16]; |
1512 | 0 | x += dx; |
1513 | 0 | dst += 2; |
1514 | 0 | } |
1515 | 0 | if (dst_width & 1) { |
1516 | 0 | dst[0] = src[x >> 16]; |
1517 | 0 | } |
1518 | 0 | } |
1519 | | |
1520 | | // Scales a single row of pixels up by 2x using point sampling. |
1521 | | void ScaleUVColsUp2_C(uint8_t* dst_uv, |
1522 | | const uint8_t* src_uv, |
1523 | | int dst_width, |
1524 | | int x, |
1525 | 0 | int dx) { |
1526 | 0 | const uint16_t* src = (const uint16_t*)(src_uv); |
1527 | 0 | uint16_t* dst = (uint16_t*)(dst_uv); |
1528 | 0 | int j; |
1529 | 0 | (void)x; |
1530 | 0 | (void)dx; |
1531 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1532 | 0 | dst[1] = dst[0] = src[0]; |
1533 | 0 | src += 1; |
1534 | 0 | dst += 2; |
1535 | 0 | } |
1536 | 0 | if (dst_width & 1) { |
1537 | 0 | dst[0] = src[0]; |
1538 | 0 | } |
1539 | 0 | } |
1540 | | |
1541 | | // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607. |
1542 | | // Mimics SSSE3 blender |
1543 | 0 | #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7 |
1544 | | #define BLENDERC(a, b, f, s) \ |
1545 | 0 | (uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) |
1546 | 0 | #define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0) |
1547 | | |
1548 | | void ScaleUVFilterCols_C(uint8_t* dst_uv, |
1549 | | const uint8_t* src_uv, |
1550 | | int dst_width, |
1551 | | int x, |
1552 | 0 | int dx) { |
1553 | 0 | const uint16_t* src = (const uint16_t*)(src_uv); |
1554 | 0 | uint16_t* dst = (uint16_t*)(dst_uv); |
1555 | 0 | int j; |
1556 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1557 | 0 | int xi = x >> 16; |
1558 | 0 | int xf = (x >> 9) & 0x7f; |
1559 | 0 | uint16_t a = src[xi]; |
1560 | 0 | uint16_t b = src[xi + 1]; |
1561 | 0 | dst[0] = BLENDER(a, b, xf); |
1562 | 0 | x += dx; |
1563 | 0 | xi = x >> 16; |
1564 | 0 | xf = (x >> 9) & 0x7f; |
1565 | 0 | a = src[xi]; |
1566 | 0 | b = src[xi + 1]; |
1567 | 0 | dst[1] = BLENDER(a, b, xf); |
1568 | 0 | x += dx; |
1569 | 0 | dst += 2; |
1570 | 0 | } |
1571 | 0 | if (dst_width & 1) { |
1572 | 0 | int xi = x >> 16; |
1573 | 0 | int xf = (x >> 9) & 0x7f; |
1574 | 0 | uint16_t a = src[xi]; |
1575 | 0 | uint16_t b = src[xi + 1]; |
1576 | 0 | dst[0] = BLENDER(a, b, xf); |
1577 | 0 | } |
1578 | 0 | } |
1579 | | |
1580 | | void ScaleUVFilterCols64_C(uint8_t* dst_uv, |
1581 | | const uint8_t* src_uv, |
1582 | | int dst_width, |
1583 | | int x32, |
1584 | 0 | int dx) { |
1585 | 0 | int64_t x = (int64_t)(x32); |
1586 | 0 | const uint16_t* src = (const uint16_t*)(src_uv); |
1587 | 0 | uint16_t* dst = (uint16_t*)(dst_uv); |
1588 | 0 | int j; |
1589 | 0 | for (j = 0; j < dst_width - 1; j += 2) { |
1590 | 0 | int64_t xi = x >> 16; |
1591 | 0 | int xf = (x >> 9) & 0x7f; |
1592 | 0 | uint16_t a = src[xi]; |
1593 | 0 | uint16_t b = src[xi + 1]; |
1594 | 0 | dst[0] = BLENDER(a, b, xf); |
1595 | 0 | x += dx; |
1596 | 0 | xi = x >> 16; |
1597 | 0 | xf = (x >> 9) & 0x7f; |
1598 | 0 | a = src[xi]; |
1599 | 0 | b = src[xi + 1]; |
1600 | 0 | dst[1] = BLENDER(a, b, xf); |
1601 | 0 | x += dx; |
1602 | 0 | dst += 2; |
1603 | 0 | } |
1604 | 0 | if (dst_width & 1) { |
1605 | 0 | int64_t xi = x >> 16; |
1606 | 0 | int xf = (x >> 9) & 0x7f; |
1607 | 0 | uint16_t a = src[xi]; |
1608 | 0 | uint16_t b = src[xi + 1]; |
1609 | 0 | dst[0] = BLENDER(a, b, xf); |
1610 | 0 | } |
1611 | 0 | } |
1612 | | #undef BLENDER1 |
1613 | | #undef BLENDERC |
1614 | | #undef BLENDER |
1615 | | |
1616 | | // Scale plane vertically with bilinear interpolation. |
1617 | | void ScalePlaneVertical(int src_height, |
1618 | | int dst_width, |
1619 | | int dst_height, |
1620 | | int src_stride, |
1621 | | int dst_stride, |
1622 | | const uint8_t* src_argb, |
1623 | | uint8_t* dst_argb, |
1624 | | int x, |
1625 | | int y, |
1626 | | int dy, |
1627 | | int bpp, // bytes per pixel. 4 for ARGB. |
1628 | 5.16k | enum FilterMode filtering) { |
1629 | | // TODO(fbarchard): Allow higher bpp. |
1630 | 5.16k | int dst_width_bytes = dst_width * bpp; |
1631 | 5.16k | void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, |
1632 | 5.16k | ptrdiff_t src_stride, int dst_width, |
1633 | 5.16k | int source_y_fraction) = InterpolateRow_C; |
1634 | 5.16k | const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; |
1635 | 5.16k | int j; |
1636 | 5.16k | assert(bpp >= 1 && bpp <= 4); |
1637 | 5.16k | assert(src_height != 0); |
1638 | 5.16k | assert(dst_width > 0); |
1639 | 5.16k | assert(dst_height > 0); |
1640 | 5.16k | src_argb += (x >> 16) * bpp; |
1641 | 5.16k | #if defined(HAS_INTERPOLATEROW_SSSE3) |
1642 | 5.16k | if (TestCpuFlag(kCpuHasSSSE3)) { |
1643 | 5.16k | InterpolateRow = InterpolateRow_Any_SSSE3; |
1644 | 5.16k | if (IS_ALIGNED(dst_width_bytes, 16)) { |
1645 | 422 | InterpolateRow = InterpolateRow_SSSE3; |
1646 | 422 | } |
1647 | 5.16k | } |
1648 | 5.16k | #endif |
1649 | 5.16k | #if defined(HAS_INTERPOLATEROW_AVX2) |
1650 | 5.16k | if (TestCpuFlag(kCpuHasAVX2)) { |
1651 | 5.16k | InterpolateRow = InterpolateRow_Any_AVX2; |
1652 | 5.16k | if (IS_ALIGNED(dst_width_bytes, 32)) { |
1653 | 292 | InterpolateRow = InterpolateRow_AVX2; |
1654 | 292 | } |
1655 | 5.16k | } |
1656 | 5.16k | #endif |
1657 | | #if defined(HAS_INTERPOLATEROW_NEON) |
1658 | | if (TestCpuFlag(kCpuHasNEON)) { |
1659 | | InterpolateRow = InterpolateRow_Any_NEON; |
1660 | | if (IS_ALIGNED(dst_width_bytes, 16)) { |
1661 | | InterpolateRow = InterpolateRow_NEON; |
1662 | | } |
1663 | | } |
1664 | | #endif |
1665 | | #if defined(HAS_INTERPOLATEROW_SME) |
1666 | | if (TestCpuFlag(kCpuHasSME)) { |
1667 | | InterpolateRow = InterpolateRow_SME; |
1668 | | } |
1669 | | #endif |
1670 | | #if defined(HAS_INTERPOLATEROW_MSA) |
1671 | | if (TestCpuFlag(kCpuHasMSA)) { |
1672 | | InterpolateRow = InterpolateRow_Any_MSA; |
1673 | | if (IS_ALIGNED(dst_width_bytes, 32)) { |
1674 | | InterpolateRow = InterpolateRow_MSA; |
1675 | | } |
1676 | | } |
1677 | | #endif |
1678 | | #if defined(HAS_INTERPOLATEROW_LSX) |
1679 | | if (TestCpuFlag(kCpuHasLSX)) { |
1680 | | InterpolateRow = InterpolateRow_Any_LSX; |
1681 | | if (IS_ALIGNED(dst_width_bytes, 32)) { |
1682 | | InterpolateRow = InterpolateRow_LSX; |
1683 | | } |
1684 | | } |
1685 | | #endif |
1686 | | #if defined(HAS_INTERPOLATEROW_RVV) |
1687 | | if (TestCpuFlag(kCpuHasRVV)) { |
1688 | | InterpolateRow = InterpolateRow_RVV; |
1689 | | } |
1690 | | #endif |
1691 | | |
1692 | 733k | for (j = 0; j < dst_height; ++j) { |
1693 | 728k | int yi; |
1694 | 728k | int yf; |
1695 | 728k | if (y > max_y) { |
1696 | 0 | y = max_y; |
1697 | 0 | } |
1698 | 728k | yi = y >> 16; |
1699 | 728k | yf = filtering ? ((y >> 8) & 255) : 0; |
1700 | 728k | InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride, |
1701 | 728k | dst_width_bytes, yf); |
1702 | 728k | dst_argb += dst_stride; |
1703 | 728k | y += dy; |
1704 | 728k | } |
1705 | 5.16k | } |
1706 | | |
1707 | | void ScalePlaneVertical_16(int src_height, |
1708 | | int dst_width, |
1709 | | int dst_height, |
1710 | | int src_stride, |
1711 | | int dst_stride, |
1712 | | const uint16_t* src_argb, |
1713 | | uint16_t* dst_argb, |
1714 | | int x, |
1715 | | int y, |
1716 | | int dy, |
1717 | | int wpp, /* words per pixel. normally 1 */ |
1718 | 1.58k | enum FilterMode filtering) { |
1719 | | // TODO(fbarchard): Allow higher wpp. |
1720 | 1.58k | int dst_width_words = dst_width * wpp; |
1721 | 1.58k | void (*InterpolateRow)(uint16_t* dst_argb, const uint16_t* src_argb, |
1722 | 1.58k | ptrdiff_t src_stride, int dst_width, |
1723 | 1.58k | int source_y_fraction) = InterpolateRow_16_C; |
1724 | 1.58k | const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; |
1725 | 1.58k | int j; |
1726 | 1.58k | assert(wpp >= 1 && wpp <= 2); |
1727 | 1.58k | assert(src_height != 0); |
1728 | 1.58k | assert(dst_width > 0); |
1729 | 1.58k | assert(dst_height > 0); |
1730 | 1.58k | src_argb += (x >> 16) * wpp; |
1731 | | #if defined(HAS_INTERPOLATEROW_16_SSE2) |
1732 | | if (TestCpuFlag(kCpuHasSSE2)) { |
1733 | | InterpolateRow = InterpolateRow_16_Any_SSE2; |
1734 | | if (IS_ALIGNED(dst_width_words, 16)) { |
1735 | | InterpolateRow = InterpolateRow_16_SSE2; |
1736 | | } |
1737 | | } |
1738 | | #endif |
1739 | | #if defined(HAS_INTERPOLATEROW_16_SSSE3) |
1740 | | if (TestCpuFlag(kCpuHasSSSE3)) { |
1741 | | InterpolateRow = InterpolateRow_16_Any_SSSE3; |
1742 | | if (IS_ALIGNED(dst_width_words, 16)) { |
1743 | | InterpolateRow = InterpolateRow_16_SSSE3; |
1744 | | } |
1745 | | } |
1746 | | #endif |
1747 | | #if defined(HAS_INTERPOLATEROW_16_AVX2) |
1748 | | if (TestCpuFlag(kCpuHasAVX2)) { |
1749 | | InterpolateRow = InterpolateRow_16_Any_AVX2; |
1750 | | if (IS_ALIGNED(dst_width_words, 32)) { |
1751 | | InterpolateRow = InterpolateRow_16_AVX2; |
1752 | | } |
1753 | | } |
1754 | | #endif |
1755 | | #if defined(HAS_INTERPOLATEROW_16_NEON) |
1756 | | if (TestCpuFlag(kCpuHasNEON)) { |
1757 | | InterpolateRow = InterpolateRow_16_Any_NEON; |
1758 | | if (IS_ALIGNED(dst_width_words, 8)) { |
1759 | | InterpolateRow = InterpolateRow_16_NEON; |
1760 | | } |
1761 | | } |
1762 | | #endif |
1763 | | #if defined(HAS_INTERPOLATEROW_16_SME) |
1764 | | if (TestCpuFlag(kCpuHasSME)) { |
1765 | | InterpolateRow = InterpolateRow_16_SME; |
1766 | | } |
1767 | | #endif |
1768 | 658k | for (j = 0; j < dst_height; ++j) { |
1769 | 657k | int yi; |
1770 | 657k | int yf; |
1771 | 657k | if (y > max_y) { |
1772 | 0 | y = max_y; |
1773 | 0 | } |
1774 | 657k | yi = y >> 16; |
1775 | 657k | yf = filtering ? ((y >> 8) & 255) : 0; |
1776 | 657k | InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride, |
1777 | 657k | dst_width_words, yf); |
1778 | 657k | dst_argb += dst_stride; |
1779 | 657k | y += dy; |
1780 | 657k | } |
1781 | 1.58k | } |
1782 | | |
1783 | | // Use scale to convert lsb formats to msb, depending how many bits there are: |
1784 | | // 32768 = 9 bits |
1785 | | // 16384 = 10 bits |
1786 | | // 4096 = 12 bits |
1787 | | // 256 = 16 bits |
1788 | | // TODO(fbarchard): change scale to bits |
1789 | | void ScalePlaneVertical_16To8(int src_height, |
1790 | | int dst_width, |
1791 | | int dst_height, |
1792 | | int src_stride, |
1793 | | int dst_stride, |
1794 | | const uint16_t* src_argb, |
1795 | | uint8_t* dst_argb, |
1796 | | int x, |
1797 | | int y, |
1798 | | int dy, |
1799 | | int wpp, /* words per pixel. normally 1 */ |
1800 | | int scale, |
1801 | 0 | enum FilterMode filtering) { |
1802 | | // TODO(fbarchard): Allow higher wpp. |
1803 | 0 | int dst_width_words = dst_width * wpp; |
1804 | | // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions. |
1805 | 0 | void (*InterpolateRow_16To8)(uint8_t* dst_argb, const uint16_t* src_argb, |
1806 | 0 | ptrdiff_t src_stride, int scale, int dst_width, |
1807 | 0 | int source_y_fraction) = InterpolateRow_16To8_C; |
1808 | 0 | const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; |
1809 | 0 | int j; |
1810 | 0 | assert(wpp >= 1 && wpp <= 2); |
1811 | 0 | assert(src_height != 0); |
1812 | 0 | assert(dst_width > 0); |
1813 | 0 | assert(dst_height > 0); |
1814 | 0 | src_argb += (x >> 16) * wpp; |
1815 | |
|
1816 | | #if defined(HAS_INTERPOLATEROW_16TO8_NEON) |
1817 | | if (TestCpuFlag(kCpuHasNEON)) { |
1818 | | InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON; |
1819 | | if (IS_ALIGNED(dst_width, 8)) { |
1820 | | InterpolateRow_16To8 = InterpolateRow_16To8_NEON; |
1821 | | } |
1822 | | } |
1823 | | #endif |
1824 | | #if defined(HAS_INTERPOLATEROW_16TO8_SME) |
1825 | | if (TestCpuFlag(kCpuHasSME)) { |
1826 | | InterpolateRow_16To8 = InterpolateRow_16To8_SME; |
1827 | | } |
1828 | | #endif |
1829 | 0 | #if defined(HAS_INTERPOLATEROW_16TO8_AVX2) |
1830 | 0 | if (TestCpuFlag(kCpuHasAVX2)) { |
1831 | 0 | InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2; |
1832 | 0 | if (IS_ALIGNED(dst_width, 32)) { |
1833 | 0 | InterpolateRow_16To8 = InterpolateRow_16To8_AVX2; |
1834 | 0 | } |
1835 | 0 | } |
1836 | 0 | #endif |
1837 | 0 | for (j = 0; j < dst_height; ++j) { |
1838 | 0 | int yi; |
1839 | 0 | int yf; |
1840 | 0 | if (y > max_y) { |
1841 | 0 | y = max_y; |
1842 | 0 | } |
1843 | 0 | yi = y >> 16; |
1844 | 0 | yf = filtering ? ((y >> 8) & 255) : 0; |
1845 | 0 | InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride, |
1846 | 0 | scale, dst_width_words, yf); |
1847 | 0 | dst_argb += dst_stride; |
1848 | 0 | y += dy; |
1849 | 0 | } |
1850 | 0 | } |
1851 | | |
1852 | | // Simplify the filtering based on scale factors. |
1853 | | enum FilterMode ScaleFilterReduce(int src_width, |
1854 | | int src_height, |
1855 | | int dst_width, |
1856 | | int dst_height, |
1857 | 44.6k | enum FilterMode filtering) { |
1858 | 44.6k | if (src_width < 0) { |
1859 | 0 | src_width = -src_width; |
1860 | 0 | } |
1861 | 44.6k | if (src_height < 0) { |
1862 | 0 | src_height = -src_height; |
1863 | 0 | } |
1864 | 44.6k | if (filtering == kFilterBox) { |
1865 | | // If scaling either axis to 0.5 or larger, switch from Box to Bilinear. |
1866 | 31.9k | if (dst_width * 2 >= src_width || dst_height * 2 >= src_height) { |
1867 | 28.4k | filtering = kFilterBilinear; |
1868 | 28.4k | } |
1869 | 31.9k | } |
1870 | 44.6k | if (filtering == kFilterBilinear) { |
1871 | 36.6k | if (src_height == 1) { |
1872 | 2.52k | filtering = kFilterLinear; |
1873 | 2.52k | } |
1874 | | // TODO(fbarchard): Detect any odd scale factor and reduce to Linear. |
1875 | 36.6k | if (dst_height == src_height || dst_height * 3 == src_height) { |
1876 | 4.54k | filtering = kFilterLinear; |
1877 | 4.54k | } |
1878 | | // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to |
1879 | | // avoid reading 2 pixels horizontally that causes memory exception. |
1880 | 36.6k | if (src_width == 1) { |
1881 | 1.90k | filtering = kFilterNone; |
1882 | 1.90k | } |
1883 | 36.6k | } |
1884 | 44.6k | if (filtering == kFilterLinear) { |
1885 | 9.35k | if (src_width == 1) { |
1886 | 0 | filtering = kFilterNone; |
1887 | 0 | } |
1888 | | // TODO(fbarchard): Detect any odd scale factor and reduce to None. |
1889 | 9.35k | if (dst_width == src_width || dst_width * 3 == src_width) { |
1890 | 744 | filtering = kFilterNone; |
1891 | 744 | } |
1892 | 9.35k | } |
1893 | 44.6k | return filtering; |
1894 | 44.6k | } |
1895 | | |
1896 | | // Divide num by div and return as 16.16 fixed point result. |
1897 | 0 | int FixedDiv_C(int num, int div) { |
1898 | 0 | return (int)(((int64_t)(num) << 16) / div); |
1899 | 0 | } |
1900 | | |
1901 | | // Divide num - 1 by div - 1 and return as 16.16 fixed point result. |
1902 | 0 | int FixedDiv1_C(int num, int div) { |
1903 | 0 | return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1)); |
1904 | 0 | } |
1905 | | |
1906 | 16.2k | #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) |
1907 | | |
1908 | | // Compute slope values for stepping. |
1909 | | void ScaleSlope(int src_width, |
1910 | | int src_height, |
1911 | | int dst_width, |
1912 | | int dst_height, |
1913 | | enum FilterMode filtering, |
1914 | | int* x, |
1915 | | int* y, |
1916 | | int* dx, |
1917 | 22.2k | int* dy) { |
1918 | 22.2k | assert(x != NULL); |
1919 | 22.2k | assert(y != NULL); |
1920 | 22.2k | assert(dx != NULL); |
1921 | 22.2k | assert(dy != NULL); |
1922 | 22.2k | assert(src_width != 0); |
1923 | 22.2k | assert(src_height != 0); |
1924 | 22.2k | assert(dst_width > 0); |
1925 | 22.2k | assert(dst_height > 0); |
1926 | | // Check for 1 pixel and avoid FixedDiv overflow. |
1927 | 22.2k | if (dst_width == 1 && src_width >= 32768) { |
1928 | 0 | dst_width = src_width; |
1929 | 0 | } |
1930 | 22.2k | if (dst_height == 1 && src_height >= 32768) { |
1931 | 0 | dst_height = src_height; |
1932 | 0 | } |
1933 | 22.2k | if (filtering == kFilterBox) { |
1934 | | // Scale step for point sampling duplicates all pixels equally. |
1935 | 2.24k | *dx = FixedDiv(Abs(src_width), dst_width); |
1936 | 2.24k | *dy = FixedDiv(src_height, dst_height); |
1937 | 2.24k | *x = 0; |
1938 | 2.24k | *y = 0; |
1939 | 19.9k | } else if (filtering == kFilterBilinear) { |
1940 | | // Scale step for bilinear sampling renders last pixel once for upsample. |
1941 | 13.2k | if (dst_width <= Abs(src_width)) { |
1942 | 4.20k | *dx = FixedDiv(Abs(src_width), dst_width); |
1943 | 4.20k | *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. |
1944 | 9.01k | } else if (src_width > 1 && dst_width > 1) { |
1945 | 9.01k | *dx = FixedDiv1(Abs(src_width), dst_width); |
1946 | 9.01k | *x = 0; |
1947 | 9.01k | } |
1948 | 13.2k | if (dst_height <= src_height) { |
1949 | 5.49k | *dy = FixedDiv(src_height, dst_height); |
1950 | 5.49k | *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter. |
1951 | 7.71k | } else if (src_height > 1 && dst_height > 1) { |
1952 | 7.71k | *dy = FixedDiv1(src_height, dst_height); |
1953 | 7.71k | *y = 0; |
1954 | 7.71k | } |
1955 | 13.2k | } else if (filtering == kFilterLinear) { |
1956 | | // Scale step for bilinear sampling renders last pixel once for upsample. |
1957 | 4.95k | if (dst_width <= Abs(src_width)) { |
1958 | 2.97k | *dx = FixedDiv(Abs(src_width), dst_width); |
1959 | 2.97k | *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. |
1960 | 2.97k | } else if (src_width > 1 && dst_width > 1) { |
1961 | 1.98k | *dx = FixedDiv1(Abs(src_width), dst_width); |
1962 | 1.98k | *x = 0; |
1963 | 1.98k | } |
1964 | 4.95k | *dy = FixedDiv(src_height, dst_height); |
1965 | 4.95k | *y = *dy >> 1; |
1966 | 4.95k | } else { |
1967 | | // Scale step for point sampling duplicates all pixels equally. |
1968 | 1.79k | *dx = FixedDiv(Abs(src_width), dst_width); |
1969 | 1.79k | *dy = FixedDiv(src_height, dst_height); |
1970 | 1.79k | *x = CENTERSTART(*dx, 0); |
1971 | 1.79k | *y = CENTERSTART(*dy, 0); |
1972 | 1.79k | } |
1973 | | // Negative src_width means horizontally mirror. |
1974 | 22.2k | if (src_width < 0) { |
1975 | 0 | *x += (dst_width - 1) * *dx; |
1976 | 0 | *dx = -*dx; |
1977 | | // src_width = -src_width; // Caller must do this. |
1978 | 0 | } |
1979 | 22.2k | } |
1980 | | #undef CENTERSTART |
1981 | | |
1982 | | #ifdef __cplusplus |
1983 | | } // extern "C" |
1984 | | } // namespace libyuv |
1985 | | #endif |