/work/dav1d/src/mc_tmpl.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include "common/attributes.h" |
34 | | #include "common/intops.h" |
35 | | |
36 | | #include "src/mc.h" |
37 | | #include "src/tables.h" |
38 | | |
39 | | #if BITDEPTH == 8 |
40 | 0 | #define get_intermediate_bits(bitdepth_max) 4 |
41 | | // Output in interval [-5132, 9212], fits in int16_t as is |
42 | 0 | #define PREP_BIAS 0 |
43 | | #else |
44 | | // 4 for 10 bits/component, 2 for 12 bits/component |
45 | | #define get_intermediate_bits(bitdepth_max) (14 - bitdepth_from_max(bitdepth_max)) |
46 | | // Output in interval [-20588, 36956] (10-bit), [-20602, 36983] (12-bit) |
47 | | // Subtract a bias to ensure the output fits in int16_t |
48 | | #define PREP_BIAS 8192 |
49 | | #endif |
50 | | |
51 | | static NOINLINE void |
52 | | put_c(pixel *dst, const ptrdiff_t dst_stride, |
53 | | const pixel *src, const ptrdiff_t src_stride, const int w, int h) |
54 | 0 | { |
55 | 0 | do { |
56 | 0 | pixel_copy(dst, src, w); |
57 | |
|
58 | 0 | dst += dst_stride; |
59 | 0 | src += src_stride; |
60 | 0 | } while (--h); |
61 | 0 | } |
62 | | |
63 | | static NOINLINE void |
64 | | prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride, |
65 | | const int w, int h HIGHBD_DECL_SUFFIX) |
66 | 0 | { |
67 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
68 | 0 | do { |
69 | 0 | for (int x = 0; x < w; x++) |
70 | 0 | tmp[x] = (src[x] << intermediate_bits) - PREP_BIAS; |
71 | |
|
72 | 0 | tmp += w; |
73 | 0 | src += src_stride; |
74 | 0 | } while (--h); |
75 | 0 | } |
76 | | |
77 | | #define FILTER_8TAP(src, x, F, stride) \ |
78 | 0 | (F[0] * src[x + -3 * stride] + \ |
79 | 0 | F[1] * src[x + -2 * stride] + \ |
80 | 0 | F[2] * src[x + -1 * stride] + \ |
81 | 0 | F[3] * src[x + +0 * stride] + \ |
82 | 0 | F[4] * src[x + +1 * stride] + \ |
83 | 0 | F[5] * src[x + +2 * stride] + \ |
84 | 0 | F[6] * src[x + +3 * stride] + \ |
85 | 0 | F[7] * src[x + +4 * stride]) |
86 | | |
87 | | #define FILTER_8TAP2(src, x, F) \ |
88 | 0 | (F[0] * src[0][x] + \ |
89 | 0 | F[1] * src[1][x] + \ |
90 | 0 | F[2] * src[2][x] + \ |
91 | 0 | F[3] * src[3][x] + \ |
92 | 0 | F[4] * src[4][x] + \ |
93 | 0 | F[5] * src[5][x] + \ |
94 | 0 | F[6] * src[6][x] + \ |
95 | 0 | F[7] * src[7][x]) |
96 | | |
97 | | #define DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh) \ |
98 | 0 | ((FILTER_8TAP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
99 | | |
100 | | #define DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh) \ |
101 | 0 | ((FILTER_8TAP(src, x, F, stride) + (rnd)) >> (sh)) |
102 | | |
103 | | #define DAV1D_FILTER_8TAP_RND3(src, x, F, sh) \ |
104 | 0 | ((FILTER_8TAP2(src, x, F) + ((1 << (sh)) >> 1)) >> (sh)) |
105 | | |
106 | | #define DAV1D_FILTER_8TAP_CLIP(src, x, F, stride, sh) \ |
107 | 0 | iclip_pixel(DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh)) |
108 | | |
109 | | #define DAV1D_FILTER_8TAP_CLIP2(src, x, F, stride, rnd, sh) \ |
110 | 0 | iclip_pixel(DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh)) |
111 | | |
112 | | #define DAV1D_FILTER_8TAP_CLIP3(src, x, F, sh) \ |
113 | 0 | iclip_pixel(DAV1D_FILTER_8TAP_RND3(src, x, F, sh)) |
114 | | |
115 | | #define GET_H_FILTER(mx) \ |
116 | 0 | const int8_t *const fh = !(mx) ? NULL : w > 4 ? \ |
117 | 0 | dav1d_mc_subpel_filters[filter_type & 3][(mx) - 1] : \ |
118 | 0 | dav1d_mc_subpel_filters[3 + (filter_type & 1)][(mx) - 1] |
119 | | |
120 | | #define GET_V_FILTER(my) \ |
121 | 0 | const int8_t *const fv = !(my) ? NULL : h > 4 ? \ |
122 | 0 | dav1d_mc_subpel_filters[filter_type >> 2][(my) - 1] : \ |
123 | 0 | dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][(my) - 1] |
124 | | |
125 | | #define GET_FILTERS() \ |
126 | 0 | GET_H_FILTER(mx); \ |
127 | 0 | GET_V_FILTER(my) |
128 | | |
129 | | static NOINLINE void |
130 | | put_8tap_c(pixel *dst, ptrdiff_t dst_stride, |
131 | | const pixel *src, ptrdiff_t src_stride, |
132 | | const int w, int h, const int mx, const int my, |
133 | | const int filter_type HIGHBD_DECL_SUFFIX) |
134 | 0 | { |
135 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
136 | 0 | const int intermediate_rnd = 32 + ((1 << (6 - intermediate_bits)) >> 1); |
137 | |
|
138 | 0 | GET_FILTERS(); |
139 | 0 | dst_stride = PXSTRIDE(dst_stride); |
140 | 0 | src_stride = PXSTRIDE(src_stride); |
141 | |
|
142 | 0 | if (fh) { |
143 | 0 | if (fv) { |
144 | 0 | int tmp_h = h + 7; |
145 | 0 | int16_t mid[128 * 135], *mid_ptr = mid; |
146 | |
|
147 | 0 | src -= src_stride * 3; |
148 | 0 | do { |
149 | 0 | for (int x = 0; x < w; x++) |
150 | 0 | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
151 | 0 | 6 - intermediate_bits); |
152 | |
|
153 | 0 | mid_ptr += 128; |
154 | 0 | src += src_stride; |
155 | 0 | } while (--tmp_h); |
156 | |
|
157 | 0 | mid_ptr = mid + 128 * 3; |
158 | 0 | do { |
159 | 0 | for (int x = 0; x < w; x++) |
160 | 0 | dst[x] = DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, |
161 | 0 | 6 + intermediate_bits); |
162 | |
|
163 | 0 | mid_ptr += 128; |
164 | 0 | dst += dst_stride; |
165 | 0 | } while (--h); |
166 | 0 | } else { |
167 | 0 | do { |
168 | 0 | for (int x = 0; x < w; x++) { |
169 | 0 | dst[x] = DAV1D_FILTER_8TAP_CLIP2(src, x, fh, 1, |
170 | 0 | intermediate_rnd, 6); |
171 | 0 | } |
172 | |
|
173 | 0 | dst += dst_stride; |
174 | 0 | src += src_stride; |
175 | 0 | } while (--h); |
176 | 0 | } |
177 | 0 | } else if (fv) { |
178 | 0 | do { |
179 | 0 | for (int x = 0; x < w; x++) |
180 | 0 | dst[x] = DAV1D_FILTER_8TAP_CLIP(src, x, fv, src_stride, 6); |
181 | |
|
182 | 0 | dst += dst_stride; |
183 | 0 | src += src_stride; |
184 | 0 | } while (--h); |
185 | 0 | } else |
186 | 0 | put_c(dst, dst_stride, src, src_stride, w, h); |
187 | 0 | } |
188 | | |
189 | | static NOINLINE void |
190 | | put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride, |
191 | | const pixel *src, ptrdiff_t src_stride, |
192 | | const int w, int h, const int mx, int my, |
193 | | const int dx, const int dy, const int filter_type |
194 | | HIGHBD_DECL_SUFFIX) |
195 | 0 | { |
196 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
197 | 0 | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
198 | 0 | int16_t mid[128 * 8]; |
199 | 0 | int16_t *mid_ptrs[8]; |
200 | 0 | int in_y = -8; |
201 | 0 | src_stride = PXSTRIDE(src_stride); |
202 | |
|
203 | 0 | for (int i = 0; i < 8; i++) |
204 | 0 | mid_ptrs[i] = &mid[128 * i]; |
205 | |
|
206 | 0 | src -= src_stride * 3; |
207 | |
|
208 | 0 | for (int y = 0; y < h; y++) { |
209 | 0 | int x; |
210 | 0 | int src_y = my >> 10; |
211 | 0 | GET_V_FILTER((my & 0x3ff) >> 6); |
212 | |
|
213 | 0 | while (in_y < src_y) { |
214 | 0 | int imx = mx, ioff = 0; |
215 | 0 | int16_t *mid_ptr = mid_ptrs[0]; |
216 | |
|
217 | 0 | for (int i = 0; i < 7; i++) |
218 | 0 | mid_ptrs[i] = mid_ptrs[i + 1]; |
219 | 0 | mid_ptrs[7] = mid_ptr; |
220 | |
|
221 | 0 | for (x = 0; x < w; x++) { |
222 | 0 | GET_H_FILTER(imx >> 6); |
223 | 0 | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
224 | 0 | 6 - intermediate_bits) : |
225 | 0 | src[ioff] << intermediate_bits; |
226 | 0 | imx += dx; |
227 | 0 | ioff += imx >> 10; |
228 | 0 | imx &= 0x3ff; |
229 | 0 | } |
230 | |
|
231 | 0 | src += src_stride; |
232 | 0 | in_y++; |
233 | 0 | } |
234 | |
|
235 | 0 | for (x = 0; x < w; x++) |
236 | 0 | dst[x] = fv ? DAV1D_FILTER_8TAP_CLIP3(mid_ptrs, x, fv, |
237 | 0 | 6 + intermediate_bits) : |
238 | 0 | iclip_pixel((mid_ptrs[3][x] + intermediate_rnd) >> |
239 | 0 | intermediate_bits); |
240 | |
|
241 | 0 | my += dy; |
242 | 0 | dst += PXSTRIDE(dst_stride); |
243 | 0 | } |
244 | 0 | } |
245 | | |
246 | | static NOINLINE void |
247 | | prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
248 | | const int w, int h, const int mx, const int my, |
249 | | const int filter_type HIGHBD_DECL_SUFFIX) |
250 | 0 | { |
251 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
252 | 0 | GET_FILTERS(); |
253 | 0 | src_stride = PXSTRIDE(src_stride); |
254 | |
|
255 | 0 | if (fh) { |
256 | 0 | if (fv) { |
257 | 0 | int tmp_h = h + 7; |
258 | 0 | int16_t mid[128 * 135], *mid_ptr = mid; |
259 | |
|
260 | 0 | src -= src_stride * 3; |
261 | 0 | do { |
262 | 0 | for (int x = 0; x < w; x++) |
263 | 0 | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
264 | 0 | 6 - intermediate_bits); |
265 | |
|
266 | 0 | mid_ptr += 128; |
267 | 0 | src += src_stride; |
268 | 0 | } while (--tmp_h); |
269 | |
|
270 | 0 | mid_ptr = mid + 128 * 3; |
271 | 0 | do { |
272 | 0 | for (int x = 0; x < w; x++) { |
273 | 0 | int t = DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) - |
274 | 0 | PREP_BIAS; |
275 | 0 | assert(t >= INT16_MIN && t <= INT16_MAX); |
276 | 0 | tmp[x] = t; |
277 | 0 | } |
278 | | |
279 | 0 | mid_ptr += 128; |
280 | 0 | tmp += w; |
281 | 0 | } while (--h); |
282 | 0 | } else { |
283 | 0 | do { |
284 | 0 | for (int x = 0; x < w; x++) |
285 | 0 | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
286 | 0 | 6 - intermediate_bits) - |
287 | 0 | PREP_BIAS; |
288 | |
|
289 | 0 | tmp += w; |
290 | 0 | src += src_stride; |
291 | 0 | } while (--h); |
292 | 0 | } |
293 | 0 | } else if (fv) { |
294 | 0 | do { |
295 | 0 | for (int x = 0; x < w; x++) |
296 | 0 | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride, |
297 | 0 | 6 - intermediate_bits) - |
298 | 0 | PREP_BIAS; |
299 | |
|
300 | 0 | tmp += w; |
301 | 0 | src += src_stride; |
302 | 0 | } while (--h); |
303 | 0 | } else |
304 | 0 | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
305 | 0 | } |
306 | | |
307 | | static NOINLINE void |
308 | | prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
309 | | const int w, int h, const int mx, int my, |
310 | | const int dx, const int dy, const int filter_type |
311 | | HIGHBD_DECL_SUFFIX) |
312 | 0 | { |
313 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
314 | 0 | int16_t mid[128 * 8]; |
315 | 0 | int16_t *mid_ptrs[8]; |
316 | 0 | int in_y = -8; |
317 | 0 | src_stride = PXSTRIDE(src_stride); |
318 | |
|
319 | 0 | for (int i = 0; i < 8; i++) |
320 | 0 | mid_ptrs[i] = &mid[128 * i]; |
321 | |
|
322 | 0 | src -= src_stride * 3; |
323 | |
|
324 | 0 | for (int y = 0; y < h; y++) { |
325 | 0 | int x; |
326 | 0 | int src_y = my >> 10; |
327 | 0 | GET_V_FILTER((my & 0x3ff) >> 6); |
328 | |
|
329 | 0 | while (in_y < src_y) { |
330 | 0 | int imx = mx, ioff = 0; |
331 | 0 | int16_t *mid_ptr = mid_ptrs[0]; |
332 | |
|
333 | 0 | for (int i = 0; i < 7; i++) |
334 | 0 | mid_ptrs[i] = mid_ptrs[i + 1]; |
335 | 0 | mid_ptrs[7] = mid_ptr; |
336 | |
|
337 | 0 | for (x = 0; x < w; x++) { |
338 | 0 | GET_H_FILTER(imx >> 6); |
339 | 0 | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
340 | 0 | 6 - intermediate_bits) : |
341 | 0 | src[ioff] << intermediate_bits; |
342 | 0 | imx += dx; |
343 | 0 | ioff += imx >> 10; |
344 | 0 | imx &= 0x3ff; |
345 | 0 | } |
346 | |
|
347 | 0 | src += src_stride; |
348 | 0 | in_y++; |
349 | 0 | } |
350 | |
|
351 | 0 | for (x = 0; x < w; x++) |
352 | 0 | tmp[x] = (fv ? DAV1D_FILTER_8TAP_RND3(mid_ptrs, x, fv, 6) |
353 | 0 | : mid_ptrs[3][x]) - PREP_BIAS; |
354 | |
|
355 | 0 | my += dy; |
356 | 0 | tmp += w; |
357 | 0 | } |
358 | 0 | } |
359 | | |
360 | | #define filter_fns(type, type_h, type_v) \ |
361 | | static void put_8tap_##type##_c(pixel *const dst, \ |
362 | | const ptrdiff_t dst_stride, \ |
363 | | const pixel *const src, \ |
364 | | const ptrdiff_t src_stride, \ |
365 | | const int w, const int h, \ |
366 | | const int mx, const int my \ |
367 | 0 | HIGHBD_DECL_SUFFIX) \ |
368 | 0 | { \ |
369 | 0 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ |
370 | 0 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
371 | 0 | } \ Unexecuted instantiation: mc_tmpl.c:put_8tap_regular_c Unexecuted instantiation: mc_tmpl.c:put_8tap_regular_smooth_c Unexecuted instantiation: mc_tmpl.c:put_8tap_regular_sharp_c Unexecuted instantiation: mc_tmpl.c:put_8tap_sharp_regular_c Unexecuted instantiation: mc_tmpl.c:put_8tap_sharp_smooth_c Unexecuted instantiation: mc_tmpl.c:put_8tap_sharp_c Unexecuted instantiation: mc_tmpl.c:put_8tap_smooth_regular_c Unexecuted instantiation: mc_tmpl.c:put_8tap_smooth_c Unexecuted instantiation: mc_tmpl.c:put_8tap_smooth_sharp_c |
372 | | static void put_8tap_##type##_scaled_c(pixel *const dst, \ |
373 | | const ptrdiff_t dst_stride, \ |
374 | | const pixel *const src, \ |
375 | | const ptrdiff_t src_stride, \ |
376 | | const int w, const int h, \ |
377 | | const int mx, const int my, \ |
378 | | const int dx, const int dy \ |
379 | 0 | HIGHBD_DECL_SUFFIX) \ |
380 | 0 | { \ |
381 | 0 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ |
382 | 0 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
383 | 0 | } \ Unexecuted instantiation: mc_tmpl.c:put_8tap_regular_scaled_c Unexecuted instantiation: mc_tmpl.c:put_8tap_regular_smooth_scaled_c Unexecuted instantiation: mc_tmpl.c:put_8tap_regular_sharp_scaled_c Unexecuted instantiation: mc_tmpl.c:put_8tap_sharp_regular_scaled_c Unexecuted instantiation: mc_tmpl.c:put_8tap_sharp_smooth_scaled_c Unexecuted instantiation: mc_tmpl.c:put_8tap_sharp_scaled_c Unexecuted instantiation: mc_tmpl.c:put_8tap_smooth_regular_scaled_c Unexecuted instantiation: mc_tmpl.c:put_8tap_smooth_scaled_c Unexecuted instantiation: mc_tmpl.c:put_8tap_smooth_sharp_scaled_c |
384 | | static void prep_8tap_##type##_c(int16_t *const tmp, \ |
385 | | const pixel *const src, \ |
386 | | const ptrdiff_t src_stride, \ |
387 | | const int w, const int h, \ |
388 | | const int mx, const int my \ |
389 | 0 | HIGHBD_DECL_SUFFIX) \ |
390 | 0 | { \ |
391 | 0 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ |
392 | 0 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
393 | 0 | } \ Unexecuted instantiation: mc_tmpl.c:prep_8tap_regular_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_regular_smooth_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_regular_sharp_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_sharp_regular_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_sharp_smooth_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_sharp_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_smooth_regular_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_smooth_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_smooth_sharp_c |
394 | | static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \ |
395 | | const pixel *const src, \ |
396 | | const ptrdiff_t src_stride, \ |
397 | | const int w, const int h, \ |
398 | | const int mx, const int my, \ |
399 | | const int dx, const int dy \ |
400 | 0 | HIGHBD_DECL_SUFFIX) \ |
401 | 0 | { \ |
402 | 0 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ |
403 | 0 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
404 | 0 | } Unexecuted instantiation: mc_tmpl.c:prep_8tap_regular_scaled_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_regular_smooth_scaled_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_regular_sharp_scaled_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_sharp_regular_scaled_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_sharp_smooth_scaled_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_sharp_scaled_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_smooth_regular_scaled_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_smooth_scaled_c Unexecuted instantiation: mc_tmpl.c:prep_8tap_smooth_sharp_scaled_c |
405 | | |
406 | | filter_fns(regular, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR) |
407 | | filter_fns(regular_sharp, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP) |
408 | | filter_fns(regular_smooth, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH) |
409 | | filter_fns(smooth, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH) |
410 | | filter_fns(smooth_regular, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR) |
411 | | filter_fns(smooth_sharp, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP) |
412 | | filter_fns(sharp, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP) |
413 | | filter_fns(sharp_regular, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR) |
414 | | filter_fns(sharp_smooth, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH) |
415 | | |
416 | | #define FILTER_BILIN(src, x, mxy, stride) \ |
417 | 0 | (16 * src[x] + ((mxy) * (src[x + stride] - src[x]))) |
418 | | |
419 | | #define FILTER_BILIN_RND(src, x, mxy, stride, sh) \ |
420 | 0 | ((FILTER_BILIN(src, x, mxy, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
421 | | |
422 | | #define FILTER_BILIN_CLIP(src, x, mxy, stride, sh) \ |
423 | 0 | iclip_pixel(FILTER_BILIN_RND(src, x, mxy, stride, sh)) |
424 | | |
425 | | #define FILTER_BILIN2(src1, src2, x, mxy) \ |
426 | 0 | (16 * src1[x] + ((mxy) * (src2[x] - src1[x]))) |
427 | | |
428 | | #define FILTER_BILIN_RND2(src1, src2, x, mxy, sh) \ |
429 | 0 | ((FILTER_BILIN2(src1, src2, x, mxy) + ((1 << (sh)) >> 1)) >> (sh)) |
430 | | |
431 | | #define FILTER_BILIN_CLIP2(src1, src2, x, mxy, sh) \ |
432 | 0 | iclip_pixel(FILTER_BILIN_RND2(src1, src2, x, mxy, sh)) |
433 | | |
434 | | static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride, |
435 | | const pixel *src, ptrdiff_t src_stride, |
436 | | const int w, int h, const int mx, const int my |
437 | | HIGHBD_DECL_SUFFIX) |
438 | 0 | { |
439 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
440 | 0 | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
441 | 0 | dst_stride = PXSTRIDE(dst_stride); |
442 | 0 | src_stride = PXSTRIDE(src_stride); |
443 | |
|
444 | 0 | if (mx) { |
445 | 0 | if (my) { |
446 | 0 | int16_t mid[128 * 129], *mid_ptr = mid; |
447 | 0 | int tmp_h = h + 1; |
448 | |
|
449 | 0 | do { |
450 | 0 | for (int x = 0; x < w; x++) |
451 | 0 | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
452 | 0 | 4 - intermediate_bits); |
453 | |
|
454 | 0 | mid_ptr += 128; |
455 | 0 | src += src_stride; |
456 | 0 | } while (--tmp_h); |
457 | |
|
458 | 0 | mid_ptr = mid; |
459 | 0 | do { |
460 | 0 | for (int x = 0; x < w; x++) |
461 | 0 | dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my, 128, |
462 | 0 | 4 + intermediate_bits); |
463 | |
|
464 | 0 | mid_ptr += 128; |
465 | 0 | dst += dst_stride; |
466 | 0 | } while (--h); |
467 | 0 | } else { |
468 | 0 | do { |
469 | 0 | for (int x = 0; x < w; x++) { |
470 | 0 | const int px = FILTER_BILIN_RND(src, x, mx, 1, |
471 | 0 | 4 - intermediate_bits); |
472 | 0 | dst[x] = iclip_pixel((px + intermediate_rnd) >> intermediate_bits); |
473 | 0 | } |
474 | |
|
475 | 0 | dst += dst_stride; |
476 | 0 | src += src_stride; |
477 | 0 | } while (--h); |
478 | 0 | } |
479 | 0 | } else if (my) { |
480 | 0 | do { |
481 | 0 | for (int x = 0; x < w; x++) |
482 | 0 | dst[x] = FILTER_BILIN_CLIP(src, x, my, src_stride, 4); |
483 | |
|
484 | 0 | dst += dst_stride; |
485 | 0 | src += src_stride; |
486 | 0 | } while (--h); |
487 | 0 | } else |
488 | 0 | put_c(dst, dst_stride, src, src_stride, w, h); |
489 | 0 | } |
490 | | |
491 | | static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride, |
492 | | const pixel *src, ptrdiff_t src_stride, |
493 | | const int w, int h, const int mx, int my, |
494 | | const int dx, const int dy |
495 | | HIGHBD_DECL_SUFFIX) |
496 | 0 | { |
497 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
498 | 0 | int16_t mid[128 * 2]; |
499 | 0 | int in_y = -2; |
500 | |
|
501 | 0 | do { |
502 | 0 | int x; |
503 | 0 | int y = my >> 10; |
504 | 0 | int16_t *mid1 = &mid[(y & 1) * 128]; |
505 | 0 | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
506 | 0 | int dmy = my & 0x3ff; |
507 | |
|
508 | 0 | while (in_y < y) { |
509 | 0 | int imx = mx, ioff = 0; |
510 | 0 | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
511 | |
|
512 | 0 | for (x = 0; x < w; x++) { |
513 | 0 | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
514 | 0 | 4 - intermediate_bits); |
515 | 0 | imx += dx; |
516 | 0 | ioff += imx >> 10; |
517 | 0 | imx &= 0x3ff; |
518 | 0 | } |
519 | |
|
520 | 0 | src += PXSTRIDE(src_stride); |
521 | 0 | in_y++; |
522 | 0 | } |
523 | |
|
524 | 0 | for (x = 0; x < w; x++) |
525 | 0 | dst[x] = FILTER_BILIN_CLIP2(mid1, mid2, x, dmy >> 6, |
526 | 0 | 4 + intermediate_bits); |
527 | |
|
528 | 0 | my += dy; |
529 | 0 | dst += PXSTRIDE(dst_stride); |
530 | 0 | } while (--h); |
531 | 0 | } |
532 | | |
533 | | static void prep_bilin_c(int16_t *tmp, |
534 | | const pixel *src, ptrdiff_t src_stride, |
535 | | const int w, int h, const int mx, const int my |
536 | | HIGHBD_DECL_SUFFIX) |
537 | 0 | { |
538 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
539 | 0 | src_stride = PXSTRIDE(src_stride); |
540 | |
|
541 | 0 | if (mx) { |
542 | 0 | if (my) { |
543 | 0 | int16_t mid[128 * 129], *mid_ptr = mid; |
544 | 0 | int tmp_h = h + 1; |
545 | |
|
546 | 0 | do { |
547 | 0 | for (int x = 0; x < w; x++) |
548 | 0 | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
549 | 0 | 4 - intermediate_bits); |
550 | |
|
551 | 0 | mid_ptr += 128; |
552 | 0 | src += src_stride; |
553 | 0 | } while (--tmp_h); |
554 | |
|
555 | 0 | mid_ptr = mid; |
556 | 0 | do { |
557 | 0 | for (int x = 0; x < w; x++) |
558 | 0 | tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my, 128, 4) - |
559 | 0 | PREP_BIAS; |
560 | |
|
561 | 0 | mid_ptr += 128; |
562 | 0 | tmp += w; |
563 | 0 | } while (--h); |
564 | 0 | } else { |
565 | 0 | do { |
566 | 0 | for (int x = 0; x < w; x++) |
567 | 0 | tmp[x] = FILTER_BILIN_RND(src, x, mx, 1, |
568 | 0 | 4 - intermediate_bits) - |
569 | 0 | PREP_BIAS; |
570 | |
|
571 | 0 | tmp += w; |
572 | 0 | src += src_stride; |
573 | 0 | } while (--h); |
574 | 0 | } |
575 | 0 | } else if (my) { |
576 | 0 | do { |
577 | 0 | for (int x = 0; x < w; x++) |
578 | 0 | tmp[x] = FILTER_BILIN_RND(src, x, my, src_stride, |
579 | 0 | 4 - intermediate_bits) - PREP_BIAS; |
580 | |
|
581 | 0 | tmp += w; |
582 | 0 | src += src_stride; |
583 | 0 | } while (--h); |
584 | 0 | } else |
585 | 0 | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
586 | 0 | } |
587 | | |
588 | | static void prep_bilin_scaled_c(int16_t *tmp, |
589 | | const pixel *src, ptrdiff_t src_stride, |
590 | | const int w, int h, const int mx, int my, |
591 | | const int dx, const int dy HIGHBD_DECL_SUFFIX) |
592 | 0 | { |
593 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
594 | 0 | int16_t mid[128 * 2]; |
595 | 0 | int in_y = -2; |
596 | |
|
597 | 0 | do { |
598 | 0 | int x; |
599 | 0 | int y = my >> 10; |
600 | 0 | int16_t *mid1 = &mid[(y & 1) * 128]; |
601 | 0 | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
602 | 0 | int dmy = my & 0x3ff; |
603 | |
|
604 | 0 | while (in_y < y) { |
605 | 0 | int imx = mx, ioff = 0; |
606 | 0 | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
607 | |
|
608 | 0 | for (x = 0; x < w; x++) { |
609 | 0 | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
610 | 0 | 4 - intermediate_bits); |
611 | 0 | imx += dx; |
612 | 0 | ioff += imx >> 10; |
613 | 0 | imx &= 0x3ff; |
614 | 0 | } |
615 | |
|
616 | 0 | src += PXSTRIDE(src_stride); |
617 | 0 | in_y++; |
618 | 0 | } |
619 | |
|
620 | 0 | for (x = 0; x < w; x++) |
621 | 0 | tmp[x] = FILTER_BILIN_RND2(mid1, mid2, x, dmy >> 6, 4) - PREP_BIAS; |
622 | |
|
623 | 0 | my += dy; |
624 | 0 | tmp += w; |
625 | 0 | } while (--h); |
626 | 0 | } |
627 | | |
628 | | static void avg_c(pixel *dst, const ptrdiff_t dst_stride, |
629 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h |
630 | | HIGHBD_DECL_SUFFIX) |
631 | 0 | { |
632 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
633 | 0 | const int sh = intermediate_bits + 1; |
634 | 0 | const int rnd = (1 << intermediate_bits) + PREP_BIAS * 2; |
635 | 0 | do { |
636 | 0 | for (int x = 0; x < w; x++) |
637 | 0 | dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + rnd) >> sh); |
638 | |
|
639 | 0 | tmp1 += w; |
640 | 0 | tmp2 += w; |
641 | 0 | dst += PXSTRIDE(dst_stride); |
642 | 0 | } while (--h); |
643 | 0 | } |
644 | | |
645 | | static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride, |
646 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
647 | | const int weight HIGHBD_DECL_SUFFIX) |
648 | 0 | { |
649 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
650 | 0 | const int sh = intermediate_bits + 4; |
651 | 0 | const int rnd = (8 << intermediate_bits) + PREP_BIAS * 16; |
652 | 0 | do { |
653 | 0 | for (int x = 0; x < w; x++) |
654 | 0 | dst[x] = iclip_pixel((tmp1[x] * weight + |
655 | 0 | tmp2[x] * (16 - weight) + rnd) >> sh); |
656 | |
|
657 | 0 | tmp1 += w; |
658 | 0 | tmp2 += w; |
659 | 0 | dst += PXSTRIDE(dst_stride); |
660 | 0 | } while (--h); |
661 | 0 | } |
662 | | |
663 | | static void mask_c(pixel *dst, const ptrdiff_t dst_stride, |
664 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
665 | | const uint8_t *mask HIGHBD_DECL_SUFFIX) |
666 | 0 | { |
667 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
668 | 0 | const int sh = intermediate_bits + 6; |
669 | 0 | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
670 | 0 | do { |
671 | 0 | for (int x = 0; x < w; x++) |
672 | 0 | dst[x] = iclip_pixel((tmp1[x] * mask[x] + |
673 | 0 | tmp2[x] * (64 - mask[x]) + rnd) >> sh); |
674 | |
|
675 | 0 | tmp1 += w; |
676 | 0 | tmp2 += w; |
677 | 0 | mask += w; |
678 | 0 | dst += PXSTRIDE(dst_stride); |
679 | 0 | } while (--h); |
680 | 0 | } |
681 | | |
682 | 0 | #define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6) |
683 | | static void blend_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
684 | | const int w, int h, const uint8_t *mask) |
685 | 0 | { |
686 | 0 | do { |
687 | 0 | for (int x = 0; x < w; x++) { |
688 | 0 | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
689 | 0 | } |
690 | 0 | dst += PXSTRIDE(dst_stride); |
691 | 0 | tmp += w; |
692 | 0 | mask += w; |
693 | 0 | } while (--h); |
694 | 0 | } |
695 | | |
696 | | static void blend_v_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
697 | | const int w, int h) |
698 | 0 | { |
699 | 0 | const uint8_t *const mask = &dav1d_obmc_masks[w]; |
700 | 0 | do { |
701 | 0 | for (int x = 0; x < (w * 3) >> 2; x++) { |
702 | 0 | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
703 | 0 | } |
704 | 0 | dst += PXSTRIDE(dst_stride); |
705 | 0 | tmp += w; |
706 | 0 | } while (--h); |
707 | 0 | } |
708 | | |
709 | | static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
710 | | const int w, int h) |
711 | 0 | { |
712 | 0 | const uint8_t *mask = &dav1d_obmc_masks[h]; |
713 | 0 | h = (h * 3) >> 2; |
714 | 0 | do { |
715 | 0 | const int m = *mask++; |
716 | 0 | for (int x = 0; x < w; x++) { |
717 | 0 | dst[x] = blend_px(dst[x], tmp[x], m); |
718 | 0 | } |
719 | 0 | dst += PXSTRIDE(dst_stride); |
720 | 0 | tmp += w; |
721 | 0 | } while (--h); |
722 | 0 | } |
723 | | |
724 | | static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride, |
725 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
726 | | uint8_t *mask, const int sign, |
727 | | const int ss_hor, const int ss_ver HIGHBD_DECL_SUFFIX) |
728 | 0 | { |
729 | | // store mask at 2x2 resolution, i.e. store 2x1 sum for even rows, |
730 | | // and then load this intermediate to calculate final value for odd rows |
731 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
732 | 0 | const int bitdepth = bitdepth_from_max(bitdepth_max); |
733 | 0 | const int sh = intermediate_bits + 6; |
734 | 0 | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
735 | 0 | const int mask_sh = bitdepth + intermediate_bits - 4; |
736 | 0 | const int mask_rnd = 1 << (mask_sh - 5); |
737 | 0 | do { |
738 | 0 | for (int x = 0; x < w; x++) { |
739 | 0 | const int tmpdiff = tmp1[x] - tmp2[x]; |
740 | 0 | const int m = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
741 | 0 | dst[x] = iclip_pixel((tmpdiff * m + tmp2[x] * 64 + rnd) >> sh); |
742 | |
|
743 | 0 | if (ss_hor) { |
744 | 0 | x++; |
745 | |
|
746 | 0 | const int tmpdiff = tmp1[x] - tmp2[x]; |
747 | 0 | const int n = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
748 | 0 | dst[x] = iclip_pixel((tmpdiff * n + tmp2[x] * 64 + rnd) >> sh); |
749 | |
|
750 | 0 | if (h & ss_ver) { |
751 | 0 | mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2; |
752 | 0 | } else if (ss_ver) { |
753 | 0 | mask[x >> 1] = m + n; |
754 | 0 | } else { |
755 | 0 | mask[x >> 1] = (m + n + 1 - sign) >> 1; |
756 | 0 | } |
757 | 0 | } else { |
758 | 0 | mask[x] = m; |
759 | 0 | } |
760 | 0 | } |
761 | |
|
762 | 0 | tmp1 += w; |
763 | 0 | tmp2 += w; |
764 | 0 | dst += PXSTRIDE(dst_stride); |
765 | 0 | if (!ss_ver || (h & 1)) mask += w >> ss_hor; |
766 | 0 | } while (--h); |
767 | 0 | } |
768 | | |
769 | | #define w_mask_fns(ssn, ss_hor, ss_ver) \ |
770 | | static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \ |
771 | | const int16_t *const tmp1, const int16_t *const tmp2, \ |
772 | | const int w, const int h, uint8_t *mask, \ |
773 | 0 | const int sign HIGHBD_DECL_SUFFIX) \ |
774 | 0 | { \ |
775 | 0 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ |
776 | 0 | HIGHBD_TAIL_SUFFIX); \ |
777 | 0 | } Unexecuted instantiation: mc_tmpl.c:w_mask_444_c Unexecuted instantiation: mc_tmpl.c:w_mask_422_c Unexecuted instantiation: mc_tmpl.c:w_mask_420_c |
778 | | |
779 | | w_mask_fns(444, 0, 0); |
780 | | w_mask_fns(422, 1, 0); |
781 | | w_mask_fns(420, 1, 1); |
782 | | |
783 | | #undef w_mask_fns |
784 | | |
785 | | #define FILTER_WARP_RND(src, x, F, stride, sh) \ |
786 | 0 | ((F[0] * src[x - 3 * stride] + \ |
787 | 0 | F[1] * src[x - 2 * stride] + \ |
788 | 0 | F[2] * src[x - 1 * stride] + \ |
789 | 0 | F[3] * src[x + 0 * stride] + \ |
790 | 0 | F[4] * src[x + 1 * stride] + \ |
791 | 0 | F[5] * src[x + 2 * stride] + \ |
792 | 0 | F[6] * src[x + 3 * stride] + \ |
793 | 0 | F[7] * src[x + 4 * stride] + \ |
794 | 0 | ((1 << (sh)) >> 1)) >> (sh)) |
795 | | |
796 | | #define FILTER_WARP_CLIP(src, x, F, stride, sh) \ |
797 | 0 | iclip_pixel(FILTER_WARP_RND(src, x, F, stride, sh)) |
798 | | |
799 | | static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, |
800 | | const pixel *src, const ptrdiff_t src_stride, |
801 | | const int16_t *const abcd, int mx, int my |
802 | | HIGHBD_DECL_SUFFIX) |
803 | 0 | { |
804 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
805 | 0 | int16_t mid[15 * 8], *mid_ptr = mid; |
806 | |
|
807 | 0 | src -= 3 * PXSTRIDE(src_stride); |
808 | 0 | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
809 | 0 | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
810 | 0 | const int8_t *const filter = |
811 | 0 | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
812 | |
|
813 | 0 | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
814 | 0 | 7 - intermediate_bits); |
815 | 0 | } |
816 | 0 | src += PXSTRIDE(src_stride); |
817 | 0 | mid_ptr += 8; |
818 | 0 | } |
819 | |
|
820 | 0 | mid_ptr = &mid[3 * 8]; |
821 | 0 | for (int y = 0; y < 8; y++, my += abcd[3]) { |
822 | 0 | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
823 | 0 | const int8_t *const filter = |
824 | 0 | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
825 | |
|
826 | 0 | dst[x] = FILTER_WARP_CLIP(mid_ptr, x, filter, 8, |
827 | 0 | 7 + intermediate_bits); |
828 | 0 | } |
829 | 0 | mid_ptr += 8; |
830 | 0 | dst += PXSTRIDE(dst_stride); |
831 | 0 | } |
832 | 0 | } |
833 | | |
834 | | static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride, |
835 | | const pixel *src, const ptrdiff_t src_stride, |
836 | | const int16_t *const abcd, int mx, int my |
837 | | HIGHBD_DECL_SUFFIX) |
838 | 0 | { |
839 | 0 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
840 | 0 | int16_t mid[15 * 8], *mid_ptr = mid; |
841 | |
|
842 | 0 | src -= 3 * PXSTRIDE(src_stride); |
843 | 0 | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
844 | 0 | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
845 | 0 | const int8_t *const filter = |
846 | 0 | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
847 | |
|
848 | 0 | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
849 | 0 | 7 - intermediate_bits); |
850 | 0 | } |
851 | 0 | src += PXSTRIDE(src_stride); |
852 | 0 | mid_ptr += 8; |
853 | 0 | } |
854 | |
|
855 | 0 | mid_ptr = &mid[3 * 8]; |
856 | 0 | for (int y = 0; y < 8; y++, my += abcd[3]) { |
857 | 0 | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
858 | 0 | const int8_t *const filter = |
859 | 0 | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
860 | |
|
861 | 0 | tmp[x] = FILTER_WARP_RND(mid_ptr, x, filter, 8, 7) - PREP_BIAS; |
862 | 0 | } |
863 | 0 | mid_ptr += 8; |
864 | 0 | tmp += tmp_stride; |
865 | 0 | } |
866 | 0 | } |
867 | | |
868 | | static void emu_edge_c(const intptr_t bw, const intptr_t bh, |
869 | | const intptr_t iw, const intptr_t ih, |
870 | | const intptr_t x, const intptr_t y, |
871 | | pixel *dst, const ptrdiff_t dst_stride, |
872 | | const pixel *ref, const ptrdiff_t ref_stride) |
873 | 0 | { |
874 | | // find offset in reference of visible block to copy |
875 | 0 | ref += iclip((int) y, 0, (int) ih - 1) * PXSTRIDE(ref_stride) + |
876 | 0 | iclip((int) x, 0, (int) iw - 1); |
877 | | |
878 | | // number of pixels to extend (left, right, top, bottom) |
879 | 0 | const int left_ext = iclip((int) -x, 0, (int) bw - 1); |
880 | 0 | const int right_ext = iclip((int) (x + bw - iw), 0, (int) bw - 1); |
881 | 0 | assert(left_ext + right_ext < bw); |
882 | 0 | const int top_ext = iclip((int) -y, 0, (int) bh - 1); |
883 | 0 | const int bottom_ext = iclip((int) (y + bh - ih), 0, (int) bh - 1); |
884 | 0 | assert(top_ext + bottom_ext < bh); |
885 | | |
886 | | // copy visible portion first |
887 | 0 | pixel *blk = dst + top_ext * PXSTRIDE(dst_stride); |
888 | 0 | const int center_w = (int) (bw - left_ext - right_ext); |
889 | 0 | const int center_h = (int) (bh - top_ext - bottom_ext); |
890 | 0 | for (int y = 0; y < center_h; y++) { |
891 | 0 | pixel_copy(blk + left_ext, ref, center_w); |
892 | | // extend left edge for this line |
893 | 0 | if (left_ext) |
894 | 0 | pixel_set(blk, blk[left_ext], left_ext); |
895 | | // extend right edge for this line |
896 | 0 | if (right_ext) |
897 | 0 | pixel_set(blk + left_ext + center_w, blk[left_ext + center_w - 1], |
898 | 0 | right_ext); |
899 | 0 | ref += PXSTRIDE(ref_stride); |
900 | 0 | blk += PXSTRIDE(dst_stride); |
901 | 0 | } |
902 | | |
903 | | // copy top |
904 | 0 | blk = dst + top_ext * PXSTRIDE(dst_stride); |
905 | 0 | for (int y = 0; y < top_ext; y++) { |
906 | 0 | pixel_copy(dst, blk, bw); |
907 | 0 | dst += PXSTRIDE(dst_stride); |
908 | 0 | } |
909 | | |
910 | | // copy bottom |
911 | 0 | dst += center_h * PXSTRIDE(dst_stride); |
912 | 0 | for (int y = 0; y < bottom_ext; y++) { |
913 | 0 | pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], bw); |
914 | 0 | dst += PXSTRIDE(dst_stride); |
915 | 0 | } |
916 | 0 | } |
917 | | |
918 | | static void resize_c(pixel *dst, const ptrdiff_t dst_stride, |
919 | | const pixel *src, const ptrdiff_t src_stride, |
920 | | const int dst_w, int h, const int src_w, |
921 | | const int dx, const int mx0 HIGHBD_DECL_SUFFIX) |
922 | 0 | { |
923 | 0 | do { |
924 | 0 | int mx = mx0, src_x = -1; |
925 | 0 | for (int x = 0; x < dst_w; x++) { |
926 | 0 | const int8_t *const F = dav1d_resize_filter[mx >> 8]; |
927 | 0 | dst[x] = iclip_pixel((-(F[0] * src[iclip(src_x - 3, 0, src_w - 1)] + |
928 | 0 | F[1] * src[iclip(src_x - 2, 0, src_w - 1)] + |
929 | 0 | F[2] * src[iclip(src_x - 1, 0, src_w - 1)] + |
930 | 0 | F[3] * src[iclip(src_x + 0, 0, src_w - 1)] + |
931 | 0 | F[4] * src[iclip(src_x + 1, 0, src_w - 1)] + |
932 | 0 | F[5] * src[iclip(src_x + 2, 0, src_w - 1)] + |
933 | 0 | F[6] * src[iclip(src_x + 3, 0, src_w - 1)] + |
934 | 0 | F[7] * src[iclip(src_x + 4, 0, src_w - 1)]) + |
935 | 0 | 64) >> 7); |
936 | 0 | mx += dx; |
937 | 0 | src_x += mx >> 14; |
938 | 0 | mx &= 0x3fff; |
939 | 0 | } |
940 | |
|
941 | 0 | dst += PXSTRIDE(dst_stride); |
942 | 0 | src += PXSTRIDE(src_stride); |
943 | 0 | } while (--h); |
944 | 0 | } |
945 | | |
946 | | #if HAVE_ASM |
947 | | #if ARCH_AARCH64 || ARCH_ARM |
948 | | #include "src/arm/mc.h" |
949 | | #elif ARCH_LOONGARCH64 |
950 | | #include "src/loongarch/mc.h" |
951 | | #elif ARCH_PPC64LE |
952 | | #include "src/ppc/mc.h" |
953 | | #elif ARCH_RISCV |
954 | | #include "src/riscv/mc.h" |
955 | | #elif ARCH_X86 |
956 | | #include "src/x86/mc.h" |
957 | | #endif |
958 | | #endif |
959 | | |
960 | 0 | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { |
961 | 0 | #define init_mc_fns(type, name) do { \ |
962 | 0 | c->mc [type] = put_##name##_c; \ |
963 | 0 | c->mc_scaled [type] = put_##name##_scaled_c; \ |
964 | 0 | c->mct [type] = prep_##name##_c; \ |
965 | 0 | c->mct_scaled[type] = prep_##name##_scaled_c; \ |
966 | 0 | } while (0) |
967 | |
|
968 | 0 | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); |
969 | 0 | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); |
970 | 0 | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); |
971 | 0 | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); |
972 | 0 | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); |
973 | 0 | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); |
974 | 0 | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); |
975 | 0 | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); |
976 | 0 | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); |
977 | 0 | init_mc_fns(FILTER_2D_BILINEAR, bilin); |
978 | |
|
979 | 0 | c->avg = avg_c; |
980 | 0 | c->w_avg = w_avg_c; |
981 | 0 | c->mask = mask_c; |
982 | 0 | c->blend = blend_c; |
983 | 0 | c->blend_v = blend_v_c; |
984 | 0 | c->blend_h = blend_h_c; |
985 | 0 | c->w_mask[0] = w_mask_444_c; |
986 | 0 | c->w_mask[1] = w_mask_422_c; |
987 | 0 | c->w_mask[2] = w_mask_420_c; |
988 | 0 | c->warp8x8 = warp_affine_8x8_c; |
989 | 0 | c->warp8x8t = warp_affine_8x8t_c; |
990 | 0 | c->emu_edge = emu_edge_c; |
991 | 0 | c->resize = resize_c; |
992 | |
|
993 | | #if HAVE_ASM |
994 | | #if ARCH_AARCH64 || ARCH_ARM |
995 | | mc_dsp_init_arm(c); |
996 | | #elif ARCH_LOONGARCH64 |
997 | | mc_dsp_init_loongarch(c); |
998 | | #elif ARCH_PPC64LE |
999 | | mc_dsp_init_ppc(c); |
1000 | | #elif ARCH_RISCV |
1001 | | mc_dsp_init_riscv(c); |
1002 | | #elif ARCH_X86 |
1003 | | mc_dsp_init_x86(c); |
1004 | | #endif |
1005 | | #endif |
1006 | 0 | } Unexecuted instantiation: dav1d_mc_dsp_init_8bpc Unexecuted instantiation: dav1d_mc_dsp_init_16bpc |