/work/dav1d/src/mc_tmpl.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include "common/attributes.h" |
34 | | #include "common/intops.h" |
35 | | |
36 | | #include "src/mc.h" |
37 | | #include "src/tables.h" |
38 | | |
39 | | #if BITDEPTH == 8 |
40 | 307k | #define get_intermediate_bits(bitdepth_max) 4 |
41 | | // Output in interval [-5132, 9212], fits in int16_t as is |
42 | 21.5M | #define PREP_BIAS 0 |
43 | | #else |
44 | | // 4 for 10 bits/component, 2 for 12 bits/component |
45 | | #define get_intermediate_bits(bitdepth_max) (14 - bitdepth_from_max(bitdepth_max)) |
46 | | // Output in interval [-20588, 36956] (10-bit), [-20602, 36983] (12-bit) |
47 | | // Subtract a bias to ensure the output fits in int16_t |
48 | | #define PREP_BIAS 8192 |
49 | | #endif |
50 | | |
51 | | static NOINLINE void |
52 | | put_c(pixel *dst, const ptrdiff_t dst_stride, |
53 | | const pixel *src, const ptrdiff_t src_stride, const int w, int h) |
54 | 174k | { |
55 | 2.03M | do { |
56 | 2.03M | pixel_copy(dst, src, w); |
57 | | |
58 | 2.03M | dst += dst_stride; |
59 | 2.03M | src += src_stride; |
60 | 2.03M | } while (--h); |
61 | 174k | } |
62 | | |
63 | | static NOINLINE void |
64 | | prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride, |
65 | | const int w, int h HIGHBD_DECL_SUFFIX) |
66 | 5.63k | { |
67 | 5.63k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
68 | 133k | do { |
69 | 5.70M | for (int x = 0; x < w; x++) |
70 | 5.57M | tmp[x] = (src[x] << intermediate_bits) - PREP_BIAS; |
71 | | |
72 | 133k | tmp += w; |
73 | 133k | src += src_stride; |
74 | 133k | } while (--h); |
75 | 5.63k | } |
76 | | |
77 | | #define FILTER_8TAP(src, x, F, stride) \ |
78 | 27.7M | (F[0] * src[x + -3 * stride] + \ |
79 | 27.7M | F[1] * src[x + -2 * stride] + \ |
80 | 27.7M | F[2] * src[x + -1 * stride] + \ |
81 | 27.7M | F[3] * src[x + +0 * stride] + \ |
82 | 27.7M | F[4] * src[x + +1 * stride] + \ |
83 | 27.7M | F[5] * src[x + +2 * stride] + \ |
84 | 27.7M | F[6] * src[x + +3 * stride] + \ |
85 | 27.7M | F[7] * src[x + +4 * stride]) |
86 | | |
87 | | #define FILTER_8TAP2(src, x, F) \ |
88 | 6.18M | (F[0] * src[0][x] + \ |
89 | 6.18M | F[1] * src[1][x] + \ |
90 | 6.18M | F[2] * src[2][x] + \ |
91 | 6.18M | F[3] * src[3][x] + \ |
92 | 6.18M | F[4] * src[4][x] + \ |
93 | 6.18M | F[5] * src[5][x] + \ |
94 | 6.18M | F[6] * src[6][x] + \ |
95 | 6.18M | F[7] * src[7][x]) |
96 | | |
97 | | #define DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh) \ |
98 | 26.3M | ((FILTER_8TAP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
99 | | |
100 | | #define DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh) \ |
101 | 1.47M | ((FILTER_8TAP(src, x, F, stride) + (rnd)) >> (sh)) |
102 | | |
103 | | #define DAV1D_FILTER_8TAP_RND3(src, x, F, sh) \ |
104 | 6.18M | ((FILTER_8TAP2(src, x, F) + ((1 << (sh)) >> 1)) >> (sh)) |
105 | | |
106 | | #define DAV1D_FILTER_8TAP_CLIP(src, x, F, stride, sh) \ |
107 | 2.83M | iclip_pixel(DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh)) |
108 | | |
109 | | #define DAV1D_FILTER_8TAP_CLIP2(src, x, F, stride, rnd, sh) \ |
110 | 1.47M | iclip_pixel(DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh)) |
111 | | |
112 | | #define DAV1D_FILTER_8TAP_CLIP3(src, x, F, sh) \ |
113 | 4.02M | iclip_pixel(DAV1D_FILTER_8TAP_RND3(src, x, F, sh)) |
114 | | |
115 | | #define GET_H_FILTER(mx) \ |
116 | 12.4M | const int8_t *const fh = !(mx) ? NULL : w > 4 ? \ |
117 | 11.6M | dav1d_mc_subpel_filters[filter_type & 3][(mx) - 1] : \ |
118 | 11.6M | dav1d_mc_subpel_filters[3 + (filter_type & 1)][(mx) - 1] |
119 | | |
120 | | #define GET_V_FILTER(my) \ |
121 | 414k | const int8_t *const fv = !(my) ? NULL : h > 4 ? \ |
122 | 198k | dav1d_mc_subpel_filters[filter_type >> 2][(my) - 1] : \ |
123 | 198k | dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][(my) - 1] |
124 | | |
125 | | #define GET_FILTERS() \ |
126 | 51.0k | GET_H_FILTER(mx); \ |
127 | 51.0k | GET_V_FILTER(my) |
128 | | |
129 | | static NOINLINE void |
130 | | put_8tap_c(pixel *dst, ptrdiff_t dst_stride, |
131 | | const pixel *src, ptrdiff_t src_stride, |
132 | | const int w, int h, const int mx, const int my, |
133 | | const int filter_type HIGHBD_DECL_SUFFIX) |
134 | 42.1k | { |
135 | 42.1k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
136 | 42.1k | const int intermediate_rnd = 32 + ((1 << (6 - intermediate_bits)) >> 1); |
137 | | |
138 | 42.1k | GET_FILTERS(); |
139 | 42.1k | dst_stride = PXSTRIDE(dst_stride); |
140 | 42.1k | src_stride = PXSTRIDE(src_stride); |
141 | | |
142 | 42.1k | if (fh) { |
143 | 8.84k | if (fv) { |
144 | 5.40k | int tmp_h = h + 7; |
145 | 5.40k | int16_t mid[128 * 135], *mid_ptr = mid; |
146 | | |
147 | 5.40k | src -= src_stride * 3; |
148 | 112k | do { |
149 | 2.67M | for (int x = 0; x < w; x++) |
150 | 2.56M | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
151 | 112k | 6 - intermediate_bits); |
152 | | |
153 | 112k | mid_ptr += 128; |
154 | 112k | src += src_stride; |
155 | 112k | } while (--tmp_h); |
156 | | |
157 | 5.40k | mid_ptr = mid + 128 * 3; |
158 | 75.1k | do { |
159 | 2.16M | for (int x = 0; x < w; x++) |
160 | 2.09M | dst[x] = DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, |
161 | 75.1k | 6 + intermediate_bits); |
162 | | |
163 | 75.1k | mid_ptr += 128; |
164 | 75.1k | dst += dst_stride; |
165 | 75.1k | } while (--h); |
166 | 5.40k | } else { |
167 | 46.2k | do { |
168 | 1.51M | for (int x = 0; x < w; x++) { |
169 | 1.47M | dst[x] = DAV1D_FILTER_8TAP_CLIP2(src, x, fh, 1, |
170 | 1.47M | intermediate_rnd, 6); |
171 | 1.47M | } |
172 | | |
173 | 46.2k | dst += dst_stride; |
174 | 46.2k | src += src_stride; |
175 | 46.2k | } while (--h); |
176 | 3.43k | } |
177 | 33.3k | } else if (fv) { |
178 | 33.1k | do { |
179 | 772k | for (int x = 0; x < w; x++) |
180 | 739k | dst[x] = DAV1D_FILTER_8TAP_CLIP(src, x, fv, src_stride, 6); |
181 | | |
182 | 33.1k | dst += dst_stride; |
183 | 33.1k | src += src_stride; |
184 | 33.1k | } while (--h); |
185 | 2.53k | } else |
186 | 30.7k | put_c(dst, dst_stride, src, src_stride, w, h); |
187 | 42.1k | } |
188 | | |
189 | | static NOINLINE void |
190 | | put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride, |
191 | | const pixel *src, ptrdiff_t src_stride, |
192 | | const int w, int h, const int mx, int my, |
193 | | const int dx, const int dy, const int filter_type |
194 | | HIGHBD_DECL_SUFFIX) |
195 | 16.1k | { |
196 | 16.1k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
197 | 16.1k | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
198 | 16.1k | int16_t mid[128 * 8]; |
199 | 16.1k | int16_t *mid_ptrs[8]; |
200 | 16.1k | int in_y = -8; |
201 | 16.1k | src_stride = PXSTRIDE(src_stride); |
202 | | |
203 | 145k | for (int i = 0; i < 8; i++) |
204 | 129k | mid_ptrs[i] = &mid[128 * i]; |
205 | | |
206 | 16.1k | src -= src_stride * 3; |
207 | | |
208 | 255k | for (int y = 0; y < h; y++) { |
209 | 239k | int x; |
210 | 239k | int src_y = my >> 10; |
211 | 239k | GET_V_FILTER((my & 0x3ff) >> 6); |
212 | | |
213 | 557k | while (in_y < src_y) { |
214 | 317k | int imx = mx, ioff = 0; |
215 | 317k | int16_t *mid_ptr = mid_ptrs[0]; |
216 | | |
217 | 2.53M | for (int i = 0; i < 7; i++) |
218 | 2.22M | mid_ptrs[i] = mid_ptrs[i + 1]; |
219 | 317k | mid_ptrs[7] = mid_ptr; |
220 | | |
221 | 8.47M | for (x = 0; x < w; x++) { |
222 | 8.15M | GET_H_FILTER(imx >> 6); |
223 | 8.15M | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
224 | 8.15M | 6 - intermediate_bits) : |
225 | 8.15M | src[ioff] << intermediate_bits; |
226 | 8.15M | imx += dx; |
227 | 8.15M | ioff += imx >> 10; |
228 | 8.15M | imx &= 0x3ff; |
229 | 8.15M | } |
230 | | |
231 | 317k | src += src_stride; |
232 | 317k | in_y++; |
233 | 317k | } |
234 | | |
235 | 6.84M | for (x = 0; x < w; x++) |
236 | 6.60M | dst[x] = fv ? DAV1D_FILTER_8TAP_CLIP3(mid_ptrs, x, fv, |
237 | 6.60M | 6 + intermediate_bits) : |
238 | 6.60M | iclip_pixel((mid_ptrs[3][x] + intermediate_rnd) >> |
239 | 2.58M | intermediate_bits); |
240 | | |
241 | 239k | my += dy; |
242 | 239k | dst += PXSTRIDE(dst_stride); |
243 | 239k | } |
244 | 16.1k | } |
245 | | |
246 | | static NOINLINE void |
247 | | prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
248 | | const int w, int h, const int mx, const int my, |
249 | | const int filter_type HIGHBD_DECL_SUFFIX) |
250 | 8.92k | { |
251 | 8.92k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
252 | 8.92k | GET_FILTERS(); |
253 | 8.92k | src_stride = PXSTRIDE(src_stride); |
254 | | |
255 | 8.92k | if (fh) { |
256 | 4.32k | if (fv) { |
257 | 2.29k | int tmp_h = h + 7; |
258 | 2.29k | int16_t mid[128 * 135], *mid_ptr = mid; |
259 | | |
260 | 2.29k | src -= src_stride * 3; |
261 | 78.3k | do { |
262 | 2.80M | for (int x = 0; x < w; x++) |
263 | 2.73M | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
264 | 78.3k | 6 - intermediate_bits); |
265 | | |
266 | 78.3k | mid_ptr += 128; |
267 | 78.3k | src += src_stride; |
268 | 78.3k | } while (--tmp_h); |
269 | | |
270 | 2.29k | mid_ptr = mid + 128 * 3; |
271 | 62.3k | do { |
272 | 2.47M | for (int x = 0; x < w; x++) { |
273 | 2.41M | int t = DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) - |
274 | 2.41M | PREP_BIAS; |
275 | 2.41M | assert(t >= INT16_MIN && t <= INT16_MAX); |
276 | 2.41M | tmp[x] = t; |
277 | 2.41M | } |
278 | | |
279 | 62.3k | mid_ptr += 128; |
280 | 62.3k | tmp += w; |
281 | 62.3k | } while (--h); |
282 | 2.29k | } else { |
283 | 63.1k | do { |
284 | 3.11M | for (int x = 0; x < w; x++) |
285 | 3.05M | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
286 | 3.05M | 6 - intermediate_bits) - |
287 | 3.05M | PREP_BIAS; |
288 | | |
289 | 63.1k | tmp += w; |
290 | 63.1k | src += src_stride; |
291 | 63.1k | } while (--h); |
292 | 2.03k | } |
293 | 4.59k | } else if (fv) { |
294 | 31.7k | do { |
295 | 1.14M | for (int x = 0; x < w; x++) |
296 | 1.11M | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride, |
297 | 1.11M | 6 - intermediate_bits) - |
298 | 1.11M | PREP_BIAS; |
299 | | |
300 | 31.7k | tmp += w; |
301 | 31.7k | src += src_stride; |
302 | 31.7k | } while (--h); |
303 | 1.30k | } else |
304 | 3.29k | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
305 | 8.92k | } |
306 | | |
307 | | static NOINLINE void |
308 | | prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
309 | | const int w, int h, const int mx, int my, |
310 | | const int dx, const int dy, const int filter_type |
311 | | HIGHBD_DECL_SUFFIX) |
312 | 4.82k | { |
313 | 4.82k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
314 | 4.82k | int16_t mid[128 * 8]; |
315 | 4.82k | int16_t *mid_ptrs[8]; |
316 | 4.82k | int in_y = -8; |
317 | 4.82k | src_stride = PXSTRIDE(src_stride); |
318 | | |
319 | 43.3k | for (int i = 0; i < 8; i++) |
320 | 38.5k | mid_ptrs[i] = &mid[128 * i]; |
321 | | |
322 | 4.82k | src -= src_stride * 3; |
323 | | |
324 | 128k | for (int y = 0; y < h; y++) { |
325 | 123k | int x; |
326 | 123k | int src_y = my >> 10; |
327 | 123k | GET_V_FILTER((my & 0x3ff) >> 6); |
328 | | |
329 | 270k | while (in_y < src_y) { |
330 | 147k | int imx = mx, ioff = 0; |
331 | 147k | int16_t *mid_ptr = mid_ptrs[0]; |
332 | | |
333 | 1.17M | for (int i = 0; i < 7; i++) |
334 | 1.02M | mid_ptrs[i] = mid_ptrs[i + 1]; |
335 | 147k | mid_ptrs[7] = mid_ptr; |
336 | | |
337 | 4.40M | for (x = 0; x < w; x++) { |
338 | 4.25M | GET_H_FILTER(imx >> 6); |
339 | 4.25M | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
340 | 4.25M | 6 - intermediate_bits) : |
341 | 4.25M | src[ioff] << intermediate_bits; |
342 | 4.25M | imx += dx; |
343 | 4.25M | ioff += imx >> 10; |
344 | 4.25M | imx &= 0x3ff; |
345 | 4.25M | } |
346 | | |
347 | 147k | src += src_stride; |
348 | 147k | in_y++; |
349 | 147k | } |
350 | | |
351 | 4.13M | for (x = 0; x < w; x++) |
352 | 4.00M | tmp[x] = (fv ? DAV1D_FILTER_8TAP_RND3(mid_ptrs, x, fv, 6) |
353 | 4.00M | : mid_ptrs[3][x]) - PREP_BIAS; |
354 | | |
355 | 123k | my += dy; |
356 | 123k | tmp += w; |
357 | 123k | } |
358 | 4.82k | } |
359 | | |
360 | | #define filter_fns(type, type_h, type_v) \ |
361 | | static void put_8tap_##type##_c(pixel *const dst, \ |
362 | | const ptrdiff_t dst_stride, \ |
363 | | const pixel *const src, \ |
364 | | const ptrdiff_t src_stride, \ |
365 | | const int w, const int h, \ |
366 | | const int mx, const int my \ |
367 | 42.1k | HIGHBD_DECL_SUFFIX) \ |
368 | 42.1k | { \ |
369 | 42.1k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ |
370 | 42.1k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
371 | 42.1k | } \ mc_tmpl.c:put_8tap_regular_c Line | Count | Source | 367 | 24.5k | HIGHBD_DECL_SUFFIX) \ | 368 | 24.5k | { \ | 369 | 24.5k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 24.5k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 24.5k | } \ |
mc_tmpl.c:put_8tap_regular_smooth_c Line | Count | Source | 367 | 550 | HIGHBD_DECL_SUFFIX) \ | 368 | 550 | { \ | 369 | 550 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 550 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 550 | } \ |
mc_tmpl.c:put_8tap_regular_sharp_c Line | Count | Source | 367 | 351 | HIGHBD_DECL_SUFFIX) \ | 368 | 351 | { \ | 369 | 351 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 351 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 351 | } \ |
mc_tmpl.c:put_8tap_sharp_regular_c Line | Count | Source | 367 | 252 | HIGHBD_DECL_SUFFIX) \ | 368 | 252 | { \ | 369 | 252 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 252 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 252 | } \ |
mc_tmpl.c:put_8tap_sharp_smooth_c Line | Count | Source | 367 | 133 | HIGHBD_DECL_SUFFIX) \ | 368 | 133 | { \ | 369 | 133 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 133 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 133 | } \ |
mc_tmpl.c:put_8tap_sharp_c Line | Count | Source | 367 | 8.30k | HIGHBD_DECL_SUFFIX) \ | 368 | 8.30k | { \ | 369 | 8.30k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 8.30k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 8.30k | } \ |
mc_tmpl.c:put_8tap_smooth_regular_c Line | Count | Source | 367 | 652 | HIGHBD_DECL_SUFFIX) \ | 368 | 652 | { \ | 369 | 652 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 652 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 652 | } \ |
mc_tmpl.c:put_8tap_smooth_c Line | Count | Source | 367 | 7.16k | HIGHBD_DECL_SUFFIX) \ | 368 | 7.16k | { \ | 369 | 7.16k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 7.16k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 7.16k | } \ |
mc_tmpl.c:put_8tap_smooth_sharp_c Line | Count | Source | 367 | 224 | HIGHBD_DECL_SUFFIX) \ | 368 | 224 | { \ | 369 | 224 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 224 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 224 | } \ |
|
372 | | static void put_8tap_##type##_scaled_c(pixel *const dst, \ |
373 | | const ptrdiff_t dst_stride, \ |
374 | | const pixel *const src, \ |
375 | | const ptrdiff_t src_stride, \ |
376 | | const int w, const int h, \ |
377 | | const int mx, const int my, \ |
378 | | const int dx, const int dy \ |
379 | 16.1k | HIGHBD_DECL_SUFFIX) \ |
380 | 16.1k | { \ |
381 | 16.1k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ |
382 | 16.1k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
383 | 16.1k | } \ mc_tmpl.c:put_8tap_regular_scaled_c Line | Count | Source | 379 | 9.89k | HIGHBD_DECL_SUFFIX) \ | 380 | 9.89k | { \ | 381 | 9.89k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 9.89k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 9.89k | } \ |
mc_tmpl.c:put_8tap_regular_smooth_scaled_c Line | Count | Source | 379 | 255 | HIGHBD_DECL_SUFFIX) \ | 380 | 255 | { \ | 381 | 255 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 255 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 255 | } \ |
mc_tmpl.c:put_8tap_regular_sharp_scaled_c Line | Count | Source | 379 | 204 | HIGHBD_DECL_SUFFIX) \ | 380 | 204 | { \ | 381 | 204 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 204 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 204 | } \ |
mc_tmpl.c:put_8tap_sharp_regular_scaled_c Line | Count | Source | 379 | 201 | HIGHBD_DECL_SUFFIX) \ | 380 | 201 | { \ | 381 | 201 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 201 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 201 | } \ |
mc_tmpl.c:put_8tap_sharp_smooth_scaled_c Line | Count | Source | 379 | 71 | HIGHBD_DECL_SUFFIX) \ | 380 | 71 | { \ | 381 | 71 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 71 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 71 | } \ |
mc_tmpl.c:put_8tap_sharp_scaled_c Line | Count | Source | 379 | 1.67k | HIGHBD_DECL_SUFFIX) \ | 380 | 1.67k | { \ | 381 | 1.67k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 1.67k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 1.67k | } \ |
mc_tmpl.c:put_8tap_smooth_regular_scaled_c Line | Count | Source | 379 | 354 | HIGHBD_DECL_SUFFIX) \ | 380 | 354 | { \ | 381 | 354 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 354 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 354 | } \ |
mc_tmpl.c:put_8tap_smooth_scaled_c Line | Count | Source | 379 | 3.17k | HIGHBD_DECL_SUFFIX) \ | 380 | 3.17k | { \ | 381 | 3.17k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 3.17k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 3.17k | } \ |
mc_tmpl.c:put_8tap_smooth_sharp_scaled_c Line | Count | Source | 379 | 353 | HIGHBD_DECL_SUFFIX) \ | 380 | 353 | { \ | 381 | 353 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 353 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 353 | } \ |
|
384 | | static void prep_8tap_##type##_c(int16_t *const tmp, \ |
385 | | const pixel *const src, \ |
386 | | const ptrdiff_t src_stride, \ |
387 | | const int w, const int h, \ |
388 | | const int mx, const int my \ |
389 | 8.92k | HIGHBD_DECL_SUFFIX) \ |
390 | 8.92k | { \ |
391 | 8.92k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ |
392 | 8.92k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
393 | 8.92k | } \ mc_tmpl.c:prep_8tap_regular_c Line | Count | Source | 389 | 3.21k | HIGHBD_DECL_SUFFIX) \ | 390 | 3.21k | { \ | 391 | 3.21k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 3.21k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 3.21k | } \ |
mc_tmpl.c:prep_8tap_regular_smooth_c Line | Count | Source | 389 | 355 | HIGHBD_DECL_SUFFIX) \ | 390 | 355 | { \ | 391 | 355 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 355 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 355 | } \ |
mc_tmpl.c:prep_8tap_regular_sharp_c Line | Count | Source | 389 | 359 | HIGHBD_DECL_SUFFIX) \ | 390 | 359 | { \ | 391 | 359 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 359 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 359 | } \ |
mc_tmpl.c:prep_8tap_sharp_regular_c Line | Count | Source | 389 | 594 | HIGHBD_DECL_SUFFIX) \ | 390 | 594 | { \ | 391 | 594 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 594 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 594 | } \ |
mc_tmpl.c:prep_8tap_sharp_smooth_c Line | Count | Source | 389 | 221 | HIGHBD_DECL_SUFFIX) \ | 390 | 221 | { \ | 391 | 221 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 221 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 221 | } \ |
mc_tmpl.c:prep_8tap_sharp_c Line | Count | Source | 389 | 2.22k | HIGHBD_DECL_SUFFIX) \ | 390 | 2.22k | { \ | 391 | 2.22k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 2.22k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 2.22k | } \ |
mc_tmpl.c:prep_8tap_smooth_regular_c Line | Count | Source | 389 | 412 | HIGHBD_DECL_SUFFIX) \ | 390 | 412 | { \ | 391 | 412 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 412 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 412 | } \ |
mc_tmpl.c:prep_8tap_smooth_c Line | Count | Source | 389 | 1.30k | HIGHBD_DECL_SUFFIX) \ | 390 | 1.30k | { \ | 391 | 1.30k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 1.30k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 1.30k | } \ |
mc_tmpl.c:prep_8tap_smooth_sharp_c Line | Count | Source | 389 | 245 | HIGHBD_DECL_SUFFIX) \ | 390 | 245 | { \ | 391 | 245 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 245 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 245 | } \ |
|
394 | | static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \ |
395 | | const pixel *const src, \ |
396 | | const ptrdiff_t src_stride, \ |
397 | | const int w, const int h, \ |
398 | | const int mx, const int my, \ |
399 | | const int dx, const int dy \ |
400 | 4.82k | HIGHBD_DECL_SUFFIX) \ |
401 | 4.82k | { \ |
402 | 4.82k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ |
403 | 4.82k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
404 | 4.82k | } mc_tmpl.c:prep_8tap_regular_scaled_c Line | Count | Source | 400 | 1.63k | HIGHBD_DECL_SUFFIX) \ | 401 | 1.63k | { \ | 402 | 1.63k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 1.63k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 1.63k | } |
mc_tmpl.c:prep_8tap_regular_smooth_scaled_c Line | Count | Source | 400 | 297 | HIGHBD_DECL_SUFFIX) \ | 401 | 297 | { \ | 402 | 297 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 297 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 297 | } |
mc_tmpl.c:prep_8tap_regular_sharp_scaled_c Line | Count | Source | 400 | 292 | HIGHBD_DECL_SUFFIX) \ | 401 | 292 | { \ | 402 | 292 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 292 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 292 | } |
mc_tmpl.c:prep_8tap_sharp_regular_scaled_c Line | Count | Source | 400 | 301 | HIGHBD_DECL_SUFFIX) \ | 401 | 301 | { \ | 402 | 301 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 301 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 301 | } |
mc_tmpl.c:prep_8tap_sharp_smooth_scaled_c Line | Count | Source | 400 | 210 | HIGHBD_DECL_SUFFIX) \ | 401 | 210 | { \ | 402 | 210 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 210 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 210 | } |
mc_tmpl.c:prep_8tap_sharp_scaled_c Line | Count | Source | 400 | 722 | HIGHBD_DECL_SUFFIX) \ | 401 | 722 | { \ | 402 | 722 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 722 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 722 | } |
mc_tmpl.c:prep_8tap_smooth_regular_scaled_c Line | Count | Source | 400 | 278 | HIGHBD_DECL_SUFFIX) \ | 401 | 278 | { \ | 402 | 278 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 278 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 278 | } |
mc_tmpl.c:prep_8tap_smooth_scaled_c Line | Count | Source | 400 | 964 | HIGHBD_DECL_SUFFIX) \ | 401 | 964 | { \ | 402 | 964 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 964 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 964 | } |
mc_tmpl.c:prep_8tap_smooth_sharp_scaled_c Line | Count | Source | 400 | 128 | HIGHBD_DECL_SUFFIX) \ | 401 | 128 | { \ | 402 | 128 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 128 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 128 | } |
|
405 | | |
406 | | filter_fns(regular, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR) |
407 | | filter_fns(regular_sharp, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP) |
408 | | filter_fns(regular_smooth, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH) |
409 | | filter_fns(smooth, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH) |
410 | | filter_fns(smooth_regular, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR) |
411 | | filter_fns(smooth_sharp, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP) |
412 | | filter_fns(sharp, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP) |
413 | | filter_fns(sharp_regular, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR) |
414 | | filter_fns(sharp_smooth, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH) |
415 | | |
416 | | #define FILTER_BILIN(src, x, mxy, stride) \ |
417 | 18.7M | (16 * src[x] + ((mxy) * (src[x + stride] - src[x]))) |
418 | | |
419 | | #define FILTER_BILIN_RND(src, x, mxy, stride, sh) \ |
420 | 18.7M | ((FILTER_BILIN(src, x, mxy, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
421 | | |
422 | | #define FILTER_BILIN_CLIP(src, x, mxy, stride, sh) \ |
423 | 1.21M | iclip_pixel(FILTER_BILIN_RND(src, x, mxy, stride, sh)) |
424 | | |
425 | | #define FILTER_BILIN2(src1, src2, x, mxy) \ |
426 | 7.17M | (16 * src1[x] + ((mxy) * (src2[x] - src1[x]))) |
427 | | |
428 | | #define FILTER_BILIN_RND2(src1, src2, x, mxy, sh) \ |
429 | 7.17M | ((FILTER_BILIN2(src1, src2, x, mxy) + ((1 << (sh)) >> 1)) >> (sh)) |
430 | | |
431 | | #define FILTER_BILIN_CLIP2(src1, src2, x, mxy, sh) \ |
432 | 6.72M | iclip_pixel(FILTER_BILIN_RND2(src1, src2, x, mxy, sh)) |
433 | | |
434 | | static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride, |
435 | | const pixel *src, ptrdiff_t src_stride, |
436 | | const int w, int h, const int mx, const int my |
437 | | HIGHBD_DECL_SUFFIX) |
438 | 147k | { |
439 | 147k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
440 | 147k | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
441 | 147k | dst_stride = PXSTRIDE(dst_stride); |
442 | 147k | src_stride = PXSTRIDE(src_stride); |
443 | | |
444 | 147k | if (mx) { |
445 | 2.51k | if (my) { |
446 | 1.36k | int16_t mid[128 * 129], *mid_ptr = mid; |
447 | 1.36k | int tmp_h = h + 1; |
448 | | |
449 | 22.6k | do { |
450 | 763k | for (int x = 0; x < w; x++) |
451 | 740k | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
452 | 22.6k | 4 - intermediate_bits); |
453 | | |
454 | 22.6k | mid_ptr += 128; |
455 | 22.6k | src += src_stride; |
456 | 22.6k | } while (--tmp_h); |
457 | | |
458 | 1.36k | mid_ptr = mid; |
459 | 21.2k | do { |
460 | 742k | for (int x = 0; x < w; x++) |
461 | 721k | dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my, 128, |
462 | 21.2k | 4 + intermediate_bits); |
463 | | |
464 | 21.2k | mid_ptr += 128; |
465 | 21.2k | dst += dst_stride; |
466 | 21.2k | } while (--h); |
467 | 1.36k | } else { |
468 | 14.2k | do { |
469 | 517k | for (int x = 0; x < w; x++) { |
470 | 503k | const int px = FILTER_BILIN_RND(src, x, mx, 1, |
471 | 503k | 4 - intermediate_bits); |
472 | 503k | dst[x] = iclip_pixel((px + intermediate_rnd) >> intermediate_bits); |
473 | 503k | } |
474 | | |
475 | 14.2k | dst += dst_stride; |
476 | 14.2k | src += src_stride; |
477 | 14.2k | } while (--h); |
478 | 1.14k | } |
479 | 144k | } else if (my) { |
480 | 17.4k | do { |
481 | 508k | for (int x = 0; x < w; x++) |
482 | 491k | dst[x] = FILTER_BILIN_CLIP(src, x, my, src_stride, 4); |
483 | | |
484 | 17.4k | dst += dst_stride; |
485 | 17.4k | src += src_stride; |
486 | 17.4k | } while (--h); |
487 | 1.22k | } else |
488 | 143k | put_c(dst, dst_stride, src, src_stride, w, h); |
489 | 147k | } |
490 | | |
491 | | static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride, |
492 | | const pixel *src, ptrdiff_t src_stride, |
493 | | const int w, int h, const int mx, int my, |
494 | | const int dx, const int dy |
495 | | HIGHBD_DECL_SUFFIX) |
496 | 10.3k | { |
497 | 10.3k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
498 | 10.3k | int16_t mid[128 * 2]; |
499 | 10.3k | int in_y = -2; |
500 | | |
501 | 290k | do { |
502 | 290k | int x; |
503 | 290k | int y = my >> 10; |
504 | 290k | int16_t *mid1 = &mid[(y & 1) * 128]; |
505 | 290k | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
506 | 290k | int dmy = my & 0x3ff; |
507 | | |
508 | 742k | while (in_y < y) { |
509 | 451k | int imx = mx, ioff = 0; |
510 | 451k | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
511 | | |
512 | 11.0M | for (x = 0; x < w; x++) { |
513 | 10.6M | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
514 | 10.6M | 4 - intermediate_bits); |
515 | 10.6M | imx += dx; |
516 | 10.6M | ioff += imx >> 10; |
517 | 10.6M | imx &= 0x3ff; |
518 | 10.6M | } |
519 | | |
520 | 451k | src += PXSTRIDE(src_stride); |
521 | 451k | in_y++; |
522 | 451k | } |
523 | | |
524 | 7.01M | for (x = 0; x < w; x++) |
525 | 6.72M | dst[x] = FILTER_BILIN_CLIP2(mid1, mid2, x, dmy >> 6, |
526 | 290k | 4 + intermediate_bits); |
527 | | |
528 | 290k | my += dy; |
529 | 290k | dst += PXSTRIDE(dst_stride); |
530 | 290k | } while (--h); |
531 | 10.3k | } |
532 | | |
533 | | static void prep_bilin_c(int16_t *tmp, |
534 | | const pixel *src, ptrdiff_t src_stride, |
535 | | const int w, int h, const int mx, const int my |
536 | | HIGHBD_DECL_SUFFIX) |
537 | 5.42k | { |
538 | 5.42k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
539 | 5.42k | src_stride = PXSTRIDE(src_stride); |
540 | | |
541 | 5.42k | if (mx) { |
542 | 2.53k | if (my) { |
543 | 1.21k | int16_t mid[128 * 129], *mid_ptr = mid; |
544 | 1.21k | int tmp_h = h + 1; |
545 | | |
546 | 31.2k | do { |
547 | 1.39M | for (int x = 0; x < w; x++) |
548 | 1.36M | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
549 | 31.2k | 4 - intermediate_bits); |
550 | | |
551 | 31.2k | mid_ptr += 128; |
552 | 31.2k | src += src_stride; |
553 | 31.2k | } while (--tmp_h); |
554 | | |
555 | 1.21k | mid_ptr = mid; |
556 | 30.0k | do { |
557 | 1.36M | for (int x = 0; x < w; x++) |
558 | 1.33M | tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my, 128, 4) - |
559 | 1.33M | PREP_BIAS; |
560 | | |
561 | 30.0k | mid_ptr += 128; |
562 | 30.0k | tmp += w; |
563 | 30.0k | } while (--h); |
564 | 1.32k | } else { |
565 | 38.3k | do { |
566 | 1.87M | for (int x = 0; x < w; x++) |
567 | 1.83M | tmp[x] = FILTER_BILIN_RND(src, x, mx, 1, |
568 | 1.83M | 4 - intermediate_bits) - |
569 | 1.83M | PREP_BIAS; |
570 | | |
571 | 38.3k | tmp += w; |
572 | 38.3k | src += src_stride; |
573 | 38.3k | } while (--h); |
574 | 1.32k | } |
575 | 2.89k | } else if (my) { |
576 | 15.8k | do { |
577 | 788k | for (int x = 0; x < w; x++) |
578 | 772k | tmp[x] = FILTER_BILIN_RND(src, x, my, src_stride, |
579 | 772k | 4 - intermediate_bits) - PREP_BIAS; |
580 | | |
581 | 15.8k | tmp += w; |
582 | 15.8k | src += src_stride; |
583 | 15.8k | } while (--h); |
584 | 554 | } else |
585 | 2.33k | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
586 | 5.42k | } |
587 | | |
588 | | static void prep_bilin_scaled_c(int16_t *tmp, |
589 | | const pixel *src, ptrdiff_t src_stride, |
590 | | const int w, int h, const int mx, int my, |
591 | | const int dx, const int dy HIGHBD_DECL_SUFFIX) |
592 | 797 | { |
593 | 797 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
594 | 797 | int16_t mid[128 * 2]; |
595 | 797 | int in_y = -2; |
596 | | |
597 | 14.7k | do { |
598 | 14.7k | int x; |
599 | 14.7k | int y = my >> 10; |
600 | 14.7k | int16_t *mid1 = &mid[(y & 1) * 128]; |
601 | 14.7k | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
602 | 14.7k | int dmy = my & 0x3ff; |
603 | | |
604 | 27.8k | while (in_y < y) { |
605 | 13.1k | int imx = mx, ioff = 0; |
606 | 13.1k | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
607 | | |
608 | 420k | for (x = 0; x < w; x++) { |
609 | 407k | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
610 | 407k | 4 - intermediate_bits); |
611 | 407k | imx += dx; |
612 | 407k | ioff += imx >> 10; |
613 | 407k | imx &= 0x3ff; |
614 | 407k | } |
615 | | |
616 | 13.1k | src += PXSTRIDE(src_stride); |
617 | 13.1k | in_y++; |
618 | 13.1k | } |
619 | | |
620 | 467k | for (x = 0; x < w; x++) |
621 | 452k | tmp[x] = FILTER_BILIN_RND2(mid1, mid2, x, dmy >> 6, 4) - PREP_BIAS; |
622 | | |
623 | 14.7k | my += dy; |
624 | 14.7k | tmp += w; |
625 | 14.7k | } while (--h); |
626 | 797 | } |
627 | | |
628 | | static void avg_c(pixel *dst, const ptrdiff_t dst_stride, |
629 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h |
630 | | HIGHBD_DECL_SUFFIX) |
631 | 6.86k | { |
632 | 6.86k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
633 | 6.86k | const int sh = intermediate_bits + 1; |
634 | 6.86k | const int rnd = (1 << intermediate_bits) + PREP_BIAS * 2; |
635 | 177k | do { |
636 | 7.14M | for (int x = 0; x < w; x++) |
637 | 6.96M | dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + rnd) >> sh); |
638 | | |
639 | 177k | tmp1 += w; |
640 | 177k | tmp2 += w; |
641 | 177k | dst += PXSTRIDE(dst_stride); |
642 | 177k | } while (--h); |
643 | 6.86k | } |
644 | | |
645 | | static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride, |
646 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
647 | | const int weight HIGHBD_DECL_SUFFIX) |
648 | 1.04k | { |
649 | 1.04k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
650 | 1.04k | const int sh = intermediate_bits + 4; |
651 | 1.04k | const int rnd = (8 << intermediate_bits) + PREP_BIAS * 16; |
652 | 24.7k | do { |
653 | 978k | for (int x = 0; x < w; x++) |
654 | 953k | dst[x] = iclip_pixel((tmp1[x] * weight + |
655 | 953k | tmp2[x] * (16 - weight) + rnd) >> sh); |
656 | | |
657 | 24.7k | tmp1 += w; |
658 | 24.7k | tmp2 += w; |
659 | 24.7k | dst += PXSTRIDE(dst_stride); |
660 | 24.7k | } while (--h); |
661 | 1.04k | } |
662 | | |
663 | | static void mask_c(pixel *dst, const ptrdiff_t dst_stride, |
664 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
665 | | const uint8_t *mask HIGHBD_DECL_SUFFIX) |
666 | 1.77k | { |
667 | 1.77k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
668 | 1.77k | const int sh = intermediate_bits + 6; |
669 | 1.77k | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
670 | 40.4k | do { |
671 | 1.53M | for (int x = 0; x < w; x++) |
672 | 1.49M | dst[x] = iclip_pixel((tmp1[x] * mask[x] + |
673 | 1.49M | tmp2[x] * (64 - mask[x]) + rnd) >> sh); |
674 | | |
675 | 40.4k | tmp1 += w; |
676 | 40.4k | tmp2 += w; |
677 | 40.4k | mask += w; |
678 | 40.4k | dst += PXSTRIDE(dst_stride); |
679 | 40.4k | } while (--h); |
680 | 1.77k | } |
681 | | |
682 | 2.41M | #define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6) |
683 | | static void blend_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
684 | | const int w, int h, const uint8_t *mask) |
685 | 3.34k | { |
686 | 36.3k | do { |
687 | 521k | for (int x = 0; x < w; x++) { |
688 | 485k | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
689 | 485k | } |
690 | 36.3k | dst += PXSTRIDE(dst_stride); |
691 | 36.3k | tmp += w; |
692 | 36.3k | mask += w; |
693 | 36.3k | } while (--h); |
694 | 3.34k | } |
695 | | |
696 | | static void blend_v_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
697 | | const int w, int h) |
698 | 10.4k | { |
699 | 10.4k | const uint8_t *const mask = &dav1d_obmc_masks[w]; |
700 | 205k | do { |
701 | 1.51M | for (int x = 0; x < (w * 3) >> 2; x++) { |
702 | 1.30M | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
703 | 1.30M | } |
704 | 205k | dst += PXSTRIDE(dst_stride); |
705 | 205k | tmp += w; |
706 | 205k | } while (--h); |
707 | 10.4k | } |
708 | | |
709 | | static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
710 | | const int w, int h) |
711 | 8.13k | { |
712 | 8.13k | const uint8_t *mask = &dav1d_obmc_masks[h]; |
713 | 8.13k | h = (h * 3) >> 2; |
714 | 43.0k | do { |
715 | 43.0k | const int m = *mask++; |
716 | 661k | for (int x = 0; x < w; x++) { |
717 | 618k | dst[x] = blend_px(dst[x], tmp[x], m); |
718 | 618k | } |
719 | 43.0k | dst += PXSTRIDE(dst_stride); |
720 | 43.0k | tmp += w; |
721 | 43.0k | } while (--h); |
722 | 8.13k | } |
723 | | |
724 | | static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride, |
725 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
726 | | uint8_t *mask, const int sign, |
727 | | const int ss_hor, const int ss_ver HIGHBD_DECL_SUFFIX) |
728 | 633 | { |
729 | | // store mask at 2x2 resolution, i.e. store 2x1 sum for even rows, |
730 | | // and then load this intermediate to calculate final value for odd rows |
731 | 633 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
732 | 633 | const int bitdepth = bitdepth_from_max(bitdepth_max); |
733 | 633 | const int sh = intermediate_bits + 6; |
734 | 633 | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
735 | 633 | const int mask_sh = bitdepth + intermediate_bits - 4; |
736 | 633 | const int mask_rnd = 1 << (mask_sh - 5); |
737 | 25.0k | do { |
738 | 1.20M | for (int x = 0; x < w; x++) { |
739 | 1.17M | const int tmpdiff = tmp1[x] - tmp2[x]; |
740 | 1.17M | const int m = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
741 | 1.17M | dst[x] = iclip_pixel((tmpdiff * m + tmp2[x] * 64 + rnd) >> sh); |
742 | | |
743 | 1.17M | if (ss_hor) { |
744 | 155k | x++; |
745 | | |
746 | 155k | const int tmpdiff = tmp1[x] - tmp2[x]; |
747 | 155k | const int n = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
748 | 155k | dst[x] = iclip_pixel((tmpdiff * n + tmp2[x] * 64 + rnd) >> sh); |
749 | | |
750 | 155k | if (h & ss_ver) { |
751 | 72.4k | mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2; |
752 | 83.1k | } else if (ss_ver) { |
753 | 72.4k | mask[x >> 1] = m + n; |
754 | 72.4k | } else { |
755 | 10.7k | mask[x >> 1] = (m + n + 1 - sign) >> 1; |
756 | 10.7k | } |
757 | 1.02M | } else { |
758 | 1.02M | mask[x] = m; |
759 | 1.02M | } |
760 | 1.17M | } |
761 | | |
762 | 25.0k | tmp1 += w; |
763 | 25.0k | tmp2 += w; |
764 | 25.0k | dst += PXSTRIDE(dst_stride); |
765 | 25.0k | if (!ss_ver || (h & 1)) mask += w >> ss_hor; |
766 | 25.0k | } while (--h); |
767 | 633 | } |
768 | | |
769 | | #define w_mask_fns(ssn, ss_hor, ss_ver) \ |
770 | | static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \ |
771 | | const int16_t *const tmp1, const int16_t *const tmp2, \ |
772 | | const int w, const int h, uint8_t *mask, \ |
773 | 633 | const int sign HIGHBD_DECL_SUFFIX) \ |
774 | 633 | { \ |
775 | 633 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ |
776 | 633 | HIGHBD_TAIL_SUFFIX); \ |
777 | 633 | } Line | Count | Source | 773 | 373 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 373 | { \ | 775 | 373 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 373 | HIGHBD_TAIL_SUFFIX); \ | 777 | 373 | } |
Line | Count | Source | 773 | 54 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 54 | { \ | 775 | 54 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 54 | HIGHBD_TAIL_SUFFIX); \ | 777 | 54 | } |
Line | Count | Source | 773 | 206 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 206 | { \ | 775 | 206 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 206 | HIGHBD_TAIL_SUFFIX); \ | 777 | 206 | } |
|
778 | | |
779 | | w_mask_fns(444, 0, 0); |
780 | | w_mask_fns(422, 1, 0); |
781 | | w_mask_fns(420, 1, 1); |
782 | | |
783 | | #undef w_mask_fns |
784 | | |
785 | | #define FILTER_WARP_RND(src, x, F, stride, sh) \ |
786 | 10.2M | ((F[0] * src[x - 3 * stride] + \ |
787 | 10.2M | F[1] * src[x - 2 * stride] + \ |
788 | 10.2M | F[2] * src[x - 1 * stride] + \ |
789 | 10.2M | F[3] * src[x + 0 * stride] + \ |
790 | 10.2M | F[4] * src[x + 1 * stride] + \ |
791 | 10.2M | F[5] * src[x + 2 * stride] + \ |
792 | 10.2M | F[6] * src[x + 3 * stride] + \ |
793 | 10.2M | F[7] * src[x + 4 * stride] + \ |
794 | 10.2M | ((1 << (sh)) >> 1)) >> (sh)) |
795 | | |
796 | | #define FILTER_WARP_CLIP(src, x, F, stride, sh) \ |
797 | 2.62M | iclip_pixel(FILTER_WARP_RND(src, x, F, stride, sh)) |
798 | | |
799 | | static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, |
800 | | const pixel *src, const ptrdiff_t src_stride, |
801 | | const int16_t *const abcd, int mx, int my |
802 | | HIGHBD_DECL_SUFFIX) |
803 | 41.0k | { |
804 | 41.0k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
805 | 41.0k | int16_t mid[15 * 8], *mid_ptr = mid; |
806 | | |
807 | 41.0k | src -= 3 * PXSTRIDE(src_stride); |
808 | 656k | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
809 | 5.53M | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
810 | 4.92M | const int8_t *const filter = |
811 | 4.92M | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
812 | | |
813 | 4.92M | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
814 | 4.92M | 7 - intermediate_bits); |
815 | 4.92M | } |
816 | 615k | src += PXSTRIDE(src_stride); |
817 | 615k | mid_ptr += 8; |
818 | 615k | } |
819 | | |
820 | 41.0k | mid_ptr = &mid[3 * 8]; |
821 | 369k | for (int y = 0; y < 8; y++, my += abcd[3]) { |
822 | 2.95M | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
823 | 2.62M | const int8_t *const filter = |
824 | 2.62M | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
825 | | |
826 | 2.62M | dst[x] = FILTER_WARP_CLIP(mid_ptr, x, filter, 8, |
827 | 2.62M | 7 + intermediate_bits); |
828 | 2.62M | } |
829 | 328k | mid_ptr += 8; |
830 | 328k | dst += PXSTRIDE(dst_stride); |
831 | 328k | } |
832 | 41.0k | } |
833 | | |
834 | | static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride, |
835 | | const pixel *src, const ptrdiff_t src_stride, |
836 | | const int16_t *const abcd, int mx, int my |
837 | | HIGHBD_DECL_SUFFIX) |
838 | 14.4k | { |
839 | 14.4k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
840 | 14.4k | int16_t mid[15 * 8], *mid_ptr = mid; |
841 | | |
842 | 14.4k | src -= 3 * PXSTRIDE(src_stride); |
843 | 231k | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
844 | 1.95M | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
845 | 1.73M | const int8_t *const filter = |
846 | 1.73M | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
847 | | |
848 | 1.73M | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
849 | 1.73M | 7 - intermediate_bits); |
850 | 1.73M | } |
851 | 217k | src += PXSTRIDE(src_stride); |
852 | 217k | mid_ptr += 8; |
853 | 217k | } |
854 | | |
855 | 14.4k | mid_ptr = &mid[3 * 8]; |
856 | 130k | for (int y = 0; y < 8; y++, my += abcd[3]) { |
857 | 1.04M | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
858 | 927k | const int8_t *const filter = |
859 | 927k | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
860 | | |
861 | 927k | tmp[x] = FILTER_WARP_RND(mid_ptr, x, filter, 8, 7) - PREP_BIAS; |
862 | 927k | } |
863 | 115k | mid_ptr += 8; |
864 | 115k | tmp += tmp_stride; |
865 | 115k | } |
866 | 14.4k | } |
867 | | |
868 | | static void emu_edge_c(const intptr_t bw, const intptr_t bh, |
869 | | const intptr_t iw, const intptr_t ih, |
870 | | const intptr_t x, const intptr_t y, |
871 | | pixel *dst, const ptrdiff_t dst_stride, |
872 | | const pixel *ref, const ptrdiff_t ref_stride) |
873 | 126k | { |
874 | | // find offset in reference of visible block to copy |
875 | 126k | ref += iclip((int) y, 0, (int) ih - 1) * PXSTRIDE(ref_stride) + |
876 | 126k | iclip((int) x, 0, (int) iw - 1); |
877 | | |
878 | | // number of pixels to extend (left, right, top, bottom) |
879 | 126k | const int left_ext = iclip((int) -x, 0, (int) bw - 1); |
880 | 126k | const int right_ext = iclip((int) (x + bw - iw), 0, (int) bw - 1); |
881 | 126k | assert(left_ext + right_ext < bw); |
882 | 126k | const int top_ext = iclip((int) -y, 0, (int) bh - 1); |
883 | 126k | const int bottom_ext = iclip((int) (y + bh - ih), 0, (int) bh - 1); |
884 | 126k | assert(top_ext + bottom_ext < bh); |
885 | | |
886 | | // copy visible portion first |
887 | 126k | pixel *blk = dst + top_ext * PXSTRIDE(dst_stride); |
888 | 126k | const int center_w = (int) (bw - left_ext - right_ext); |
889 | 126k | const int center_h = (int) (bh - top_ext - bottom_ext); |
890 | 2.28M | for (int y = 0; y < center_h; y++) { |
891 | 2.15M | pixel_copy(blk + left_ext, ref, center_w); |
892 | | // extend left edge for this line |
893 | 2.15M | if (left_ext) |
894 | 271k | pixel_set(blk, blk[left_ext], left_ext); |
895 | | // extend right edge for this line |
896 | 2.15M | if (right_ext) |
897 | 1.33M | pixel_set(blk + left_ext + center_w, blk[left_ext + center_w - 1], |
898 | 1.33M | right_ext); |
899 | 2.15M | ref += PXSTRIDE(ref_stride); |
900 | 2.15M | blk += PXSTRIDE(dst_stride); |
901 | 2.15M | } |
902 | | |
903 | | // copy top |
904 | 126k | blk = dst + top_ext * PXSTRIDE(dst_stride); |
905 | 320k | for (int y = 0; y < top_ext; y++) { |
906 | 193k | pixel_copy(dst, blk, bw); |
907 | 193k | dst += PXSTRIDE(dst_stride); |
908 | 193k | } |
909 | | |
910 | | // copy bottom |
911 | 126k | dst += center_h * PXSTRIDE(dst_stride); |
912 | 896k | for (int y = 0; y < bottom_ext; y++) { |
913 | 770k | pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], bw); |
914 | 770k | dst += PXSTRIDE(dst_stride); |
915 | 770k | } |
916 | 126k | } |
917 | | |
918 | | static void resize_c(pixel *dst, const ptrdiff_t dst_stride, |
919 | | const pixel *src, const ptrdiff_t src_stride, |
920 | | const int dst_w, int h, const int src_w, |
921 | | const int dx, const int mx0 HIGHBD_DECL_SUFFIX) |
922 | 31.5k | { |
923 | 1.25M | do { |
924 | 1.25M | int mx = mx0, src_x = -1; |
925 | 117M | for (int x = 0; x < dst_w; x++) { |
926 | 116M | const int8_t *const F = dav1d_resize_filter[mx >> 8]; |
927 | 116M | dst[x] = iclip_pixel((-(F[0] * src[iclip(src_x - 3, 0, src_w - 1)] + |
928 | 116M | F[1] * src[iclip(src_x - 2, 0, src_w - 1)] + |
929 | 116M | F[2] * src[iclip(src_x - 1, 0, src_w - 1)] + |
930 | 116M | F[3] * src[iclip(src_x + 0, 0, src_w - 1)] + |
931 | 116M | F[4] * src[iclip(src_x + 1, 0, src_w - 1)] + |
932 | 116M | F[5] * src[iclip(src_x + 2, 0, src_w - 1)] + |
933 | 116M | F[6] * src[iclip(src_x + 3, 0, src_w - 1)] + |
934 | 116M | F[7] * src[iclip(src_x + 4, 0, src_w - 1)]) + |
935 | 116M | 64) >> 7); |
936 | 116M | mx += dx; |
937 | 116M | src_x += mx >> 14; |
938 | 116M | mx &= 0x3fff; |
939 | 116M | } |
940 | | |
941 | 1.25M | dst += PXSTRIDE(dst_stride); |
942 | 1.25M | src += PXSTRIDE(src_stride); |
943 | 1.25M | } while (--h); |
944 | 31.5k | } |
945 | | |
946 | | #if HAVE_ASM |
947 | | #if ARCH_AARCH64 || ARCH_ARM |
948 | | #include "src/arm/mc.h" |
949 | | #elif ARCH_LOONGARCH64 |
950 | | #include "src/loongarch/mc.h" |
951 | | #elif ARCH_PPC64LE |
952 | | #include "src/ppc/mc.h" |
953 | | #elif ARCH_RISCV |
954 | | #include "src/riscv/mc.h" |
955 | | #elif ARCH_X86 |
956 | | #include "src/x86/mc.h" |
957 | | #endif |
958 | | #endif |
959 | | |
960 | 18.4k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { |
961 | 184k | #define init_mc_fns(type, name) do { \ |
962 | 184k | c->mc [type] = put_##name##_c; \ |
963 | 184k | c->mc_scaled [type] = put_##name##_scaled_c; \ |
964 | 184k | c->mct [type] = prep_##name##_c; \ |
965 | 184k | c->mct_scaled[type] = prep_##name##_scaled_c; \ |
966 | 184k | } while (0) |
967 | | |
968 | 18.4k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); |
969 | 18.4k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); |
970 | 18.4k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); |
971 | 18.4k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); |
972 | 18.4k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); |
973 | 18.4k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); |
974 | 18.4k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); |
975 | 18.4k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); |
976 | 18.4k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); |
977 | 18.4k | init_mc_fns(FILTER_2D_BILINEAR, bilin); |
978 | | |
979 | 18.4k | c->avg = avg_c; |
980 | 18.4k | c->w_avg = w_avg_c; |
981 | 18.4k | c->mask = mask_c; |
982 | 18.4k | c->blend = blend_c; |
983 | 18.4k | c->blend_v = blend_v_c; |
984 | 18.4k | c->blend_h = blend_h_c; |
985 | 18.4k | c->w_mask[0] = w_mask_444_c; |
986 | 18.4k | c->w_mask[1] = w_mask_422_c; |
987 | 18.4k | c->w_mask[2] = w_mask_420_c; |
988 | 18.4k | c->warp8x8 = warp_affine_8x8_c; |
989 | 18.4k | c->warp8x8t = warp_affine_8x8t_c; |
990 | 18.4k | c->emu_edge = emu_edge_c; |
991 | 18.4k | c->resize = resize_c; |
992 | | |
993 | | #if HAVE_ASM |
994 | | #if ARCH_AARCH64 || ARCH_ARM |
995 | | mc_dsp_init_arm(c); |
996 | | #elif ARCH_LOONGARCH64 |
997 | | mc_dsp_init_loongarch(c); |
998 | | #elif ARCH_PPC64LE |
999 | | mc_dsp_init_ppc(c); |
1000 | | #elif ARCH_RISCV |
1001 | | mc_dsp_init_riscv(c); |
1002 | | #elif ARCH_X86 |
1003 | | mc_dsp_init_x86(c); |
1004 | | #endif |
1005 | | #endif |
1006 | 18.4k | } Line | Count | Source | 960 | 8.44k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { | 961 | 8.44k | #define init_mc_fns(type, name) do { \ | 962 | 8.44k | c->mc [type] = put_##name##_c; \ | 963 | 8.44k | c->mc_scaled [type] = put_##name##_scaled_c; \ | 964 | 8.44k | c->mct [type] = prep_##name##_c; \ | 965 | 8.44k | c->mct_scaled[type] = prep_##name##_scaled_c; \ | 966 | 8.44k | } while (0) | 967 | | | 968 | 8.44k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); | 969 | 8.44k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); | 970 | 8.44k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); | 971 | 8.44k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); | 972 | 8.44k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); | 973 | 8.44k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); | 974 | 8.44k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); | 975 | 8.44k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); | 976 | 8.44k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); | 977 | 8.44k | init_mc_fns(FILTER_2D_BILINEAR, bilin); | 978 | | | 979 | 8.44k | c->avg = avg_c; | 980 | 8.44k | c->w_avg = w_avg_c; | 981 | 8.44k | c->mask = mask_c; | 982 | 8.44k | c->blend = blend_c; | 983 | 8.44k | c->blend_v = blend_v_c; | 984 | 8.44k | c->blend_h = blend_h_c; | 985 | 8.44k | c->w_mask[0] = w_mask_444_c; | 986 | 8.44k | c->w_mask[1] = w_mask_422_c; | 987 | 8.44k | c->w_mask[2] = w_mask_420_c; | 988 | 8.44k | c->warp8x8 = warp_affine_8x8_c; | 989 | 8.44k | c->warp8x8t = warp_affine_8x8t_c; | 990 | 8.44k | c->emu_edge = emu_edge_c; | 991 | 8.44k | c->resize = resize_c; | 992 | | | 993 | | #if HAVE_ASM | 994 | | #if ARCH_AARCH64 || ARCH_ARM | 995 | | mc_dsp_init_arm(c); | 996 | | #elif ARCH_LOONGARCH64 | 997 | | mc_dsp_init_loongarch(c); | 998 | | #elif ARCH_PPC64LE | 999 | | mc_dsp_init_ppc(c); | 1000 | | #elif ARCH_RISCV | 1001 | | mc_dsp_init_riscv(c); | 1002 | | #elif ARCH_X86 | 1003 | | mc_dsp_init_x86(c); | 1004 | | #endif | 1005 | | #endif | 1006 | 8.44k | } |
Line | Count | Source | 960 | 9.97k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { | 961 | 9.97k | #define init_mc_fns(type, name) do { \ | 962 | 9.97k | c->mc [type] = put_##name##_c; \ | 963 | 9.97k | c->mc_scaled [type] = put_##name##_scaled_c; \ | 964 | 9.97k | c->mct [type] = prep_##name##_c; \ | 965 | 9.97k | c->mct_scaled[type] = prep_##name##_scaled_c; \ | 966 | 9.97k | } while (0) | 967 | | | 968 | 9.97k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); | 969 | 9.97k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); | 970 | 9.97k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); | 971 | 9.97k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); | 972 | 9.97k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); | 973 | 9.97k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); | 974 | 9.97k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); | 975 | 9.97k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); | 976 | 9.97k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); | 977 | 9.97k | init_mc_fns(FILTER_2D_BILINEAR, bilin); | 978 | | | 979 | 9.97k | c->avg = avg_c; | 980 | 9.97k | c->w_avg = w_avg_c; | 981 | 9.97k | c->mask = mask_c; | 982 | 9.97k | c->blend = blend_c; | 983 | 9.97k | c->blend_v = blend_v_c; | 984 | 9.97k | c->blend_h = blend_h_c; | 985 | 9.97k | c->w_mask[0] = w_mask_444_c; | 986 | 9.97k | c->w_mask[1] = w_mask_422_c; | 987 | 9.97k | c->w_mask[2] = w_mask_420_c; | 988 | 9.97k | c->warp8x8 = warp_affine_8x8_c; | 989 | 9.97k | c->warp8x8t = warp_affine_8x8t_c; | 990 | 9.97k | c->emu_edge = emu_edge_c; | 991 | 9.97k | c->resize = resize_c; | 992 | | | 993 | | #if HAVE_ASM | 994 | | #if ARCH_AARCH64 || ARCH_ARM | 995 | | mc_dsp_init_arm(c); | 996 | | #elif ARCH_LOONGARCH64 | 997 | | mc_dsp_init_loongarch(c); | 998 | | #elif ARCH_PPC64LE | 999 | | mc_dsp_init_ppc(c); | 1000 | | #elif ARCH_RISCV | 1001 | | mc_dsp_init_riscv(c); | 1002 | | #elif ARCH_X86 | 1003 | | mc_dsp_init_x86(c); | 1004 | | #endif | 1005 | | #endif | 1006 | 9.97k | } |
|