/work/dav1d/src/mc_tmpl.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include "common/attributes.h" |
34 | | #include "common/intops.h" |
35 | | |
36 | | #include "src/mc.h" |
37 | | #include "src/tables.h" |
38 | | |
39 | | #if BITDEPTH == 8 |
40 | 921k | #define get_intermediate_bits(bitdepth_max) 4 |
41 | | // Output in interval [-5132, 9212], fits in int16_t as is |
42 | 104M | #define PREP_BIAS 0 |
43 | | #else |
44 | | // 4 for 10 bits/component, 2 for 12 bits/component |
45 | | #define get_intermediate_bits(bitdepth_max) (14 - bitdepth_from_max(bitdepth_max)) |
46 | | // Output in interval [-20588, 36956] (10-bit), [-20602, 36983] (12-bit) |
47 | | // Subtract a bias to ensure the output fits in int16_t |
48 | | #define PREP_BIAS 8192 |
49 | | #endif |
50 | | |
51 | | static NOINLINE void |
52 | | put_c(pixel *dst, const ptrdiff_t dst_stride, |
53 | | const pixel *src, const ptrdiff_t src_stride, const int w, int h) |
54 | 264k | { |
55 | 5.68M | do { |
56 | 5.68M | pixel_copy(dst, src, w); |
57 | | |
58 | 5.68M | dst += dst_stride; |
59 | 5.68M | src += src_stride; |
60 | 5.68M | } while (--h); |
61 | 264k | } |
62 | | |
63 | | static NOINLINE void |
64 | | prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride, |
65 | | const int w, int h HIGHBD_DECL_SUFFIX) |
66 | 42.6k | { |
67 | 42.6k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
68 | 1.47M | do { |
69 | 62.1M | for (int x = 0; x < w; x++) |
70 | 60.6M | tmp[x] = (src[x] << intermediate_bits) - PREP_BIAS; |
71 | | |
72 | 1.47M | tmp += w; |
73 | 1.47M | src += src_stride; |
74 | 1.47M | } while (--h); |
75 | 42.6k | } |
76 | | |
77 | | #define FILTER_8TAP(src, x, F, stride) \ |
78 | 150M | (F[0] * src[x + -3 * stride] + \ |
79 | 150M | F[1] * src[x + -2 * stride] + \ |
80 | 150M | F[2] * src[x + -1 * stride] + \ |
81 | 150M | F[3] * src[x + +0 * stride] + \ |
82 | 150M | F[4] * src[x + +1 * stride] + \ |
83 | 150M | F[5] * src[x + +2 * stride] + \ |
84 | 150M | F[6] * src[x + +3 * stride] + \ |
85 | 150M | F[7] * src[x + +4 * stride]) |
86 | | |
87 | | #define FILTER_8TAP2(src, x, F) \ |
88 | 103M | (F[0] * src[0][x] + \ |
89 | 103M | F[1] * src[1][x] + \ |
90 | 103M | F[2] * src[2][x] + \ |
91 | 103M | F[3] * src[3][x] + \ |
92 | 103M | F[4] * src[4][x] + \ |
93 | 103M | F[5] * src[5][x] + \ |
94 | 103M | F[6] * src[6][x] + \ |
95 | 103M | F[7] * src[7][x]) |
96 | | |
97 | | #define DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh) \ |
98 | 132M | ((FILTER_8TAP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
99 | | |
100 | | #define DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh) \ |
101 | 18.8M | ((FILTER_8TAP(src, x, F, stride) + (rnd)) >> (sh)) |
102 | | |
103 | | #define DAV1D_FILTER_8TAP_RND3(src, x, F, sh) \ |
104 | 103M | ((FILTER_8TAP2(src, x, F) + ((1 << (sh)) >> 1)) >> (sh)) |
105 | | |
106 | | #define DAV1D_FILTER_8TAP_CLIP(src, x, F, stride, sh) \ |
107 | 32.2M | iclip_pixel(DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh)) |
108 | | |
109 | | #define DAV1D_FILTER_8TAP_CLIP2(src, x, F, stride, rnd, sh) \ |
110 | 18.8M | iclip_pixel(DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh)) |
111 | | |
112 | | #define DAV1D_FILTER_8TAP_CLIP3(src, x, F, sh) \ |
113 | 80.4M | iclip_pixel(DAV1D_FILTER_8TAP_RND3(src, x, F, sh)) |
114 | | |
115 | | #define GET_H_FILTER(mx) \ |
116 | 60.2M | const int8_t *const fh = !(mx) ? NULL : w > 4 ? \ |
117 | 54.1M | dav1d_mc_subpel_filters[filter_type & 3][(mx) - 1] : \ |
118 | 54.1M | dav1d_mc_subpel_filters[3 + (filter_type & 1)][(mx) - 1] |
119 | | |
120 | | #define GET_V_FILTER(my) \ |
121 | 2.62M | const int8_t *const fv = !(my) ? NULL : h > 4 ? \ |
122 | 2.12M | dav1d_mc_subpel_filters[filter_type >> 2][(my) - 1] : \ |
123 | 2.12M | dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][(my) - 1] |
124 | | |
125 | | #define GET_FILTERS() \ |
126 | 335k | GET_H_FILTER(mx); \ |
127 | 335k | GET_V_FILTER(my) |
128 | | |
129 | | static NOINLINE void |
130 | | put_8tap_c(pixel *dst, ptrdiff_t dst_stride, |
131 | | const pixel *src, ptrdiff_t src_stride, |
132 | | const int w, int h, const int mx, const int my, |
133 | | const int filter_type HIGHBD_DECL_SUFFIX) |
134 | 276k | { |
135 | 276k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
136 | 276k | const int intermediate_rnd = 32 + ((1 << (6 - intermediate_bits)) >> 1); |
137 | | |
138 | 276k | GET_FILTERS(); |
139 | 276k | dst_stride = PXSTRIDE(dst_stride); |
140 | 276k | src_stride = PXSTRIDE(src_stride); |
141 | | |
142 | 276k | if (fh) { |
143 | 93.2k | if (fv) { |
144 | 46.2k | int tmp_h = h + 7; |
145 | 46.2k | int16_t mid[128 * 135], *mid_ptr = mid; |
146 | | |
147 | 46.2k | src -= src_stride * 3; |
148 | 970k | do { |
149 | 30.7M | for (int x = 0; x < w; x++) |
150 | 29.7M | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
151 | 970k | 6 - intermediate_bits); |
152 | | |
153 | 970k | mid_ptr += 128; |
154 | 970k | src += src_stride; |
155 | 970k | } while (--tmp_h); |
156 | | |
157 | 46.2k | mid_ptr = mid + 128 * 3; |
158 | 673k | do { |
159 | 24.2M | for (int x = 0; x < w; x++) |
160 | 23.6M | dst[x] = DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, |
161 | 673k | 6 + intermediate_bits); |
162 | | |
163 | 673k | mid_ptr += 128; |
164 | 673k | dst += dst_stride; |
165 | 673k | } while (--h); |
166 | 47.0k | } else { |
167 | 600k | do { |
168 | 19.4M | for (int x = 0; x < w; x++) { |
169 | 18.8M | dst[x] = DAV1D_FILTER_8TAP_CLIP2(src, x, fh, 1, |
170 | 18.8M | intermediate_rnd, 6); |
171 | 18.8M | } |
172 | | |
173 | 600k | dst += dst_stride; |
174 | 600k | src += src_stride; |
175 | 600k | } while (--h); |
176 | 47.0k | } |
177 | 183k | } else if (fv) { |
178 | 285k | do { |
179 | 8.95M | for (int x = 0; x < w; x++) |
180 | 8.66M | dst[x] = DAV1D_FILTER_8TAP_CLIP(src, x, fv, src_stride, 6); |
181 | | |
182 | 285k | dst += dst_stride; |
183 | 285k | src += src_stride; |
184 | 285k | } while (--h); |
185 | 24.2k | } else |
186 | 159k | put_c(dst, dst_stride, src, src_stride, w, h); |
187 | 276k | } |
188 | | |
189 | | static NOINLINE void |
190 | | put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride, |
191 | | const pixel *src, ptrdiff_t src_stride, |
192 | | const int w, int h, const int mx, int my, |
193 | | const int dx, const int dy, const int filter_type |
194 | | HIGHBD_DECL_SUFFIX) |
195 | 63.3k | { |
196 | 63.3k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
197 | 63.3k | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
198 | 63.3k | int16_t mid[128 * 8]; |
199 | 63.3k | int16_t *mid_ptrs[8]; |
200 | 63.3k | int in_y = -8; |
201 | 63.3k | src_stride = PXSTRIDE(src_stride); |
202 | | |
203 | 570k | for (int i = 0; i < 8; i++) |
204 | 507k | mid_ptrs[i] = &mid[128 * i]; |
205 | | |
206 | 63.3k | src -= src_stride * 3; |
207 | | |
208 | 1.77M | for (int y = 0; y < h; y++) { |
209 | 1.71M | int x; |
210 | 1.71M | int src_y = my >> 10; |
211 | 1.71M | GET_V_FILTER((my & 0x3ff) >> 6); |
212 | | |
213 | 2.83M | while (in_y < src_y) { |
214 | 1.12M | int imx = mx, ioff = 0; |
215 | 1.12M | int16_t *mid_ptr = mid_ptrs[0]; |
216 | | |
217 | 8.97M | for (int i = 0; i < 7; i++) |
218 | 7.85M | mid_ptrs[i] = mid_ptrs[i + 1]; |
219 | 1.12M | mid_ptrs[7] = mid_ptr; |
220 | | |
221 | 48.6M | for (x = 0; x < w; x++) { |
222 | 47.5M | GET_H_FILTER(imx >> 6); |
223 | 47.5M | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
224 | 47.5M | 6 - intermediate_bits) : |
225 | 47.5M | src[ioff] << intermediate_bits; |
226 | 47.5M | imx += dx; |
227 | 47.5M | ioff += imx >> 10; |
228 | 47.5M | imx &= 0x3ff; |
229 | 47.5M | } |
230 | | |
231 | 1.12M | src += src_stride; |
232 | 1.12M | in_y++; |
233 | 1.12M | } |
234 | | |
235 | 91.1M | for (x = 0; x < w; x++) |
236 | 89.3M | dst[x] = fv ? DAV1D_FILTER_8TAP_CLIP3(mid_ptrs, x, fv, |
237 | 89.3M | 6 + intermediate_bits) : |
238 | 89.3M | iclip_pixel((mid_ptrs[3][x] + intermediate_rnd) >> |
239 | 8.91M | intermediate_bits); |
240 | | |
241 | 1.71M | my += dy; |
242 | 1.71M | dst += PXSTRIDE(dst_stride); |
243 | 1.71M | } |
244 | 63.3k | } |
245 | | |
246 | | static NOINLINE void |
247 | | prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
248 | | const int w, int h, const int mx, const int my, |
249 | | const int filter_type HIGHBD_DECL_SUFFIX) |
250 | 59.0k | { |
251 | 59.0k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
252 | 59.0k | GET_FILTERS(); |
253 | 59.0k | src_stride = PXSTRIDE(src_stride); |
254 | | |
255 | 59.0k | if (fh) { |
256 | 14.1k | if (fv) { |
257 | 8.70k | int tmp_h = h + 7; |
258 | 8.70k | int16_t mid[128 * 135], *mid_ptr = mid; |
259 | | |
260 | 8.70k | src -= src_stride * 3; |
261 | 183k | do { |
262 | 4.37M | for (int x = 0; x < w; x++) |
263 | 4.19M | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
264 | 183k | 6 - intermediate_bits); |
265 | | |
266 | 183k | mid_ptr += 128; |
267 | 183k | src += src_stride; |
268 | 183k | } while (--tmp_h); |
269 | | |
270 | 8.70k | mid_ptr = mid + 128 * 3; |
271 | 123k | do { |
272 | 3.51M | for (int x = 0; x < w; x++) { |
273 | 3.39M | int t = DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) - |
274 | 3.39M | PREP_BIAS; |
275 | 3.39M | assert(t >= INT16_MIN && t <= INT16_MAX); |
276 | 3.39M | tmp[x] = t; |
277 | 3.39M | } |
278 | | |
279 | 123k | mid_ptr += 128; |
280 | 123k | tmp += w; |
281 | 123k | } while (--h); |
282 | 8.70k | } else { |
283 | 122k | do { |
284 | 4.45M | for (int x = 0; x < w; x++) |
285 | 4.33M | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
286 | 4.33M | 6 - intermediate_bits) - |
287 | 4.33M | PREP_BIAS; |
288 | | |
289 | 122k | tmp += w; |
290 | 122k | src += src_stride; |
291 | 122k | } while (--h); |
292 | 5.40k | } |
293 | 44.9k | } else if (fv) { |
294 | 119k | do { |
295 | 4.12M | for (int x = 0; x < w; x++) |
296 | 4.00M | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride, |
297 | 4.00M | 6 - intermediate_bits) - |
298 | 4.00M | PREP_BIAS; |
299 | | |
300 | 119k | tmp += w; |
301 | 119k | src += src_stride; |
302 | 119k | } while (--h); |
303 | 5.59k | } else |
304 | 39.3k | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
305 | 59.0k | } |
306 | | |
307 | | static NOINLINE void |
308 | | prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
309 | | const int w, int h, const int mx, int my, |
310 | | const int dx, const int dy, const int filter_type |
311 | | HIGHBD_DECL_SUFFIX) |
312 | 18.1k | { |
313 | 18.1k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
314 | 18.1k | int16_t mid[128 * 8]; |
315 | 18.1k | int16_t *mid_ptrs[8]; |
316 | 18.1k | int in_y = -8; |
317 | 18.1k | src_stride = PXSTRIDE(src_stride); |
318 | | |
319 | 163k | for (int i = 0; i < 8; i++) |
320 | 145k | mid_ptrs[i] = &mid[128 * i]; |
321 | | |
322 | 18.1k | src -= src_stride * 3; |
323 | | |
324 | 597k | for (int y = 0; y < h; y++) { |
325 | 579k | int x; |
326 | 579k | int src_y = my >> 10; |
327 | 579k | GET_V_FILTER((my & 0x3ff) >> 6); |
328 | | |
329 | 921k | while (in_y < src_y) { |
330 | 341k | int imx = mx, ioff = 0; |
331 | 341k | int16_t *mid_ptr = mid_ptrs[0]; |
332 | | |
333 | 2.73M | for (int i = 0; i < 7; i++) |
334 | 2.39M | mid_ptrs[i] = mid_ptrs[i + 1]; |
335 | 341k | mid_ptrs[7] = mid_ptr; |
336 | | |
337 | 12.7M | for (x = 0; x < w; x++) { |
338 | 12.3M | GET_H_FILTER(imx >> 6); |
339 | 12.3M | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
340 | 12.3M | 6 - intermediate_bits) : |
341 | 12.3M | src[ioff] << intermediate_bits; |
342 | 12.3M | imx += dx; |
343 | 12.3M | ioff += imx >> 10; |
344 | 12.3M | imx &= 0x3ff; |
345 | 12.3M | } |
346 | | |
347 | 341k | src += src_stride; |
348 | 341k | in_y++; |
349 | 341k | } |
350 | | |
351 | 26.7M | for (x = 0; x < w; x++) |
352 | 26.1M | tmp[x] = (fv ? DAV1D_FILTER_8TAP_RND3(mid_ptrs, x, fv, 6) |
353 | 26.1M | : mid_ptrs[3][x]) - PREP_BIAS; |
354 | | |
355 | 579k | my += dy; |
356 | 579k | tmp += w; |
357 | 579k | } |
358 | 18.1k | } |
359 | | |
360 | | #define filter_fns(type, type_h, type_v) \ |
361 | | static void put_8tap_##type##_c(pixel *const dst, \ |
362 | | const ptrdiff_t dst_stride, \ |
363 | | const pixel *const src, \ |
364 | | const ptrdiff_t src_stride, \ |
365 | | const int w, const int h, \ |
366 | | const int mx, const int my \ |
367 | 276k | HIGHBD_DECL_SUFFIX) \ |
368 | 276k | { \ |
369 | 276k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ |
370 | 276k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
371 | 276k | } \ mc_tmpl.c:put_8tap_regular_c Line | Count | Source | 367 | 228k | HIGHBD_DECL_SUFFIX) \ | 368 | 228k | { \ | 369 | 228k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 228k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 228k | } \ |
mc_tmpl.c:put_8tap_regular_smooth_c Line | Count | Source | 367 | 5.50k | HIGHBD_DECL_SUFFIX) \ | 368 | 5.50k | { \ | 369 | 5.50k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 5.50k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 5.50k | } \ |
mc_tmpl.c:put_8tap_regular_sharp_c Line | Count | Source | 367 | 431 | HIGHBD_DECL_SUFFIX) \ | 368 | 431 | { \ | 369 | 431 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 431 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 431 | } \ |
mc_tmpl.c:put_8tap_sharp_regular_c Line | Count | Source | 367 | 331 | HIGHBD_DECL_SUFFIX) \ | 368 | 331 | { \ | 369 | 331 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 331 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 331 | } \ |
mc_tmpl.c:put_8tap_sharp_smooth_c Line | Count | Source | 367 | 493 | HIGHBD_DECL_SUFFIX) \ | 368 | 493 | { \ | 369 | 493 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 493 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 493 | } \ |
mc_tmpl.c:put_8tap_sharp_c Line | Count | Source | 367 | 4.52k | HIGHBD_DECL_SUFFIX) \ | 368 | 4.52k | { \ | 369 | 4.52k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 4.52k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 4.52k | } \ |
mc_tmpl.c:put_8tap_smooth_regular_c Line | Count | Source | 367 | 3.41k | HIGHBD_DECL_SUFFIX) \ | 368 | 3.41k | { \ | 369 | 3.41k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 3.41k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 3.41k | } \ |
mc_tmpl.c:put_8tap_smooth_c Line | Count | Source | 367 | 33.2k | HIGHBD_DECL_SUFFIX) \ | 368 | 33.2k | { \ | 369 | 33.2k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 33.2k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 33.2k | } \ |
mc_tmpl.c:put_8tap_smooth_sharp_c Line | Count | Source | 367 | 263 | HIGHBD_DECL_SUFFIX) \ | 368 | 263 | { \ | 369 | 263 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 263 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 263 | } \ |
|
372 | | static void put_8tap_##type##_scaled_c(pixel *const dst, \ |
373 | | const ptrdiff_t dst_stride, \ |
374 | | const pixel *const src, \ |
375 | | const ptrdiff_t src_stride, \ |
376 | | const int w, const int h, \ |
377 | | const int mx, const int my, \ |
378 | | const int dx, const int dy \ |
379 | 63.4k | HIGHBD_DECL_SUFFIX) \ |
380 | 63.4k | { \ |
381 | 63.4k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ |
382 | 63.4k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
383 | 63.4k | } \ mc_tmpl.c:put_8tap_regular_scaled_c Line | Count | Source | 379 | 38.0k | HIGHBD_DECL_SUFFIX) \ | 380 | 38.0k | { \ | 381 | 38.0k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 38.0k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 38.0k | } \ |
mc_tmpl.c:put_8tap_regular_smooth_scaled_c Line | Count | Source | 379 | 1.06k | HIGHBD_DECL_SUFFIX) \ | 380 | 1.06k | { \ | 381 | 1.06k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 1.06k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 1.06k | } \ |
mc_tmpl.c:put_8tap_regular_sharp_scaled_c Line | Count | Source | 379 | 434 | HIGHBD_DECL_SUFFIX) \ | 380 | 434 | { \ | 381 | 434 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 434 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 434 | } \ |
mc_tmpl.c:put_8tap_sharp_regular_scaled_c Line | Count | Source | 379 | 232 | HIGHBD_DECL_SUFFIX) \ | 380 | 232 | { \ | 381 | 232 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 232 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 232 | } \ |
mc_tmpl.c:put_8tap_sharp_smooth_scaled_c Line | Count | Source | 379 | 181 | HIGHBD_DECL_SUFFIX) \ | 380 | 181 | { \ | 381 | 181 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 181 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 181 | } \ |
mc_tmpl.c:put_8tap_sharp_scaled_c Line | Count | Source | 379 | 1.46k | HIGHBD_DECL_SUFFIX) \ | 380 | 1.46k | { \ | 381 | 1.46k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 1.46k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 1.46k | } \ |
mc_tmpl.c:put_8tap_smooth_regular_scaled_c Line | Count | Source | 379 | 2.03k | HIGHBD_DECL_SUFFIX) \ | 380 | 2.03k | { \ | 381 | 2.03k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 2.03k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 2.03k | } \ |
mc_tmpl.c:put_8tap_smooth_scaled_c Line | Count | Source | 379 | 19.7k | HIGHBD_DECL_SUFFIX) \ | 380 | 19.7k | { \ | 381 | 19.7k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 19.7k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 19.7k | } \ |
mc_tmpl.c:put_8tap_smooth_sharp_scaled_c Line | Count | Source | 379 | 197 | HIGHBD_DECL_SUFFIX) \ | 380 | 197 | { \ | 381 | 197 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 197 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 197 | } \ |
|
384 | | static void prep_8tap_##type##_c(int16_t *const tmp, \ |
385 | | const pixel *const src, \ |
386 | | const ptrdiff_t src_stride, \ |
387 | | const int w, const int h, \ |
388 | | const int mx, const int my \ |
389 | 59.0k | HIGHBD_DECL_SUFFIX) \ |
390 | 59.0k | { \ |
391 | 59.0k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ |
392 | 59.0k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
393 | 59.0k | } \ mc_tmpl.c:prep_8tap_regular_c Line | Count | Source | 389 | 31.2k | HIGHBD_DECL_SUFFIX) \ | 390 | 31.2k | { \ | 391 | 31.2k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 31.2k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 31.2k | } \ |
mc_tmpl.c:prep_8tap_regular_smooth_c Line | Count | Source | 389 | 462 | HIGHBD_DECL_SUFFIX) \ | 390 | 462 | { \ | 391 | 462 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 462 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 462 | } \ |
mc_tmpl.c:prep_8tap_regular_sharp_c Line | Count | Source | 389 | 923 | HIGHBD_DECL_SUFFIX) \ | 390 | 923 | { \ | 391 | 923 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 923 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 923 | } \ |
mc_tmpl.c:prep_8tap_sharp_regular_c Line | Count | Source | 389 | 1.32k | HIGHBD_DECL_SUFFIX) \ | 390 | 1.32k | { \ | 391 | 1.32k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 1.32k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 1.32k | } \ |
mc_tmpl.c:prep_8tap_sharp_smooth_c Line | Count | Source | 389 | 844 | HIGHBD_DECL_SUFFIX) \ | 390 | 844 | { \ | 391 | 844 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 844 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 844 | } \ |
mc_tmpl.c:prep_8tap_sharp_c Line | Count | Source | 389 | 4.14k | HIGHBD_DECL_SUFFIX) \ | 390 | 4.14k | { \ | 391 | 4.14k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 4.14k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 4.14k | } \ |
mc_tmpl.c:prep_8tap_smooth_regular_c Line | Count | Source | 389 | 804 | HIGHBD_DECL_SUFFIX) \ | 390 | 804 | { \ | 391 | 804 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 804 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 804 | } \ |
mc_tmpl.c:prep_8tap_smooth_c Line | Count | Source | 389 | 18.9k | HIGHBD_DECL_SUFFIX) \ | 390 | 18.9k | { \ | 391 | 18.9k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 18.9k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 18.9k | } \ |
mc_tmpl.c:prep_8tap_smooth_sharp_c Line | Count | Source | 389 | 463 | HIGHBD_DECL_SUFFIX) \ | 390 | 463 | { \ | 391 | 463 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 463 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 463 | } \ |
|
394 | | static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \ |
395 | | const pixel *const src, \ |
396 | | const ptrdiff_t src_stride, \ |
397 | | const int w, const int h, \ |
398 | | const int mx, const int my, \ |
399 | | const int dx, const int dy \ |
400 | 18.1k | HIGHBD_DECL_SUFFIX) \ |
401 | 18.1k | { \ |
402 | 18.1k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ |
403 | 18.1k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
404 | 18.1k | } mc_tmpl.c:prep_8tap_regular_scaled_c Line | Count | Source | 400 | 7.36k | HIGHBD_DECL_SUFFIX) \ | 401 | 7.36k | { \ | 402 | 7.36k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 7.36k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 7.36k | } |
mc_tmpl.c:prep_8tap_regular_smooth_scaled_c Line | Count | Source | 400 | 339 | HIGHBD_DECL_SUFFIX) \ | 401 | 339 | { \ | 402 | 339 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 339 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 339 | } |
mc_tmpl.c:prep_8tap_regular_sharp_scaled_c Line | Count | Source | 400 | 2.45k | HIGHBD_DECL_SUFFIX) \ | 401 | 2.45k | { \ | 402 | 2.45k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 2.45k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 2.45k | } |
mc_tmpl.c:prep_8tap_sharp_regular_scaled_c Line | Count | Source | 400 | 647 | HIGHBD_DECL_SUFFIX) \ | 401 | 647 | { \ | 402 | 647 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 647 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 647 | } |
mc_tmpl.c:prep_8tap_sharp_smooth_scaled_c Line | Count | Source | 400 | 406 | HIGHBD_DECL_SUFFIX) \ | 401 | 406 | { \ | 402 | 406 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 406 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 406 | } |
mc_tmpl.c:prep_8tap_sharp_scaled_c Line | Count | Source | 400 | 1.71k | HIGHBD_DECL_SUFFIX) \ | 401 | 1.71k | { \ | 402 | 1.71k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 1.71k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 1.71k | } |
mc_tmpl.c:prep_8tap_smooth_regular_scaled_c Line | Count | Source | 400 | 504 | HIGHBD_DECL_SUFFIX) \ | 401 | 504 | { \ | 402 | 504 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 504 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 504 | } |
mc_tmpl.c:prep_8tap_smooth_scaled_c Line | Count | Source | 400 | 4.47k | HIGHBD_DECL_SUFFIX) \ | 401 | 4.47k | { \ | 402 | 4.47k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 4.47k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 4.47k | } |
mc_tmpl.c:prep_8tap_smooth_sharp_scaled_c Line | Count | Source | 400 | 237 | HIGHBD_DECL_SUFFIX) \ | 401 | 237 | { \ | 402 | 237 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 237 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 237 | } |
|
405 | | |
406 | | filter_fns(regular, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR) |
407 | | filter_fns(regular_sharp, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP) |
408 | | filter_fns(regular_smooth, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH) |
409 | | filter_fns(smooth, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH) |
410 | | filter_fns(smooth_regular, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR) |
411 | | filter_fns(smooth_sharp, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP) |
412 | | filter_fns(sharp, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP) |
413 | | filter_fns(sharp_regular, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR) |
414 | | filter_fns(sharp_smooth, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH) |
415 | | |
416 | | #define FILTER_BILIN(src, x, mxy, stride) \ |
417 | 23.7M | (16 * src[x] + ((mxy) * (src[x + stride] - src[x]))) |
418 | | |
419 | | #define FILTER_BILIN_RND(src, x, mxy, stride, sh) \ |
420 | 23.7M | ((FILTER_BILIN(src, x, mxy, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
421 | | |
422 | | #define FILTER_BILIN_CLIP(src, x, mxy, stride, sh) \ |
423 | 3.41M | iclip_pixel(FILTER_BILIN_RND(src, x, mxy, stride, sh)) |
424 | | |
425 | | #define FILTER_BILIN2(src1, src2, x, mxy) \ |
426 | 10.2M | (16 * src1[x] + ((mxy) * (src2[x] - src1[x]))) |
427 | | |
428 | | #define FILTER_BILIN_RND2(src1, src2, x, mxy, sh) \ |
429 | 10.2M | ((FILTER_BILIN2(src1, src2, x, mxy) + ((1 << (sh)) >> 1)) >> (sh)) |
430 | | |
431 | | #define FILTER_BILIN_CLIP2(src1, src2, x, mxy, sh) \ |
432 | 9.24M | iclip_pixel(FILTER_BILIN_RND2(src1, src2, x, mxy, sh)) |
433 | | |
434 | | static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride, |
435 | | const pixel *src, ptrdiff_t src_stride, |
436 | | const int w, int h, const int mx, const int my |
437 | | HIGHBD_DECL_SUFFIX) |
438 | 156k | { |
439 | 156k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
440 | 156k | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
441 | 156k | dst_stride = PXSTRIDE(dst_stride); |
442 | 156k | src_stride = PXSTRIDE(src_stride); |
443 | | |
444 | 156k | if (mx) { |
445 | 35.7k | if (my) { |
446 | 17.5k | int16_t mid[128 * 129], *mid_ptr = mid; |
447 | 17.5k | int tmp_h = h + 1; |
448 | | |
449 | 140k | do { |
450 | 1.72M | for (int x = 0; x < w; x++) |
451 | 1.58M | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
452 | 140k | 4 - intermediate_bits); |
453 | | |
454 | 140k | mid_ptr += 128; |
455 | 140k | src += src_stride; |
456 | 140k | } while (--tmp_h); |
457 | | |
458 | 17.5k | mid_ptr = mid; |
459 | 122k | do { |
460 | 1.57M | for (int x = 0; x < w; x++) |
461 | 1.45M | dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my, 128, |
462 | 122k | 4 + intermediate_bits); |
463 | | |
464 | 122k | mid_ptr += 128; |
465 | 122k | dst += dst_stride; |
466 | 122k | } while (--h); |
467 | 18.2k | } else { |
468 | 256k | do { |
469 | 9.27M | for (int x = 0; x < w; x++) { |
470 | 9.01M | const int px = FILTER_BILIN_RND(src, x, mx, 1, |
471 | 9.01M | 4 - intermediate_bits); |
472 | 9.01M | dst[x] = iclip_pixel((px + intermediate_rnd) >> intermediate_bits); |
473 | 9.01M | } |
474 | | |
475 | 256k | dst += dst_stride; |
476 | 256k | src += src_stride; |
477 | 256k | } while (--h); |
478 | 18.2k | } |
479 | 120k | } else if (my) { |
480 | 121k | do { |
481 | 2.09M | for (int x = 0; x < w; x++) |
482 | 1.96M | dst[x] = FILTER_BILIN_CLIP(src, x, my, src_stride, 4); |
483 | | |
484 | 121k | dst += dst_stride; |
485 | 121k | src += src_stride; |
486 | 121k | } while (--h); |
487 | 15.4k | } else |
488 | 105k | put_c(dst, dst_stride, src, src_stride, w, h); |
489 | 156k | } |
490 | | |
491 | | static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride, |
492 | | const pixel *src, ptrdiff_t src_stride, |
493 | | const int w, int h, const int mx, int my, |
494 | | const int dx, const int dy |
495 | | HIGHBD_DECL_SUFFIX) |
496 | 32.2k | { |
497 | 32.2k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
498 | 32.2k | int16_t mid[128 * 2]; |
499 | 32.2k | int in_y = -2; |
500 | | |
501 | 323k | do { |
502 | 323k | int x; |
503 | 323k | int y = my >> 10; |
504 | 323k | int16_t *mid1 = &mid[(y & 1) * 128]; |
505 | 323k | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
506 | 323k | int dmy = my & 0x3ff; |
507 | | |
508 | 467k | while (in_y < y) { |
509 | 144k | int imx = mx, ioff = 0; |
510 | 144k | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
511 | | |
512 | 3.94M | for (x = 0; x < w; x++) { |
513 | 3.80M | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
514 | 3.80M | 4 - intermediate_bits); |
515 | 3.80M | imx += dx; |
516 | 3.80M | ioff += imx >> 10; |
517 | 3.80M | imx &= 0x3ff; |
518 | 3.80M | } |
519 | | |
520 | 144k | src += PXSTRIDE(src_stride); |
521 | 144k | in_y++; |
522 | 144k | } |
523 | | |
524 | 9.56M | for (x = 0; x < w; x++) |
525 | 9.24M | dst[x] = FILTER_BILIN_CLIP2(mid1, mid2, x, dmy >> 6, |
526 | 323k | 4 + intermediate_bits); |
527 | | |
528 | 323k | my += dy; |
529 | 323k | dst += PXSTRIDE(dst_stride); |
530 | 323k | } while (--h); |
531 | 32.2k | } |
532 | | |
533 | | static void prep_bilin_c(int16_t *tmp, |
534 | | const pixel *src, ptrdiff_t src_stride, |
535 | | const int w, int h, const int mx, const int my |
536 | | HIGHBD_DECL_SUFFIX) |
537 | 13.1k | { |
538 | 13.1k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
539 | 13.1k | src_stride = PXSTRIDE(src_stride); |
540 | | |
541 | 13.1k | if (mx) { |
542 | 7.91k | if (my) { |
543 | 6.08k | int16_t mid[128 * 129], *mid_ptr = mid; |
544 | 6.08k | int tmp_h = h + 1; |
545 | | |
546 | 74.4k | do { |
547 | 1.88M | for (int x = 0; x < w; x++) |
548 | 1.80M | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
549 | 74.4k | 4 - intermediate_bits); |
550 | | |
551 | 74.4k | mid_ptr += 128; |
552 | 74.4k | src += src_stride; |
553 | 74.4k | } while (--tmp_h); |
554 | | |
555 | 6.08k | mid_ptr = mid; |
556 | 68.3k | do { |
557 | 1.78M | for (int x = 0; x < w; x++) |
558 | 1.71M | tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my, 128, 4) - |
559 | 1.71M | PREP_BIAS; |
560 | | |
561 | 68.3k | mid_ptr += 128; |
562 | 68.3k | tmp += w; |
563 | 68.3k | } while (--h); |
564 | 6.08k | } else { |
565 | 24.7k | do { |
566 | 543k | for (int x = 0; x < w; x++) |
567 | 518k | tmp[x] = FILTER_BILIN_RND(src, x, mx, 1, |
568 | 518k | 4 - intermediate_bits) - |
569 | 518k | PREP_BIAS; |
570 | | |
571 | 24.7k | tmp += w; |
572 | 24.7k | src += src_stride; |
573 | 24.7k | } while (--h); |
574 | 1.82k | } |
575 | 7.91k | } else if (my) { |
576 | 33.1k | do { |
577 | 1.23M | for (int x = 0; x < w; x++) |
578 | 1.19M | tmp[x] = FILTER_BILIN_RND(src, x, my, src_stride, |
579 | 1.19M | 4 - intermediate_bits) - PREP_BIAS; |
580 | | |
581 | 33.1k | tmp += w; |
582 | 33.1k | src += src_stride; |
583 | 33.1k | } while (--h); |
584 | 1.96k | } else |
585 | 3.26k | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
586 | 13.1k | } |
587 | | |
588 | | static void prep_bilin_scaled_c(int16_t *tmp, |
589 | | const pixel *src, ptrdiff_t src_stride, |
590 | | const int w, int h, const int mx, int my, |
591 | | const int dx, const int dy HIGHBD_DECL_SUFFIX) |
592 | 1.24k | { |
593 | 1.24k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
594 | 1.24k | int16_t mid[128 * 2]; |
595 | 1.24k | int in_y = -2; |
596 | | |
597 | 19.6k | do { |
598 | 19.6k | int x; |
599 | 19.6k | int y = my >> 10; |
600 | 19.6k | int16_t *mid1 = &mid[(y & 1) * 128]; |
601 | 19.6k | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
602 | 19.6k | int dmy = my & 0x3ff; |
603 | | |
604 | 34.6k | while (in_y < y) { |
605 | 15.0k | int imx = mx, ioff = 0; |
606 | 15.0k | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
607 | | |
608 | 734k | for (x = 0; x < w; x++) { |
609 | 719k | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
610 | 719k | 4 - intermediate_bits); |
611 | 719k | imx += dx; |
612 | 719k | ioff += imx >> 10; |
613 | 719k | imx &= 0x3ff; |
614 | 719k | } |
615 | | |
616 | 15.0k | src += PXSTRIDE(src_stride); |
617 | 15.0k | in_y++; |
618 | 15.0k | } |
619 | | |
620 | 1.02M | for (x = 0; x < w; x++) |
621 | 1.00M | tmp[x] = FILTER_BILIN_RND2(mid1, mid2, x, dmy >> 6, 4) - PREP_BIAS; |
622 | | |
623 | 19.6k | my += dy; |
624 | 19.6k | tmp += w; |
625 | 19.6k | } while (--h); |
626 | 1.24k | } |
627 | | |
628 | | static void avg_c(pixel *dst, const ptrdiff_t dst_stride, |
629 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h |
630 | | HIGHBD_DECL_SUFFIX) |
631 | 37.6k | { |
632 | 37.6k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
633 | 37.6k | const int sh = intermediate_bits + 1; |
634 | 37.6k | const int rnd = (1 << intermediate_bits) + PREP_BIAS * 2; |
635 | 840k | do { |
636 | 31.1M | for (int x = 0; x < w; x++) |
637 | 30.3M | dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + rnd) >> sh); |
638 | | |
639 | 840k | tmp1 += w; |
640 | 840k | tmp2 += w; |
641 | 840k | dst += PXSTRIDE(dst_stride); |
642 | 840k | } while (--h); |
643 | 37.6k | } |
644 | | |
645 | | static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride, |
646 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
647 | | const int weight HIGHBD_DECL_SUFFIX) |
648 | 3.81k | { |
649 | 3.81k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
650 | 3.81k | const int sh = intermediate_bits + 4; |
651 | 3.81k | const int rnd = (8 << intermediate_bits) + PREP_BIAS * 16; |
652 | 170k | do { |
653 | 8.31M | for (int x = 0; x < w; x++) |
654 | 8.14M | dst[x] = iclip_pixel((tmp1[x] * weight + |
655 | 8.14M | tmp2[x] * (16 - weight) + rnd) >> sh); |
656 | | |
657 | 170k | tmp1 += w; |
658 | 170k | tmp2 += w; |
659 | 170k | dst += PXSTRIDE(dst_stride); |
660 | 170k | } while (--h); |
661 | 3.81k | } |
662 | | |
663 | | static void mask_c(pixel *dst, const ptrdiff_t dst_stride, |
664 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
665 | | const uint8_t *mask HIGHBD_DECL_SUFFIX) |
666 | 5.85k | { |
667 | 5.85k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
668 | 5.85k | const int sh = intermediate_bits + 6; |
669 | 5.85k | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
670 | 165k | do { |
671 | 5.91M | for (int x = 0; x < w; x++) |
672 | 5.74M | dst[x] = iclip_pixel((tmp1[x] * mask[x] + |
673 | 5.74M | tmp2[x] * (64 - mask[x]) + rnd) >> sh); |
674 | | |
675 | 165k | tmp1 += w; |
676 | 165k | tmp2 += w; |
677 | 165k | mask += w; |
678 | 165k | dst += PXSTRIDE(dst_stride); |
679 | 165k | } while (--h); |
680 | 5.85k | } |
681 | | |
682 | 7.38M | #define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6) |
683 | | static void blend_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
684 | | const int w, int h, const uint8_t *mask) |
685 | 5.74k | { |
686 | 59.4k | do { |
687 | 688k | for (int x = 0; x < w; x++) { |
688 | 629k | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
689 | 629k | } |
690 | 59.4k | dst += PXSTRIDE(dst_stride); |
691 | 59.4k | tmp += w; |
692 | 59.4k | mask += w; |
693 | 59.4k | } while (--h); |
694 | 5.74k | } |
695 | | |
696 | | static void blend_v_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
697 | | const int w, int h) |
698 | 44.6k | { |
699 | 44.6k | const uint8_t *const mask = &dav1d_obmc_masks[w]; |
700 | 521k | do { |
701 | 4.50M | for (int x = 0; x < (w * 3) >> 2; x++) { |
702 | 3.98M | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
703 | 3.98M | } |
704 | 521k | dst += PXSTRIDE(dst_stride); |
705 | 521k | tmp += w; |
706 | 521k | } while (--h); |
707 | 44.6k | } |
708 | | |
709 | | static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
710 | | const int w, int h) |
711 | 40.8k | { |
712 | 40.8k | const uint8_t *mask = &dav1d_obmc_masks[h]; |
713 | 40.8k | h = (h * 3) >> 2; |
714 | 176k | do { |
715 | 176k | const int m = *mask++; |
716 | 2.94M | for (int x = 0; x < w; x++) { |
717 | 2.76M | dst[x] = blend_px(dst[x], tmp[x], m); |
718 | 2.76M | } |
719 | 176k | dst += PXSTRIDE(dst_stride); |
720 | 176k | tmp += w; |
721 | 176k | } while (--h); |
722 | 40.8k | } |
723 | | |
724 | | static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride, |
725 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
726 | | uint8_t *mask, const int sign, |
727 | | const int ss_hor, const int ss_ver HIGHBD_DECL_SUFFIX) |
728 | 2.19k | { |
729 | | // store mask at 2x2 resolution, i.e. store 2x1 sum for even rows, |
730 | | // and then load this intermediate to calculate final value for odd rows |
731 | 2.19k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
732 | 2.19k | const int bitdepth = bitdepth_from_max(bitdepth_max); |
733 | 2.19k | const int sh = intermediate_bits + 6; |
734 | 2.19k | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
735 | 2.19k | const int mask_sh = bitdepth + intermediate_bits - 4; |
736 | 2.19k | const int mask_rnd = 1 << (mask_sh - 5); |
737 | 149k | do { |
738 | 5.67M | for (int x = 0; x < w; x++) { |
739 | 5.52M | const int tmpdiff = tmp1[x] - tmp2[x]; |
740 | 5.52M | const int m = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
741 | 5.52M | dst[x] = iclip_pixel((tmpdiff * m + tmp2[x] * 64 + rnd) >> sh); |
742 | | |
743 | 5.52M | if (ss_hor) { |
744 | 2.33M | x++; |
745 | | |
746 | 2.33M | const int tmpdiff = tmp1[x] - tmp2[x]; |
747 | 2.33M | const int n = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
748 | 2.33M | dst[x] = iclip_pixel((tmpdiff * n + tmp2[x] * 64 + rnd) >> sh); |
749 | | |
750 | 2.33M | if (h & ss_ver) { |
751 | 1.16M | mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2; |
752 | 1.16M | } else if (ss_ver) { |
753 | 1.16M | mask[x >> 1] = m + n; |
754 | 18.4E | } else { |
755 | 18.4E | mask[x >> 1] = (m + n + 1 - sign) >> 1; |
756 | 18.4E | } |
757 | 3.19M | } else { |
758 | 3.19M | mask[x] = m; |
759 | 3.19M | } |
760 | 5.52M | } |
761 | | |
762 | 149k | tmp1 += w; |
763 | 149k | tmp2 += w; |
764 | 149k | dst += PXSTRIDE(dst_stride); |
765 | 149k | if (!ss_ver || (h & 1)) mask += w >> ss_hor; |
766 | 149k | } while (--h); |
767 | 2.19k | } |
768 | | |
769 | | #define w_mask_fns(ssn, ss_hor, ss_ver) \ |
770 | | static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \ |
771 | | const int16_t *const tmp1, const int16_t *const tmp2, \ |
772 | | const int w, const int h, uint8_t *mask, \ |
773 | 2.19k | const int sign HIGHBD_DECL_SUFFIX) \ |
774 | 2.19k | { \ |
775 | 2.19k | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ |
776 | 2.19k | HIGHBD_TAIL_SUFFIX); \ |
777 | 2.19k | } Line | Count | Source | 773 | 850 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 850 | { \ | 775 | 850 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 850 | HIGHBD_TAIL_SUFFIX); \ | 777 | 850 | } |
Line | Count | Source | 773 | 107 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 107 | { \ | 775 | 107 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 107 | HIGHBD_TAIL_SUFFIX); \ | 777 | 107 | } |
Line | Count | Source | 773 | 1.23k | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 1.23k | { \ | 775 | 1.23k | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 1.23k | HIGHBD_TAIL_SUFFIX); \ | 777 | 1.23k | } |
|
778 | | |
779 | | w_mask_fns(444, 0, 0); |
780 | | w_mask_fns(422, 1, 0); |
781 | | w_mask_fns(420, 1, 1); |
782 | | |
783 | | #undef w_mask_fns |
784 | | |
785 | | #define FILTER_WARP_RND(src, x, F, stride, sh) \ |
786 | 37.4M | ((F[0] * src[x - 3 * stride] + \ |
787 | 37.4M | F[1] * src[x - 2 * stride] + \ |
788 | 37.4M | F[2] * src[x - 1 * stride] + \ |
789 | 37.4M | F[3] * src[x + 0 * stride] + \ |
790 | 37.4M | F[4] * src[x + 1 * stride] + \ |
791 | 37.4M | F[5] * src[x + 2 * stride] + \ |
792 | 37.4M | F[6] * src[x + 3 * stride] + \ |
793 | 37.4M | F[7] * src[x + 4 * stride] + \ |
794 | 37.4M | ((1 << (sh)) >> 1)) >> (sh)) |
795 | | |
796 | | #define FILTER_WARP_CLIP(src, x, F, stride, sh) \ |
797 | 11.3M | iclip_pixel(FILTER_WARP_RND(src, x, F, stride, sh)) |
798 | | |
799 | | static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, |
800 | | const pixel *src, const ptrdiff_t src_stride, |
801 | | const int16_t *const abcd, int mx, int my |
802 | | HIGHBD_DECL_SUFFIX) |
803 | 183k | { |
804 | 183k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
805 | 183k | int16_t mid[15 * 8], *mid_ptr = mid; |
806 | | |
807 | 183k | src -= 3 * PXSTRIDE(src_stride); |
808 | 2.90M | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
809 | 24.1M | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
810 | 21.3M | const int8_t *const filter = |
811 | 21.3M | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
812 | | |
813 | 21.3M | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
814 | 21.3M | 7 - intermediate_bits); |
815 | 21.3M | } |
816 | 2.72M | src += PXSTRIDE(src_stride); |
817 | 2.72M | mid_ptr += 8; |
818 | 2.72M | } |
819 | | |
820 | 183k | mid_ptr = &mid[3 * 8]; |
821 | 1.64M | for (int y = 0; y < 8; y++, my += abcd[3]) { |
822 | 12.8M | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
823 | 11.3M | const int8_t *const filter = |
824 | 11.3M | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
825 | | |
826 | 11.3M | dst[x] = FILTER_WARP_CLIP(mid_ptr, x, filter, 8, |
827 | 11.3M | 7 + intermediate_bits); |
828 | 11.3M | } |
829 | 1.45M | mid_ptr += 8; |
830 | 1.45M | dst += PXSTRIDE(dst_stride); |
831 | 1.45M | } |
832 | 183k | } |
833 | | |
834 | | static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride, |
835 | | const pixel *src, const ptrdiff_t src_stride, |
836 | | const int16_t *const abcd, int mx, int my |
837 | | HIGHBD_DECL_SUFFIX) |
838 | 25.3k | { |
839 | 25.3k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
840 | 25.3k | int16_t mid[15 * 8], *mid_ptr = mid; |
841 | | |
842 | 25.3k | src -= 3 * PXSTRIDE(src_stride); |
843 | 406k | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
844 | 3.42M | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
845 | 3.04M | const int8_t *const filter = |
846 | 3.04M | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
847 | | |
848 | 3.04M | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
849 | 3.04M | 7 - intermediate_bits); |
850 | 3.04M | } |
851 | 380k | src += PXSTRIDE(src_stride); |
852 | 380k | mid_ptr += 8; |
853 | 380k | } |
854 | | |
855 | 25.3k | mid_ptr = &mid[3 * 8]; |
856 | 228k | for (int y = 0; y < 8; y++, my += abcd[3]) { |
857 | 1.82M | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
858 | 1.62M | const int8_t *const filter = |
859 | 1.62M | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
860 | | |
861 | 1.62M | tmp[x] = FILTER_WARP_RND(mid_ptr, x, filter, 8, 7) - PREP_BIAS; |
862 | 1.62M | } |
863 | 203k | mid_ptr += 8; |
864 | 203k | tmp += tmp_stride; |
865 | 203k | } |
866 | 25.3k | } |
867 | | |
868 | | static void emu_edge_c(const intptr_t bw, const intptr_t bh, |
869 | | const intptr_t iw, const intptr_t ih, |
870 | | const intptr_t x, const intptr_t y, |
871 | | pixel *dst, const ptrdiff_t dst_stride, |
872 | | const pixel *ref, const ptrdiff_t ref_stride) |
873 | 477k | { |
874 | | // find offset in reference of visible block to copy |
875 | 477k | ref += iclip((int) y, 0, (int) ih - 1) * PXSTRIDE(ref_stride) + |
876 | 477k | iclip((int) x, 0, (int) iw - 1); |
877 | | |
878 | | // number of pixels to extend (left, right, top, bottom) |
879 | 477k | const int left_ext = iclip((int) -x, 0, (int) bw - 1); |
880 | 477k | const int right_ext = iclip((int) (x + bw - iw), 0, (int) bw - 1); |
881 | 477k | assert(left_ext + right_ext < bw); |
882 | 477k | const int top_ext = iclip((int) -y, 0, (int) bh - 1); |
883 | 477k | const int bottom_ext = iclip((int) (y + bh - ih), 0, (int) bh - 1); |
884 | 477k | assert(top_ext + bottom_ext < bh); |
885 | | |
886 | | // copy visible portion first |
887 | 477k | pixel *blk = dst + top_ext * PXSTRIDE(dst_stride); |
888 | 477k | const int center_w = (int) (bw - left_ext - right_ext); |
889 | 477k | const int center_h = (int) (bh - top_ext - bottom_ext); |
890 | 6.11M | for (int y = 0; y < center_h; y++) { |
891 | 5.63M | pixel_copy(blk + left_ext, ref, center_w); |
892 | | // extend left edge for this line |
893 | 5.63M | if (left_ext) |
894 | 1.22M | pixel_set(blk, blk[left_ext], left_ext); |
895 | | // extend right edge for this line |
896 | 5.63M | if (right_ext) |
897 | 4.06M | pixel_set(blk + left_ext + center_w, blk[left_ext + center_w - 1], |
898 | 4.06M | right_ext); |
899 | 5.63M | ref += PXSTRIDE(ref_stride); |
900 | 5.63M | blk += PXSTRIDE(dst_stride); |
901 | 5.63M | } |
902 | | |
903 | | // copy top |
904 | 477k | blk = dst + top_ext * PXSTRIDE(dst_stride); |
905 | 1.04M | for (int y = 0; y < top_ext; y++) { |
906 | 563k | pixel_copy(dst, blk, bw); |
907 | 563k | dst += PXSTRIDE(dst_stride); |
908 | 563k | } |
909 | | |
910 | | // copy bottom |
911 | 477k | dst += center_h * PXSTRIDE(dst_stride); |
912 | 4.17M | for (int y = 0; y < bottom_ext; y++) { |
913 | 3.69M | pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], bw); |
914 | 3.69M | dst += PXSTRIDE(dst_stride); |
915 | 3.69M | } |
916 | 477k | } |
917 | | |
918 | | static void resize_c(pixel *dst, const ptrdiff_t dst_stride, |
919 | | const pixel *src, const ptrdiff_t src_stride, |
920 | | const int dst_w, int h, const int src_w, |
921 | | const int dx, const int mx0 HIGHBD_DECL_SUFFIX) |
922 | 59.4k | { |
923 | 1.84M | do { |
924 | 1.84M | int mx = mx0, src_x = -1; |
925 | 471M | for (int x = 0; x < dst_w; x++) { |
926 | 469M | const int8_t *const F = dav1d_resize_filter[mx >> 8]; |
927 | 469M | dst[x] = iclip_pixel((-(F[0] * src[iclip(src_x - 3, 0, src_w - 1)] + |
928 | 469M | F[1] * src[iclip(src_x - 2, 0, src_w - 1)] + |
929 | 469M | F[2] * src[iclip(src_x - 1, 0, src_w - 1)] + |
930 | 469M | F[3] * src[iclip(src_x + 0, 0, src_w - 1)] + |
931 | 469M | F[4] * src[iclip(src_x + 1, 0, src_w - 1)] + |
932 | 469M | F[5] * src[iclip(src_x + 2, 0, src_w - 1)] + |
933 | 469M | F[6] * src[iclip(src_x + 3, 0, src_w - 1)] + |
934 | 469M | F[7] * src[iclip(src_x + 4, 0, src_w - 1)]) + |
935 | 469M | 64) >> 7); |
936 | 469M | mx += dx; |
937 | 469M | src_x += mx >> 14; |
938 | 469M | mx &= 0x3fff; |
939 | 469M | } |
940 | | |
941 | 1.84M | dst += PXSTRIDE(dst_stride); |
942 | 1.84M | src += PXSTRIDE(src_stride); |
943 | 1.84M | } while (--h); |
944 | 59.4k | } |
945 | | |
946 | | #if HAVE_ASM |
947 | | #if ARCH_AARCH64 || ARCH_ARM |
948 | | #include "src/arm/mc.h" |
949 | | #elif ARCH_LOONGARCH64 |
950 | | #include "src/loongarch/mc.h" |
951 | | #elif ARCH_PPC64LE |
952 | | #include "src/ppc/mc.h" |
953 | | #elif ARCH_RISCV |
954 | | #include "src/riscv/mc.h" |
955 | | #elif ARCH_X86 |
956 | | #include "src/x86/mc.h" |
957 | | #endif |
958 | | #endif |
959 | | |
960 | 12.2k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { |
961 | 122k | #define init_mc_fns(type, name) do { \ |
962 | 122k | c->mc [type] = put_##name##_c; \ |
963 | 122k | c->mc_scaled [type] = put_##name##_scaled_c; \ |
964 | 122k | c->mct [type] = prep_##name##_c; \ |
965 | 122k | c->mct_scaled[type] = prep_##name##_scaled_c; \ |
966 | 122k | } while (0) |
967 | | |
968 | 12.2k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); |
969 | 12.2k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); |
970 | 12.2k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); |
971 | 12.2k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); |
972 | 12.2k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); |
973 | 12.2k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); |
974 | 12.2k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); |
975 | 12.2k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); |
976 | 12.2k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); |
977 | 12.2k | init_mc_fns(FILTER_2D_BILINEAR, bilin); |
978 | | |
979 | 12.2k | c->avg = avg_c; |
980 | 12.2k | c->w_avg = w_avg_c; |
981 | 12.2k | c->mask = mask_c; |
982 | 12.2k | c->blend = blend_c; |
983 | 12.2k | c->blend_v = blend_v_c; |
984 | 12.2k | c->blend_h = blend_h_c; |
985 | 12.2k | c->w_mask[0] = w_mask_444_c; |
986 | 12.2k | c->w_mask[1] = w_mask_422_c; |
987 | 12.2k | c->w_mask[2] = w_mask_420_c; |
988 | 12.2k | c->warp8x8 = warp_affine_8x8_c; |
989 | 12.2k | c->warp8x8t = warp_affine_8x8t_c; |
990 | 12.2k | c->emu_edge = emu_edge_c; |
991 | 12.2k | c->resize = resize_c; |
992 | | |
993 | | #if HAVE_ASM |
994 | | #if ARCH_AARCH64 || ARCH_ARM |
995 | | mc_dsp_init_arm(c); |
996 | | #elif ARCH_LOONGARCH64 |
997 | | mc_dsp_init_loongarch(c); |
998 | | #elif ARCH_PPC64LE |
999 | | mc_dsp_init_ppc(c); |
1000 | | #elif ARCH_RISCV |
1001 | | mc_dsp_init_riscv(c); |
1002 | | #elif ARCH_X86 |
1003 | | mc_dsp_init_x86(c); |
1004 | | #endif |
1005 | | #endif |
1006 | 12.2k | } Line | Count | Source | 960 | 6.01k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { | 961 | 6.01k | #define init_mc_fns(type, name) do { \ | 962 | 6.01k | c->mc [type] = put_##name##_c; \ | 963 | 6.01k | c->mc_scaled [type] = put_##name##_scaled_c; \ | 964 | 6.01k | c->mct [type] = prep_##name##_c; \ | 965 | 6.01k | c->mct_scaled[type] = prep_##name##_scaled_c; \ | 966 | 6.01k | } while (0) | 967 | | | 968 | 6.01k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); | 969 | 6.01k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); | 970 | 6.01k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); | 971 | 6.01k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); | 972 | 6.01k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); | 973 | 6.01k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); | 974 | 6.01k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); | 975 | 6.01k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); | 976 | 6.01k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); | 977 | 6.01k | init_mc_fns(FILTER_2D_BILINEAR, bilin); | 978 | | | 979 | 6.01k | c->avg = avg_c; | 980 | 6.01k | c->w_avg = w_avg_c; | 981 | 6.01k | c->mask = mask_c; | 982 | 6.01k | c->blend = blend_c; | 983 | 6.01k | c->blend_v = blend_v_c; | 984 | 6.01k | c->blend_h = blend_h_c; | 985 | 6.01k | c->w_mask[0] = w_mask_444_c; | 986 | 6.01k | c->w_mask[1] = w_mask_422_c; | 987 | 6.01k | c->w_mask[2] = w_mask_420_c; | 988 | 6.01k | c->warp8x8 = warp_affine_8x8_c; | 989 | 6.01k | c->warp8x8t = warp_affine_8x8t_c; | 990 | 6.01k | c->emu_edge = emu_edge_c; | 991 | 6.01k | c->resize = resize_c; | 992 | | | 993 | | #if HAVE_ASM | 994 | | #if ARCH_AARCH64 || ARCH_ARM | 995 | | mc_dsp_init_arm(c); | 996 | | #elif ARCH_LOONGARCH64 | 997 | | mc_dsp_init_loongarch(c); | 998 | | #elif ARCH_PPC64LE | 999 | | mc_dsp_init_ppc(c); | 1000 | | #elif ARCH_RISCV | 1001 | | mc_dsp_init_riscv(c); | 1002 | | #elif ARCH_X86 | 1003 | | mc_dsp_init_x86(c); | 1004 | | #endif | 1005 | | #endif | 1006 | 6.01k | } |
Line | Count | Source | 960 | 6.23k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { | 961 | 6.23k | #define init_mc_fns(type, name) do { \ | 962 | 6.23k | c->mc [type] = put_##name##_c; \ | 963 | 6.23k | c->mc_scaled [type] = put_##name##_scaled_c; \ | 964 | 6.23k | c->mct [type] = prep_##name##_c; \ | 965 | 6.23k | c->mct_scaled[type] = prep_##name##_scaled_c; \ | 966 | 6.23k | } while (0) | 967 | | | 968 | 6.23k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); | 969 | 6.23k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); | 970 | 6.23k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); | 971 | 6.23k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); | 972 | 6.23k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); | 973 | 6.23k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); | 974 | 6.23k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); | 975 | 6.23k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); | 976 | 6.23k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); | 977 | 6.23k | init_mc_fns(FILTER_2D_BILINEAR, bilin); | 978 | | | 979 | 6.23k | c->avg = avg_c; | 980 | 6.23k | c->w_avg = w_avg_c; | 981 | 6.23k | c->mask = mask_c; | 982 | 6.23k | c->blend = blend_c; | 983 | 6.23k | c->blend_v = blend_v_c; | 984 | 6.23k | c->blend_h = blend_h_c; | 985 | 6.23k | c->w_mask[0] = w_mask_444_c; | 986 | 6.23k | c->w_mask[1] = w_mask_422_c; | 987 | 6.23k | c->w_mask[2] = w_mask_420_c; | 988 | 6.23k | c->warp8x8 = warp_affine_8x8_c; | 989 | 6.23k | c->warp8x8t = warp_affine_8x8t_c; | 990 | 6.23k | c->emu_edge = emu_edge_c; | 991 | 6.23k | c->resize = resize_c; | 992 | | | 993 | | #if HAVE_ASM | 994 | | #if ARCH_AARCH64 || ARCH_ARM | 995 | | mc_dsp_init_arm(c); | 996 | | #elif ARCH_LOONGARCH64 | 997 | | mc_dsp_init_loongarch(c); | 998 | | #elif ARCH_PPC64LE | 999 | | mc_dsp_init_ppc(c); | 1000 | | #elif ARCH_RISCV | 1001 | | mc_dsp_init_riscv(c); | 1002 | | #elif ARCH_X86 | 1003 | | mc_dsp_init_x86(c); | 1004 | | #endif | 1005 | | #endif | 1006 | 6.23k | } |
|