/work/dav1d/src/mc_tmpl.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include "common/attributes.h" |
34 | | #include "common/intops.h" |
35 | | |
36 | | #include "src/mc.h" |
37 | | #include "src/tables.h" |
38 | | |
39 | | #if BITDEPTH == 8 |
40 | 807k | #define get_intermediate_bits(bitdepth_max) 4 |
41 | | // Output in interval [-5132, 9212], fits in int16_t as is |
42 | 84.5M | #define PREP_BIAS 0 |
43 | | #else |
44 | | // 4 for 10 bits/component, 2 for 12 bits/component |
45 | | #define get_intermediate_bits(bitdepth_max) (14 - bitdepth_from_max(bitdepth_max)) |
46 | | // Output in interval [-20588, 36956] (10-bit), [-20602, 36983] (12-bit) |
47 | | // Subtract a bias to ensure the output fits in int16_t |
48 | | #define PREP_BIAS 8192 |
49 | | #endif |
50 | | |
51 | | static NOINLINE void |
52 | | put_c(pixel *dst, const ptrdiff_t dst_stride, |
53 | | const pixel *src, const ptrdiff_t src_stride, const int w, int h) |
54 | 281k | { |
55 | 5.55M | do { |
56 | 5.55M | pixel_copy(dst, src, w); |
57 | | |
58 | 5.55M | dst += dst_stride; |
59 | 5.55M | src += src_stride; |
60 | 5.55M | } while (--h); |
61 | 281k | } |
62 | | |
63 | | static NOINLINE void |
64 | | prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride, |
65 | | const int w, int h HIGHBD_DECL_SUFFIX) |
66 | 28.4k | { |
67 | 28.4k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
68 | 995k | do { |
69 | 45.6M | for (int x = 0; x < w; x++) |
70 | 44.6M | tmp[x] = (src[x] << intermediate_bits) - PREP_BIAS; |
71 | | |
72 | 995k | tmp += w; |
73 | 995k | src += src_stride; |
74 | 995k | } while (--h); |
75 | 28.4k | } |
76 | | |
77 | | #define FILTER_8TAP(src, x, F, stride) \ |
78 | 185M | (F[0] * src[x + -3 * stride] + \ |
79 | 185M | F[1] * src[x + -2 * stride] + \ |
80 | 185M | F[2] * src[x + -1 * stride] + \ |
81 | 185M | F[3] * src[x + +0 * stride] + \ |
82 | 185M | F[4] * src[x + +1 * stride] + \ |
83 | 185M | F[5] * src[x + +2 * stride] + \ |
84 | 185M | F[6] * src[x + +3 * stride] + \ |
85 | 185M | F[7] * src[x + +4 * stride]) |
86 | | |
87 | | #define FILTER_8TAP2(src, x, F) \ |
88 | 102M | (F[0] * src[0][x] + \ |
89 | 102M | F[1] * src[1][x] + \ |
90 | 102M | F[2] * src[2][x] + \ |
91 | 102M | F[3] * src[3][x] + \ |
92 | 102M | F[4] * src[4][x] + \ |
93 | 102M | F[5] * src[5][x] + \ |
94 | 102M | F[6] * src[6][x] + \ |
95 | 102M | F[7] * src[7][x]) |
96 | | |
97 | | #define DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh) \ |
98 | 169M | ((FILTER_8TAP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
99 | | |
100 | | #define DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh) \ |
101 | 16.8M | ((FILTER_8TAP(src, x, F, stride) + (rnd)) >> (sh)) |
102 | | |
103 | | #define DAV1D_FILTER_8TAP_RND3(src, x, F, sh) \ |
104 | 102M | ((FILTER_8TAP2(src, x, F) + ((1 << (sh)) >> 1)) >> (sh)) |
105 | | |
106 | | #define DAV1D_FILTER_8TAP_CLIP(src, x, F, stride, sh) \ |
107 | 54.3M | iclip_pixel(DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh)) |
108 | | |
109 | | #define DAV1D_FILTER_8TAP_CLIP2(src, x, F, stride, rnd, sh) \ |
110 | 16.8M | iclip_pixel(DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh)) |
111 | | |
112 | | #define DAV1D_FILTER_8TAP_CLIP3(src, x, F, sh) \ |
113 | 74.4M | iclip_pixel(DAV1D_FILTER_8TAP_RND3(src, x, F, sh)) |
114 | | |
115 | | #define GET_H_FILTER(mx) \ |
116 | 59.5M | const int8_t *const fh = !(mx) ? NULL : w > 4 ? \ |
117 | 51.8M | dav1d_mc_subpel_filters[filter_type & 3][(mx) - 1] : \ |
118 | 51.8M | dav1d_mc_subpel_filters[3 + (filter_type & 1)][(mx) - 1] |
119 | | |
120 | | #define GET_V_FILTER(my) \ |
121 | 2.70M | const int8_t *const fv = !(my) ? NULL : h > 4 ? \ |
122 | 2.22M | dav1d_mc_subpel_filters[filter_type >> 2][(my) - 1] : \ |
123 | 2.22M | dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][(my) - 1] |
124 | | |
125 | | #define GET_FILTERS() \ |
126 | 358k | GET_H_FILTER(mx); \ |
127 | 358k | GET_V_FILTER(my) |
128 | | |
129 | | static NOINLINE void |
130 | | put_8tap_c(pixel *dst, ptrdiff_t dst_stride, |
131 | | const pixel *src, ptrdiff_t src_stride, |
132 | | const int w, int h, const int mx, const int my, |
133 | | const int filter_type HIGHBD_DECL_SUFFIX) |
134 | 318k | { |
135 | 318k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
136 | 318k | const int intermediate_rnd = 32 + ((1 << (6 - intermediate_bits)) >> 1); |
137 | | |
138 | 318k | GET_FILTERS(); |
139 | 318k | dst_stride = PXSTRIDE(dst_stride); |
140 | 318k | src_stride = PXSTRIDE(src_stride); |
141 | | |
142 | 318k | if (fh) { |
143 | 87.0k | if (fv) { |
144 | 40.3k | int tmp_h = h + 7; |
145 | 40.3k | int16_t mid[128 * 135], *mid_ptr = mid; |
146 | | |
147 | 40.3k | src -= src_stride * 3; |
148 | 1.24M | do { |
149 | 54.9M | for (int x = 0; x < w; x++) |
150 | 53.7M | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
151 | 1.24M | 6 - intermediate_bits); |
152 | | |
153 | 1.24M | mid_ptr += 128; |
154 | 1.24M | src += src_stride; |
155 | 1.24M | } while (--tmp_h); |
156 | | |
157 | 40.3k | mid_ptr = mid + 128 * 3; |
158 | 1.07M | do { |
159 | 45.1M | for (int x = 0; x < w; x++) |
160 | 44.0M | dst[x] = DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, |
161 | 1.07M | 6 + intermediate_bits); |
162 | | |
163 | 1.07M | mid_ptr += 128; |
164 | 1.07M | dst += dst_stride; |
165 | 1.07M | } while (--h); |
166 | 46.6k | } else { |
167 | 586k | do { |
168 | 17.4M | for (int x = 0; x < w; x++) { |
169 | 16.8M | dst[x] = DAV1D_FILTER_8TAP_CLIP2(src, x, fh, 1, |
170 | 16.8M | intermediate_rnd, 6); |
171 | 16.8M | } |
172 | | |
173 | 586k | dst += dst_stride; |
174 | 586k | src += src_stride; |
175 | 586k | } while (--h); |
176 | 46.6k | } |
177 | 231k | } else if (fv) { |
178 | 281k | do { |
179 | 10.5M | for (int x = 0; x < w; x++) |
180 | 10.2M | dst[x] = DAV1D_FILTER_8TAP_CLIP(src, x, fv, src_stride, 6); |
181 | | |
182 | 281k | dst += dst_stride; |
183 | 281k | src += src_stride; |
184 | 281k | } while (--h); |
185 | 19.9k | } else |
186 | 211k | put_c(dst, dst_stride, src, src_stride, w, h); |
187 | 318k | } |
188 | | |
189 | | static NOINLINE void |
190 | | put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride, |
191 | | const pixel *src, ptrdiff_t src_stride, |
192 | | const int w, int h, const int mx, int my, |
193 | | const int dx, const int dy, const int filter_type |
194 | | HIGHBD_DECL_SUFFIX) |
195 | 48.5k | { |
196 | 48.5k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
197 | 48.5k | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
198 | 48.5k | int16_t mid[128 * 8]; |
199 | 48.5k | int16_t *mid_ptrs[8]; |
200 | 48.5k | int in_y = -8; |
201 | 48.5k | src_stride = PXSTRIDE(src_stride); |
202 | | |
203 | 436k | for (int i = 0; i < 8; i++) |
204 | 388k | mid_ptrs[i] = &mid[128 * i]; |
205 | | |
206 | 48.5k | src -= src_stride * 3; |
207 | | |
208 | 1.71M | for (int y = 0; y < h; y++) { |
209 | 1.66M | int x; |
210 | 1.66M | int src_y = my >> 10; |
211 | 1.66M | GET_V_FILTER((my & 0x3ff) >> 6); |
212 | | |
213 | 2.66M | while (in_y < src_y) { |
214 | 1.00M | int imx = mx, ioff = 0; |
215 | 1.00M | int16_t *mid_ptr = mid_ptrs[0]; |
216 | | |
217 | 8.00M | for (int i = 0; i < 7; i++) |
218 | 7.00M | mid_ptrs[i] = mid_ptrs[i + 1]; |
219 | 1.00M | mid_ptrs[7] = mid_ptr; |
220 | | |
221 | 48.1M | for (x = 0; x < w; x++) { |
222 | 47.1M | GET_H_FILTER(imx >> 6); |
223 | 47.1M | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
224 | 47.1M | 6 - intermediate_bits) : |
225 | 47.1M | src[ioff] << intermediate_bits; |
226 | 47.1M | imx += dx; |
227 | 47.1M | ioff += imx >> 10; |
228 | 47.1M | imx &= 0x3ff; |
229 | 47.1M | } |
230 | | |
231 | 1.00M | src += src_stride; |
232 | 1.00M | in_y++; |
233 | 1.00M | } |
234 | | |
235 | 81.1M | for (x = 0; x < w; x++) |
236 | 79.5M | dst[x] = fv ? DAV1D_FILTER_8TAP_CLIP3(mid_ptrs, x, fv, |
237 | 79.5M | 6 + intermediate_bits) : |
238 | 79.5M | iclip_pixel((mid_ptrs[3][x] + intermediate_rnd) >> |
239 | 5.10M | intermediate_bits); |
240 | | |
241 | 1.66M | my += dy; |
242 | 1.66M | dst += PXSTRIDE(dst_stride); |
243 | 1.66M | } |
244 | 48.5k | } |
245 | | |
246 | | static NOINLINE void |
247 | | prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
248 | | const int w, int h, const int mx, const int my, |
249 | | const int filter_type HIGHBD_DECL_SUFFIX) |
250 | 40.4k | { |
251 | 40.4k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
252 | 40.4k | GET_FILTERS(); |
253 | 40.4k | src_stride = PXSTRIDE(src_stride); |
254 | | |
255 | 40.4k | if (fh) { |
256 | 8.13k | if (fv) { |
257 | 5.26k | int tmp_h = h + 7; |
258 | 5.26k | int16_t mid[128 * 135], *mid_ptr = mid; |
259 | | |
260 | 5.26k | src -= src_stride * 3; |
261 | 115k | do { |
262 | 2.59M | for (int x = 0; x < w; x++) |
263 | 2.48M | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
264 | 115k | 6 - intermediate_bits); |
265 | | |
266 | 115k | mid_ptr += 128; |
267 | 115k | src += src_stride; |
268 | 115k | } while (--tmp_h); |
269 | | |
270 | 5.26k | mid_ptr = mid + 128 * 3; |
271 | 78.7k | do { |
272 | 2.07M | for (int x = 0; x < w; x++) { |
273 | 1.99M | int t = DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) - |
274 | 1.99M | PREP_BIAS; |
275 | 1.99M | assert(t >= INT16_MIN && t <= INT16_MAX); |
276 | 1.99M | tmp[x] = t; |
277 | 1.99M | } |
278 | | |
279 | 78.7k | mid_ptr += 128; |
280 | 78.7k | tmp += w; |
281 | 78.7k | } while (--h); |
282 | 5.26k | } else { |
283 | 46.4k | do { |
284 | 1.20M | for (int x = 0; x < w; x++) |
285 | 1.15M | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
286 | 1.15M | 6 - intermediate_bits) - |
287 | 1.15M | PREP_BIAS; |
288 | | |
289 | 46.4k | tmp += w; |
290 | 46.4k | src += src_stride; |
291 | 46.4k | } while (--h); |
292 | 2.87k | } |
293 | 32.3k | } else if (fv) { |
294 | 115k | do { |
295 | 3.57M | for (int x = 0; x < w; x++) |
296 | 3.45M | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride, |
297 | 3.45M | 6 - intermediate_bits) - |
298 | 3.45M | PREP_BIAS; |
299 | | |
300 | 115k | tmp += w; |
301 | 115k | src += src_stride; |
302 | 115k | } while (--h); |
303 | 4.76k | } else |
304 | 27.5k | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
305 | 40.4k | } |
306 | | |
307 | | static NOINLINE void |
308 | | prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
309 | | const int w, int h, const int mx, int my, |
310 | | const int dx, const int dy, const int filter_type |
311 | | HIGHBD_DECL_SUFFIX) |
312 | 13.9k | { |
313 | 13.9k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
314 | 13.9k | int16_t mid[128 * 8]; |
315 | 13.9k | int16_t *mid_ptrs[8]; |
316 | 13.9k | int in_y = -8; |
317 | 13.9k | src_stride = PXSTRIDE(src_stride); |
318 | | |
319 | 125k | for (int i = 0; i < 8; i++) |
320 | 111k | mid_ptrs[i] = &mid[128 * i]; |
321 | | |
322 | 13.9k | src -= src_stride * 3; |
323 | | |
324 | 697k | for (int y = 0; y < h; y++) { |
325 | 683k | int x; |
326 | 683k | int src_y = my >> 10; |
327 | 683k | GET_V_FILTER((my & 0x3ff) >> 6); |
328 | | |
329 | 990k | while (in_y < src_y) { |
330 | 306k | int imx = mx, ioff = 0; |
331 | 306k | int16_t *mid_ptr = mid_ptrs[0]; |
332 | | |
333 | 2.45M | for (int i = 0; i < 7; i++) |
334 | 2.14M | mid_ptrs[i] = mid_ptrs[i + 1]; |
335 | 306k | mid_ptrs[7] = mid_ptr; |
336 | | |
337 | 12.3M | for (x = 0; x < w; x++) { |
338 | 12.0M | GET_H_FILTER(imx >> 6); |
339 | 12.0M | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
340 | 12.0M | 6 - intermediate_bits) : |
341 | 12.0M | src[ioff] << intermediate_bits; |
342 | 12.0M | imx += dx; |
343 | 12.0M | ioff += imx >> 10; |
344 | 12.0M | imx &= 0x3ff; |
345 | 12.0M | } |
346 | | |
347 | 306k | src += src_stride; |
348 | 306k | in_y++; |
349 | 306k | } |
350 | | |
351 | 31.4M | for (x = 0; x < w; x++) |
352 | 30.7M | tmp[x] = (fv ? DAV1D_FILTER_8TAP_RND3(mid_ptrs, x, fv, 6) |
353 | 30.7M | : mid_ptrs[3][x]) - PREP_BIAS; |
354 | | |
355 | 683k | my += dy; |
356 | 683k | tmp += w; |
357 | 683k | } |
358 | 13.9k | } |
359 | | |
360 | | #define filter_fns(type, type_h, type_v) \ |
361 | | static void put_8tap_##type##_c(pixel *const dst, \ |
362 | | const ptrdiff_t dst_stride, \ |
363 | | const pixel *const src, \ |
364 | | const ptrdiff_t src_stride, \ |
365 | | const int w, const int h, \ |
366 | | const int mx, const int my \ |
367 | 318k | HIGHBD_DECL_SUFFIX) \ |
368 | 318k | { \ |
369 | 318k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ |
370 | 318k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
371 | 318k | } \ mc_tmpl.c:put_8tap_regular_c Line | Count | Source | 367 | 268k | HIGHBD_DECL_SUFFIX) \ | 368 | 268k | { \ | 369 | 268k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 268k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 268k | } \ |
mc_tmpl.c:put_8tap_regular_smooth_c Line | Count | Source | 367 | 2.19k | HIGHBD_DECL_SUFFIX) \ | 368 | 2.19k | { \ | 369 | 2.19k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 2.19k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 2.19k | } \ |
mc_tmpl.c:put_8tap_regular_sharp_c Line | Count | Source | 367 | 182 | HIGHBD_DECL_SUFFIX) \ | 368 | 182 | { \ | 369 | 182 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 182 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 182 | } \ |
mc_tmpl.c:put_8tap_sharp_regular_c Line | Count | Source | 367 | 266 | HIGHBD_DECL_SUFFIX) \ | 368 | 266 | { \ | 369 | 266 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 266 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 266 | } \ |
mc_tmpl.c:put_8tap_sharp_smooth_c Line | Count | Source | 367 | 242 | HIGHBD_DECL_SUFFIX) \ | 368 | 242 | { \ | 369 | 242 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 242 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 242 | } \ |
mc_tmpl.c:put_8tap_sharp_c Line | Count | Source | 367 | 2.16k | HIGHBD_DECL_SUFFIX) \ | 368 | 2.16k | { \ | 369 | 2.16k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 2.16k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 2.16k | } \ |
mc_tmpl.c:put_8tap_smooth_regular_c Line | Count | Source | 367 | 1.37k | HIGHBD_DECL_SUFFIX) \ | 368 | 1.37k | { \ | 369 | 1.37k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 1.37k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 1.37k | } \ |
mc_tmpl.c:put_8tap_smooth_c Line | Count | Source | 367 | 42.8k | HIGHBD_DECL_SUFFIX) \ | 368 | 42.8k | { \ | 369 | 42.8k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 42.8k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 42.8k | } \ |
mc_tmpl.c:put_8tap_smooth_sharp_c Line | Count | Source | 367 | 82 | HIGHBD_DECL_SUFFIX) \ | 368 | 82 | { \ | 369 | 82 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 82 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 82 | } \ |
|
372 | | static void put_8tap_##type##_scaled_c(pixel *const dst, \ |
373 | | const ptrdiff_t dst_stride, \ |
374 | | const pixel *const src, \ |
375 | | const ptrdiff_t src_stride, \ |
376 | | const int w, const int h, \ |
377 | | const int mx, const int my, \ |
378 | | const int dx, const int dy \ |
379 | 48.5k | HIGHBD_DECL_SUFFIX) \ |
380 | 48.5k | { \ |
381 | 48.5k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ |
382 | 48.5k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
383 | 48.5k | } \ mc_tmpl.c:put_8tap_regular_scaled_c Line | Count | Source | 379 | 41.9k | HIGHBD_DECL_SUFFIX) \ | 380 | 41.9k | { \ | 381 | 41.9k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 41.9k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 41.9k | } \ |
mc_tmpl.c:put_8tap_regular_smooth_scaled_c Line | Count | Source | 379 | 45 | HIGHBD_DECL_SUFFIX) \ | 380 | 45 | { \ | 381 | 45 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 45 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 45 | } \ |
mc_tmpl.c:put_8tap_regular_sharp_scaled_c Line | Count | Source | 379 | 36 | HIGHBD_DECL_SUFFIX) \ | 380 | 36 | { \ | 381 | 36 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 36 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 36 | } \ |
mc_tmpl.c:put_8tap_sharp_regular_scaled_c Line | Count | Source | 379 | 50 | HIGHBD_DECL_SUFFIX) \ | 380 | 50 | { \ | 381 | 50 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 50 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 50 | } \ |
mc_tmpl.c:put_8tap_sharp_smooth_scaled_c Line | Count | Source | 379 | 8 | HIGHBD_DECL_SUFFIX) \ | 380 | 8 | { \ | 381 | 8 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 8 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 8 | } \ |
mc_tmpl.c:put_8tap_sharp_scaled_c Line | Count | Source | 379 | 2.15k | HIGHBD_DECL_SUFFIX) \ | 380 | 2.15k | { \ | 381 | 2.15k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 2.15k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 2.15k | } \ |
mc_tmpl.c:put_8tap_smooth_regular_scaled_c Line | Count | Source | 379 | 188 | HIGHBD_DECL_SUFFIX) \ | 380 | 188 | { \ | 381 | 188 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 188 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 188 | } \ |
mc_tmpl.c:put_8tap_smooth_scaled_c Line | Count | Source | 379 | 4.02k | HIGHBD_DECL_SUFFIX) \ | 380 | 4.02k | { \ | 381 | 4.02k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 4.02k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 4.02k | } \ |
mc_tmpl.c:put_8tap_smooth_sharp_scaled_c Line | Count | Source | 379 | 30 | HIGHBD_DECL_SUFFIX) \ | 380 | 30 | { \ | 381 | 30 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 30 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 30 | } \ |
|
384 | | static void prep_8tap_##type##_c(int16_t *const tmp, \ |
385 | | const pixel *const src, \ |
386 | | const ptrdiff_t src_stride, \ |
387 | | const int w, const int h, \ |
388 | | const int mx, const int my \ |
389 | 40.4k | HIGHBD_DECL_SUFFIX) \ |
390 | 40.4k | { \ |
391 | 40.4k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ |
392 | 40.4k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
393 | 40.4k | } \ mc_tmpl.c:prep_8tap_regular_c Line | Count | Source | 389 | 16.2k | HIGHBD_DECL_SUFFIX) \ | 390 | 16.2k | { \ | 391 | 16.2k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 16.2k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 16.2k | } \ |
mc_tmpl.c:prep_8tap_regular_smooth_c Line | Count | Source | 389 | 198 | HIGHBD_DECL_SUFFIX) \ | 390 | 198 | { \ | 391 | 198 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 198 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 198 | } \ |
mc_tmpl.c:prep_8tap_regular_sharp_c Line | Count | Source | 389 | 820 | HIGHBD_DECL_SUFFIX) \ | 390 | 820 | { \ | 391 | 820 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 820 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 820 | } \ |
mc_tmpl.c:prep_8tap_sharp_regular_c Line | Count | Source | 389 | 788 | HIGHBD_DECL_SUFFIX) \ | 390 | 788 | { \ | 391 | 788 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 788 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 788 | } \ |
mc_tmpl.c:prep_8tap_sharp_smooth_c Line | Count | Source | 389 | 298 | HIGHBD_DECL_SUFFIX) \ | 390 | 298 | { \ | 391 | 298 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 298 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 298 | } \ |
mc_tmpl.c:prep_8tap_sharp_c Line | Count | Source | 389 | 2.30k | HIGHBD_DECL_SUFFIX) \ | 390 | 2.30k | { \ | 391 | 2.30k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 2.30k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 2.30k | } \ |
mc_tmpl.c:prep_8tap_smooth_regular_c Line | Count | Source | 389 | 1.34k | HIGHBD_DECL_SUFFIX) \ | 390 | 1.34k | { \ | 391 | 1.34k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 1.34k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 1.34k | } \ |
mc_tmpl.c:prep_8tap_smooth_c Line | Count | Source | 389 | 18.4k | HIGHBD_DECL_SUFFIX) \ | 390 | 18.4k | { \ | 391 | 18.4k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 18.4k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 18.4k | } \ |
mc_tmpl.c:prep_8tap_smooth_sharp_c Line | Count | Source | 389 | 84 | HIGHBD_DECL_SUFFIX) \ | 390 | 84 | { \ | 391 | 84 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 84 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 84 | } \ |
|
394 | | static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \ |
395 | | const pixel *const src, \ |
396 | | const ptrdiff_t src_stride, \ |
397 | | const int w, const int h, \ |
398 | | const int mx, const int my, \ |
399 | | const int dx, const int dy \ |
400 | 13.9k | HIGHBD_DECL_SUFFIX) \ |
401 | 13.9k | { \ |
402 | 13.9k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ |
403 | 13.9k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
404 | 13.9k | } mc_tmpl.c:prep_8tap_regular_scaled_c Line | Count | Source | 400 | 7.16k | HIGHBD_DECL_SUFFIX) \ | 401 | 7.16k | { \ | 402 | 7.16k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 7.16k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 7.16k | } |
mc_tmpl.c:prep_8tap_regular_smooth_scaled_c Line | Count | Source | 400 | 30 | HIGHBD_DECL_SUFFIX) \ | 401 | 30 | { \ | 402 | 30 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 30 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 30 | } |
mc_tmpl.c:prep_8tap_regular_sharp_scaled_c Line | Count | Source | 400 | 2.73k | HIGHBD_DECL_SUFFIX) \ | 401 | 2.73k | { \ | 402 | 2.73k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 2.73k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 2.73k | } |
mc_tmpl.c:prep_8tap_sharp_regular_scaled_c Line | Count | Source | 400 | 32 | HIGHBD_DECL_SUFFIX) \ | 401 | 32 | { \ | 402 | 32 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 32 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 32 | } |
mc_tmpl.c:prep_8tap_sharp_smooth_scaled_c Line | Count | Source | 400 | 50 | HIGHBD_DECL_SUFFIX) \ | 401 | 50 | { \ | 402 | 50 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 50 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 50 | } |
mc_tmpl.c:prep_8tap_sharp_scaled_c Line | Count | Source | 400 | 1.43k | HIGHBD_DECL_SUFFIX) \ | 401 | 1.43k | { \ | 402 | 1.43k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 1.43k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 1.43k | } |
mc_tmpl.c:prep_8tap_smooth_regular_scaled_c Line | Count | Source | 400 | 594 | HIGHBD_DECL_SUFFIX) \ | 401 | 594 | { \ | 402 | 594 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 594 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 594 | } |
mc_tmpl.c:prep_8tap_smooth_scaled_c Line | Count | Source | 400 | 1.94k | HIGHBD_DECL_SUFFIX) \ | 401 | 1.94k | { \ | 402 | 1.94k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 1.94k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 1.94k | } |
Unexecuted instantiation: mc_tmpl.c:prep_8tap_smooth_sharp_scaled_c |
405 | | |
406 | | filter_fns(regular, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR) |
407 | | filter_fns(regular_sharp, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP) |
408 | | filter_fns(regular_smooth, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH) |
409 | | filter_fns(smooth, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH) |
410 | | filter_fns(smooth_regular, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR) |
411 | | filter_fns(smooth_sharp, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP) |
412 | | filter_fns(sharp, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP) |
413 | | filter_fns(sharp_regular, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR) |
414 | | filter_fns(sharp_smooth, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH) |
415 | | |
416 | | #define FILTER_BILIN(src, x, mxy, stride) \ |
417 | 20.3M | (16 * src[x] + ((mxy) * (src[x + stride] - src[x]))) |
418 | | |
419 | | #define FILTER_BILIN_RND(src, x, mxy, stride, sh) \ |
420 | 20.3M | ((FILTER_BILIN(src, x, mxy, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
421 | | |
422 | | #define FILTER_BILIN_CLIP(src, x, mxy, stride, sh) \ |
423 | 2.50M | iclip_pixel(FILTER_BILIN_RND(src, x, mxy, stride, sh)) |
424 | | |
425 | | #define FILTER_BILIN2(src1, src2, x, mxy) \ |
426 | 1.27M | (16 * src1[x] + ((mxy) * (src2[x] - src1[x]))) |
427 | | |
428 | | #define FILTER_BILIN_RND2(src1, src2, x, mxy, sh) \ |
429 | 1.27M | ((FILTER_BILIN2(src1, src2, x, mxy) + ((1 << (sh)) >> 1)) >> (sh)) |
430 | | |
431 | | #define FILTER_BILIN_CLIP2(src1, src2, x, mxy, sh) \ |
432 | 1.00M | iclip_pixel(FILTER_BILIN_RND2(src1, src2, x, mxy, sh)) |
433 | | |
434 | | static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride, |
435 | | const pixel *src, ptrdiff_t src_stride, |
436 | | const int w, int h, const int mx, const int my |
437 | | HIGHBD_DECL_SUFFIX) |
438 | 113k | { |
439 | 113k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
440 | 113k | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
441 | 113k | dst_stride = PXSTRIDE(dst_stride); |
442 | 113k | src_stride = PXSTRIDE(src_stride); |
443 | | |
444 | 113k | if (mx) { |
445 | 30.3k | if (my) { |
446 | 12.5k | int16_t mid[128 * 129], *mid_ptr = mid; |
447 | 12.5k | int tmp_h = h + 1; |
448 | | |
449 | 101k | do { |
450 | 1.37M | for (int x = 0; x < w; x++) |
451 | 1.26M | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
452 | 101k | 4 - intermediate_bits); |
453 | | |
454 | 101k | mid_ptr += 128; |
455 | 101k | src += src_stride; |
456 | 101k | } while (--tmp_h); |
457 | | |
458 | 12.5k | mid_ptr = mid; |
459 | 89.3k | do { |
460 | 1.25M | for (int x = 0; x < w; x++) |
461 | 1.17M | dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my, 128, |
462 | 89.3k | 4 + intermediate_bits); |
463 | | |
464 | 89.3k | mid_ptr += 128; |
465 | 89.3k | dst += dst_stride; |
466 | 89.3k | } while (--h); |
467 | 17.7k | } else { |
468 | 336k | do { |
469 | 14.7M | for (int x = 0; x < w; x++) { |
470 | 14.4M | const int px = FILTER_BILIN_RND(src, x, mx, 1, |
471 | 14.4M | 4 - intermediate_bits); |
472 | 14.4M | dst[x] = iclip_pixel((px + intermediate_rnd) >> intermediate_bits); |
473 | 14.4M | } |
474 | | |
475 | 336k | dst += dst_stride; |
476 | 336k | src += src_stride; |
477 | 336k | } while (--h); |
478 | 17.7k | } |
479 | 83.5k | } else if (my) { |
480 | 98.7k | do { |
481 | 1.43M | for (int x = 0; x < w; x++) |
482 | 1.33M | dst[x] = FILTER_BILIN_CLIP(src, x, my, src_stride, 4); |
483 | | |
484 | 98.7k | dst += dst_stride; |
485 | 98.7k | src += src_stride; |
486 | 98.7k | } while (--h); |
487 | 13.6k | } else |
488 | 69.9k | put_c(dst, dst_stride, src, src_stride, w, h); |
489 | 113k | } |
490 | | |
491 | | static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride, |
492 | | const pixel *src, ptrdiff_t src_stride, |
493 | | const int w, int h, const int mx, int my, |
494 | | const int dx, const int dy |
495 | | HIGHBD_DECL_SUFFIX) |
496 | 3.11k | { |
497 | 3.11k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
498 | 3.11k | int16_t mid[128 * 2]; |
499 | 3.11k | int in_y = -2; |
500 | | |
501 | 30.7k | do { |
502 | 30.7k | int x; |
503 | 30.7k | int y = my >> 10; |
504 | 30.7k | int16_t *mid1 = &mid[(y & 1) * 128]; |
505 | 30.7k | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
506 | 30.7k | int dmy = my & 0x3ff; |
507 | | |
508 | 56.3k | while (in_y < y) { |
509 | 25.5k | int imx = mx, ioff = 0; |
510 | 25.5k | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
511 | | |
512 | 757k | for (x = 0; x < w; x++) { |
513 | 731k | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
514 | 731k | 4 - intermediate_bits); |
515 | 731k | imx += dx; |
516 | 731k | ioff += imx >> 10; |
517 | 731k | imx &= 0x3ff; |
518 | 731k | } |
519 | | |
520 | 25.5k | src += PXSTRIDE(src_stride); |
521 | 25.5k | in_y++; |
522 | 25.5k | } |
523 | | |
524 | 1.03M | for (x = 0; x < w; x++) |
525 | 1.00M | dst[x] = FILTER_BILIN_CLIP2(mid1, mid2, x, dmy >> 6, |
526 | 30.7k | 4 + intermediate_bits); |
527 | | |
528 | 30.7k | my += dy; |
529 | 30.7k | dst += PXSTRIDE(dst_stride); |
530 | 30.7k | } while (--h); |
531 | 3.11k | } |
532 | | |
533 | | static void prep_bilin_c(int16_t *tmp, |
534 | | const pixel *src, ptrdiff_t src_stride, |
535 | | const int w, int h, const int mx, const int my |
536 | | HIGHBD_DECL_SUFFIX) |
537 | 1.57k | { |
538 | 1.57k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
539 | 1.57k | src_stride = PXSTRIDE(src_stride); |
540 | | |
541 | 1.57k | if (mx) { |
542 | 464 | if (my) { |
543 | 386 | int16_t mid[128 * 129], *mid_ptr = mid; |
544 | 386 | int tmp_h = h + 1; |
545 | | |
546 | 9.28k | do { |
547 | 390k | for (int x = 0; x < w; x++) |
548 | 381k | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
549 | 9.28k | 4 - intermediate_bits); |
550 | | |
551 | 9.28k | mid_ptr += 128; |
552 | 9.28k | src += src_stride; |
553 | 9.28k | } while (--tmp_h); |
554 | | |
555 | 386 | mid_ptr = mid; |
556 | 8.89k | do { |
557 | 383k | for (int x = 0; x < w; x++) |
558 | 374k | tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my, 128, 4) - |
559 | 374k | PREP_BIAS; |
560 | | |
561 | 8.89k | mid_ptr += 128; |
562 | 8.89k | tmp += w; |
563 | 8.89k | } while (--h); |
564 | 386 | } else { |
565 | 1.53k | do { |
566 | 61.4k | for (int x = 0; x < w; x++) |
567 | 59.9k | tmp[x] = FILTER_BILIN_RND(src, x, mx, 1, |
568 | 59.9k | 4 - intermediate_bits) - |
569 | 59.9k | PREP_BIAS; |
570 | | |
571 | 1.53k | tmp += w; |
572 | 1.53k | src += src_stride; |
573 | 1.53k | } while (--h); |
574 | 78 | } |
575 | 1.10k | } else if (my) { |
576 | 6.04k | do { |
577 | 238k | for (int x = 0; x < w; x++) |
578 | 232k | tmp[x] = FILTER_BILIN_RND(src, x, my, src_stride, |
579 | 232k | 4 - intermediate_bits) - PREP_BIAS; |
580 | | |
581 | 6.04k | tmp += w; |
582 | 6.04k | src += src_stride; |
583 | 6.04k | } while (--h); |
584 | 228 | } else |
585 | 879 | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
586 | 1.57k | } |
587 | | |
588 | | static void prep_bilin_scaled_c(int16_t *tmp, |
589 | | const pixel *src, ptrdiff_t src_stride, |
590 | | const int w, int h, const int mx, int my, |
591 | | const int dx, const int dy HIGHBD_DECL_SUFFIX) |
592 | 263 | { |
593 | 263 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
594 | 263 | int16_t mid[128 * 2]; |
595 | 263 | int in_y = -2; |
596 | | |
597 | 6.07k | do { |
598 | 6.07k | int x; |
599 | 6.07k | int y = my >> 10; |
600 | 6.07k | int16_t *mid1 = &mid[(y & 1) * 128]; |
601 | 6.07k | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
602 | 6.07k | int dmy = my & 0x3ff; |
603 | | |
604 | 13.9k | while (in_y < y) { |
605 | 7.92k | int imx = mx, ioff = 0; |
606 | 7.92k | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
607 | | |
608 | 401k | for (x = 0; x < w; x++) { |
609 | 393k | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
610 | 393k | 4 - intermediate_bits); |
611 | 393k | imx += dx; |
612 | 393k | ioff += imx >> 10; |
613 | 393k | imx &= 0x3ff; |
614 | 393k | } |
615 | | |
616 | 7.92k | src += PXSTRIDE(src_stride); |
617 | 7.92k | in_y++; |
618 | 7.92k | } |
619 | | |
620 | 278k | for (x = 0; x < w; x++) |
621 | 272k | tmp[x] = FILTER_BILIN_RND2(mid1, mid2, x, dmy >> 6, 4) - PREP_BIAS; |
622 | | |
623 | 6.07k | my += dy; |
624 | 6.07k | tmp += w; |
625 | 6.07k | } while (--h); |
626 | 263 | } |
627 | | |
628 | | static void avg_c(pixel *dst, const ptrdiff_t dst_stride, |
629 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h |
630 | | HIGHBD_DECL_SUFFIX) |
631 | 25.4k | { |
632 | 25.4k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
633 | 25.4k | const int sh = intermediate_bits + 1; |
634 | 25.4k | const int rnd = (1 << intermediate_bits) + PREP_BIAS * 2; |
635 | 735k | do { |
636 | 29.6M | for (int x = 0; x < w; x++) |
637 | 28.9M | dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + rnd) >> sh); |
638 | | |
639 | 735k | tmp1 += w; |
640 | 735k | tmp2 += w; |
641 | 735k | dst += PXSTRIDE(dst_stride); |
642 | 735k | } while (--h); |
643 | 25.4k | } |
644 | | |
645 | | static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride, |
646 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
647 | | const int weight HIGHBD_DECL_SUFFIX) |
648 | 1.57k | { |
649 | 1.57k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
650 | 1.57k | const int sh = intermediate_bits + 4; |
651 | 1.57k | const int rnd = (8 << intermediate_bits) + PREP_BIAS * 16; |
652 | 123k | do { |
653 | 7.36M | for (int x = 0; x < w; x++) |
654 | 7.24M | dst[x] = iclip_pixel((tmp1[x] * weight + |
655 | 7.24M | tmp2[x] * (16 - weight) + rnd) >> sh); |
656 | | |
657 | 123k | tmp1 += w; |
658 | 123k | tmp2 += w; |
659 | 123k | dst += PXSTRIDE(dst_stride); |
660 | 123k | } while (--h); |
661 | 1.57k | } |
662 | | |
663 | | static void mask_c(pixel *dst, const ptrdiff_t dst_stride, |
664 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
665 | | const uint8_t *mask HIGHBD_DECL_SUFFIX) |
666 | 2.67k | { |
667 | 2.67k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
668 | 2.67k | const int sh = intermediate_bits + 6; |
669 | 2.67k | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
670 | 80.8k | do { |
671 | 2.81M | for (int x = 0; x < w; x++) |
672 | 2.73M | dst[x] = iclip_pixel((tmp1[x] * mask[x] + |
673 | 2.73M | tmp2[x] * (64 - mask[x]) + rnd) >> sh); |
674 | | |
675 | 80.8k | tmp1 += w; |
676 | 80.8k | tmp2 += w; |
677 | 80.8k | mask += w; |
678 | 80.8k | dst += PXSTRIDE(dst_stride); |
679 | 80.8k | } while (--h); |
680 | 2.67k | } |
681 | | |
682 | 10.6M | #define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6) |
683 | | static void blend_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
684 | | const int w, int h, const uint8_t *mask) |
685 | 5.50k | { |
686 | 48.0k | do { |
687 | 537k | for (int x = 0; x < w; x++) { |
688 | 489k | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
689 | 489k | } |
690 | 48.0k | dst += PXSTRIDE(dst_stride); |
691 | 48.0k | tmp += w; |
692 | 48.0k | mask += w; |
693 | 48.0k | } while (--h); |
694 | 5.50k | } |
695 | | |
696 | | static void blend_v_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
697 | | const int w, int h) |
698 | 53.8k | { |
699 | 53.8k | const uint8_t *const mask = &dav1d_obmc_masks[w]; |
700 | 719k | do { |
701 | 6.90M | for (int x = 0; x < (w * 3) >> 2; x++) { |
702 | 6.18M | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
703 | 6.18M | } |
704 | 719k | dst += PXSTRIDE(dst_stride); |
705 | 719k | tmp += w; |
706 | 719k | } while (--h); |
707 | 53.8k | } |
708 | | |
709 | | static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
710 | | const int w, int h) |
711 | 51.9k | { |
712 | 51.9k | const uint8_t *mask = &dav1d_obmc_masks[h]; |
713 | 51.9k | h = (h * 3) >> 2; |
714 | 230k | do { |
715 | 230k | const int m = *mask++; |
716 | 4.16M | for (int x = 0; x < w; x++) { |
717 | 3.93M | dst[x] = blend_px(dst[x], tmp[x], m); |
718 | 3.93M | } |
719 | 230k | dst += PXSTRIDE(dst_stride); |
720 | 230k | tmp += w; |
721 | 230k | } while (--h); |
722 | 51.9k | } |
723 | | |
724 | | static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride, |
725 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
726 | | uint8_t *mask, const int sign, |
727 | | const int ss_hor, const int ss_ver HIGHBD_DECL_SUFFIX) |
728 | 990 | { |
729 | | // store mask at 2x2 resolution, i.e. store 2x1 sum for even rows, |
730 | | // and then load this intermediate to calculate final value for odd rows |
731 | 990 | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
732 | 990 | const int bitdepth = bitdepth_from_max(bitdepth_max); |
733 | 990 | const int sh = intermediate_bits + 6; |
734 | 990 | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
735 | 990 | const int mask_sh = bitdepth + intermediate_bits - 4; |
736 | 990 | const int mask_rnd = 1 << (mask_sh - 5); |
737 | 64.8k | do { |
738 | 2.46M | for (int x = 0; x < w; x++) { |
739 | 2.40M | const int tmpdiff = tmp1[x] - tmp2[x]; |
740 | 2.40M | const int m = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
741 | 2.40M | dst[x] = iclip_pixel((tmpdiff * m + tmp2[x] * 64 + rnd) >> sh); |
742 | | |
743 | 2.40M | if (ss_hor) { |
744 | 1.25M | x++; |
745 | | |
746 | 1.25M | const int tmpdiff = tmp1[x] - tmp2[x]; |
747 | 1.25M | const int n = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
748 | 1.25M | dst[x] = iclip_pixel((tmpdiff * n + tmp2[x] * 64 + rnd) >> sh); |
749 | | |
750 | 1.25M | if (h & ss_ver) { |
751 | 627k | mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2; |
752 | 631k | } else if (ss_ver) { |
753 | 627k | mask[x >> 1] = m + n; |
754 | 627k | } else { |
755 | 4.81k | mask[x >> 1] = (m + n + 1 - sign) >> 1; |
756 | 4.81k | } |
757 | 1.25M | } else { |
758 | 1.14M | mask[x] = m; |
759 | 1.14M | } |
760 | 2.40M | } |
761 | | |
762 | 64.8k | tmp1 += w; |
763 | 64.8k | tmp2 += w; |
764 | 64.8k | dst += PXSTRIDE(dst_stride); |
765 | 64.8k | if (!ss_ver || (h & 1)) mask += w >> ss_hor; |
766 | 64.8k | } while (--h); |
767 | 990 | } |
768 | | |
769 | | #define w_mask_fns(ssn, ss_hor, ss_ver) \ |
770 | | static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \ |
771 | | const int16_t *const tmp1, const int16_t *const tmp2, \ |
772 | | const int w, const int h, uint8_t *mask, \ |
773 | 990 | const int sign HIGHBD_DECL_SUFFIX) \ |
774 | 990 | { \ |
775 | 990 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ |
776 | 990 | HIGHBD_TAIL_SUFFIX); \ |
777 | 990 | } Line | Count | Source | 773 | 559 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 559 | { \ | 775 | 559 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 559 | HIGHBD_TAIL_SUFFIX); \ | 777 | 559 | } |
Line | Count | Source | 773 | 16 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 16 | { \ | 775 | 16 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 16 | HIGHBD_TAIL_SUFFIX); \ | 777 | 16 | } |
Line | Count | Source | 773 | 415 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 415 | { \ | 775 | 415 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 415 | HIGHBD_TAIL_SUFFIX); \ | 777 | 415 | } |
|
778 | | |
779 | | w_mask_fns(444, 0, 0); |
780 | | w_mask_fns(422, 1, 0); |
781 | | w_mask_fns(420, 1, 1); |
782 | | |
783 | | #undef w_mask_fns |
784 | | |
785 | | #define FILTER_WARP_RND(src, x, F, stride, sh) \ |
786 | 37.7M | ((F[0] * src[x - 3 * stride] + \ |
787 | 37.7M | F[1] * src[x - 2 * stride] + \ |
788 | 37.7M | F[2] * src[x - 1 * stride] + \ |
789 | 37.7M | F[3] * src[x + 0 * stride] + \ |
790 | 37.7M | F[4] * src[x + 1 * stride] + \ |
791 | 37.7M | F[5] * src[x + 2 * stride] + \ |
792 | 37.7M | F[6] * src[x + 3 * stride] + \ |
793 | 37.7M | F[7] * src[x + 4 * stride] + \ |
794 | 37.7M | ((1 << (sh)) >> 1)) >> (sh)) |
795 | | |
796 | | #define FILTER_WARP_CLIP(src, x, F, stride, sh) \ |
797 | 11.5M | iclip_pixel(FILTER_WARP_RND(src, x, F, stride, sh)) |
798 | | |
799 | | static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, |
800 | | const pixel *src, const ptrdiff_t src_stride, |
801 | | const int16_t *const abcd, int mx, int my |
802 | | HIGHBD_DECL_SUFFIX) |
803 | 185k | { |
804 | 185k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
805 | 185k | int16_t mid[15 * 8], *mid_ptr = mid; |
806 | | |
807 | 185k | src -= 3 * PXSTRIDE(src_stride); |
808 | 2.94M | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
809 | 24.5M | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
810 | 21.8M | const int8_t *const filter = |
811 | 21.8M | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
812 | | |
813 | 21.8M | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
814 | 21.8M | 7 - intermediate_bits); |
815 | 21.8M | } |
816 | 2.76M | src += PXSTRIDE(src_stride); |
817 | 2.76M | mid_ptr += 8; |
818 | 2.76M | } |
819 | | |
820 | 185k | mid_ptr = &mid[3 * 8]; |
821 | 1.66M | for (int y = 0; y < 8; y++, my += abcd[3]) { |
822 | 13.0M | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
823 | 11.5M | const int8_t *const filter = |
824 | 11.5M | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
825 | | |
826 | 11.5M | dst[x] = FILTER_WARP_CLIP(mid_ptr, x, filter, 8, |
827 | 11.5M | 7 + intermediate_bits); |
828 | 11.5M | } |
829 | 1.47M | mid_ptr += 8; |
830 | 1.47M | dst += PXSTRIDE(dst_stride); |
831 | 1.47M | } |
832 | 185k | } |
833 | | |
834 | | static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride, |
835 | | const pixel *src, const ptrdiff_t src_stride, |
836 | | const int16_t *const abcd, int mx, int my |
837 | | HIGHBD_DECL_SUFFIX) |
838 | 23.2k | { |
839 | 23.2k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
840 | 23.2k | int16_t mid[15 * 8], *mid_ptr = mid; |
841 | | |
842 | 23.2k | src -= 3 * PXSTRIDE(src_stride); |
843 | 372k | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
844 | 3.14M | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
845 | 2.79M | const int8_t *const filter = |
846 | 2.79M | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
847 | | |
848 | 2.79M | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
849 | 2.79M | 7 - intermediate_bits); |
850 | 2.79M | } |
851 | 349k | src += PXSTRIDE(src_stride); |
852 | 349k | mid_ptr += 8; |
853 | 349k | } |
854 | | |
855 | 23.2k | mid_ptr = &mid[3 * 8]; |
856 | 209k | for (int y = 0; y < 8; y++, my += abcd[3]) { |
857 | 1.67M | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
858 | 1.48M | const int8_t *const filter = |
859 | 1.48M | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
860 | | |
861 | 1.48M | tmp[x] = FILTER_WARP_RND(mid_ptr, x, filter, 8, 7) - PREP_BIAS; |
862 | 1.48M | } |
863 | 186k | mid_ptr += 8; |
864 | 186k | tmp += tmp_stride; |
865 | 186k | } |
866 | 23.2k | } |
867 | | |
868 | | static void emu_edge_c(const intptr_t bw, const intptr_t bh, |
869 | | const intptr_t iw, const intptr_t ih, |
870 | | const intptr_t x, const intptr_t y, |
871 | | pixel *dst, const ptrdiff_t dst_stride, |
872 | | const pixel *ref, const ptrdiff_t ref_stride) |
873 | 429k | { |
874 | | // find offset in reference of visible block to copy |
875 | 429k | ref += iclip((int) y, 0, (int) ih - 1) * PXSTRIDE(ref_stride) + |
876 | 429k | iclip((int) x, 0, (int) iw - 1); |
877 | | |
878 | | // number of pixels to extend (left, right, top, bottom) |
879 | 429k | const int left_ext = iclip((int) -x, 0, (int) bw - 1); |
880 | 429k | const int right_ext = iclip((int) (x + bw - iw), 0, (int) bw - 1); |
881 | 429k | assert(left_ext + right_ext < bw); |
882 | 429k | const int top_ext = iclip((int) -y, 0, (int) bh - 1); |
883 | 429k | const int bottom_ext = iclip((int) (y + bh - ih), 0, (int) bh - 1); |
884 | 429k | assert(top_ext + bottom_ext < bh); |
885 | | |
886 | | // copy visible portion first |
887 | 429k | pixel *blk = dst + top_ext * PXSTRIDE(dst_stride); |
888 | 429k | const int center_w = (int) (bw - left_ext - right_ext); |
889 | 429k | const int center_h = (int) (bh - top_ext - bottom_ext); |
890 | 6.33M | for (int y = 0; y < center_h; y++) { |
891 | 5.90M | pixel_copy(blk + left_ext, ref, center_w); |
892 | | // extend left edge for this line |
893 | 5.90M | if (left_ext) |
894 | 1.52M | pixel_set(blk, blk[left_ext], left_ext); |
895 | | // extend right edge for this line |
896 | 5.90M | if (right_ext) |
897 | 4.22M | pixel_set(blk + left_ext + center_w, blk[left_ext + center_w - 1], |
898 | 4.22M | right_ext); |
899 | 5.90M | ref += PXSTRIDE(ref_stride); |
900 | 5.90M | blk += PXSTRIDE(dst_stride); |
901 | 5.90M | } |
902 | | |
903 | | // copy top |
904 | 429k | blk = dst + top_ext * PXSTRIDE(dst_stride); |
905 | 881k | for (int y = 0; y < top_ext; y++) { |
906 | 451k | pixel_copy(dst, blk, bw); |
907 | 451k | dst += PXSTRIDE(dst_stride); |
908 | 451k | } |
909 | | |
910 | | // copy bottom |
911 | 429k | dst += center_h * PXSTRIDE(dst_stride); |
912 | 4.33M | for (int y = 0; y < bottom_ext; y++) { |
913 | 3.90M | pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], bw); |
914 | 3.90M | dst += PXSTRIDE(dst_stride); |
915 | 3.90M | } |
916 | 429k | } |
917 | | |
918 | | static void resize_c(pixel *dst, const ptrdiff_t dst_stride, |
919 | | const pixel *src, const ptrdiff_t src_stride, |
920 | | const int dst_w, int h, const int src_w, |
921 | | const int dx, const int mx0 HIGHBD_DECL_SUFFIX) |
922 | 16.9k | { |
923 | 592k | do { |
924 | 592k | int mx = mx0, src_x = -1; |
925 | 252M | for (int x = 0; x < dst_w; x++) { |
926 | 251M | const int8_t *const F = dav1d_resize_filter[mx >> 8]; |
927 | 251M | dst[x] = iclip_pixel((-(F[0] * src[iclip(src_x - 3, 0, src_w - 1)] + |
928 | 251M | F[1] * src[iclip(src_x - 2, 0, src_w - 1)] + |
929 | 251M | F[2] * src[iclip(src_x - 1, 0, src_w - 1)] + |
930 | 251M | F[3] * src[iclip(src_x + 0, 0, src_w - 1)] + |
931 | 251M | F[4] * src[iclip(src_x + 1, 0, src_w - 1)] + |
932 | 251M | F[5] * src[iclip(src_x + 2, 0, src_w - 1)] + |
933 | 251M | F[6] * src[iclip(src_x + 3, 0, src_w - 1)] + |
934 | 251M | F[7] * src[iclip(src_x + 4, 0, src_w - 1)]) + |
935 | 251M | 64) >> 7); |
936 | 251M | mx += dx; |
937 | 251M | src_x += mx >> 14; |
938 | 251M | mx &= 0x3fff; |
939 | 251M | } |
940 | | |
941 | 592k | dst += PXSTRIDE(dst_stride); |
942 | 592k | src += PXSTRIDE(src_stride); |
943 | 592k | } while (--h); |
944 | 16.9k | } |
945 | | |
946 | | #if HAVE_ASM |
947 | | #if ARCH_AARCH64 || ARCH_ARM |
948 | | #include "src/arm/mc.h" |
949 | | #elif ARCH_LOONGARCH64 |
950 | | #include "src/loongarch/mc.h" |
951 | | #elif ARCH_PPC64LE |
952 | | #include "src/ppc/mc.h" |
953 | | #elif ARCH_RISCV |
954 | | #include "src/riscv/mc.h" |
955 | | #elif ARCH_X86 |
956 | | #include "src/x86/mc.h" |
957 | | #endif |
958 | | #endif |
959 | | |
960 | 5.12k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { |
961 | 51.2k | #define init_mc_fns(type, name) do { \ |
962 | 51.2k | c->mc [type] = put_##name##_c; \ |
963 | 51.2k | c->mc_scaled [type] = put_##name##_scaled_c; \ |
964 | 51.2k | c->mct [type] = prep_##name##_c; \ |
965 | 51.2k | c->mct_scaled[type] = prep_##name##_scaled_c; \ |
966 | 51.2k | } while (0) |
967 | | |
968 | 5.12k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); |
969 | 5.12k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); |
970 | 5.12k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); |
971 | 5.12k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); |
972 | 5.12k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); |
973 | 5.12k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); |
974 | 5.12k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); |
975 | 5.12k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); |
976 | 5.12k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); |
977 | 5.12k | init_mc_fns(FILTER_2D_BILINEAR, bilin); |
978 | | |
979 | 5.12k | c->avg = avg_c; |
980 | 5.12k | c->w_avg = w_avg_c; |
981 | 5.12k | c->mask = mask_c; |
982 | 5.12k | c->blend = blend_c; |
983 | 5.12k | c->blend_v = blend_v_c; |
984 | 5.12k | c->blend_h = blend_h_c; |
985 | 5.12k | c->w_mask[0] = w_mask_444_c; |
986 | 5.12k | c->w_mask[1] = w_mask_422_c; |
987 | 5.12k | c->w_mask[2] = w_mask_420_c; |
988 | 5.12k | c->warp8x8 = warp_affine_8x8_c; |
989 | 5.12k | c->warp8x8t = warp_affine_8x8t_c; |
990 | 5.12k | c->emu_edge = emu_edge_c; |
991 | 5.12k | c->resize = resize_c; |
992 | | |
993 | | #if HAVE_ASM |
994 | | #if ARCH_AARCH64 || ARCH_ARM |
995 | | mc_dsp_init_arm(c); |
996 | | #elif ARCH_LOONGARCH64 |
997 | | mc_dsp_init_loongarch(c); |
998 | | #elif ARCH_PPC64LE |
999 | | mc_dsp_init_ppc(c); |
1000 | | #elif ARCH_RISCV |
1001 | | mc_dsp_init_riscv(c); |
1002 | | #elif ARCH_X86 |
1003 | | mc_dsp_init_x86(c); |
1004 | | #endif |
1005 | | #endif |
1006 | 5.12k | } Line | Count | Source | 960 | 2.71k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { | 961 | 2.71k | #define init_mc_fns(type, name) do { \ | 962 | 2.71k | c->mc [type] = put_##name##_c; \ | 963 | 2.71k | c->mc_scaled [type] = put_##name##_scaled_c; \ | 964 | 2.71k | c->mct [type] = prep_##name##_c; \ | 965 | 2.71k | c->mct_scaled[type] = prep_##name##_scaled_c; \ | 966 | 2.71k | } while (0) | 967 | | | 968 | 2.71k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); | 969 | 2.71k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); | 970 | 2.71k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); | 971 | 2.71k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); | 972 | 2.71k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); | 973 | 2.71k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); | 974 | 2.71k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); | 975 | 2.71k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); | 976 | 2.71k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); | 977 | 2.71k | init_mc_fns(FILTER_2D_BILINEAR, bilin); | 978 | | | 979 | 2.71k | c->avg = avg_c; | 980 | 2.71k | c->w_avg = w_avg_c; | 981 | 2.71k | c->mask = mask_c; | 982 | 2.71k | c->blend = blend_c; | 983 | 2.71k | c->blend_v = blend_v_c; | 984 | 2.71k | c->blend_h = blend_h_c; | 985 | 2.71k | c->w_mask[0] = w_mask_444_c; | 986 | 2.71k | c->w_mask[1] = w_mask_422_c; | 987 | 2.71k | c->w_mask[2] = w_mask_420_c; | 988 | 2.71k | c->warp8x8 = warp_affine_8x8_c; | 989 | 2.71k | c->warp8x8t = warp_affine_8x8t_c; | 990 | 2.71k | c->emu_edge = emu_edge_c; | 991 | 2.71k | c->resize = resize_c; | 992 | | | 993 | | #if HAVE_ASM | 994 | | #if ARCH_AARCH64 || ARCH_ARM | 995 | | mc_dsp_init_arm(c); | 996 | | #elif ARCH_LOONGARCH64 | 997 | | mc_dsp_init_loongarch(c); | 998 | | #elif ARCH_PPC64LE | 999 | | mc_dsp_init_ppc(c); | 1000 | | #elif ARCH_RISCV | 1001 | | mc_dsp_init_riscv(c); | 1002 | | #elif ARCH_X86 | 1003 | | mc_dsp_init_x86(c); | 1004 | | #endif | 1005 | | #endif | 1006 | 2.71k | } |
Line | Count | Source | 960 | 2.41k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { | 961 | 2.41k | #define init_mc_fns(type, name) do { \ | 962 | 2.41k | c->mc [type] = put_##name##_c; \ | 963 | 2.41k | c->mc_scaled [type] = put_##name##_scaled_c; \ | 964 | 2.41k | c->mct [type] = prep_##name##_c; \ | 965 | 2.41k | c->mct_scaled[type] = prep_##name##_scaled_c; \ | 966 | 2.41k | } while (0) | 967 | | | 968 | 2.41k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); | 969 | 2.41k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); | 970 | 2.41k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); | 971 | 2.41k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); | 972 | 2.41k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); | 973 | 2.41k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); | 974 | 2.41k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); | 975 | 2.41k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); | 976 | 2.41k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); | 977 | 2.41k | init_mc_fns(FILTER_2D_BILINEAR, bilin); | 978 | | | 979 | 2.41k | c->avg = avg_c; | 980 | 2.41k | c->w_avg = w_avg_c; | 981 | 2.41k | c->mask = mask_c; | 982 | 2.41k | c->blend = blend_c; | 983 | 2.41k | c->blend_v = blend_v_c; | 984 | 2.41k | c->blend_h = blend_h_c; | 985 | 2.41k | c->w_mask[0] = w_mask_444_c; | 986 | 2.41k | c->w_mask[1] = w_mask_422_c; | 987 | 2.41k | c->w_mask[2] = w_mask_420_c; | 988 | 2.41k | c->warp8x8 = warp_affine_8x8_c; | 989 | 2.41k | c->warp8x8t = warp_affine_8x8t_c; | 990 | 2.41k | c->emu_edge = emu_edge_c; | 991 | 2.41k | c->resize = resize_c; | 992 | | | 993 | | #if HAVE_ASM | 994 | | #if ARCH_AARCH64 || ARCH_ARM | 995 | | mc_dsp_init_arm(c); | 996 | | #elif ARCH_LOONGARCH64 | 997 | | mc_dsp_init_loongarch(c); | 998 | | #elif ARCH_PPC64LE | 999 | | mc_dsp_init_ppc(c); | 1000 | | #elif ARCH_RISCV | 1001 | | mc_dsp_init_riscv(c); | 1002 | | #elif ARCH_X86 | 1003 | | mc_dsp_init_x86(c); | 1004 | | #endif | 1005 | | #endif | 1006 | 2.41k | } |
|