/work/dav1d/src/mc_tmpl.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include "common/attributes.h" |
34 | | #include "common/intops.h" |
35 | | |
36 | | #include "src/mc.h" |
37 | | #include "src/tables.h" |
38 | | |
39 | | #if BITDEPTH == 8 |
40 | 266k | #define get_intermediate_bits(bitdepth_max) 4 |
41 | | // Output in interval [-5132, 9212], fits in int16_t as is |
42 | 16.7M | #define PREP_BIAS 0 |
43 | | #else |
44 | | // 4 for 10 bits/component, 2 for 12 bits/component |
45 | | #define get_intermediate_bits(bitdepth_max) (14 - bitdepth_from_max(bitdepth_max)) |
46 | | // Output in interval [-20588, 36956] (10-bit), [-20602, 36983] (12-bit) |
47 | | // Subtract a bias to ensure the output fits in int16_t |
48 | | #define PREP_BIAS 8192 |
49 | | #endif |
50 | | |
51 | | static NOINLINE void |
52 | | put_c(pixel *dst, const ptrdiff_t dst_stride, |
53 | | const pixel *src, const ptrdiff_t src_stride, const int w, int h) |
54 | 29.6k | { |
55 | 471k | do { |
56 | 471k | pixel_copy(dst, src, w); |
57 | | |
58 | 471k | dst += dst_stride; |
59 | 471k | src += src_stride; |
60 | 471k | } while (--h); |
61 | 29.6k | } |
62 | | |
63 | | static NOINLINE void |
64 | | prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride, |
65 | | const int w, int h HIGHBD_DECL_SUFFIX) |
66 | 8.43k | { |
67 | 8.43k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
68 | 118k | do { |
69 | 1.72M | for (int x = 0; x < w; x++) |
70 | 1.60M | tmp[x] = (src[x] << intermediate_bits) - PREP_BIAS; |
71 | | |
72 | 118k | tmp += w; |
73 | 118k | src += src_stride; |
74 | 118k | } while (--h); |
75 | 8.43k | } |
76 | | |
77 | | #define FILTER_8TAP(src, x, F, stride) \ |
78 | 21.9M | (F[0] * src[x + -3 * stride] + \ |
79 | 21.9M | F[1] * src[x + -2 * stride] + \ |
80 | 21.9M | F[2] * src[x + -1 * stride] + \ |
81 | 21.9M | F[3] * src[x + +0 * stride] + \ |
82 | 21.9M | F[4] * src[x + +1 * stride] + \ |
83 | 21.9M | F[5] * src[x + +2 * stride] + \ |
84 | 21.9M | F[6] * src[x + +3 * stride] + \ |
85 | 21.9M | F[7] * src[x + +4 * stride]) |
86 | | |
87 | | #define FILTER_8TAP2(src, x, F) \ |
88 | 24.9M | (F[0] * src[0][x] + \ |
89 | 24.9M | F[1] * src[1][x] + \ |
90 | 24.9M | F[2] * src[2][x] + \ |
91 | 24.9M | F[3] * src[3][x] + \ |
92 | 24.9M | F[4] * src[4][x] + \ |
93 | 24.9M | F[5] * src[5][x] + \ |
94 | 24.9M | F[6] * src[6][x] + \ |
95 | 24.9M | F[7] * src[7][x]) |
96 | | |
97 | | #define DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh) \ |
98 | 21.0M | ((FILTER_8TAP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
99 | | |
100 | | #define DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh) \ |
101 | 965k | ((FILTER_8TAP(src, x, F, stride) + (rnd)) >> (sh)) |
102 | | |
103 | | #define DAV1D_FILTER_8TAP_RND3(src, x, F, sh) \ |
104 | 24.9M | ((FILTER_8TAP2(src, x, F) + ((1 << (sh)) >> 1)) >> (sh)) |
105 | | |
106 | | #define DAV1D_FILTER_8TAP_CLIP(src, x, F, stride, sh) \ |
107 | 2.39M | iclip_pixel(DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh)) |
108 | | |
109 | | #define DAV1D_FILTER_8TAP_CLIP2(src, x, F, stride, rnd, sh) \ |
110 | 965k | iclip_pixel(DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh)) |
111 | | |
112 | | #define DAV1D_FILTER_8TAP_CLIP3(src, x, F, sh) \ |
113 | 14.7M | iclip_pixel(DAV1D_FILTER_8TAP_RND3(src, x, F, sh)) |
114 | | |
115 | | #define GET_H_FILTER(mx) \ |
116 | 15.7M | const int8_t *const fh = !(mx) ? NULL : w > 4 ? \ |
117 | 14.0M | dav1d_mc_subpel_filters[filter_type & 3][(mx) - 1] : \ |
118 | 14.0M | dav1d_mc_subpel_filters[3 + (filter_type & 1)][(mx) - 1] |
119 | | |
120 | | #define GET_V_FILTER(my) \ |
121 | 1.30M | const int8_t *const fv = !(my) ? NULL : h > 4 ? \ |
122 | 1.18M | dav1d_mc_subpel_filters[filter_type >> 2][(my) - 1] : \ |
123 | 1.18M | dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][(my) - 1] |
124 | | |
125 | | #define GET_FILTERS() \ |
126 | 56.0k | GET_H_FILTER(mx); \ |
127 | 56.0k | GET_V_FILTER(my) |
128 | | |
129 | | static NOINLINE void |
130 | | put_8tap_c(pixel *dst, ptrdiff_t dst_stride, |
131 | | const pixel *src, ptrdiff_t src_stride, |
132 | | const int w, int h, const int mx, const int my, |
133 | | const int filter_type HIGHBD_DECL_SUFFIX) |
134 | 41.1k | { |
135 | 41.1k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
136 | 41.1k | const int intermediate_rnd = 32 + ((1 << (6 - intermediate_bits)) >> 1); |
137 | | |
138 | 41.1k | GET_FILTERS(); |
139 | 41.1k | dst_stride = PXSTRIDE(dst_stride); |
140 | 41.1k | src_stride = PXSTRIDE(src_stride); |
141 | | |
142 | 41.1k | if (fh) { |
143 | 19.9k | if (fv) { |
144 | 13.0k | int tmp_h = h + 7; |
145 | 13.0k | int16_t mid[128 * 135], *mid_ptr = mid; |
146 | | |
147 | 13.0k | src -= src_stride * 3; |
148 | 223k | do { |
149 | 2.53M | for (int x = 0; x < w; x++) |
150 | 2.31M | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
151 | 223k | 6 - intermediate_bits); |
152 | | |
153 | 223k | mid_ptr += 128; |
154 | 223k | src += src_stride; |
155 | 223k | } while (--tmp_h); |
156 | | |
157 | 13.0k | mid_ptr = mid + 128 * 3; |
158 | 133k | do { |
159 | 1.73M | for (int x = 0; x < w; x++) |
160 | 1.59M | dst[x] = DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, |
161 | 133k | 6 + intermediate_bits); |
162 | | |
163 | 133k | mid_ptr += 128; |
164 | 133k | dst += dst_stride; |
165 | 133k | } while (--h); |
166 | 13.0k | } else { |
167 | 73.5k | do { |
168 | 1.03M | for (int x = 0; x < w; x++) { |
169 | 965k | dst[x] = DAV1D_FILTER_8TAP_CLIP2(src, x, fh, 1, |
170 | 965k | intermediate_rnd, 6); |
171 | 965k | } |
172 | | |
173 | 73.5k | dst += dst_stride; |
174 | 73.5k | src += src_stride; |
175 | 73.5k | } while (--h); |
176 | 6.93k | } |
177 | 21.2k | } else if (fv) { |
178 | 71.8k | do { |
179 | 870k | for (int x = 0; x < w; x++) |
180 | 798k | dst[x] = DAV1D_FILTER_8TAP_CLIP(src, x, fv, src_stride, 6); |
181 | | |
182 | 71.8k | dst += dst_stride; |
183 | 71.8k | src += src_stride; |
184 | 71.8k | } while (--h); |
185 | 7.10k | } else |
186 | 14.1k | put_c(dst, dst_stride, src, src_stride, w, h); |
187 | 41.1k | } |
188 | | |
189 | | static NOINLINE void |
190 | | put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride, |
191 | | const pixel *src, ptrdiff_t src_stride, |
192 | | const int w, int h, const int mx, int my, |
193 | | const int dx, const int dy, const int filter_type |
194 | | HIGHBD_DECL_SUFFIX) |
195 | 62.8k | { |
196 | 62.8k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
197 | 62.8k | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
198 | 62.8k | int16_t mid[128 * 8]; |
199 | 62.8k | int16_t *mid_ptrs[8]; |
200 | 62.8k | int in_y = -8; |
201 | 62.8k | src_stride = PXSTRIDE(src_stride); |
202 | | |
203 | 565k | for (int i = 0; i < 8; i++) |
204 | 502k | mid_ptrs[i] = &mid[128 * i]; |
205 | | |
206 | 62.8k | src -= src_stride * 3; |
207 | | |
208 | 830k | for (int y = 0; y < h; y++) { |
209 | 767k | int x; |
210 | 767k | int src_y = my >> 10; |
211 | 767k | GET_V_FILTER((my & 0x3ff) >> 6); |
212 | | |
213 | 1.47M | while (in_y < src_y) { |
214 | 708k | int imx = mx, ioff = 0; |
215 | 708k | int16_t *mid_ptr = mid_ptrs[0]; |
216 | | |
217 | 5.66M | for (int i = 0; i < 7; i++) |
218 | 4.95M | mid_ptrs[i] = mid_ptrs[i + 1]; |
219 | 708k | mid_ptrs[7] = mid_ptr; |
220 | | |
221 | 10.4M | for (x = 0; x < w; x++) { |
222 | 9.73M | GET_H_FILTER(imx >> 6); |
223 | 9.73M | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
224 | 9.73M | 6 - intermediate_bits) : |
225 | 9.73M | src[ioff] << intermediate_bits; |
226 | 9.73M | imx += dx; |
227 | 9.73M | ioff += imx >> 10; |
228 | 9.73M | imx &= 0x3ff; |
229 | 9.73M | } |
230 | | |
231 | 708k | src += src_stride; |
232 | 708k | in_y++; |
233 | 708k | } |
234 | | |
235 | 16.8M | for (x = 0; x < w; x++) |
236 | 16.0M | dst[x] = fv ? DAV1D_FILTER_8TAP_CLIP3(mid_ptrs, x, fv, |
237 | 16.0M | 6 + intermediate_bits) : |
238 | 16.0M | iclip_pixel((mid_ptrs[3][x] + intermediate_rnd) >> |
239 | 1.36M | intermediate_bits); |
240 | | |
241 | 767k | my += dy; |
242 | 767k | dst += PXSTRIDE(dst_stride); |
243 | 767k | } |
244 | 62.8k | } |
245 | | |
246 | | static NOINLINE void |
247 | | prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
248 | | const int w, int h, const int mx, const int my, |
249 | | const int filter_type HIGHBD_DECL_SUFFIX) |
250 | 14.9k | { |
251 | 14.9k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
252 | 14.9k | GET_FILTERS(); |
253 | 14.9k | src_stride = PXSTRIDE(src_stride); |
254 | | |
255 | 14.9k | if (fh) { |
256 | 6.86k | if (fv) { |
257 | 4.71k | int tmp_h = h + 7; |
258 | 4.71k | int16_t mid[128 * 135], *mid_ptr = mid; |
259 | | |
260 | 4.71k | src -= src_stride * 3; |
261 | 92.8k | do { |
262 | 1.06M | for (int x = 0; x < w; x++) |
263 | 970k | mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
264 | 92.8k | 6 - intermediate_bits); |
265 | | |
266 | 92.8k | mid_ptr += 128; |
267 | 92.8k | src += src_stride; |
268 | 92.8k | } while (--tmp_h); |
269 | | |
270 | 4.71k | mid_ptr = mid + 128 * 3; |
271 | 59.8k | do { |
272 | 722k | for (int x = 0; x < w; x++) { |
273 | 662k | int t = DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) - |
274 | 662k | PREP_BIAS; |
275 | 662k | assert(t >= INT16_MIN && t <= INT16_MAX); |
276 | 662k | tmp[x] = t; |
277 | 662k | } |
278 | | |
279 | 59.8k | mid_ptr += 128; |
280 | 59.8k | tmp += w; |
281 | 59.8k | } while (--h); |
282 | 4.71k | } else { |
283 | 30.2k | do { |
284 | 391k | for (int x = 0; x < w; x++) |
285 | 361k | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, |
286 | 361k | 6 - intermediate_bits) - |
287 | 361k | PREP_BIAS; |
288 | | |
289 | 30.2k | tmp += w; |
290 | 30.2k | src += src_stride; |
291 | 30.2k | } while (--h); |
292 | 2.15k | } |
293 | 8.05k | } else if (fv) { |
294 | 31.7k | do { |
295 | 332k | for (int x = 0; x < w; x++) |
296 | 300k | tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride, |
297 | 300k | 6 - intermediate_bits) - |
298 | 300k | PREP_BIAS; |
299 | | |
300 | 31.7k | tmp += w; |
301 | 31.7k | src += src_stride; |
302 | 31.7k | } while (--h); |
303 | 2.35k | } else |
304 | 5.69k | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
305 | 14.9k | } |
306 | | |
307 | | static NOINLINE void |
308 | | prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, |
309 | | const int w, int h, const int mx, int my, |
310 | | const int dx, const int dy, const int filter_type |
311 | | HIGHBD_DECL_SUFFIX) |
312 | 23.6k | { |
313 | 23.6k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
314 | 23.6k | int16_t mid[128 * 8]; |
315 | 23.6k | int16_t *mid_ptrs[8]; |
316 | 23.6k | int in_y = -8; |
317 | 23.6k | src_stride = PXSTRIDE(src_stride); |
318 | | |
319 | 212k | for (int i = 0; i < 8; i++) |
320 | 189k | mid_ptrs[i] = &mid[128 * i]; |
321 | | |
322 | 23.6k | src -= src_stride * 3; |
323 | | |
324 | 501k | for (int y = 0; y < h; y++) { |
325 | 477k | int x; |
326 | 477k | int src_y = my >> 10; |
327 | 477k | GET_V_FILTER((my & 0x3ff) >> 6); |
328 | | |
329 | 788k | while (in_y < src_y) { |
330 | 310k | int imx = mx, ioff = 0; |
331 | 310k | int16_t *mid_ptr = mid_ptrs[0]; |
332 | | |
333 | 2.48M | for (int i = 0; i < 7; i++) |
334 | 2.17M | mid_ptrs[i] = mid_ptrs[i + 1]; |
335 | 310k | mid_ptrs[7] = mid_ptr; |
336 | | |
337 | 6.30M | for (x = 0; x < w; x++) { |
338 | 5.99M | GET_H_FILTER(imx >> 6); |
339 | 5.99M | mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, |
340 | 5.99M | 6 - intermediate_bits) : |
341 | 5.99M | src[ioff] << intermediate_bits; |
342 | 5.99M | imx += dx; |
343 | 5.99M | ioff += imx >> 10; |
344 | 5.99M | imx &= 0x3ff; |
345 | 5.99M | } |
346 | | |
347 | 310k | src += src_stride; |
348 | 310k | in_y++; |
349 | 310k | } |
350 | | |
351 | 11.4M | for (x = 0; x < w; x++) |
352 | 10.9M | tmp[x] = (fv ? DAV1D_FILTER_8TAP_RND3(mid_ptrs, x, fv, 6) |
353 | 10.9M | : mid_ptrs[3][x]) - PREP_BIAS; |
354 | | |
355 | 477k | my += dy; |
356 | 477k | tmp += w; |
357 | 477k | } |
358 | 23.6k | } |
359 | | |
360 | | #define filter_fns(type, type_h, type_v) \ |
361 | | static void put_8tap_##type##_c(pixel *const dst, \ |
362 | | const ptrdiff_t dst_stride, \ |
363 | | const pixel *const src, \ |
364 | | const ptrdiff_t src_stride, \ |
365 | | const int w, const int h, \ |
366 | | const int mx, const int my \ |
367 | 41.1k | HIGHBD_DECL_SUFFIX) \ |
368 | 41.1k | { \ |
369 | 41.1k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ |
370 | 41.1k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
371 | 41.1k | } \ mc_tmpl.c:put_8tap_regular_c Line | Count | Source | 367 | 21.4k | HIGHBD_DECL_SUFFIX) \ | 368 | 21.4k | { \ | 369 | 21.4k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 21.4k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 21.4k | } \ |
mc_tmpl.c:put_8tap_regular_smooth_c Line | Count | Source | 367 | 2.55k | HIGHBD_DECL_SUFFIX) \ | 368 | 2.55k | { \ | 369 | 2.55k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 2.55k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 2.55k | } \ |
mc_tmpl.c:put_8tap_regular_sharp_c Line | Count | Source | 367 | 479 | HIGHBD_DECL_SUFFIX) \ | 368 | 479 | { \ | 369 | 479 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 479 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 479 | } \ |
mc_tmpl.c:put_8tap_sharp_regular_c Line | Count | Source | 367 | 461 | HIGHBD_DECL_SUFFIX) \ | 368 | 461 | { \ | 369 | 461 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 461 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 461 | } \ |
mc_tmpl.c:put_8tap_sharp_smooth_c Line | Count | Source | 367 | 153 | HIGHBD_DECL_SUFFIX) \ | 368 | 153 | { \ | 369 | 153 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 153 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 153 | } \ |
mc_tmpl.c:put_8tap_sharp_c Line | Count | Source | 367 | 4.71k | HIGHBD_DECL_SUFFIX) \ | 368 | 4.71k | { \ | 369 | 4.71k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 4.71k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 4.71k | } \ |
mc_tmpl.c:put_8tap_smooth_regular_c Line | Count | Source | 367 | 2.80k | HIGHBD_DECL_SUFFIX) \ | 368 | 2.80k | { \ | 369 | 2.80k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 2.80k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 2.80k | } \ |
mc_tmpl.c:put_8tap_smooth_c Line | Count | Source | 367 | 8.03k | HIGHBD_DECL_SUFFIX) \ | 368 | 8.03k | { \ | 369 | 8.03k | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 8.03k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 8.03k | } \ |
mc_tmpl.c:put_8tap_smooth_sharp_c Line | Count | Source | 367 | 484 | HIGHBD_DECL_SUFFIX) \ | 368 | 484 | { \ | 369 | 484 | put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \ | 370 | 484 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 371 | 484 | } \ |
|
372 | | static void put_8tap_##type##_scaled_c(pixel *const dst, \ |
373 | | const ptrdiff_t dst_stride, \ |
374 | | const pixel *const src, \ |
375 | | const ptrdiff_t src_stride, \ |
376 | | const int w, const int h, \ |
377 | | const int mx, const int my, \ |
378 | | const int dx, const int dy \ |
379 | 62.8k | HIGHBD_DECL_SUFFIX) \ |
380 | 62.8k | { \ |
381 | 62.8k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ |
382 | 62.8k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
383 | 62.8k | } \ mc_tmpl.c:put_8tap_regular_scaled_c Line | Count | Source | 379 | 36.3k | HIGHBD_DECL_SUFFIX) \ | 380 | 36.3k | { \ | 381 | 36.3k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 36.3k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 36.3k | } \ |
mc_tmpl.c:put_8tap_regular_smooth_scaled_c Line | Count | Source | 379 | 1.49k | HIGHBD_DECL_SUFFIX) \ | 380 | 1.49k | { \ | 381 | 1.49k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 1.49k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 1.49k | } \ |
mc_tmpl.c:put_8tap_regular_sharp_scaled_c Line | Count | Source | 379 | 456 | HIGHBD_DECL_SUFFIX) \ | 380 | 456 | { \ | 381 | 456 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 456 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 456 | } \ |
mc_tmpl.c:put_8tap_sharp_regular_scaled_c Line | Count | Source | 379 | 715 | HIGHBD_DECL_SUFFIX) \ | 380 | 715 | { \ | 381 | 715 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 715 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 715 | } \ |
mc_tmpl.c:put_8tap_sharp_smooth_scaled_c Line | Count | Source | 379 | 472 | HIGHBD_DECL_SUFFIX) \ | 380 | 472 | { \ | 381 | 472 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 472 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 472 | } \ |
mc_tmpl.c:put_8tap_sharp_scaled_c Line | Count | Source | 379 | 10.8k | HIGHBD_DECL_SUFFIX) \ | 380 | 10.8k | { \ | 381 | 10.8k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 10.8k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 10.8k | } \ |
mc_tmpl.c:put_8tap_smooth_regular_scaled_c Line | Count | Source | 379 | 1.45k | HIGHBD_DECL_SUFFIX) \ | 380 | 1.45k | { \ | 381 | 1.45k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 1.45k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 1.45k | } \ |
mc_tmpl.c:put_8tap_smooth_scaled_c Line | Count | Source | 379 | 10.6k | HIGHBD_DECL_SUFFIX) \ | 380 | 10.6k | { \ | 381 | 10.6k | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 10.6k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 10.6k | } \ |
mc_tmpl.c:put_8tap_smooth_sharp_scaled_c Line | Count | Source | 379 | 335 | HIGHBD_DECL_SUFFIX) \ | 380 | 335 | { \ | 381 | 335 | put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ | 382 | 335 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 383 | 335 | } \ |
|
384 | | static void prep_8tap_##type##_c(int16_t *const tmp, \ |
385 | | const pixel *const src, \ |
386 | | const ptrdiff_t src_stride, \ |
387 | | const int w, const int h, \ |
388 | | const int mx, const int my \ |
389 | 14.9k | HIGHBD_DECL_SUFFIX) \ |
390 | 14.9k | { \ |
391 | 14.9k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ |
392 | 14.9k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
393 | 14.9k | } \ mc_tmpl.c:prep_8tap_regular_c Line | Count | Source | 389 | 6.25k | HIGHBD_DECL_SUFFIX) \ | 390 | 6.25k | { \ | 391 | 6.25k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 6.25k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 6.25k | } \ |
mc_tmpl.c:prep_8tap_regular_smooth_c Line | Count | Source | 389 | 377 | HIGHBD_DECL_SUFFIX) \ | 390 | 377 | { \ | 391 | 377 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 377 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 377 | } \ |
mc_tmpl.c:prep_8tap_regular_sharp_c Line | Count | Source | 389 | 993 | HIGHBD_DECL_SUFFIX) \ | 390 | 993 | { \ | 391 | 993 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 993 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 993 | } \ |
mc_tmpl.c:prep_8tap_sharp_regular_c Line | Count | Source | 389 | 1.01k | HIGHBD_DECL_SUFFIX) \ | 390 | 1.01k | { \ | 391 | 1.01k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 1.01k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 1.01k | } \ |
mc_tmpl.c:prep_8tap_sharp_smooth_c Line | Count | Source | 389 | 328 | HIGHBD_DECL_SUFFIX) \ | 390 | 328 | { \ | 391 | 328 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 328 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 328 | } \ |
mc_tmpl.c:prep_8tap_sharp_c Line | Count | Source | 389 | 2.28k | HIGHBD_DECL_SUFFIX) \ | 390 | 2.28k | { \ | 391 | 2.28k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 2.28k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 2.28k | } \ |
mc_tmpl.c:prep_8tap_smooth_regular_c Line | Count | Source | 389 | 713 | HIGHBD_DECL_SUFFIX) \ | 390 | 713 | { \ | 391 | 713 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 713 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 713 | } \ |
mc_tmpl.c:prep_8tap_smooth_c Line | Count | Source | 389 | 2.37k | HIGHBD_DECL_SUFFIX) \ | 390 | 2.37k | { \ | 391 | 2.37k | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 2.37k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 2.37k | } \ |
mc_tmpl.c:prep_8tap_smooth_sharp_c Line | Count | Source | 389 | 585 | HIGHBD_DECL_SUFFIX) \ | 390 | 585 | { \ | 391 | 585 | prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \ | 392 | 585 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 393 | 585 | } \ |
|
394 | | static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \ |
395 | | const pixel *const src, \ |
396 | | const ptrdiff_t src_stride, \ |
397 | | const int w, const int h, \ |
398 | | const int mx, const int my, \ |
399 | | const int dx, const int dy \ |
400 | 23.6k | HIGHBD_DECL_SUFFIX) \ |
401 | 23.6k | { \ |
402 | 23.6k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ |
403 | 23.6k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ |
404 | 23.6k | } mc_tmpl.c:prep_8tap_regular_scaled_c Line | Count | Source | 400 | 8.00k | HIGHBD_DECL_SUFFIX) \ | 401 | 8.00k | { \ | 402 | 8.00k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 8.00k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 8.00k | } |
mc_tmpl.c:prep_8tap_regular_smooth_scaled_c Line | Count | Source | 400 | 338 | HIGHBD_DECL_SUFFIX) \ | 401 | 338 | { \ | 402 | 338 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 338 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 338 | } |
mc_tmpl.c:prep_8tap_regular_sharp_scaled_c Line | Count | Source | 400 | 1.01k | HIGHBD_DECL_SUFFIX) \ | 401 | 1.01k | { \ | 402 | 1.01k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 1.01k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 1.01k | } |
mc_tmpl.c:prep_8tap_sharp_regular_scaled_c Line | Count | Source | 400 | 2.45k | HIGHBD_DECL_SUFFIX) \ | 401 | 2.45k | { \ | 402 | 2.45k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 2.45k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 2.45k | } |
mc_tmpl.c:prep_8tap_sharp_smooth_scaled_c Line | Count | Source | 400 | 588 | HIGHBD_DECL_SUFFIX) \ | 401 | 588 | { \ | 402 | 588 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 588 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 588 | } |
mc_tmpl.c:prep_8tap_sharp_scaled_c Line | Count | Source | 400 | 2.21k | HIGHBD_DECL_SUFFIX) \ | 401 | 2.21k | { \ | 402 | 2.21k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 2.21k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 2.21k | } |
mc_tmpl.c:prep_8tap_smooth_regular_scaled_c Line | Count | Source | 400 | 691 | HIGHBD_DECL_SUFFIX) \ | 401 | 691 | { \ | 402 | 691 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 691 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 691 | } |
mc_tmpl.c:prep_8tap_smooth_scaled_c Line | Count | Source | 400 | 7.54k | HIGHBD_DECL_SUFFIX) \ | 401 | 7.54k | { \ | 402 | 7.54k | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 7.54k | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 7.54k | } |
mc_tmpl.c:prep_8tap_smooth_sharp_scaled_c Line | Count | Source | 400 | 789 | HIGHBD_DECL_SUFFIX) \ | 401 | 789 | { \ | 402 | 789 | prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \ | 403 | 789 | type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \ | 404 | 789 | } |
|
405 | | |
406 | | filter_fns(regular, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR) |
407 | | filter_fns(regular_sharp, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP) |
408 | | filter_fns(regular_smooth, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH) |
409 | | filter_fns(smooth, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH) |
410 | | filter_fns(smooth_regular, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR) |
411 | | filter_fns(smooth_sharp, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP) |
412 | | filter_fns(sharp, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP) |
413 | | filter_fns(sharp_regular, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR) |
414 | | filter_fns(sharp_smooth, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH) |
415 | | |
416 | | #define FILTER_BILIN(src, x, mxy, stride) \ |
417 | 4.01M | (16 * src[x] + ((mxy) * (src[x + stride] - src[x]))) |
418 | | |
419 | | #define FILTER_BILIN_RND(src, x, mxy, stride, sh) \ |
420 | 4.01M | ((FILTER_BILIN(src, x, mxy, stride) + ((1 << (sh)) >> 1)) >> (sh)) |
421 | | |
422 | | #define FILTER_BILIN_CLIP(src, x, mxy, stride, sh) \ |
423 | 536k | iclip_pixel(FILTER_BILIN_RND(src, x, mxy, stride, sh)) |
424 | | |
425 | | #define FILTER_BILIN2(src1, src2, x, mxy) \ |
426 | 6.88M | (16 * src1[x] + ((mxy) * (src2[x] - src1[x]))) |
427 | | |
428 | | #define FILTER_BILIN_RND2(src1, src2, x, mxy, sh) \ |
429 | 6.88M | ((FILTER_BILIN2(src1, src2, x, mxy) + ((1 << (sh)) >> 1)) >> (sh)) |
430 | | |
431 | | #define FILTER_BILIN_CLIP2(src1, src2, x, mxy, sh) \ |
432 | 5.15M | iclip_pixel(FILTER_BILIN_RND2(src1, src2, x, mxy, sh)) |
433 | | |
434 | | static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride, |
435 | | const pixel *src, ptrdiff_t src_stride, |
436 | | const int w, int h, const int mx, const int my |
437 | | HIGHBD_DECL_SUFFIX) |
438 | 24.4k | { |
439 | 24.4k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
440 | 24.4k | const int intermediate_rnd = (1 << intermediate_bits) >> 1; |
441 | 24.4k | dst_stride = PXSTRIDE(dst_stride); |
442 | 24.4k | src_stride = PXSTRIDE(src_stride); |
443 | | |
444 | 24.4k | if (mx) { |
445 | 6.40k | if (my) { |
446 | 4.20k | int16_t mid[128 * 129], *mid_ptr = mid; |
447 | 4.20k | int tmp_h = h + 1; |
448 | | |
449 | 43.6k | do { |
450 | 418k | for (int x = 0; x < w; x++) |
451 | 374k | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
452 | 43.6k | 4 - intermediate_bits); |
453 | | |
454 | 43.6k | mid_ptr += 128; |
455 | 43.6k | src += src_stride; |
456 | 43.6k | } while (--tmp_h); |
457 | | |
458 | 4.20k | mid_ptr = mid; |
459 | 39.4k | do { |
460 | 384k | for (int x = 0; x < w; x++) |
461 | 344k | dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my, 128, |
462 | 39.4k | 4 + intermediate_bits); |
463 | | |
464 | 39.4k | mid_ptr += 128; |
465 | 39.4k | dst += dst_stride; |
466 | 39.4k | } while (--h); |
467 | 4.20k | } else { |
468 | 17.9k | do { |
469 | 202k | for (int x = 0; x < w; x++) { |
470 | 184k | const int px = FILTER_BILIN_RND(src, x, mx, 1, |
471 | 184k | 4 - intermediate_bits); |
472 | 184k | dst[x] = iclip_pixel((px + intermediate_rnd) >> intermediate_bits); |
473 | 184k | } |
474 | | |
475 | 17.9k | dst += dst_stride; |
476 | 17.9k | src += src_stride; |
477 | 17.9k | } while (--h); |
478 | 2.20k | } |
479 | 18.0k | } else if (my) { |
480 | 20.5k | do { |
481 | 212k | for (int x = 0; x < w; x++) |
482 | 191k | dst[x] = FILTER_BILIN_CLIP(src, x, my, src_stride, 4); |
483 | | |
484 | 20.5k | dst += dst_stride; |
485 | 20.5k | src += src_stride; |
486 | 20.5k | } while (--h); |
487 | 2.51k | } else |
488 | 15.5k | put_c(dst, dst_stride, src, src_stride, w, h); |
489 | 24.4k | } |
490 | | |
491 | | static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride, |
492 | | const pixel *src, ptrdiff_t src_stride, |
493 | | const int w, int h, const int mx, int my, |
494 | | const int dx, const int dy |
495 | | HIGHBD_DECL_SUFFIX) |
496 | 23.1k | { |
497 | 23.1k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
498 | 23.1k | int16_t mid[128 * 2]; |
499 | 23.1k | int in_y = -2; |
500 | | |
501 | 282k | do { |
502 | 282k | int x; |
503 | 282k | int y = my >> 10; |
504 | 282k | int16_t *mid1 = &mid[(y & 1) * 128]; |
505 | 282k | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
506 | 282k | int dmy = my & 0x3ff; |
507 | | |
508 | 378k | while (in_y < y) { |
509 | 95.6k | int imx = mx, ioff = 0; |
510 | 95.6k | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
511 | | |
512 | 1.77M | for (x = 0; x < w; x++) { |
513 | 1.67M | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
514 | 1.67M | 4 - intermediate_bits); |
515 | 1.67M | imx += dx; |
516 | 1.67M | ioff += imx >> 10; |
517 | 1.67M | imx &= 0x3ff; |
518 | 1.67M | } |
519 | | |
520 | 95.6k | src += PXSTRIDE(src_stride); |
521 | 95.6k | in_y++; |
522 | 95.6k | } |
523 | | |
524 | 5.44M | for (x = 0; x < w; x++) |
525 | 5.15M | dst[x] = FILTER_BILIN_CLIP2(mid1, mid2, x, dmy >> 6, |
526 | 282k | 4 + intermediate_bits); |
527 | | |
528 | 282k | my += dy; |
529 | 282k | dst += PXSTRIDE(dst_stride); |
530 | 282k | } while (--h); |
531 | 23.1k | } |
532 | | |
533 | | static void prep_bilin_c(int16_t *tmp, |
534 | | const pixel *src, ptrdiff_t src_stride, |
535 | | const int w, int h, const int mx, const int my |
536 | | HIGHBD_DECL_SUFFIX) |
537 | 7.14k | { |
538 | 7.14k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
539 | 7.14k | src_stride = PXSTRIDE(src_stride); |
540 | | |
541 | 7.14k | if (mx) { |
542 | 3.00k | if (my) { |
543 | 2.03k | int16_t mid[128 * 129], *mid_ptr = mid; |
544 | 2.03k | int tmp_h = h + 1; |
545 | | |
546 | 21.5k | do { |
547 | 220k | for (int x = 0; x < w; x++) |
548 | 199k | mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1, |
549 | 21.5k | 4 - intermediate_bits); |
550 | | |
551 | 21.5k | mid_ptr += 128; |
552 | 21.5k | src += src_stride; |
553 | 21.5k | } while (--tmp_h); |
554 | | |
555 | 2.03k | mid_ptr = mid; |
556 | 19.5k | do { |
557 | 203k | for (int x = 0; x < w; x++) |
558 | 183k | tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my, 128, 4) - |
559 | 183k | PREP_BIAS; |
560 | | |
561 | 19.5k | mid_ptr += 128; |
562 | 19.5k | tmp += w; |
563 | 19.5k | } while (--h); |
564 | 2.03k | } else { |
565 | 9.56k | do { |
566 | 106k | for (int x = 0; x < w; x++) |
567 | 96.9k | tmp[x] = FILTER_BILIN_RND(src, x, mx, 1, |
568 | 96.9k | 4 - intermediate_bits) - |
569 | 96.9k | PREP_BIAS; |
570 | | |
571 | 9.56k | tmp += w; |
572 | 9.56k | src += src_stride; |
573 | 9.56k | } while (--h); |
574 | 967 | } |
575 | 4.14k | } else if (my) { |
576 | 13.7k | do { |
577 | 152k | for (int x = 0; x < w; x++) |
578 | 139k | tmp[x] = FILTER_BILIN_RND(src, x, my, src_stride, |
579 | 139k | 4 - intermediate_bits) - PREP_BIAS; |
580 | | |
581 | 13.7k | tmp += w; |
582 | 13.7k | src += src_stride; |
583 | 13.7k | } while (--h); |
584 | 1.40k | } else |
585 | 2.73k | prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX); |
586 | 7.14k | } |
587 | | |
588 | | static void prep_bilin_scaled_c(int16_t *tmp, |
589 | | const pixel *src, ptrdiff_t src_stride, |
590 | | const int w, int h, const int mx, int my, |
591 | | const int dx, const int dy HIGHBD_DECL_SUFFIX) |
592 | 4.03k | { |
593 | 4.03k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
594 | 4.03k | int16_t mid[128 * 2]; |
595 | 4.03k | int in_y = -2; |
596 | | |
597 | 76.4k | do { |
598 | 76.4k | int x; |
599 | 76.4k | int y = my >> 10; |
600 | 76.4k | int16_t *mid1 = &mid[(y & 1) * 128]; |
601 | 76.4k | int16_t *mid2 = &mid[((y + 1) & 1) * 128]; |
602 | 76.4k | int dmy = my & 0x3ff; |
603 | | |
604 | 106k | while (in_y < y) { |
605 | 29.6k | int imx = mx, ioff = 0; |
606 | 29.6k | int16_t *mid_ptr = &mid[(in_y & 1) * 128]; |
607 | | |
608 | 654k | for (x = 0; x < w; x++) { |
609 | 624k | mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1, |
610 | 624k | 4 - intermediate_bits); |
611 | 624k | imx += dx; |
612 | 624k | ioff += imx >> 10; |
613 | 624k | imx &= 0x3ff; |
614 | 624k | } |
615 | | |
616 | 29.6k | src += PXSTRIDE(src_stride); |
617 | 29.6k | in_y++; |
618 | 29.6k | } |
619 | | |
620 | 1.80M | for (x = 0; x < w; x++) |
621 | 1.72M | tmp[x] = FILTER_BILIN_RND2(mid1, mid2, x, dmy >> 6, 4) - PREP_BIAS; |
622 | | |
623 | 76.4k | my += dy; |
624 | 76.4k | tmp += w; |
625 | 76.4k | } while (--h); |
626 | 4.03k | } |
627 | | |
628 | | static void avg_c(pixel *dst, const ptrdiff_t dst_stride, |
629 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h |
630 | | HIGHBD_DECL_SUFFIX) |
631 | 16.9k | { |
632 | 16.9k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
633 | 16.9k | const int sh = intermediate_bits + 1; |
634 | 16.9k | const int rnd = (1 << intermediate_bits) + PREP_BIAS * 2; |
635 | 285k | do { |
636 | 6.00M | for (int x = 0; x < w; x++) |
637 | 5.71M | dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + rnd) >> sh); |
638 | | |
639 | 285k | tmp1 += w; |
640 | 285k | tmp2 += w; |
641 | 285k | dst += PXSTRIDE(dst_stride); |
642 | 285k | } while (--h); |
643 | 16.9k | } |
644 | | |
645 | | static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride, |
646 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
647 | | const int weight HIGHBD_DECL_SUFFIX) |
648 | 3.23k | { |
649 | 3.23k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
650 | 3.23k | const int sh = intermediate_bits + 4; |
651 | 3.23k | const int rnd = (8 << intermediate_bits) + PREP_BIAS * 16; |
652 | 42.6k | do { |
653 | 640k | for (int x = 0; x < w; x++) |
654 | 597k | dst[x] = iclip_pixel((tmp1[x] * weight + |
655 | 597k | tmp2[x] * (16 - weight) + rnd) >> sh); |
656 | | |
657 | 42.6k | tmp1 += w; |
658 | 42.6k | tmp2 += w; |
659 | 42.6k | dst += PXSTRIDE(dst_stride); |
660 | 42.6k | } while (--h); |
661 | 3.23k | } |
662 | | |
663 | | static void mask_c(pixel *dst, const ptrdiff_t dst_stride, |
664 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
665 | | const uint8_t *mask HIGHBD_DECL_SUFFIX) |
666 | 3.41k | { |
667 | 3.41k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
668 | 3.41k | const int sh = intermediate_bits + 6; |
669 | 3.41k | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
670 | 49.0k | do { |
671 | 784k | for (int x = 0; x < w; x++) |
672 | 735k | dst[x] = iclip_pixel((tmp1[x] * mask[x] + |
673 | 735k | tmp2[x] * (64 - mask[x]) + rnd) >> sh); |
674 | | |
675 | 49.0k | tmp1 += w; |
676 | 49.0k | tmp2 += w; |
677 | 49.0k | mask += w; |
678 | 49.0k | dst += PXSTRIDE(dst_stride); |
679 | 49.0k | } while (--h); |
680 | 3.41k | } |
681 | | |
682 | 2.25M | #define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6) |
683 | | static void blend_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
684 | | const int w, int h, const uint8_t *mask) |
685 | 4.54k | { |
686 | 52.2k | do { |
687 | 693k | for (int x = 0; x < w; x++) { |
688 | 641k | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
689 | 641k | } |
690 | 52.2k | dst += PXSTRIDE(dst_stride); |
691 | 52.2k | tmp += w; |
692 | 52.2k | mask += w; |
693 | 52.2k | } while (--h); |
694 | 4.54k | } |
695 | | |
696 | | static void blend_v_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
697 | | const int w, int h) |
698 | 9.75k | { |
699 | 9.75k | const uint8_t *const mask = &dav1d_obmc_masks[w]; |
700 | 132k | do { |
701 | 812k | for (int x = 0; x < (w * 3) >> 2; x++) { |
702 | 680k | dst[x] = blend_px(dst[x], tmp[x], mask[x]); |
703 | 680k | } |
704 | 132k | dst += PXSTRIDE(dst_stride); |
705 | 132k | tmp += w; |
706 | 132k | } while (--h); |
707 | 9.75k | } |
708 | | |
709 | | static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp, |
710 | | const int w, int h) |
711 | 13.2k | { |
712 | 13.2k | const uint8_t *mask = &dav1d_obmc_masks[h]; |
713 | 13.2k | h = (h * 3) >> 2; |
714 | 77.3k | do { |
715 | 77.3k | const int m = *mask++; |
716 | 1.01M | for (int x = 0; x < w; x++) { |
717 | 938k | dst[x] = blend_px(dst[x], tmp[x], m); |
718 | 938k | } |
719 | 77.3k | dst += PXSTRIDE(dst_stride); |
720 | 77.3k | tmp += w; |
721 | 77.3k | } while (--h); |
722 | 13.2k | } |
723 | | |
724 | | static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride, |
725 | | const int16_t *tmp1, const int16_t *tmp2, const int w, int h, |
726 | | uint8_t *mask, const int sign, |
727 | | const int ss_hor, const int ss_ver HIGHBD_DECL_SUFFIX) |
728 | 1.87k | { |
729 | | // store mask at 2x2 resolution, i.e. store 2x1 sum for even rows, |
730 | | // and then load this intermediate to calculate final value for odd rows |
731 | 1.87k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
732 | 1.87k | const int bitdepth = bitdepth_from_max(bitdepth_max); |
733 | 1.87k | const int sh = intermediate_bits + 6; |
734 | 1.87k | const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64; |
735 | 1.87k | const int mask_sh = bitdepth + intermediate_bits - 4; |
736 | 1.87k | const int mask_rnd = 1 << (mask_sh - 5); |
737 | 54.0k | do { |
738 | 1.28M | for (int x = 0; x < w; x++) { |
739 | 1.23M | const int tmpdiff = tmp1[x] - tmp2[x]; |
740 | 1.23M | const int m = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
741 | 1.23M | dst[x] = iclip_pixel((tmpdiff * m + tmp2[x] * 64 + rnd) >> sh); |
742 | | |
743 | 1.23M | if (ss_hor) { |
744 | 128k | x++; |
745 | | |
746 | 128k | const int tmpdiff = tmp1[x] - tmp2[x]; |
747 | 128k | const int n = imin(38 + ((abs(tmpdiff) + mask_rnd) >> mask_sh), 64); |
748 | 128k | dst[x] = iclip_pixel((tmpdiff * n + tmp2[x] * 64 + rnd) >> sh); |
749 | | |
750 | 128k | if (h & ss_ver) { |
751 | 62.8k | mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2; |
752 | 65.4k | } else if (ss_ver) { |
753 | 62.8k | mask[x >> 1] = m + n; |
754 | 62.8k | } else { |
755 | 2.59k | mask[x >> 1] = (m + n + 1 - sign) >> 1; |
756 | 2.59k | } |
757 | 1.10M | } else { |
758 | 1.10M | mask[x] = m; |
759 | 1.10M | } |
760 | 1.23M | } |
761 | | |
762 | 54.0k | tmp1 += w; |
763 | 54.0k | tmp2 += w; |
764 | 54.0k | dst += PXSTRIDE(dst_stride); |
765 | 54.0k | if (!ss_ver || (h & 1)) mask += w >> ss_hor; |
766 | 54.0k | } while (--h); |
767 | 1.87k | } |
768 | | |
769 | | #define w_mask_fns(ssn, ss_hor, ss_ver) \ |
770 | | static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \ |
771 | | const int16_t *const tmp1, const int16_t *const tmp2, \ |
772 | | const int w, const int h, uint8_t *mask, \ |
773 | 1.87k | const int sign HIGHBD_DECL_SUFFIX) \ |
774 | 1.87k | { \ |
775 | 1.87k | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ |
776 | 1.87k | HIGHBD_TAIL_SUFFIX); \ |
777 | 1.87k | } Line | Count | Source | 773 | 1.49k | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 1.49k | { \ | 775 | 1.49k | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 1.49k | HIGHBD_TAIL_SUFFIX); \ | 777 | 1.49k | } |
Line | Count | Source | 773 | 30 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 30 | { \ | 775 | 30 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 30 | HIGHBD_TAIL_SUFFIX); \ | 777 | 30 | } |
Line | Count | Source | 773 | 348 | const int sign HIGHBD_DECL_SUFFIX) \ | 774 | 348 | { \ | 775 | 348 | w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \ | 776 | 348 | HIGHBD_TAIL_SUFFIX); \ | 777 | 348 | } |
|
778 | | |
779 | | w_mask_fns(444, 0, 0); |
780 | | w_mask_fns(422, 1, 0); |
781 | | w_mask_fns(420, 1, 1); |
782 | | |
783 | | #undef w_mask_fns |
784 | | |
785 | | #define FILTER_WARP_RND(src, x, F, stride, sh) \ |
786 | 5.67M | ((F[0] * src[x - 3 * stride] + \ |
787 | 5.67M | F[1] * src[x - 2 * stride] + \ |
788 | 5.67M | F[2] * src[x - 1 * stride] + \ |
789 | 5.67M | F[3] * src[x + 0 * stride] + \ |
790 | 5.67M | F[4] * src[x + 1 * stride] + \ |
791 | 5.67M | F[5] * src[x + 2 * stride] + \ |
792 | 5.67M | F[6] * src[x + 3 * stride] + \ |
793 | 5.67M | F[7] * src[x + 4 * stride] + \ |
794 | 5.67M | ((1 << (sh)) >> 1)) >> (sh)) |
795 | | |
796 | | #define FILTER_WARP_CLIP(src, x, F, stride, sh) \ |
797 | 1.26M | iclip_pixel(FILTER_WARP_RND(src, x, F, stride, sh)) |
798 | | |
799 | | static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride, |
800 | | const pixel *src, const ptrdiff_t src_stride, |
801 | | const int16_t *const abcd, int mx, int my |
802 | | HIGHBD_DECL_SUFFIX) |
803 | 19.9k | { |
804 | 19.9k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
805 | 19.9k | int16_t mid[15 * 8], *mid_ptr = mid; |
806 | | |
807 | 19.9k | src -= 3 * PXSTRIDE(src_stride); |
808 | 318k | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
809 | 2.66M | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
810 | 2.36M | const int8_t *const filter = |
811 | 2.36M | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
812 | | |
813 | 2.36M | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
814 | 2.36M | 7 - intermediate_bits); |
815 | 2.36M | } |
816 | 298k | src += PXSTRIDE(src_stride); |
817 | 298k | mid_ptr += 8; |
818 | 298k | } |
819 | | |
820 | 19.9k | mid_ptr = &mid[3 * 8]; |
821 | 179k | for (int y = 0; y < 8; y++, my += abcd[3]) { |
822 | 1.42M | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
823 | 1.26M | const int8_t *const filter = |
824 | 1.26M | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
825 | | |
826 | 1.26M | dst[x] = FILTER_WARP_CLIP(mid_ptr, x, filter, 8, |
827 | 1.26M | 7 + intermediate_bits); |
828 | 1.26M | } |
829 | 159k | mid_ptr += 8; |
830 | 159k | dst += PXSTRIDE(dst_stride); |
831 | 159k | } |
832 | 19.9k | } |
833 | | |
834 | | static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride, |
835 | | const pixel *src, const ptrdiff_t src_stride, |
836 | | const int16_t *const abcd, int mx, int my |
837 | | HIGHBD_DECL_SUFFIX) |
838 | 11.1k | { |
839 | 11.1k | const int intermediate_bits = get_intermediate_bits(bitdepth_max); |
840 | 11.1k | int16_t mid[15 * 8], *mid_ptr = mid; |
841 | | |
842 | 11.1k | src -= 3 * PXSTRIDE(src_stride); |
843 | 177k | for (int y = 0; y < 15; y++, mx += abcd[1]) { |
844 | 1.49M | for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) { |
845 | 1.33M | const int8_t *const filter = |
846 | 1.33M | dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)]; |
847 | | |
848 | 1.33M | mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, |
849 | 1.33M | 7 - intermediate_bits); |
850 | 1.33M | } |
851 | 166k | src += PXSTRIDE(src_stride); |
852 | 166k | mid_ptr += 8; |
853 | 166k | } |
854 | | |
855 | 11.1k | mid_ptr = &mid[3 * 8]; |
856 | 99.9k | for (int y = 0; y < 8; y++, my += abcd[3]) { |
857 | 799k | for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) { |
858 | 710k | const int8_t *const filter = |
859 | 710k | dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)]; |
860 | | |
861 | 710k | tmp[x] = FILTER_WARP_RND(mid_ptr, x, filter, 8, 7) - PREP_BIAS; |
862 | 710k | } |
863 | 88.8k | mid_ptr += 8; |
864 | 88.8k | tmp += tmp_stride; |
865 | 88.8k | } |
866 | 11.1k | } |
867 | | |
868 | | static void emu_edge_c(const intptr_t bw, const intptr_t bh, |
869 | | const intptr_t iw, const intptr_t ih, |
870 | | const intptr_t x, const intptr_t y, |
871 | | pixel *dst, const ptrdiff_t dst_stride, |
872 | | const pixel *ref, const ptrdiff_t ref_stride) |
873 | 205k | { |
874 | | // find offset in reference of visible block to copy |
875 | 205k | ref += iclip((int) y, 0, (int) ih - 1) * PXSTRIDE(ref_stride) + |
876 | 205k | iclip((int) x, 0, (int) iw - 1); |
877 | | |
878 | | // number of pixels to extend (left, right, top, bottom) |
879 | 205k | const int left_ext = iclip((int) -x, 0, (int) bw - 1); |
880 | 205k | const int right_ext = iclip((int) (x + bw - iw), 0, (int) bw - 1); |
881 | 205k | assert(left_ext + right_ext < bw); |
882 | 205k | const int top_ext = iclip((int) -y, 0, (int) bh - 1); |
883 | 205k | const int bottom_ext = iclip((int) (y + bh - ih), 0, (int) bh - 1); |
884 | 205k | assert(top_ext + bottom_ext < bh); |
885 | | |
886 | | // copy visible portion first |
887 | 205k | pixel *blk = dst + top_ext * PXSTRIDE(dst_stride); |
888 | 205k | const int center_w = (int) (bw - left_ext - right_ext); |
889 | 205k | const int center_h = (int) (bh - top_ext - bottom_ext); |
890 | 2.11M | for (int y = 0; y < center_h; y++) { |
891 | 1.91M | pixel_copy(blk + left_ext, ref, center_w); |
892 | | // extend left edge for this line |
893 | 1.91M | if (left_ext) |
894 | 766k | pixel_set(blk, blk[left_ext], left_ext); |
895 | | // extend right edge for this line |
896 | 1.91M | if (right_ext) |
897 | 1.39M | pixel_set(blk + left_ext + center_w, blk[left_ext + center_w - 1], |
898 | 1.39M | right_ext); |
899 | 1.91M | ref += PXSTRIDE(ref_stride); |
900 | 1.91M | blk += PXSTRIDE(dst_stride); |
901 | 1.91M | } |
902 | | |
903 | | // copy top |
904 | 205k | blk = dst + top_ext * PXSTRIDE(dst_stride); |
905 | 461k | for (int y = 0; y < top_ext; y++) { |
906 | 255k | pixel_copy(dst, blk, bw); |
907 | 255k | dst += PXSTRIDE(dst_stride); |
908 | 255k | } |
909 | | |
910 | | // copy bottom |
911 | 205k | dst += center_h * PXSTRIDE(dst_stride); |
912 | 820k | for (int y = 0; y < bottom_ext; y++) { |
913 | 614k | pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], bw); |
914 | 614k | dst += PXSTRIDE(dst_stride); |
915 | 614k | } |
916 | 205k | } |
917 | | |
918 | | static void resize_c(pixel *dst, const ptrdiff_t dst_stride, |
919 | | const pixel *src, const ptrdiff_t src_stride, |
920 | | const int dst_w, int h, const int src_w, |
921 | | const int dx, const int mx0 HIGHBD_DECL_SUFFIX) |
922 | 67.4k | { |
923 | 3.19M | do { |
924 | 3.19M | int mx = mx0, src_x = -1; |
925 | 258M | for (int x = 0; x < dst_w; x++) { |
926 | 254M | const int8_t *const F = dav1d_resize_filter[mx >> 8]; |
927 | 254M | dst[x] = iclip_pixel((-(F[0] * src[iclip(src_x - 3, 0, src_w - 1)] + |
928 | 254M | F[1] * src[iclip(src_x - 2, 0, src_w - 1)] + |
929 | 254M | F[2] * src[iclip(src_x - 1, 0, src_w - 1)] + |
930 | 254M | F[3] * src[iclip(src_x + 0, 0, src_w - 1)] + |
931 | 254M | F[4] * src[iclip(src_x + 1, 0, src_w - 1)] + |
932 | 254M | F[5] * src[iclip(src_x + 2, 0, src_w - 1)] + |
933 | 254M | F[6] * src[iclip(src_x + 3, 0, src_w - 1)] + |
934 | 254M | F[7] * src[iclip(src_x + 4, 0, src_w - 1)]) + |
935 | 254M | 64) >> 7); |
936 | 254M | mx += dx; |
937 | 254M | src_x += mx >> 14; |
938 | 254M | mx &= 0x3fff; |
939 | 254M | } |
940 | | |
941 | 3.19M | dst += PXSTRIDE(dst_stride); |
942 | 3.19M | src += PXSTRIDE(src_stride); |
943 | 3.19M | } while (--h); |
944 | 67.4k | } |
945 | | |
946 | | #if HAVE_ASM |
947 | | #if ARCH_AARCH64 || ARCH_ARM |
948 | | #include "src/arm/mc.h" |
949 | | #elif ARCH_LOONGARCH64 |
950 | | #include "src/loongarch/mc.h" |
951 | | #elif ARCH_PPC64LE |
952 | | #include "src/ppc/mc.h" |
953 | | #elif ARCH_RISCV |
954 | | #include "src/riscv/mc.h" |
955 | | #elif ARCH_X86 |
956 | | #include "src/x86/mc.h" |
957 | | #endif |
958 | | #endif |
959 | | |
960 | 38.3k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { |
961 | 383k | #define init_mc_fns(type, name) do { \ |
962 | 383k | c->mc [type] = put_##name##_c; \ |
963 | 383k | c->mc_scaled [type] = put_##name##_scaled_c; \ |
964 | 383k | c->mct [type] = prep_##name##_c; \ |
965 | 383k | c->mct_scaled[type] = prep_##name##_scaled_c; \ |
966 | 383k | } while (0) |
967 | | |
968 | 38.3k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); |
969 | 38.3k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); |
970 | 38.3k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); |
971 | 38.3k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); |
972 | 38.3k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); |
973 | 38.3k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); |
974 | 38.3k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); |
975 | 38.3k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); |
976 | 38.3k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); |
977 | 38.3k | init_mc_fns(FILTER_2D_BILINEAR, bilin); |
978 | | |
979 | 38.3k | c->avg = avg_c; |
980 | 38.3k | c->w_avg = w_avg_c; |
981 | 38.3k | c->mask = mask_c; |
982 | 38.3k | c->blend = blend_c; |
983 | 38.3k | c->blend_v = blend_v_c; |
984 | 38.3k | c->blend_h = blend_h_c; |
985 | 38.3k | c->w_mask[0] = w_mask_444_c; |
986 | 38.3k | c->w_mask[1] = w_mask_422_c; |
987 | 38.3k | c->w_mask[2] = w_mask_420_c; |
988 | 38.3k | c->warp8x8 = warp_affine_8x8_c; |
989 | 38.3k | c->warp8x8t = warp_affine_8x8t_c; |
990 | 38.3k | c->emu_edge = emu_edge_c; |
991 | 38.3k | c->resize = resize_c; |
992 | | |
993 | | #if HAVE_ASM |
994 | | #if ARCH_AARCH64 || ARCH_ARM |
995 | | mc_dsp_init_arm(c); |
996 | | #elif ARCH_LOONGARCH64 |
997 | | mc_dsp_init_loongarch(c); |
998 | | #elif ARCH_PPC64LE |
999 | | mc_dsp_init_ppc(c); |
1000 | | #elif ARCH_RISCV |
1001 | | mc_dsp_init_riscv(c); |
1002 | | #elif ARCH_X86 |
1003 | | mc_dsp_init_x86(c); |
1004 | | #endif |
1005 | | #endif |
1006 | 38.3k | } Line | Count | Source | 960 | 17.2k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { | 961 | 17.2k | #define init_mc_fns(type, name) do { \ | 962 | 17.2k | c->mc [type] = put_##name##_c; \ | 963 | 17.2k | c->mc_scaled [type] = put_##name##_scaled_c; \ | 964 | 17.2k | c->mct [type] = prep_##name##_c; \ | 965 | 17.2k | c->mct_scaled[type] = prep_##name##_scaled_c; \ | 966 | 17.2k | } while (0) | 967 | | | 968 | 17.2k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); | 969 | 17.2k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); | 970 | 17.2k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); | 971 | 17.2k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); | 972 | 17.2k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); | 973 | 17.2k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); | 974 | 17.2k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); | 975 | 17.2k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); | 976 | 17.2k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); | 977 | 17.2k | init_mc_fns(FILTER_2D_BILINEAR, bilin); | 978 | | | 979 | 17.2k | c->avg = avg_c; | 980 | 17.2k | c->w_avg = w_avg_c; | 981 | 17.2k | c->mask = mask_c; | 982 | 17.2k | c->blend = blend_c; | 983 | 17.2k | c->blend_v = blend_v_c; | 984 | 17.2k | c->blend_h = blend_h_c; | 985 | 17.2k | c->w_mask[0] = w_mask_444_c; | 986 | 17.2k | c->w_mask[1] = w_mask_422_c; | 987 | 17.2k | c->w_mask[2] = w_mask_420_c; | 988 | 17.2k | c->warp8x8 = warp_affine_8x8_c; | 989 | 17.2k | c->warp8x8t = warp_affine_8x8t_c; | 990 | 17.2k | c->emu_edge = emu_edge_c; | 991 | 17.2k | c->resize = resize_c; | 992 | | | 993 | | #if HAVE_ASM | 994 | | #if ARCH_AARCH64 || ARCH_ARM | 995 | | mc_dsp_init_arm(c); | 996 | | #elif ARCH_LOONGARCH64 | 997 | | mc_dsp_init_loongarch(c); | 998 | | #elif ARCH_PPC64LE | 999 | | mc_dsp_init_ppc(c); | 1000 | | #elif ARCH_RISCV | 1001 | | mc_dsp_init_riscv(c); | 1002 | | #elif ARCH_X86 | 1003 | | mc_dsp_init_x86(c); | 1004 | | #endif | 1005 | | #endif | 1006 | 17.2k | } |
Line | Count | Source | 960 | 21.1k | COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) { | 961 | 21.1k | #define init_mc_fns(type, name) do { \ | 962 | 21.1k | c->mc [type] = put_##name##_c; \ | 963 | 21.1k | c->mc_scaled [type] = put_##name##_scaled_c; \ | 964 | 21.1k | c->mct [type] = prep_##name##_c; \ | 965 | 21.1k | c->mct_scaled[type] = prep_##name##_scaled_c; \ | 966 | 21.1k | } while (0) | 967 | | | 968 | 21.1k | init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular); | 969 | 21.1k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth); | 970 | 21.1k | init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp); | 971 | 21.1k | init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular); | 972 | 21.1k | init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth); | 973 | 21.1k | init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp); | 974 | 21.1k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular); | 975 | 21.1k | init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth); | 976 | 21.1k | init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp); | 977 | 21.1k | init_mc_fns(FILTER_2D_BILINEAR, bilin); | 978 | | | 979 | 21.1k | c->avg = avg_c; | 980 | 21.1k | c->w_avg = w_avg_c; | 981 | 21.1k | c->mask = mask_c; | 982 | 21.1k | c->blend = blend_c; | 983 | 21.1k | c->blend_v = blend_v_c; | 984 | 21.1k | c->blend_h = blend_h_c; | 985 | 21.1k | c->w_mask[0] = w_mask_444_c; | 986 | 21.1k | c->w_mask[1] = w_mask_422_c; | 987 | 21.1k | c->w_mask[2] = w_mask_420_c; | 988 | 21.1k | c->warp8x8 = warp_affine_8x8_c; | 989 | 21.1k | c->warp8x8t = warp_affine_8x8t_c; | 990 | 21.1k | c->emu_edge = emu_edge_c; | 991 | 21.1k | c->resize = resize_c; | 992 | | | 993 | | #if HAVE_ASM | 994 | | #if ARCH_AARCH64 || ARCH_ARM | 995 | | mc_dsp_init_arm(c); | 996 | | #elif ARCH_LOONGARCH64 | 997 | | mc_dsp_init_loongarch(c); | 998 | | #elif ARCH_PPC64LE | 999 | | mc_dsp_init_ppc(c); | 1000 | | #elif ARCH_RISCV | 1001 | | mc_dsp_init_riscv(c); | 1002 | | #elif ARCH_X86 | 1003 | | mc_dsp_init_x86(c); | 1004 | | #endif | 1005 | | #endif | 1006 | 21.1k | } |
|