/work/dav1d/src/ipred_tmpl.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | |
33 | | #include "common/attributes.h" |
34 | | #include "common/intops.h" |
35 | | |
36 | | #include "src/ipred.h" |
37 | | #include "src/tables.h" |
38 | | |
39 | | static NOINLINE void |
40 | | splat_dc(pixel *dst, const ptrdiff_t stride, |
41 | | const int width, const int height, const int dc HIGHBD_DECL_SUFFIX) |
42 | 330k | { |
43 | 330k | #if BITDEPTH == 8 |
44 | 330k | assert(dc <= 0xff); |
45 | 330k | if (width > 4) { |
46 | 136k | const uint64_t dcN = dc * 0x0101010101010101ULL; |
47 | 3.96M | for (int y = 0; y < height; y++) { |
48 | 19.7M | for (int x = 0; x < width; x += sizeof(dcN)) |
49 | 15.8M | *((uint64_t *) &dst[x]) = dcN; |
50 | 3.82M | dst += PXSTRIDE(stride); |
51 | 3.82M | } |
52 | 194k | } else { |
53 | 194k | const unsigned dcN = dc * 0x01010101U; |
54 | 1.00M | for (int y = 0; y < height; y++) { |
55 | 1.61M | for (int x = 0; x < width; x += sizeof(dcN)) |
56 | 807k | *((unsigned *) &dst[x]) = dcN; |
57 | 807k | dst += PXSTRIDE(stride); |
58 | 807k | } |
59 | 194k | } |
60 | | #else |
61 | | assert(dc <= bitdepth_max); |
62 | | const uint64_t dcN = dc * 0x0001000100010001ULL; |
63 | | for (int y = 0; y < height; y++) { |
64 | | for (int x = 0; x < width; x += sizeof(dcN) >> 1) |
65 | | *((uint64_t *) &dst[x]) = dcN; |
66 | | dst += PXSTRIDE(stride); |
67 | | } |
68 | | #endif |
69 | 330k | } |
70 | | |
71 | | static NOINLINE void |
72 | | cfl_pred(pixel *dst, const ptrdiff_t stride, |
73 | | const int width, const int height, const int dc, |
74 | | const int16_t *ac, const int alpha HIGHBD_DECL_SUFFIX) |
75 | 177k | { |
76 | 1.49M | for (int y = 0; y < height; y++) { |
77 | 14.2M | for (int x = 0; x < width; x++) { |
78 | 12.9M | const int diff = alpha * ac[x]; |
79 | 12.9M | dst[x] = iclip_pixel(dc + apply_sign((abs(diff) + 32) >> 6, diff)); |
80 | 12.9M | } |
81 | 1.31M | ac += width; |
82 | 1.31M | dst += PXSTRIDE(stride); |
83 | 1.31M | } |
84 | 177k | } |
85 | | |
86 | 350k | static unsigned dc_gen_top(const pixel *const topleft, const int width) { |
87 | 350k | unsigned dc = width >> 1; |
88 | 5.40M | for (int i = 0; i < width; i++) |
89 | 5.04M | dc += topleft[1 + i]; |
90 | 350k | return dc >> ctz(width); |
91 | 350k | } |
92 | | |
93 | | static void ipred_dc_top_c(pixel *dst, const ptrdiff_t stride, |
94 | | const pixel *const topleft, |
95 | | const int width, const int height, const int a, |
96 | | const int max_width, const int max_height |
97 | | HIGHBD_DECL_SUFFIX) |
98 | 275k | { |
99 | 275k | splat_dc(dst, stride, width, height, dc_gen_top(topleft, width) |
100 | 275k | HIGHBD_TAIL_SUFFIX); |
101 | 275k | } |
102 | | |
103 | | static void ipred_cfl_top_c(pixel *dst, const ptrdiff_t stride, |
104 | | const pixel *const topleft, |
105 | | const int width, const int height, |
106 | | const int16_t *ac, const int alpha |
107 | | HIGHBD_DECL_SUFFIX) |
108 | 74.9k | { |
109 | 74.9k | cfl_pred(dst, stride, width, height, dc_gen_top(topleft, width), ac, alpha |
110 | 74.9k | HIGHBD_TAIL_SUFFIX); |
111 | 74.9k | } |
112 | | |
113 | 136k | static unsigned dc_gen_left(const pixel *const topleft, const int height) { |
114 | 136k | unsigned dc = height >> 1; |
115 | 2.06M | for (int i = 0; i < height; i++) |
116 | 1.92M | dc += topleft[-(1 + i)]; |
117 | 136k | return dc >> ctz(height); |
118 | 136k | } |
119 | | |
120 | | static void ipred_dc_left_c(pixel *dst, const ptrdiff_t stride, |
121 | | const pixel *const topleft, |
122 | | const int width, const int height, const int a, |
123 | | const int max_width, const int max_height |
124 | | HIGHBD_DECL_SUFFIX) |
125 | 100k | { |
126 | 100k | splat_dc(dst, stride, width, height, dc_gen_left(topleft, height) |
127 | 100k | HIGHBD_TAIL_SUFFIX); |
128 | 100k | } |
129 | | |
130 | | static void ipred_cfl_left_c(pixel *dst, const ptrdiff_t stride, |
131 | | const pixel *const topleft, |
132 | | const int width, const int height, |
133 | | const int16_t *ac, const int alpha |
134 | | HIGHBD_DECL_SUFFIX) |
135 | 36.1k | { |
136 | 36.1k | const unsigned dc = dc_gen_left(topleft, height); |
137 | 36.1k | cfl_pred(dst, stride, width, height, dc, ac, alpha HIGHBD_TAIL_SUFFIX); |
138 | 36.1k | } |
139 | | |
140 | | #if BITDEPTH == 8 |
141 | 36.2k | #define MULTIPLIER_1x2 0x5556 |
142 | 6.62k | #define MULTIPLIER_1x4 0x3334 |
143 | 21.4k | #define BASE_SHIFT 16 |
144 | | #else |
145 | | #define MULTIPLIER_1x2 0xAAAB |
146 | | #define MULTIPLIER_1x4 0x6667 |
147 | | #define BASE_SHIFT 17 |
148 | | #endif |
149 | | |
150 | | static unsigned dc_gen(const pixel *const topleft, |
151 | | const int width, const int height) |
152 | 320k | { |
153 | 320k | unsigned dc = (width + height) >> 1; |
154 | 2.40M | for (int i = 0; i < width; i++) |
155 | 2.08M | dc += topleft[i + 1]; |
156 | 2.39M | for (int i = 0; i < height; i++) |
157 | 2.07M | dc += topleft[-(i + 1)]; |
158 | 320k | dc >>= ctz(width + height); |
159 | | |
160 | 320k | if (width != height) { |
161 | 21.4k | dc *= (width > height * 2 || height > width * 2) ? MULTIPLIER_1x4 : |
162 | 21.4k | MULTIPLIER_1x2; |
163 | 21.4k | dc >>= BASE_SHIFT; |
164 | 21.4k | } |
165 | 320k | return dc; |
166 | 320k | } |
167 | | |
168 | | static void ipred_dc_c(pixel *dst, const ptrdiff_t stride, |
169 | | const pixel *const topleft, |
170 | | const int width, const int height, const int a, |
171 | | const int max_width, const int max_height |
172 | | HIGHBD_DECL_SUFFIX) |
173 | 256k | { |
174 | 256k | splat_dc(dst, stride, width, height, dc_gen(topleft, width, height) |
175 | 256k | HIGHBD_TAIL_SUFFIX); |
176 | 256k | } |
177 | | |
178 | | static void ipred_cfl_c(pixel *dst, const ptrdiff_t stride, |
179 | | const pixel *const topleft, |
180 | | const int width, const int height, |
181 | | const int16_t *ac, const int alpha |
182 | | HIGHBD_DECL_SUFFIX) |
183 | 64.9k | { |
184 | 64.9k | unsigned dc = dc_gen(topleft, width, height); |
185 | 64.9k | cfl_pred(dst, stride, width, height, dc, ac, alpha HIGHBD_TAIL_SUFFIX); |
186 | 64.9k | } |
187 | | |
188 | | #undef MULTIPLIER_1x2 |
189 | | #undef MULTIPLIER_1x4 |
190 | | #undef BASE_SHIFT |
191 | | |
192 | | static void ipred_dc_128_c(pixel *dst, const ptrdiff_t stride, |
193 | | const pixel *const topleft, |
194 | | const int width, const int height, const int a, |
195 | | const int max_width, const int max_height |
196 | | HIGHBD_DECL_SUFFIX) |
197 | 26.1k | { |
198 | | #if BITDEPTH == 16 |
199 | | const int dc = (bitdepth_max + 1) >> 1; |
200 | | #else |
201 | 26.1k | const int dc = 128; |
202 | 26.1k | #endif |
203 | 26.1k | splat_dc(dst, stride, width, height, dc HIGHBD_TAIL_SUFFIX); |
204 | 26.1k | } |
205 | | |
206 | | static void ipred_cfl_128_c(pixel *dst, const ptrdiff_t stride, |
207 | | const pixel *const topleft, |
208 | | const int width, const int height, |
209 | | const int16_t *ac, const int alpha |
210 | | HIGHBD_DECL_SUFFIX) |
211 | 1.34k | { |
212 | | #if BITDEPTH == 16 |
213 | | const int dc = (bitdepth_max + 1) >> 1; |
214 | | #else |
215 | 1.34k | const int dc = 128; |
216 | 1.34k | #endif |
217 | 1.34k | cfl_pred(dst, stride, width, height, dc, ac, alpha HIGHBD_TAIL_SUFFIX); |
218 | 1.34k | } |
219 | | |
220 | | static void ipred_v_c(pixel *dst, const ptrdiff_t stride, |
221 | | const pixel *const topleft, |
222 | | const int width, const int height, const int a, |
223 | | const int max_width, const int max_height |
224 | | HIGHBD_DECL_SUFFIX) |
225 | 99.4k | { |
226 | 1.34M | for (int y = 0; y < height; y++) { |
227 | 1.24M | pixel_copy(dst, topleft + 1, width); |
228 | 1.24M | dst += PXSTRIDE(stride); |
229 | 1.24M | } |
230 | 99.4k | } |
231 | | |
232 | | static void ipred_h_c(pixel *dst, const ptrdiff_t stride, |
233 | | const pixel *const topleft, |
234 | | const int width, const int height, const int a, |
235 | | const int max_width, const int max_height |
236 | | HIGHBD_DECL_SUFFIX) |
237 | 95.1k | { |
238 | 1.26M | for (int y = 0; y < height; y++) { |
239 | 1.16M | pixel_set(dst, topleft[-(1 + y)], width); |
240 | 1.16M | dst += PXSTRIDE(stride); |
241 | 1.16M | } |
242 | 95.1k | } |
243 | | |
244 | | static void ipred_paeth_c(pixel *dst, const ptrdiff_t stride, |
245 | | const pixel *const tl_ptr, |
246 | | const int width, const int height, const int a, |
247 | | const int max_width, const int max_height |
248 | | HIGHBD_DECL_SUFFIX) |
249 | 63.1k | { |
250 | 63.1k | const int topleft = tl_ptr[0]; |
251 | 384k | for (int y = 0; y < height; y++) { |
252 | 320k | const int left = tl_ptr[-(y + 1)]; |
253 | 2.79M | for (int x = 0; x < width; x++) { |
254 | 2.47M | const int top = tl_ptr[1 + x]; |
255 | 2.47M | const int base = left + top - topleft; |
256 | 2.47M | const int ldiff = abs(left - base); |
257 | 2.47M | const int tdiff = abs(top - base); |
258 | 2.47M | const int tldiff = abs(topleft - base); |
259 | | |
260 | 2.47M | dst[x] = ldiff <= tdiff && ldiff <= tldiff ? left : |
261 | 2.47M | tdiff <= tldiff ? top : topleft; |
262 | 2.47M | } |
263 | 320k | dst += PXSTRIDE(stride); |
264 | 320k | } |
265 | 63.1k | } |
266 | | |
267 | | static void ipred_smooth_c(pixel *dst, const ptrdiff_t stride, |
268 | | const pixel *const topleft, |
269 | | const int width, const int height, const int a, |
270 | | const int max_width, const int max_height |
271 | | HIGHBD_DECL_SUFFIX) |
272 | 82.8k | { |
273 | 82.8k | const uint8_t *const weights_hor = &dav1d_sm_weights[width]; |
274 | 82.8k | const uint8_t *const weights_ver = &dav1d_sm_weights[height]; |
275 | 82.8k | const int right = topleft[width], bottom = topleft[-height]; |
276 | | |
277 | 1.13M | for (int y = 0; y < height; y++) { |
278 | 23.5M | for (int x = 0; x < width; x++) { |
279 | 22.4M | const int pred = weights_ver[y] * topleft[1 + x] + |
280 | 22.4M | (256 - weights_ver[y]) * bottom + |
281 | 22.4M | weights_hor[x] * topleft[-(1 + y)] + |
282 | 22.4M | (256 - weights_hor[x]) * right; |
283 | 22.4M | dst[x] = (pred + 256) >> 9; |
284 | 22.4M | } |
285 | 1.05M | dst += PXSTRIDE(stride); |
286 | 1.05M | } |
287 | 82.8k | } |
288 | | |
289 | | static void ipred_smooth_v_c(pixel *dst, const ptrdiff_t stride, |
290 | | const pixel *const topleft, |
291 | | const int width, const int height, const int a, |
292 | | const int max_width, const int max_height |
293 | | HIGHBD_DECL_SUFFIX) |
294 | 24.9k | { |
295 | 24.9k | const uint8_t *const weights_ver = &dav1d_sm_weights[height]; |
296 | 24.9k | const int bottom = topleft[-height]; |
297 | | |
298 | 339k | for (int y = 0; y < height; y++) { |
299 | 7.26M | for (int x = 0; x < width; x++) { |
300 | 6.94M | const int pred = weights_ver[y] * topleft[1 + x] + |
301 | 6.94M | (256 - weights_ver[y]) * bottom; |
302 | 6.94M | dst[x] = (pred + 128) >> 8; |
303 | 6.94M | } |
304 | 314k | dst += PXSTRIDE(stride); |
305 | 314k | } |
306 | 24.9k | } |
307 | | |
308 | | static void ipred_smooth_h_c(pixel *dst, const ptrdiff_t stride, |
309 | | const pixel *const topleft, |
310 | | const int width, const int height, const int a, |
311 | | const int max_width, const int max_height |
312 | | HIGHBD_DECL_SUFFIX) |
313 | 32.7k | { |
314 | 32.7k | const uint8_t *const weights_hor = &dav1d_sm_weights[width]; |
315 | 32.7k | const int right = topleft[width]; |
316 | | |
317 | 479k | for (int y = 0; y < height; y++) { |
318 | 10.5M | for (int x = 0; x < width; x++) { |
319 | 10.1M | const int pred = weights_hor[x] * topleft[-(y + 1)] + |
320 | 10.1M | (256 - weights_hor[x]) * right; |
321 | 10.1M | dst[x] = (pred + 128) >> 8; |
322 | 10.1M | } |
323 | 446k | dst += PXSTRIDE(stride); |
324 | 446k | } |
325 | 32.7k | } |
326 | | |
327 | | static NOINLINE int get_filter_strength(const int wh, const int angle, |
328 | | const int is_sm) |
329 | 120k | { |
330 | 120k | if (is_sm) { |
331 | 21.5k | if (wh <= 8) { |
332 | 4.73k | if (angle >= 64) return 2; |
333 | 1.99k | if (angle >= 40) return 1; |
334 | 16.8k | } else if (wh <= 16) { |
335 | 6.43k | if (angle >= 48) return 2; |
336 | 4.39k | if (angle >= 20) return 1; |
337 | 10.4k | } else if (wh <= 24) { |
338 | 3.41k | if (angle >= 4) return 3; |
339 | 7.01k | } else { |
340 | 7.01k | return 3; |
341 | 7.01k | } |
342 | 98.5k | } else { |
343 | 98.5k | if (wh <= 8) { |
344 | 22.4k | if (angle >= 56) return 1; |
345 | 76.0k | } else if (wh <= 16) { |
346 | 12.7k | if (angle >= 40) return 1; |
347 | 63.2k | } else if (wh <= 24) { |
348 | 16.1k | if (angle >= 32) return 3; |
349 | 7.51k | if (angle >= 16) return 2; |
350 | 3.80k | if (angle >= 8) return 1; |
351 | 47.1k | } else if (wh <= 32) { |
352 | 9.67k | if (angle >= 32) return 3; |
353 | 4.72k | if (angle >= 4) return 2; |
354 | 973 | return 1; |
355 | 37.4k | } else { |
356 | 37.4k | return 3; |
357 | 37.4k | } |
358 | 98.5k | } |
359 | 13.6k | return 0; |
360 | 120k | } |
361 | | |
362 | | static NOINLINE void filter_edge(pixel *const out, const int sz, |
363 | | const int lim_from, const int lim_to, |
364 | | const pixel *const in, const int from, |
365 | | const int to, const int strength) |
366 | 106k | { |
367 | 106k | static const uint8_t kernel[3][5] = { |
368 | 106k | { 0, 4, 8, 4, 0 }, |
369 | 106k | { 0, 5, 6, 5, 0 }, |
370 | 106k | { 2, 4, 4, 4, 2 } |
371 | 106k | }; |
372 | | |
373 | 106k | assert(strength > 0); |
374 | 106k | int i = 0; |
375 | 193k | for (; i < imin(sz, lim_from); i++) |
376 | 86.8k | out[i] = in[iclip(i, from, to - 1)]; |
377 | 2.18M | for (; i < imin(lim_to, sz); i++) { |
378 | 2.07M | int s = 0; |
379 | 12.4M | for (int j = 0; j < 5; j++) |
380 | 10.3M | s += in[iclip(i - 2 + j, from, to - 1)] * kernel[strength - 1][j]; |
381 | 2.07M | out[i] = (s + 8) >> 4; |
382 | 2.07M | } |
383 | 303k | for (; i < sz; i++) |
384 | 196k | out[i] = in[iclip(i, from, to - 1)]; |
385 | 106k | } |
386 | | |
387 | 176k | static inline int get_upsample(const int wh, const int angle, const int is_sm) { |
388 | 176k | return angle < 40 && wh <= 16 >> is_sm; |
389 | 176k | } |
390 | | |
391 | | static NOINLINE void upsample_edge(pixel *const out, const int hsz, |
392 | | const pixel *const in, const int from, |
393 | | const int to HIGHBD_DECL_SUFFIX) |
394 | 56.1k | { |
395 | 56.1k | static const int8_t kernel[4] = { -1, 9, 9, -1 }; |
396 | 56.1k | int i; |
397 | 444k | for (i = 0; i < hsz - 1; i++) { |
398 | 388k | out[i * 2] = in[iclip(i, from, to - 1)]; |
399 | | |
400 | 388k | int s = 0; |
401 | 1.93M | for (int j = 0; j < 4; j++) |
402 | 1.55M | s += in[iclip(i + j - 1, from, to - 1)] * kernel[j]; |
403 | 388k | out[i * 2 + 1] = iclip_pixel((s + 8) >> 4); |
404 | 388k | } |
405 | 56.1k | out[i * 2] = in[iclip(i, from, to - 1)]; |
406 | 56.1k | } |
407 | | |
408 | | static void ipred_z1_c(pixel *dst, const ptrdiff_t stride, |
409 | | const pixel *const topleft_in, |
410 | | const int width, const int height, int angle, |
411 | | const int max_width, const int max_height |
412 | | HIGHBD_DECL_SUFFIX) |
413 | 37.4k | { |
414 | 37.4k | const int is_sm = (angle >> 9) & 0x1; |
415 | 37.4k | const int enable_intra_edge_filter = angle >> 10; |
416 | 37.4k | angle &= 511; |
417 | 37.4k | assert(angle < 90); |
418 | 37.4k | int dx = dav1d_dr_intra_derivative[angle >> 1]; |
419 | 37.4k | pixel top_out[64 + 64]; |
420 | 37.4k | const pixel *top; |
421 | 37.4k | int max_base_x; |
422 | 37.4k | const int upsample_above = enable_intra_edge_filter ? |
423 | 29.0k | get_upsample(width + height, 90 - angle, is_sm) : 0; |
424 | 37.4k | if (upsample_above) { |
425 | 11.0k | upsample_edge(top_out, width + height, &topleft_in[1], -1, |
426 | 11.0k | width + imin(width, height) HIGHBD_TAIL_SUFFIX); |
427 | 11.0k | top = top_out; |
428 | 11.0k | max_base_x = 2 * (width + height) - 2; |
429 | 11.0k | dx <<= 1; |
430 | 26.4k | } else { |
431 | 26.4k | const int filter_strength = enable_intra_edge_filter ? |
432 | 17.9k | get_filter_strength(width + height, 90 - angle, is_sm) : 0; |
433 | 26.4k | if (filter_strength) { |
434 | 12.9k | filter_edge(top_out, width + height, 0, width + height, |
435 | 12.9k | &topleft_in[1], -1, width + imin(width, height), |
436 | 12.9k | filter_strength); |
437 | 12.9k | top = top_out; |
438 | 12.9k | max_base_x = width + height - 1; |
439 | 13.4k | } else { |
440 | 13.4k | top = &topleft_in[1]; |
441 | 13.4k | max_base_x = width + imin(width, height) - 1; |
442 | 13.4k | } |
443 | 26.4k | } |
444 | 37.4k | const int base_inc = 1 + upsample_above; |
445 | 526k | for (int y = 0, xpos = dx; y < height; |
446 | 489k | y++, dst += PXSTRIDE(stride), xpos += dx) |
447 | 489k | { |
448 | 489k | const int frac = xpos & 0x3E; |
449 | | |
450 | 11.2M | for (int x = 0, base = xpos >> 6; x < width; x++, base += base_inc) { |
451 | 10.7M | if (base < max_base_x) { |
452 | 10.7M | const int v = top[base] * (64 - frac) + top[base + 1] * frac; |
453 | 10.7M | dst[x] = (v + 32) >> 6; |
454 | 10.7M | } else { |
455 | 27.8k | pixel_set(&dst[x], top[max_base_x], width - x); |
456 | 27.8k | break; |
457 | 27.8k | } |
458 | 10.7M | } |
459 | 489k | } |
460 | 37.4k | } |
461 | | |
462 | | static void ipred_z2_c(pixel *dst, const ptrdiff_t stride, |
463 | | const pixel *const topleft_in, |
464 | | const int width, const int height, int angle, |
465 | | const int max_width, const int max_height |
466 | | HIGHBD_DECL_SUFFIX) |
467 | 77.0k | { |
468 | 77.0k | const int is_sm = (angle >> 9) & 0x1; |
469 | 77.0k | const int enable_intra_edge_filter = angle >> 10; |
470 | 77.0k | angle &= 511; |
471 | 77.0k | assert(angle > 90 && angle < 180); |
472 | 77.0k | int dy = dav1d_dr_intra_derivative[(angle - 90) >> 1]; |
473 | 77.0k | int dx = dav1d_dr_intra_derivative[(180 - angle) >> 1]; |
474 | 77.0k | const int upsample_left = enable_intra_edge_filter ? |
475 | 60.3k | get_upsample(width + height, 180 - angle, is_sm) : 0; |
476 | 77.0k | const int upsample_above = enable_intra_edge_filter ? |
477 | 60.3k | get_upsample(width + height, angle - 90, is_sm) : 0; |
478 | 77.0k | pixel edge[64 + 64 + 1]; |
479 | 77.0k | pixel *const topleft = &edge[64]; |
480 | | |
481 | 77.0k | if (upsample_above) { |
482 | 10.3k | upsample_edge(topleft, width + 1, topleft_in, 0, width + 1 |
483 | 10.3k | HIGHBD_TAIL_SUFFIX); |
484 | 10.3k | dx <<= 1; |
485 | 66.6k | } else { |
486 | 66.6k | const int filter_strength = enable_intra_edge_filter ? |
487 | 49.9k | get_filter_strength(width + height, angle - 90, is_sm) : 0; |
488 | | |
489 | 66.6k | if (filter_strength) { |
490 | 46.2k | filter_edge(&topleft[1], width, 0, max_width, |
491 | 46.2k | &topleft_in[1], -1, width, |
492 | 46.2k | filter_strength); |
493 | 46.2k | } else { |
494 | 20.3k | pixel_copy(&topleft[1], &topleft_in[1], width); |
495 | 20.3k | } |
496 | 66.6k | } |
497 | 77.0k | if (upsample_left) { |
498 | 16.5k | upsample_edge(&topleft[-height * 2], height + 1, &topleft_in[-height], |
499 | 16.5k | 0, height + 1 HIGHBD_TAIL_SUFFIX); |
500 | 16.5k | dy <<= 1; |
501 | 60.4k | } else { |
502 | 60.4k | const int filter_strength = enable_intra_edge_filter ? |
503 | 43.7k | get_filter_strength(width + height, 180 - angle, is_sm) : 0; |
504 | | |
505 | 60.4k | if (filter_strength) { |
506 | 39.8k | filter_edge(&topleft[-height], height, height - max_height, height, |
507 | 39.8k | &topleft_in[-height], |
508 | 39.8k | 0, height + 1, filter_strength); |
509 | 39.8k | } else { |
510 | 20.5k | pixel_copy(&topleft[-height], &topleft_in[-height], height); |
511 | 20.5k | } |
512 | 60.4k | } |
513 | 77.0k | *topleft = *topleft_in; |
514 | | |
515 | 77.0k | const int base_inc_x = 1 + upsample_above; |
516 | 77.0k | const pixel *const left = &topleft[-(1 + upsample_left)]; |
517 | 1.18M | for (int y = 0, xpos = ((1 + upsample_above) << 6) - dx; y < height; |
518 | 1.10M | y++, xpos -= dx, dst += PXSTRIDE(stride)) |
519 | 1.10M | { |
520 | 1.10M | int base_x = xpos >> 6; |
521 | 1.10M | const int frac_x = xpos & 0x3E; |
522 | | |
523 | 26.8M | for (int x = 0, ypos = (y << (6 + upsample_left)) - dy; x < width; |
524 | 25.7M | x++, base_x += base_inc_x, ypos -= dy) |
525 | 25.7M | { |
526 | 25.7M | int v; |
527 | 25.7M | if (base_x >= 0) { |
528 | 11.5M | v = topleft[base_x] * (64 - frac_x) + |
529 | 11.5M | topleft[base_x + 1] * frac_x; |
530 | 14.2M | } else { |
531 | 14.2M | const int base_y = ypos >> 6; |
532 | 14.2M | assert(base_y >= -(1 + upsample_left)); |
533 | 14.2M | const int frac_y = ypos & 0x3E; |
534 | 14.2M | v = left[-base_y] * (64 - frac_y) + |
535 | 14.2M | left[-(base_y + 1)] * frac_y; |
536 | 14.2M | } |
537 | 25.7M | dst[x] = (v + 32) >> 6; |
538 | 25.7M | } |
539 | 1.10M | } |
540 | 77.0k | } |
541 | | |
542 | | static void ipred_z3_c(pixel *dst, const ptrdiff_t stride, |
543 | | const pixel *const topleft_in, |
544 | | const int width, const int height, int angle, |
545 | | const int max_width, const int max_height |
546 | | HIGHBD_DECL_SUFFIX) |
547 | 35.2k | { |
548 | 35.2k | const int is_sm = (angle >> 9) & 0x1; |
549 | 35.2k | const int enable_intra_edge_filter = angle >> 10; |
550 | 35.2k | angle &= 511; |
551 | 35.2k | assert(angle > 180); |
552 | 35.2k | int dy = dav1d_dr_intra_derivative[(270 - angle) >> 1]; |
553 | 35.2k | pixel left_out[64 + 64]; |
554 | 35.2k | const pixel *left; |
555 | 35.2k | int max_base_y; |
556 | 35.2k | const int upsample_left = enable_intra_edge_filter ? |
557 | 26.5k | get_upsample(width + height, angle - 180, is_sm) : 0; |
558 | 35.2k | if (upsample_left) { |
559 | 18.1k | upsample_edge(left_out, width + height, |
560 | 18.1k | &topleft_in[-(width + height)], |
561 | 18.1k | imax(width - height, 0), width + height + 1 |
562 | 18.1k | HIGHBD_TAIL_SUFFIX); |
563 | 18.1k | left = &left_out[2 * (width + height) - 2]; |
564 | 18.1k | max_base_y = 2 * (width + height) - 2; |
565 | 18.1k | dy <<= 1; |
566 | 18.1k | } else { |
567 | 17.0k | const int filter_strength = enable_intra_edge_filter ? |
568 | 8.72k | get_filter_strength(width + height, angle - 180, is_sm) : 0; |
569 | | |
570 | 17.0k | if (filter_strength) { |
571 | 7.39k | filter_edge(left_out, width + height, 0, width + height, |
572 | 7.39k | &topleft_in[-(width + height)], |
573 | 7.39k | imax(width - height, 0), width + height + 1, |
574 | 7.39k | filter_strength); |
575 | 7.39k | left = &left_out[width + height - 1]; |
576 | 7.39k | max_base_y = width + height - 1; |
577 | 9.69k | } else { |
578 | 9.69k | left = &topleft_in[-1]; |
579 | 9.69k | max_base_y = height + imin(width, height) - 1; |
580 | 9.69k | } |
581 | 17.0k | } |
582 | 35.2k | const int base_inc = 1 + upsample_left; |
583 | 418k | for (int x = 0, ypos = dy; x < width; x++, ypos += dy) { |
584 | 383k | const int frac = ypos & 0x3E; |
585 | | |
586 | 7.45M | for (int y = 0, base = ypos >> 6; y < height; y++, base += base_inc) { |
587 | 7.07M | if (base < max_base_y) { |
588 | 7.07M | const int v = left[-base] * (64 - frac) + |
589 | 7.07M | left[-(base + 1)] * frac; |
590 | 7.07M | dst[y * PXSTRIDE(stride) + x] = (v + 32) >> 6; |
591 | 7.07M | } else { |
592 | 6.10k | do { |
593 | 6.10k | dst[y * PXSTRIDE(stride) + x] = left[-max_base_y]; |
594 | 6.10k | } while (++y < height); |
595 | 2.20k | break; |
596 | 2.20k | } |
597 | 7.07M | } |
598 | 383k | } |
599 | 35.2k | } |
600 | | |
601 | | #if ARCH_X86 |
602 | | #define FILTER(flt_ptr, p0, p1, p2, p3, p4, p5, p6) \ |
603 | 3.95M | flt_ptr[ 0] * p0 + flt_ptr[ 1] * p1 + \ |
604 | 3.95M | flt_ptr[16] * p2 + flt_ptr[17] * p3 + \ |
605 | 3.95M | flt_ptr[32] * p4 + flt_ptr[33] * p5 + \ |
606 | 3.95M | flt_ptr[48] * p6 |
607 | 3.95M | #define FLT_INCR 2 |
608 | | #else |
609 | | #define FILTER(flt_ptr, p0, p1, p2, p3, p4, p5, p6) \ |
610 | | flt_ptr[ 0] * p0 + flt_ptr[ 8] * p1 + \ |
611 | | flt_ptr[16] * p2 + flt_ptr[24] * p3 + \ |
612 | | flt_ptr[32] * p4 + flt_ptr[40] * p5 + \ |
613 | | flt_ptr[48] * p6 |
614 | | #define FLT_INCR 1 |
615 | | #endif |
616 | | |
617 | | /* Up to 32x32 only */ |
618 | | static void ipred_filter_c(pixel *dst, const ptrdiff_t stride, |
619 | | const pixel *const topleft_in, |
620 | | const int width, const int height, int filt_idx, |
621 | | const int max_width, const int max_height |
622 | | HIGHBD_DECL_SUFFIX) |
623 | 42.9k | { |
624 | 42.9k | filt_idx &= 511; |
625 | 42.9k | assert(filt_idx < 5); |
626 | | |
627 | 42.9k | const int8_t *const filter = dav1d_filter_intra_taps[filt_idx]; |
628 | 42.9k | const pixel *top = &topleft_in[1]; |
629 | 224k | for (int y = 0; y < height; y += 2) { |
630 | 181k | const pixel *topleft = &topleft_in[-y]; |
631 | 181k | const pixel *left = &topleft[-1]; |
632 | 181k | ptrdiff_t left_stride = -1; |
633 | 676k | for (int x = 0; x < width; x += 4) { |
634 | 494k | const int p0 = *topleft; |
635 | 494k | const int p1 = top[0], p2 = top[1], p3 = top[2], p4 = top[3]; |
636 | 494k | const int p5 = left[0 * left_stride], p6 = left[1 * left_stride]; |
637 | 494k | pixel *ptr = &dst[x]; |
638 | 494k | const int8_t *flt_ptr = filter; |
639 | | |
640 | 1.48M | for (int yy = 0; yy < 2; yy++) { |
641 | 4.94M | for (int xx = 0; xx < 4; xx++, flt_ptr += FLT_INCR) { |
642 | 3.95M | const int acc = FILTER(flt_ptr, p0, p1, p2, p3, p4, p5, p6); |
643 | 3.95M | ptr[xx] = iclip_pixel((acc + 8) >> 4); |
644 | 3.95M | } |
645 | 989k | ptr += PXSTRIDE(stride); |
646 | 989k | } |
647 | 494k | left = &dst[x + 4 - 1]; |
648 | 494k | left_stride = PXSTRIDE(stride); |
649 | 494k | top += 4; |
650 | 494k | topleft = &top[-1]; |
651 | 494k | } |
652 | 181k | top = &dst[PXSTRIDE(stride)]; |
653 | 181k | dst = &dst[PXSTRIDE(stride) * 2]; |
654 | 181k | } |
655 | 42.9k | } |
656 | | |
657 | | static NOINLINE void |
658 | | cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride, |
659 | | const int w_pad, const int h_pad, const int width, const int height, |
660 | | const int ss_hor, const int ss_ver) |
661 | 95.1k | { |
662 | 95.1k | int y, x; |
663 | 95.1k | int16_t *const ac_orig = ac; |
664 | | |
665 | 95.1k | assert(w_pad >= 0 && w_pad * 4 < width); |
666 | 95.1k | assert(h_pad >= 0 && h_pad * 4 < height); |
667 | | |
668 | 819k | for (y = 0; y < height - 4 * h_pad; y++) { |
669 | 7.72M | for (x = 0; x < width - 4 * w_pad; x++) { |
670 | 7.00M | int ac_sum = ypx[x << ss_hor]; |
671 | 7.00M | if (ss_hor) ac_sum += ypx[x * 2 + 1]; |
672 | 7.00M | if (ss_ver) { |
673 | 250k | ac_sum += ypx[(x << ss_hor) + PXSTRIDE(stride)]; |
674 | 250k | if (ss_hor) ac_sum += ypx[x * 2 + 1 + PXSTRIDE(stride)]; |
675 | 250k | } |
676 | 7.00M | ac[x] = ac_sum << (1 + !ss_ver + !ss_hor); |
677 | 7.00M | } |
678 | 984k | for (; x < width; x++) |
679 | 259k | ac[x] = ac[x - 1]; |
680 | 724k | ac += width; |
681 | 724k | ypx += PXSTRIDE(stride) << ss_ver; |
682 | 724k | } |
683 | 106k | for (; y < height; y++) { |
684 | 11.1k | memcpy(ac, &ac[-width], width * sizeof(*ac)); |
685 | 11.1k | ac += width; |
686 | 11.1k | } |
687 | | |
688 | 95.1k | const int log2sz = ctz(width) + ctz(height); |
689 | 95.1k | int sum = (1 << log2sz) >> 1; |
690 | 830k | for (ac = ac_orig, y = 0; y < height; y++) { |
691 | 8.23M | for (x = 0; x < width; x++) |
692 | 7.49M | sum += ac[x]; |
693 | 735k | ac += width; |
694 | 735k | } |
695 | 95.1k | sum >>= log2sz; |
696 | | |
697 | | // subtract DC |
698 | 830k | for (ac = ac_orig, y = 0; y < height; y++) { |
699 | 8.23M | for (x = 0; x < width; x++) |
700 | 7.49M | ac[x] -= sum; |
701 | 735k | ac += width; |
702 | 735k | } |
703 | 95.1k | } |
704 | | |
705 | | #define cfl_ac_fn(fmt, ss_hor, ss_ver) \ |
706 | | static void cfl_ac_##fmt##_c(int16_t *const ac, const pixel *const ypx, \ |
707 | | const ptrdiff_t stride, const int w_pad, \ |
708 | 95.1k | const int h_pad, const int cw, const int ch) \ |
709 | 95.1k | { \ |
710 | 95.1k | cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver); \ |
711 | 95.1k | } ipred_tmpl.c:cfl_ac_420_c Line | Count | Source | 708 | 5.87k | const int h_pad, const int cw, const int ch) \ | 709 | 5.87k | { \ | 710 | 5.87k | cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver); \ | 711 | 5.87k | } |
ipred_tmpl.c:cfl_ac_422_c Line | Count | Source | 708 | 1.01k | const int h_pad, const int cw, const int ch) \ | 709 | 1.01k | { \ | 710 | 1.01k | cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver); \ | 711 | 1.01k | } |
ipred_tmpl.c:cfl_ac_444_c Line | Count | Source | 708 | 88.2k | const int h_pad, const int cw, const int ch) \ | 709 | 88.2k | { \ | 710 | 88.2k | cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver); \ | 711 | 88.2k | } |
|
712 | | |
713 | | cfl_ac_fn(420, 1, 1) |
714 | | cfl_ac_fn(422, 1, 0) |
715 | | cfl_ac_fn(444, 0, 0) |
716 | | |
717 | | static void pal_pred_c(pixel *dst, const ptrdiff_t stride, |
718 | | const pixel *const pal, const uint8_t *idx, |
719 | | const int w, const int h) |
720 | 1.65k | { |
721 | 27.3k | for (int y = 0; y < h; y++) { |
722 | 212k | for (int x = 0; x < w; x += 2) { |
723 | 186k | const int i = *idx++; |
724 | 186k | assert(!(i & 0x88)); |
725 | 186k | dst[x + 0] = pal[i & 7]; |
726 | 186k | dst[x + 1] = pal[i >> 4]; |
727 | 186k | } |
728 | 25.7k | dst += PXSTRIDE(stride); |
729 | 25.7k | } |
730 | 1.65k | } |
731 | | |
732 | | #if HAVE_ASM |
733 | | #if ARCH_AARCH64 || ARCH_ARM |
734 | | #include "src/arm/ipred.h" |
735 | | #elif ARCH_RISCV |
736 | | #include "src/riscv/ipred.h" |
737 | | #elif ARCH_X86 |
738 | | #include "src/x86/ipred.h" |
739 | | #elif ARCH_LOONGARCH64 |
740 | | #include "src/loongarch/ipred.h" |
741 | | #endif |
742 | | #endif |
743 | | |
744 | 15.8k | COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) { |
745 | 15.8k | c->intra_pred[DC_PRED ] = ipred_dc_c; |
746 | 15.8k | c->intra_pred[DC_128_PRED ] = ipred_dc_128_c; |
747 | 15.8k | c->intra_pred[TOP_DC_PRED ] = ipred_dc_top_c; |
748 | 15.8k | c->intra_pred[LEFT_DC_PRED ] = ipred_dc_left_c; |
749 | 15.8k | c->intra_pred[HOR_PRED ] = ipred_h_c; |
750 | 15.8k | c->intra_pred[VERT_PRED ] = ipred_v_c; |
751 | 15.8k | c->intra_pred[PAETH_PRED ] = ipred_paeth_c; |
752 | 15.8k | c->intra_pred[SMOOTH_PRED ] = ipred_smooth_c; |
753 | 15.8k | c->intra_pred[SMOOTH_V_PRED] = ipred_smooth_v_c; |
754 | 15.8k | c->intra_pred[SMOOTH_H_PRED] = ipred_smooth_h_c; |
755 | 15.8k | c->intra_pred[Z1_PRED ] = ipred_z1_c; |
756 | 15.8k | c->intra_pred[Z2_PRED ] = ipred_z2_c; |
757 | 15.8k | c->intra_pred[Z3_PRED ] = ipred_z3_c; |
758 | 15.8k | c->intra_pred[FILTER_PRED ] = ipred_filter_c; |
759 | | |
760 | 15.8k | c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c; |
761 | 15.8k | c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c; |
762 | 15.8k | c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c; |
763 | | |
764 | 15.8k | c->cfl_pred[DC_PRED ] = ipred_cfl_c; |
765 | 15.8k | c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c; |
766 | 15.8k | c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c; |
767 | 15.8k | c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c; |
768 | | |
769 | 15.8k | c->pal_pred = pal_pred_c; |
770 | | |
771 | | #if HAVE_ASM |
772 | | #if ARCH_AARCH64 || ARCH_ARM |
773 | | intra_pred_dsp_init_arm(c); |
774 | | #elif ARCH_RISCV |
775 | | intra_pred_dsp_init_riscv(c); |
776 | | #elif ARCH_X86 |
777 | | intra_pred_dsp_init_x86(c); |
778 | | #elif ARCH_LOONGARCH64 |
779 | | intra_pred_dsp_init_loongarch(c); |
780 | | #endif |
781 | | #endif |
782 | 15.8k | } dav1d_intra_pred_dsp_init_8bpc Line | Count | Source | 744 | 7.70k | COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) { | 745 | 7.70k | c->intra_pred[DC_PRED ] = ipred_dc_c; | 746 | 7.70k | c->intra_pred[DC_128_PRED ] = ipred_dc_128_c; | 747 | 7.70k | c->intra_pred[TOP_DC_PRED ] = ipred_dc_top_c; | 748 | 7.70k | c->intra_pred[LEFT_DC_PRED ] = ipred_dc_left_c; | 749 | 7.70k | c->intra_pred[HOR_PRED ] = ipred_h_c; | 750 | 7.70k | c->intra_pred[VERT_PRED ] = ipred_v_c; | 751 | 7.70k | c->intra_pred[PAETH_PRED ] = ipred_paeth_c; | 752 | 7.70k | c->intra_pred[SMOOTH_PRED ] = ipred_smooth_c; | 753 | 7.70k | c->intra_pred[SMOOTH_V_PRED] = ipred_smooth_v_c; | 754 | 7.70k | c->intra_pred[SMOOTH_H_PRED] = ipred_smooth_h_c; | 755 | 7.70k | c->intra_pred[Z1_PRED ] = ipred_z1_c; | 756 | 7.70k | c->intra_pred[Z2_PRED ] = ipred_z2_c; | 757 | 7.70k | c->intra_pred[Z3_PRED ] = ipred_z3_c; | 758 | 7.70k | c->intra_pred[FILTER_PRED ] = ipred_filter_c; | 759 | | | 760 | 7.70k | c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c; | 761 | 7.70k | c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c; | 762 | 7.70k | c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c; | 763 | | | 764 | 7.70k | c->cfl_pred[DC_PRED ] = ipred_cfl_c; | 765 | 7.70k | c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c; | 766 | 7.70k | c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c; | 767 | 7.70k | c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c; | 768 | | | 769 | 7.70k | c->pal_pred = pal_pred_c; | 770 | | | 771 | | #if HAVE_ASM | 772 | | #if ARCH_AARCH64 || ARCH_ARM | 773 | | intra_pred_dsp_init_arm(c); | 774 | | #elif ARCH_RISCV | 775 | | intra_pred_dsp_init_riscv(c); | 776 | | #elif ARCH_X86 | 777 | | intra_pred_dsp_init_x86(c); | 778 | | #elif ARCH_LOONGARCH64 | 779 | | intra_pred_dsp_init_loongarch(c); | 780 | | #endif | 781 | | #endif | 782 | 7.70k | } |
dav1d_intra_pred_dsp_init_16bpc Line | Count | Source | 744 | 8.19k | COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) { | 745 | 8.19k | c->intra_pred[DC_PRED ] = ipred_dc_c; | 746 | 8.19k | c->intra_pred[DC_128_PRED ] = ipred_dc_128_c; | 747 | 8.19k | c->intra_pred[TOP_DC_PRED ] = ipred_dc_top_c; | 748 | 8.19k | c->intra_pred[LEFT_DC_PRED ] = ipred_dc_left_c; | 749 | 8.19k | c->intra_pred[HOR_PRED ] = ipred_h_c; | 750 | 8.19k | c->intra_pred[VERT_PRED ] = ipred_v_c; | 751 | 8.19k | c->intra_pred[PAETH_PRED ] = ipred_paeth_c; | 752 | 8.19k | c->intra_pred[SMOOTH_PRED ] = ipred_smooth_c; | 753 | 8.19k | c->intra_pred[SMOOTH_V_PRED] = ipred_smooth_v_c; | 754 | 8.19k | c->intra_pred[SMOOTH_H_PRED] = ipred_smooth_h_c; | 755 | 8.19k | c->intra_pred[Z1_PRED ] = ipred_z1_c; | 756 | 8.19k | c->intra_pred[Z2_PRED ] = ipred_z2_c; | 757 | 8.19k | c->intra_pred[Z3_PRED ] = ipred_z3_c; | 758 | 8.19k | c->intra_pred[FILTER_PRED ] = ipred_filter_c; | 759 | | | 760 | 8.19k | c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c; | 761 | 8.19k | c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c; | 762 | 8.19k | c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c; | 763 | | | 764 | 8.19k | c->cfl_pred[DC_PRED ] = ipred_cfl_c; | 765 | 8.19k | c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c; | 766 | 8.19k | c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c; | 767 | 8.19k | c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c; | 768 | | | 769 | 8.19k | c->pal_pred = pal_pred_c; | 770 | | | 771 | | #if HAVE_ASM | 772 | | #if ARCH_AARCH64 || ARCH_ARM | 773 | | intra_pred_dsp_init_arm(c); | 774 | | #elif ARCH_RISCV | 775 | | intra_pred_dsp_init_riscv(c); | 776 | | #elif ARCH_X86 | 777 | | intra_pred_dsp_init_x86(c); | 778 | | #elif ARCH_LOONGARCH64 | 779 | | intra_pred_dsp_init_loongarch(c); | 780 | | #endif | 781 | | #endif | 782 | 8.19k | } |
|