/src/ffmpeg/libavcodec/vp8dsp.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2010 David Conrad |
3 | | * Copyright (C) 2010 Ronald S. Bultje |
4 | | * Copyright (C) 2014 Peter Ross |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | |
23 | | /** |
24 | | * @file |
25 | | * VP8 compatible video decoder |
26 | | */ |
27 | | |
28 | | #include "config_components.h" |
29 | | |
30 | | #include "libavutil/common.h" |
31 | | #include "libavutil/intreadwrite.h" |
32 | | |
33 | | #include "mathops.h" |
34 | | #include "vp8dsp.h" |
35 | | |
36 | | #define MK_IDCT_DC_ADD4_C(name) \ |
37 | | static void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], \ |
38 | 170k | ptrdiff_t stride) \ |
39 | 170k | { \ |
40 | 170k | name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \ |
41 | 170k | name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \ |
42 | 170k | name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \ |
43 | 170k | name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \ |
44 | 170k | } \ vp8dsp.c:vp7_idct_dc_add4uv_c Line | Count | Source | 38 | 22.4k | ptrdiff_t stride) \ | 39 | 22.4k | { \ | 40 | 22.4k | name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \ | 41 | 22.4k | name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \ | 42 | 22.4k | name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \ | 43 | 22.4k | name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \ | 44 | 22.4k | } \ |
vp8dsp.c:vp8_idct_dc_add4uv_c Line | Count | Source | 38 | 148k | ptrdiff_t stride) \ | 39 | 148k | { \ | 40 | 148k | name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \ | 41 | 148k | name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \ | 42 | 148k | name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \ | 43 | 148k | name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \ | 44 | 148k | } \ |
|
45 | | \ |
46 | | static void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], \ |
47 | 643k | ptrdiff_t stride) \ |
48 | 643k | { \ |
49 | 643k | name ## _idct_dc_add_c(dst + 0, block[0], stride); \ |
50 | 643k | name ## _idct_dc_add_c(dst + 4, block[1], stride); \ |
51 | 643k | name ## _idct_dc_add_c(dst + 8, block[2], stride); \ |
52 | 643k | name ## _idct_dc_add_c(dst + 12, block[3], stride); \ |
53 | 643k | } vp8dsp.c:vp7_idct_dc_add4y_c Line | Count | Source | 47 | 427k | ptrdiff_t stride) \ | 48 | 427k | { \ | 49 | 427k | name ## _idct_dc_add_c(dst + 0, block[0], stride); \ | 50 | 427k | name ## _idct_dc_add_c(dst + 4, block[1], stride); \ | 51 | 427k | name ## _idct_dc_add_c(dst + 8, block[2], stride); \ | 52 | 427k | name ## _idct_dc_add_c(dst + 12, block[3], stride); \ | 53 | 427k | } |
vp8dsp.c:vp8_idct_dc_add4y_c Line | Count | Source | 47 | 216k | ptrdiff_t stride) \ | 48 | 216k | { \ | 49 | 216k | name ## _idct_dc_add_c(dst + 0, block[0], stride); \ | 50 | 216k | name ## _idct_dc_add_c(dst + 4, block[1], stride); \ | 51 | 216k | name ## _idct_dc_add_c(dst + 8, block[2], stride); \ | 52 | 216k | name ## _idct_dc_add_c(dst + 12, block[3], stride); \ | 53 | 216k | } |
|
54 | | |
55 | | #if CONFIG_VP7_DECODER |
56 | | static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) |
57 | 37.3k | { |
58 | 37.3k | int i; |
59 | 37.3k | unsigned a1, b1, c1, d1; |
60 | 37.3k | int16_t tmp[16]; |
61 | | |
62 | 186k | for (i = 0; i < 4; i++) { |
63 | 149k | a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170; |
64 | 149k | b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170; |
65 | 149k | c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274; |
66 | 149k | d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540; |
67 | 149k | tmp[i * 4 + 0] = (int)(a1 + d1) >> 14; |
68 | 149k | tmp[i * 4 + 3] = (int)(a1 - d1) >> 14; |
69 | 149k | tmp[i * 4 + 1] = (int)(b1 + c1) >> 14; |
70 | 149k | tmp[i * 4 + 2] = (int)(b1 - c1) >> 14; |
71 | 149k | } |
72 | | |
73 | 186k | for (i = 0; i < 4; i++) { |
74 | 149k | a1 = (tmp[i + 0] + tmp[i + 8]) * 23170; |
75 | 149k | b1 = (tmp[i + 0] - tmp[i + 8]) * 23170; |
76 | 149k | c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274; |
77 | 149k | d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540; |
78 | 149k | AV_ZERO64(dc + i * 4); |
79 | 149k | block[0][i][0] = (int)(a1 + d1 + 0x20000) >> 18; |
80 | 149k | block[3][i][0] = (int)(a1 - d1 + 0x20000) >> 18; |
81 | 149k | block[1][i][0] = (int)(b1 + c1 + 0x20000) >> 18; |
82 | 149k | block[2][i][0] = (int)(b1 - c1 + 0x20000) >> 18; |
83 | 149k | } |
84 | 37.3k | } |
85 | | |
86 | | static void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) |
87 | 88.2k | { |
88 | 88.2k | int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18; |
89 | 88.2k | dc[0] = 0; |
90 | | |
91 | 441k | for (i = 0; i < 4; i++) { |
92 | 353k | block[i][0][0] = val; |
93 | 353k | block[i][1][0] = val; |
94 | 353k | block[i][2][0] = val; |
95 | 353k | block[i][3][0] = val; |
96 | 353k | } |
97 | 88.2k | } |
98 | | |
99 | | static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
100 | 537k | { |
101 | 537k | int i; |
102 | 537k | unsigned a1, b1, c1, d1; |
103 | 537k | int16_t tmp[16]; |
104 | | |
105 | 2.68M | for (i = 0; i < 4; i++) { |
106 | 2.14M | a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170; |
107 | 2.14M | b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170; |
108 | 2.14M | c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274; |
109 | 2.14M | d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540; |
110 | 2.14M | AV_ZERO64(block + i * 4); |
111 | 2.14M | tmp[i * 4 + 0] = (int)(a1 + d1) >> 14; |
112 | 2.14M | tmp[i * 4 + 3] = (int)(a1 - d1) >> 14; |
113 | 2.14M | tmp[i * 4 + 1] = (int)(b1 + c1) >> 14; |
114 | 2.14M | tmp[i * 4 + 2] = (int)(b1 - c1) >> 14; |
115 | 2.14M | } |
116 | | |
117 | 2.68M | for (i = 0; i < 4; i++) { |
118 | 2.14M | a1 = (tmp[i + 0] + tmp[i + 8]) * 23170; |
119 | 2.14M | b1 = (tmp[i + 0] - tmp[i + 8]) * 23170; |
120 | 2.14M | c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274; |
121 | 2.14M | d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540; |
122 | 2.14M | dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] + |
123 | 2.14M | ((int)(a1 + d1 + 0x20000) >> 18)); |
124 | 2.14M | dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] + |
125 | 2.14M | ((int)(a1 - d1 + 0x20000) >> 18)); |
126 | 2.14M | dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] + |
127 | 2.14M | ((int)(b1 + c1 + 0x20000) >> 18)); |
128 | 2.14M | dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] + |
129 | 2.14M | ((int)(b1 - c1 + 0x20000) >> 18)); |
130 | 2.14M | } |
131 | 537k | } |
132 | | |
133 | | static void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
134 | 1.92M | { |
135 | 1.92M | int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18; |
136 | 1.92M | block[0] = 0; |
137 | | |
138 | 9.60M | for (i = 0; i < 4; i++) { |
139 | 7.68M | dst[0] = av_clip_uint8(dst[0] + dc); |
140 | 7.68M | dst[1] = av_clip_uint8(dst[1] + dc); |
141 | 7.68M | dst[2] = av_clip_uint8(dst[2] + dc); |
142 | 7.68M | dst[3] = av_clip_uint8(dst[3] + dc); |
143 | 7.68M | dst += stride; |
144 | 7.68M | } |
145 | 1.92M | } |
146 | | |
147 | | MK_IDCT_DC_ADD4_C(vp7) |
148 | | #endif /* CONFIG_VP7_DECODER */ |
149 | | |
150 | | // TODO: Maybe add dequant |
151 | | #if CONFIG_VP8_DECODER |
152 | | static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) |
153 | 65.8k | { |
154 | 65.8k | int i, t0, t1, t2, t3; |
155 | | |
156 | 329k | for (i = 0; i < 4; i++) { |
157 | 263k | t0 = dc[0 * 4 + i] + dc[3 * 4 + i]; |
158 | 263k | t1 = dc[1 * 4 + i] + dc[2 * 4 + i]; |
159 | 263k | t2 = dc[1 * 4 + i] - dc[2 * 4 + i]; |
160 | 263k | t3 = dc[0 * 4 + i] - dc[3 * 4 + i]; |
161 | | |
162 | 263k | dc[0 * 4 + i] = t0 + t1; |
163 | 263k | dc[1 * 4 + i] = t3 + t2; |
164 | 263k | dc[2 * 4 + i] = t0 - t1; |
165 | 263k | dc[3 * 4 + i] = t3 - t2; |
166 | 263k | } |
167 | | |
168 | 329k | for (i = 0; i < 4; i++) { |
169 | 263k | t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding |
170 | 263k | t1 = dc[i * 4 + 1] + dc[i * 4 + 2]; |
171 | 263k | t2 = dc[i * 4 + 1] - dc[i * 4 + 2]; |
172 | 263k | t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding |
173 | 263k | AV_ZERO64(dc + i * 4); |
174 | | |
175 | 263k | block[i][0][0] = (t0 + t1) >> 3; |
176 | 263k | block[i][1][0] = (t3 + t2) >> 3; |
177 | 263k | block[i][2][0] = (t0 - t1) >> 3; |
178 | 263k | block[i][3][0] = (t3 - t2) >> 3; |
179 | 263k | } |
180 | 65.8k | } |
181 | | |
182 | | static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) |
183 | 28.3k | { |
184 | 28.3k | int i, val = (dc[0] + 3) >> 3; |
185 | 28.3k | dc[0] = 0; |
186 | | |
187 | 141k | for (i = 0; i < 4; i++) { |
188 | 113k | block[i][0][0] = val; |
189 | 113k | block[i][1][0] = val; |
190 | 113k | block[i][2][0] = val; |
191 | 113k | block[i][3][0] = val; |
192 | 113k | } |
193 | 28.3k | } |
194 | | |
195 | 29.8M | #define MUL_20091(a) ((((a) * 20091) >> 16) + (a)) |
196 | 29.8M | #define MUL_35468(a) (((a) * 35468) >> 16) |
197 | | |
198 | | static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
199 | 1.86M | { |
200 | 1.86M | int i, t0, t1, t2, t3; |
201 | 1.86M | int16_t tmp[16]; |
202 | | |
203 | 9.31M | for (i = 0; i < 4; i++) { |
204 | 7.45M | t0 = block[0 * 4 + i] + block[2 * 4 + i]; |
205 | 7.45M | t1 = block[0 * 4 + i] - block[2 * 4 + i]; |
206 | 7.45M | t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]); |
207 | 7.45M | t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]); |
208 | 7.45M | block[0 * 4 + i] = 0; |
209 | 7.45M | block[1 * 4 + i] = 0; |
210 | 7.45M | block[2 * 4 + i] = 0; |
211 | 7.45M | block[3 * 4 + i] = 0; |
212 | | |
213 | 7.45M | tmp[i * 4 + 0] = t0 + t3; |
214 | 7.45M | tmp[i * 4 + 1] = t1 + t2; |
215 | 7.45M | tmp[i * 4 + 2] = t1 - t2; |
216 | 7.45M | tmp[i * 4 + 3] = t0 - t3; |
217 | 7.45M | } |
218 | | |
219 | 9.31M | for (i = 0; i < 4; i++) { |
220 | 7.45M | t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i]; |
221 | 7.45M | t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i]; |
222 | 7.45M | t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]); |
223 | 7.45M | t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]); |
224 | | |
225 | 7.45M | dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); |
226 | 7.45M | dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); |
227 | 7.45M | dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); |
228 | 7.45M | dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); |
229 | 7.45M | dst += stride; |
230 | 7.45M | } |
231 | 1.86M | } |
232 | | |
233 | | static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
234 | 2.06M | { |
235 | 2.06M | int i, dc = (block[0] + 4) >> 3; |
236 | 2.06M | block[0] = 0; |
237 | | |
238 | 10.3M | for (i = 0; i < 4; i++) { |
239 | 8.26M | dst[0] = av_clip_uint8(dst[0] + dc); |
240 | 8.26M | dst[1] = av_clip_uint8(dst[1] + dc); |
241 | 8.26M | dst[2] = av_clip_uint8(dst[2] + dc); |
242 | 8.26M | dst[3] = av_clip_uint8(dst[3] + dc); |
243 | 8.26M | dst += stride; |
244 | 8.26M | } |
245 | 2.06M | } |
246 | | |
247 | | MK_IDCT_DC_ADD4_C(vp8) |
248 | | #endif /* CONFIG_VP8_DECODER */ |
249 | | |
250 | | // because I like only having two parameters to pass functions... |
251 | | #define LOAD_PIXELS \ |
252 | 575M | av_unused int p3 = p[-4 * stride]; \ |
253 | 575M | av_unused int p2 = p[-3 * stride]; \ |
254 | 575M | av_unused int p1 = p[-2 * stride]; \ |
255 | 575M | av_unused int p0 = p[-1 * stride]; \ |
256 | 575M | av_unused int q0 = p[ 0 * stride]; \ |
257 | 575M | av_unused int q1 = p[ 1 * stride]; \ |
258 | 575M | av_unused int q2 = p[ 2 * stride]; \ |
259 | 575M | av_unused int q3 = p[ 3 * stride]; |
260 | | |
261 | 293M | #define clip_int8(n) (cm[(n) + 0x80] - 0x80) |
262 | | |
263 | | static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, |
264 | | int is4tap, int is_vp7) |
265 | 129M | { |
266 | 129M | LOAD_PIXELS |
267 | 129M | int a, f1, f2; |
268 | 129M | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; |
269 | | |
270 | 129M | a = 3 * (q0 - p0); |
271 | | |
272 | 129M | if (is4tap) |
273 | 74.3M | a += clip_int8(p1 - q1); |
274 | | |
275 | 129M | a = clip_int8(a); |
276 | | |
277 | | // We deviate from the spec here with c(a+3) >> 3 |
278 | | // since that's what libvpx does. |
279 | 129M | f1 = FFMIN(a + 4, 127) >> 3; |
280 | | |
281 | 129M | if (is_vp7) |
282 | 88.0M | f2 = f1 - ((a & 7) == 4); |
283 | 41.4M | else |
284 | 41.4M | f2 = FFMIN(a + 3, 127) >> 3; |
285 | | |
286 | | // Despite what the spec says, we do need to clamp here to |
287 | | // be bitexact with libvpx. |
288 | 129M | p[-1 * stride] = cm[p0 + f2]; |
289 | 129M | p[ 0 * stride] = cm[q0 - f1]; |
290 | | |
291 | | // only used for _inner on blocks without high edge variance |
292 | 129M | if (!is4tap) { |
293 | 55.1M | a = (f1 + 1) >> 1; |
294 | 55.1M | p[-2 * stride] = cm[p1 + a]; |
295 | 55.1M | p[ 1 * stride] = cm[q1 - a]; |
296 | 55.1M | } |
297 | 129M | } |
298 | | |
299 | | static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride, |
300 | | int is4tap) |
301 | 88.0M | { |
302 | 88.0M | filter_common(p, stride, is4tap, IS_VP7); |
303 | 88.0M | } |
304 | | |
305 | | static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride, |
306 | | int is4tap) |
307 | 41.4M | { |
308 | 41.4M | filter_common(p, stride, is4tap, IS_VP8); |
309 | 41.4M | } |
310 | | |
311 | | static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride, |
312 | | int flim) |
313 | 105M | { |
314 | 105M | LOAD_PIXELS |
315 | 105M | return FFABS(p0 - q0) <= flim; |
316 | 105M | } |
317 | | |
318 | | static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, |
319 | | int flim) |
320 | 78.2M | { |
321 | 78.2M | LOAD_PIXELS |
322 | 78.2M | return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim; |
323 | 78.2M | } |
324 | | |
325 | | /** |
326 | | * E - limit at the macroblock edge |
327 | | * I - limit for interior difference |
328 | | */ |
329 | | #define NORMAL_LIMIT(vpn) \ |
330 | | static av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p, \ |
331 | | ptrdiff_t stride, \ |
332 | 112M | int E, int I) \ |
333 | 112M | { \ |
334 | 112M | LOAD_PIXELS \ |
335 | 112M | return vp ## vpn ## _simple_limit(p, stride, E) && \ |
336 | 112M | FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \ |
337 | 112M | FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \ |
338 | 112M | FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \ |
339 | 112M | } vp8dsp.c:vp7_normal_limit Line | Count | Source | 332 | 47.9M | int E, int I) \ | 333 | 47.9M | { \ | 334 | 47.9M | LOAD_PIXELS \ | 335 | 47.9M | return vp ## vpn ## _simple_limit(p, stride, E) && \ | 336 | 47.9M | FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \ | 337 | 47.9M | FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \ | 338 | 47.9M | FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \ | 339 | 47.9M | } |
vp8dsp.c:vp8_normal_limit Line | Count | Source | 332 | 64.5M | int E, int I) \ | 333 | 64.5M | { \ | 334 | 64.5M | LOAD_PIXELS \ | 335 | 64.5M | return vp ## vpn ## _simple_limit(p, stride, E) && \ | 336 | 64.5M | FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \ | 337 | 64.5M | FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \ | 338 | 64.5M | FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \ | 339 | 64.5M | } |
|
340 | | |
341 | | NORMAL_LIMIT(7) |
342 | | NORMAL_LIMIT(8) |
343 | | |
344 | | // high edge variance |
345 | | static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh) |
346 | 104M | { |
347 | 104M | LOAD_PIXELS |
348 | 104M | return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh; |
349 | 104M | } |
350 | | |
351 | | static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride) |
352 | 44.7M | { |
353 | 44.7M | int a0, a1, a2, w; |
354 | 44.7M | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; |
355 | | |
356 | 44.7M | LOAD_PIXELS |
357 | | |
358 | 44.7M | w = clip_int8(p1 - q1); |
359 | 44.7M | w = clip_int8(w + 3 * (q0 - p0)); |
360 | | |
361 | 44.7M | a0 = (27 * w + 63) >> 7; |
362 | 44.7M | a1 = (18 * w + 63) >> 7; |
363 | 44.7M | a2 = (9 * w + 63) >> 7; |
364 | | |
365 | 44.7M | p[-3 * stride] = cm[p2 + a2]; |
366 | 44.7M | p[-2 * stride] = cm[p1 + a1]; |
367 | 44.7M | p[-1 * stride] = cm[p0 + a0]; |
368 | 44.7M | p[ 0 * stride] = cm[q0 - a0]; |
369 | 44.7M | p[ 1 * stride] = cm[q1 - a1]; |
370 | 44.7M | p[ 2 * stride] = cm[q2 - a2]; |
371 | 44.7M | } |
372 | | |
373 | | #define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline) \ |
374 | | static maybe_inline \ |
375 | | void vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, \ |
376 | | ptrdiff_t stride, \ |
377 | | int flim_E, int flim_I, \ |
378 | 4.51M | int hev_thresh) \ |
379 | 4.51M | { \ |
380 | 4.51M | int i; \ |
381 | 52.6M | for (i = 0; i < size; i++) \ |
382 | 48.1M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ |
383 | 48.1M | flim_E, flim_I)) { \ |
384 | 45.8M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ |
385 | 45.8M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ |
386 | 45.8M | else \ |
387 | 45.8M | filter_mbedge(dst + i * stridea, strideb); \ |
388 | 45.8M | } \ |
389 | 4.51M | } \ vp8dsp.c:vp7_v_loop_filter16_c Line | Count | Source | 378 | 228k | int hev_thresh) \ | 379 | 228k | { \ | 380 | 228k | int i; \ | 381 | 3.89M | for (i = 0; i < size; i++) \ | 382 | 3.66M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 3.66M | flim_E, flim_I)) { \ | 384 | 3.48M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 3.48M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 3.48M | else \ | 387 | 3.48M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 3.48M | } \ | 389 | 228k | } \ |
vp8dsp.c:vp7_h_loop_filter16_c Line | Count | Source | 378 | 233k | int hev_thresh) \ | 379 | 233k | { \ | 380 | 233k | int i; \ | 381 | 3.97M | for (i = 0; i < size; i++) \ | 382 | 3.73M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 3.73M | flim_E, flim_I)) { \ | 384 | 3.56M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 3.56M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 3.56M | else \ | 387 | 3.56M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 3.56M | } \ | 389 | 233k | } \ |
vp8dsp.c:vp7_v_loop_filter8_c Line | Count | Source | 378 | 457k | int hev_thresh) \ | 379 | 457k | { \ | 380 | 457k | int i; \ | 381 | 4.12M | for (i = 0; i < size; i++) \ | 382 | 3.66M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 3.66M | flim_E, flim_I)) { \ | 384 | 3.55M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 3.55M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 3.55M | else \ | 387 | 3.55M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 3.55M | } \ | 389 | 457k | } \ |
vp8dsp.c:vp7_h_loop_filter8_c Line | Count | Source | 378 | 467k | int hev_thresh) \ | 379 | 467k | { \ | 380 | 467k | int i; \ | 381 | 4.20M | for (i = 0; i < size; i++) \ | 382 | 3.73M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 3.73M | flim_E, flim_I)) { \ | 384 | 3.64M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 3.64M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 3.64M | else \ | 387 | 3.64M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 3.64M | } \ | 389 | 467k | } \ |
vp8dsp.c:vp8_v_loop_filter16_c Line | Count | Source | 378 | 514k | int hev_thresh) \ | 379 | 514k | { \ | 380 | 514k | int i; \ | 381 | 8.75M | for (i = 0; i < size; i++) \ | 382 | 8.23M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 8.23M | flim_E, flim_I)) { \ | 384 | 7.71M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 7.71M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 7.71M | else \ | 387 | 7.71M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 7.71M | } \ | 389 | 514k | } \ |
vp8dsp.c:vp8_h_loop_filter16_c Line | Count | Source | 378 | 527k | int hev_thresh) \ | 379 | 527k | { \ | 380 | 527k | int i; \ | 381 | 8.97M | for (i = 0; i < size; i++) \ | 382 | 8.44M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 8.44M | flim_E, flim_I)) { \ | 384 | 7.87M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 7.87M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 7.87M | else \ | 387 | 7.87M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 7.87M | } \ | 389 | 527k | } \ |
vp8dsp.c:vp8_v_loop_filter8_c Line | Count | Source | 378 | 1.02M | int hev_thresh) \ | 379 | 1.02M | { \ | 380 | 1.02M | int i; \ | 381 | 9.26M | for (i = 0; i < size; i++) \ | 382 | 8.23M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 8.23M | flim_E, flim_I)) { \ | 384 | 7.91M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 7.91M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 7.91M | else \ | 387 | 7.91M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 7.91M | } \ | 389 | 1.02M | } \ |
vp8dsp.c:vp8_h_loop_filter8_c Line | Count | Source | 378 | 1.05M | int hev_thresh) \ | 379 | 1.05M | { \ | 380 | 1.05M | int i; \ | 381 | 9.50M | for (i = 0; i < size; i++) \ | 382 | 8.44M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 8.44M | flim_E, flim_I)) { \ | 384 | 8.11M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 8.11M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 8.11M | else \ | 387 | 8.11M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 8.11M | } \ | 389 | 1.05M | } \ |
|
390 | | \ |
391 | | static maybe_inline \ |
392 | | void vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, \ |
393 | | ptrdiff_t stride, \ |
394 | | int flim_E, \ |
395 | | int flim_I, \ |
396 | 5.02M | int hev_thresh) \ |
397 | 5.02M | { \ |
398 | 5.02M | int i; \ |
399 | 69.3M | for (i = 0; i < size; i++) \ |
400 | 64.3M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ |
401 | 64.3M | flim_E, flim_I)) { \ |
402 | 58.6M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ |
403 | 58.6M | if (hv) \ |
404 | 58.6M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ |
405 | 58.6M | else \ |
406 | 58.6M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ |
407 | 58.6M | } \ |
408 | 5.02M | } vp8dsp.c:vp7_v_loop_filter16_inner_c Line | Count | Source | 396 | 776k | int hev_thresh) \ | 397 | 776k | { \ | 398 | 776k | int i; \ | 399 | 13.1M | for (i = 0; i < size; i++) \ | 400 | 12.4M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 12.4M | flim_E, flim_I)) { \ | 402 | 11.8M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 11.8M | if (hv) \ | 404 | 11.8M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 11.8M | else \ | 406 | 11.8M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 11.8M | } \ | 408 | 776k | } |
vp8dsp.c:vp7_h_loop_filter16_inner_c Line | Count | Source | 396 | 776k | int hev_thresh) \ | 397 | 776k | { \ | 398 | 776k | int i; \ | 399 | 13.1M | for (i = 0; i < size; i++) \ | 400 | 12.4M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 12.4M | flim_E, flim_I)) { \ | 402 | 11.8M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 11.8M | if (hv) \ | 404 | 11.8M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 11.8M | else \ | 406 | 11.8M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 11.8M | } \ | 408 | 776k | } |
vp8dsp.c:vp7_v_loop_filter8_inner_c Line | Count | Source | 396 | 517k | int hev_thresh) \ | 397 | 517k | { \ | 398 | 517k | int i; \ | 399 | 4.65M | for (i = 0; i < size; i++) \ | 400 | 4.14M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 4.14M | flim_E, flim_I)) { \ | 402 | 4.02M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 4.02M | if (hv) \ | 404 | 4.02M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 4.02M | else \ | 406 | 4.02M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 4.02M | } \ | 408 | 517k | } |
vp8dsp.c:vp7_h_loop_filter8_inner_c Line | Count | Source | 396 | 517k | int hev_thresh) \ | 397 | 517k | { \ | 398 | 517k | int i; \ | 399 | 4.65M | for (i = 0; i < size; i++) \ | 400 | 4.14M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 4.14M | flim_E, flim_I)) { \ | 402 | 4.02M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 4.02M | if (hv) \ | 404 | 4.02M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 4.02M | else \ | 406 | 4.02M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 4.02M | } \ | 408 | 517k | } |
vp8dsp.c:vp8_v_loop_filter16_inner_c Line | Count | Source | 396 | 731k | int hev_thresh) \ | 397 | 731k | { \ | 398 | 731k | int i; \ | 399 | 12.4M | for (i = 0; i < size; i++) \ | 400 | 11.7M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 11.7M | flim_E, flim_I)) { \ | 402 | 9.90M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 9.90M | if (hv) \ | 404 | 9.90M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 9.90M | else \ | 406 | 9.90M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 9.90M | } \ | 408 | 731k | } |
vp8dsp.c:vp8_h_loop_filter16_inner_c Line | Count | Source | 396 | 731k | int hev_thresh) \ | 397 | 731k | { \ | 398 | 731k | int i; \ | 399 | 12.4M | for (i = 0; i < size; i++) \ | 400 | 11.7M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 11.7M | flim_E, flim_I)) { \ | 402 | 9.93M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 9.93M | if (hv) \ | 404 | 9.93M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 9.93M | else \ | 406 | 9.93M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 9.93M | } \ | 408 | 731k | } |
vp8dsp.c:vp8_v_loop_filter8_inner_c Line | Count | Source | 396 | 487k | int hev_thresh) \ | 397 | 487k | { \ | 398 | 487k | int i; \ | 399 | 4.39M | for (i = 0; i < size; i++) \ | 400 | 3.90M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 3.90M | flim_E, flim_I)) { \ | 402 | 3.57M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 3.57M | if (hv) \ | 404 | 3.57M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 3.57M | else \ | 406 | 3.57M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 3.57M | } \ | 408 | 487k | } |
vp8dsp.c:vp8_h_loop_filter8_inner_c Line | Count | Source | 396 | 487k | int hev_thresh) \ | 397 | 487k | { \ | 398 | 487k | int i; \ | 399 | 4.39M | for (i = 0; i < size; i++) \ | 400 | 3.90M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 3.90M | flim_E, flim_I)) { \ | 402 | 3.54M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 3.54M | if (hv) \ | 404 | 3.54M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 3.54M | else \ | 406 | 3.54M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 3.54M | } \ | 408 | 487k | } |
|
409 | | |
410 | | #define UV_LOOP_FILTER(vpn, dir, stridea, strideb) \ |
411 | | LOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline) \ |
412 | | static void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU, \ |
413 | | uint8_t *dstV, \ |
414 | | ptrdiff_t stride, int fE, \ |
415 | 1.50M | int fI, int hev_thresh) \ |
416 | 1.50M | { \ |
417 | 1.50M | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ |
418 | 1.50M | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ |
419 | 1.50M | } \ vp8dsp.c:vp7_v_loop_filter8uv_c Line | Count | Source | 415 | 228k | int fI, int hev_thresh) \ | 416 | 228k | { \ | 417 | 228k | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ | 418 | 228k | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ | 419 | 228k | } \ |
vp8dsp.c:vp7_h_loop_filter8uv_c Line | Count | Source | 415 | 233k | int fI, int hev_thresh) \ | 416 | 233k | { \ | 417 | 233k | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ | 418 | 233k | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ | 419 | 233k | } \ |
vp8dsp.c:vp8_v_loop_filter8uv_c Line | Count | Source | 415 | 514k | int fI, int hev_thresh) \ | 416 | 514k | { \ | 417 | 514k | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ | 418 | 514k | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ | 419 | 514k | } \ |
vp8dsp.c:vp8_h_loop_filter8uv_c Line | Count | Source | 415 | 527k | int fI, int hev_thresh) \ | 416 | 527k | { \ | 417 | 527k | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ | 418 | 527k | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ | 419 | 527k | } \ |
|
420 | | \ |
421 | | static void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, \ |
422 | | uint8_t *dstV, \ |
423 | | ptrdiff_t stride, \ |
424 | | int fE, int fI, \ |
425 | 1.00M | int hev_thresh) \ |
426 | 1.00M | { \ |
427 | 1.00M | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ |
428 | 1.00M | hev_thresh); \ |
429 | 1.00M | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ |
430 | 1.00M | hev_thresh); \ |
431 | 1.00M | } vp8dsp.c:vp7_v_loop_filter8uv_inner_c Line | Count | Source | 425 | 258k | int hev_thresh) \ | 426 | 258k | { \ | 427 | 258k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ | 428 | 258k | hev_thresh); \ | 429 | 258k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ | 430 | 258k | hev_thresh); \ | 431 | 258k | } |
vp8dsp.c:vp7_h_loop_filter8uv_inner_c Line | Count | Source | 425 | 258k | int hev_thresh) \ | 426 | 258k | { \ | 427 | 258k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ | 428 | 258k | hev_thresh); \ | 429 | 258k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ | 430 | 258k | hev_thresh); \ | 431 | 258k | } |
vp8dsp.c:vp8_v_loop_filter8uv_inner_c Line | Count | Source | 425 | 243k | int hev_thresh) \ | 426 | 243k | { \ | 427 | 243k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ | 428 | 243k | hev_thresh); \ | 429 | 243k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ | 430 | 243k | hev_thresh); \ | 431 | 243k | } |
vp8dsp.c:vp8_h_loop_filter8uv_inner_c Line | Count | Source | 425 | 243k | int hev_thresh) \ | 426 | 243k | { \ | 427 | 243k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ | 428 | 243k | hev_thresh); \ | 429 | 243k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ | 430 | 243k | hev_thresh); \ | 431 | 243k | } |
|
432 | | |
433 | | #define LOOP_FILTER_SIMPLE(vpn) \ |
434 | | static void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \ |
435 | 2.24M | int flim) \ |
436 | 2.24M | { \ |
437 | 2.24M | int i; \ |
438 | 38.0M | for (i = 0; i < 16; i++) \ |
439 | 35.8M | if (vpn ## _simple_limit(dst + i, stride, flim)) \ |
440 | 35.8M | vpn ## _filter_common(dst + i, stride, 1); \ |
441 | 2.24M | } \ vp8dsp.c:vp7_v_loop_filter_simple_c Line | Count | Source | 435 | 1.81M | int flim) \ | 436 | 1.81M | { \ | 437 | 1.81M | int i; \ | 438 | 30.8M | for (i = 0; i < 16; i++) \ | 439 | 29.0M | if (vpn ## _simple_limit(dst + i, stride, flim)) \ | 440 | 29.0M | vpn ## _filter_common(dst + i, stride, 1); \ | 441 | 1.81M | } \ |
vp8dsp.c:vp8_v_loop_filter_simple_c Line | Count | Source | 435 | 425k | int flim) \ | 436 | 425k | { \ | 437 | 425k | int i; \ | 438 | 7.22M | for (i = 0; i < 16; i++) \ | 439 | 6.80M | if (vpn ## _simple_limit(dst + i, stride, flim)) \ | 440 | 6.80M | vpn ## _filter_common(dst + i, stride, 1); \ | 441 | 425k | } \ |
|
442 | | \ |
443 | | static void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \ |
444 | 2.23M | int flim) \ |
445 | 2.23M | { \ |
446 | 2.23M | int i; \ |
447 | 37.9M | for (i = 0; i < 16; i++) \ |
448 | 35.7M | if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \ |
449 | 35.7M | vpn ## _filter_common(dst + i * stride, 1, 1); \ |
450 | 2.23M | } vp8dsp.c:vp7_h_loop_filter_simple_c Line | Count | Source | 444 | 1.80M | int flim) \ | 445 | 1.80M | { \ | 446 | 1.80M | int i; \ | 447 | 30.6M | for (i = 0; i < 16; i++) \ | 448 | 28.8M | if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \ | 449 | 28.8M | vpn ## _filter_common(dst + i * stride, 1, 1); \ | 450 | 1.80M | } |
vp8dsp.c:vp8_h_loop_filter_simple_c Line | Count | Source | 444 | 430k | int flim) \ | 445 | 430k | { \ | 446 | 430k | int i; \ | 447 | 7.31M | for (i = 0; i < 16; i++) \ | 448 | 6.88M | if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \ | 449 | 6.88M | vpn ## _filter_common(dst + i * stride, 1, 1); \ | 450 | 430k | } |
|
451 | | |
452 | | #define LOOP_FILTERS(vpn) \ |
453 | | LOOP_FILTER(vpn, v, 16, 1, stride, ) \ |
454 | | LOOP_FILTER(vpn, h, 16, stride, 1, ) \ |
455 | | UV_LOOP_FILTER(vpn, v, 1, stride) \ |
456 | | UV_LOOP_FILTER(vpn, h, stride, 1) \ |
457 | | LOOP_FILTER_SIMPLE(vpn) \ |
458 | | |
459 | | static const uint8_t subpel_filters[7][6] = { |
460 | | { 0, 6, 123, 12, 1, 0 }, |
461 | | { 2, 11, 108, 36, 8, 1 }, |
462 | | { 0, 9, 93, 50, 6, 0 }, |
463 | | { 3, 16, 77, 77, 16, 3 }, |
464 | | { 0, 6, 50, 93, 9, 0 }, |
465 | | { 1, 8, 36, 108, 11, 2 }, |
466 | | { 0, 1, 12, 123, 6, 0 }, |
467 | | }; |
468 | | |
469 | | #define PUT_PIXELS(WIDTH) \ |
470 | | static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride, \ |
471 | | const uint8_t *src, ptrdiff_t srcstride, \ |
472 | 3.45M | int h, int x, int y) \ |
473 | 3.45M | { \ |
474 | 3.45M | int i; \ |
475 | 33.5M | for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ |
476 | 30.0M | memcpy(dst, src, WIDTH); \ |
477 | 3.45M | } vp8dsp.c:put_vp8_pixels16_c Line | Count | Source | 472 | 806k | int h, int x, int y) \ | 473 | 806k | { \ | 474 | 806k | int i; \ | 475 | 13.4M | for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ | 476 | 12.6M | memcpy(dst, src, WIDTH); \ | 477 | 806k | } |
vp8dsp.c:put_vp8_pixels8_c Line | Count | Source | 472 | 1.66M | int h, int x, int y) \ | 473 | 1.66M | { \ | 474 | 1.66M | int i; \ | 475 | 14.9M | for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ | 476 | 13.2M | memcpy(dst, src, WIDTH); \ | 477 | 1.66M | } |
vp8dsp.c:put_vp8_pixels4_c Line | Count | Source | 472 | 989k | int h, int x, int y) \ | 473 | 989k | { \ | 474 | 989k | int i; \ | 475 | 5.19M | for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ | 476 | 4.20M | memcpy(dst, src, WIDTH); \ | 477 | 989k | } |
|
478 | | |
479 | | PUT_PIXELS(16) |
480 | | PUT_PIXELS(8) |
481 | | PUT_PIXELS(4) |
482 | | |
483 | | #define FILTER_6TAP(src, F, stride) \ |
484 | 197M | cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ |
485 | 197M | F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \ |
486 | 197M | F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7] |
487 | | |
488 | | #define FILTER_4TAP(src, F, stride) \ |
489 | 48.2M | cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ |
490 | 48.2M | F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7] |
491 | | |
492 | | #define VP8_EPEL_H(SIZE, TAPS) \ |
493 | | static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, \ |
494 | | ptrdiff_t dststride, \ |
495 | | const uint8_t *src, \ |
496 | | ptrdiff_t srcstride, \ |
497 | 348k | int h, int mx, int my) \ |
498 | 348k | { \ |
499 | 348k | const uint8_t *filter = subpel_filters[mx - 1]; \ |
500 | 348k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
501 | 348k | int x, y; \ |
502 | 3.54M | for (y = 0; y < h; y++) { \ |
503 | 39.3M | for (x = 0; x < SIZE; x++) \ |
504 | 36.1M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ |
505 | 3.19M | dst += dststride; \ |
506 | 3.19M | src += srcstride; \ |
507 | 3.19M | } \ |
508 | 348k | } vp8dsp.c:put_vp8_epel16_h6_c Line | Count | Source | 497 | 96.6k | int h, int mx, int my) \ | 498 | 96.6k | { \ | 499 | 96.6k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 96.6k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 96.6k | int x, y; \ | 502 | 1.61M | for (y = 0; y < h; y++) { \ | 503 | 25.8M | for (x = 0; x < SIZE; x++) \ | 504 | 24.3M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 1.52M | dst += dststride; \ | 506 | 1.52M | src += srcstride; \ | 507 | 1.52M | } \ | 508 | 96.6k | } |
vp8dsp.c:put_vp8_epel8_h4_c Line | Count | Source | 497 | 54.1k | int h, int mx, int my) \ | 498 | 54.1k | { \ | 499 | 54.1k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 54.1k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 54.1k | int x, y; \ | 502 | 478k | for (y = 0; y < h; y++) { \ | 503 | 3.81M | for (x = 0; x < SIZE; x++) \ | 504 | 3.39M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 424k | dst += dststride; \ | 506 | 424k | src += srcstride; \ | 507 | 424k | } \ | 508 | 54.1k | } |
vp8dsp.c:put_vp8_epel8_h6_c Line | Count | Source | 497 | 103k | int h, int mx, int my) \ | 498 | 103k | { \ | 499 | 103k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 103k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 103k | int x, y; \ | 502 | 951k | for (y = 0; y < h; y++) { \ | 503 | 7.63M | for (x = 0; x < SIZE; x++) \ | 504 | 6.78M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 848k | dst += dststride; \ | 506 | 848k | src += srcstride; \ | 507 | 848k | } \ | 508 | 103k | } |
vp8dsp.c:put_vp8_epel4_h4_c Line | Count | Source | 497 | 20.6k | int h, int mx, int my) \ | 498 | 20.6k | { \ | 499 | 20.6k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 20.6k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 20.6k | int x, y; \ | 502 | 115k | for (y = 0; y < h; y++) { \ | 503 | 476k | for (x = 0; x < SIZE; x++) \ | 504 | 381k | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 95.3k | dst += dststride; \ | 506 | 95.3k | src += srcstride; \ | 507 | 95.3k | } \ | 508 | 20.6k | } |
vp8dsp.c:put_vp8_epel4_h6_c Line | Count | Source | 497 | 73.8k | int h, int mx, int my) \ | 498 | 73.8k | { \ | 499 | 73.8k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 73.8k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 73.8k | int x, y; \ | 502 | 380k | for (y = 0; y < h; y++) { \ | 503 | 1.53M | for (x = 0; x < SIZE; x++) \ | 504 | 1.22M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 306k | dst += dststride; \ | 506 | 306k | src += srcstride; \ | 507 | 306k | } \ | 508 | 73.8k | } |
|
509 | | |
510 | | #define VP8_EPEL_V(SIZE, TAPS) \ |
511 | | static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, \ |
512 | | ptrdiff_t dststride, \ |
513 | | const uint8_t *src, \ |
514 | | ptrdiff_t srcstride, \ |
515 | 304k | int h, int mx, int my) \ |
516 | 304k | { \ |
517 | 304k | const uint8_t *filter = subpel_filters[my - 1]; \ |
518 | 304k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
519 | 304k | int x, y; \ |
520 | 3.02M | for (y = 0; y < h; y++) { \ |
521 | 32.4M | for (x = 0; x < SIZE; x++) \ |
522 | 29.7M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ |
523 | 2.72M | dst += dststride; \ |
524 | 2.72M | src += srcstride; \ |
525 | 2.72M | } \ |
526 | 304k | } vp8dsp.c:put_vp8_epel16_v6_c Line | Count | Source | 515 | 74.8k | int h, int mx, int my) \ | 516 | 74.8k | { \ | 517 | 74.8k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 74.8k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 74.8k | int x, y; \ | 520 | 1.22M | for (y = 0; y < h; y++) { \ | 521 | 19.5M | for (x = 0; x < SIZE; x++) \ | 522 | 18.4M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 1.15M | dst += dststride; \ | 524 | 1.15M | src += srcstride; \ | 525 | 1.15M | } \ | 526 | 74.8k | } |
vp8dsp.c:put_vp8_epel8_v4_c Line | Count | Source | 515 | 60.7k | int h, int mx, int my) \ | 516 | 60.7k | { \ | 517 | 60.7k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 60.7k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 60.7k | int x, y; \ | 520 | 534k | for (y = 0; y < h; y++) { \ | 521 | 4.26M | for (x = 0; x < SIZE; x++) \ | 522 | 3.79M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 474k | dst += dststride; \ | 524 | 474k | src += srcstride; \ | 525 | 474k | } \ | 526 | 60.7k | } |
vp8dsp.c:put_vp8_epel8_v6_c Line | Count | Source | 515 | 95.0k | int h, int mx, int my) \ | 516 | 95.0k | { \ | 517 | 95.0k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 95.0k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 95.0k | int x, y; \ | 520 | 872k | for (y = 0; y < h; y++) { \ | 521 | 7.00M | for (x = 0; x < SIZE; x++) \ | 522 | 6.22M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 777k | dst += dststride; \ | 524 | 777k | src += srcstride; \ | 525 | 777k | } \ | 526 | 95.0k | } |
vp8dsp.c:put_vp8_epel4_v4_c Line | Count | Source | 515 | 21.1k | int h, int mx, int my) \ | 516 | 21.1k | { \ | 517 | 21.1k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 21.1k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 21.1k | int x, y; \ | 520 | 124k | for (y = 0; y < h; y++) { \ | 521 | 517k | for (x = 0; x < SIZE; x++) \ | 522 | 413k | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 103k | dst += dststride; \ | 524 | 103k | src += srcstride; \ | 525 | 103k | } \ | 526 | 21.1k | } |
vp8dsp.c:put_vp8_epel4_v6_c Line | Count | Source | 515 | 52.4k | int h, int mx, int my) \ | 516 | 52.4k | { \ | 517 | 52.4k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 52.4k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 52.4k | int x, y; \ | 520 | 271k | for (y = 0; y < h; y++) { \ | 521 | 1.09M | for (x = 0; x < SIZE; x++) \ | 522 | 877k | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 219k | dst += dststride; \ | 524 | 219k | src += srcstride; \ | 525 | 219k | } \ | 526 | 52.4k | } |
|
527 | | |
528 | | #define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \ |
529 | | static void \ |
530 | | put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, \ |
531 | | ptrdiff_t dststride, \ |
532 | | const uint8_t *src, \ |
533 | | ptrdiff_t srcstride, \ |
534 | | int h, int mx, \ |
535 | 949k | int my) \ |
536 | 949k | { \ |
537 | 949k | const uint8_t *filter = subpel_filters[mx - 1]; \ |
538 | 949k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
539 | 949k | int x, y; \ |
540 | 949k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ |
541 | 949k | uint8_t *tmp = tmp_array; \ |
542 | 949k | src -= (2 - (VTAPS == 4)) * srcstride; \ |
543 | 949k | \ |
544 | 12.9M | for (y = 0; y < h + VTAPS - 1; y++) { \ |
545 | 118M | for (x = 0; x < SIZE; x++) \ |
546 | 106M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ |
547 | 12.0M | tmp += SIZE; \ |
548 | 12.0M | src += srcstride; \ |
549 | 12.0M | } \ |
550 | 949k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ |
551 | 949k | filter = subpel_filters[my - 1]; \ |
552 | 949k | \ |
553 | 8.89M | for (y = 0; y < h; y++) { \ |
554 | 81.6M | for (x = 0; x < SIZE; x++) \ |
555 | 73.7M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ |
556 | 7.94M | dst += dststride; \ |
557 | 7.94M | tmp += SIZE; \ |
558 | 7.94M | } \ |
559 | 949k | } vp8dsp.c:put_vp8_epel16_h6v6_c Line | Count | Source | 535 | 135k | int my) \ | 536 | 135k | { \ | 537 | 135k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 135k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 135k | int x, y; \ | 540 | 135k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 135k | uint8_t *tmp = tmp_array; \ | 542 | 135k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 135k | \ | 544 | 2.93M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 47.5M | for (x = 0; x < SIZE; x++) \ | 546 | 44.7M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 2.79M | tmp += SIZE; \ | 548 | 2.79M | src += srcstride; \ | 549 | 2.79M | } \ | 550 | 135k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 135k | filter = subpel_filters[my - 1]; \ | 552 | 135k | \ | 553 | 2.25M | for (y = 0; y < h; y++) { \ | 554 | 36.0M | for (x = 0; x < SIZE; x++) \ | 555 | 33.9M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 2.12M | dst += dststride; \ | 557 | 2.12M | tmp += SIZE; \ | 558 | 2.12M | } \ | 559 | 135k | } |
vp8dsp.c:put_vp8_epel8_h4v4_c Line | Count | Source | 535 | 97.9k | int my) \ | 536 | 97.9k | { \ | 537 | 97.9k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 97.9k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 97.9k | int x, y; \ | 540 | 97.9k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 97.9k | uint8_t *tmp = tmp_array; \ | 542 | 97.9k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 97.9k | \ | 544 | 1.15M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 9.51M | for (x = 0; x < SIZE; x++) \ | 546 | 8.45M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 1.05M | tmp += SIZE; \ | 548 | 1.05M | src += srcstride; \ | 549 | 1.05M | } \ | 550 | 97.9k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 97.9k | filter = subpel_filters[my - 1]; \ | 552 | 97.9k | \ | 553 | 861k | for (y = 0; y < h; y++) { \ | 554 | 6.87M | for (x = 0; x < SIZE; x++) \ | 555 | 6.10M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 763k | dst += dststride; \ | 557 | 763k | tmp += SIZE; \ | 558 | 763k | } \ | 559 | 97.9k | } |
vp8dsp.c:put_vp8_epel8_h6v4_c Line | Count | Source | 535 | 95.6k | int my) \ | 536 | 95.6k | { \ | 537 | 95.6k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 95.6k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 95.6k | int x, y; \ | 540 | 95.6k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 95.6k | uint8_t *tmp = tmp_array; \ | 542 | 95.6k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 95.6k | \ | 544 | 1.12M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 9.30M | for (x = 0; x < SIZE; x++) \ | 546 | 8.26M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 1.03M | tmp += SIZE; \ | 548 | 1.03M | src += srcstride; \ | 549 | 1.03M | } \ | 550 | 95.6k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 95.6k | filter = subpel_filters[my - 1]; \ | 552 | 95.6k | \ | 553 | 842k | for (y = 0; y < h; y++) { \ | 554 | 6.72M | for (x = 0; x < SIZE; x++) \ | 555 | 5.97M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 746k | dst += dststride; \ | 557 | 746k | tmp += SIZE; \ | 558 | 746k | } \ | 559 | 95.6k | } |
vp8dsp.c:put_vp8_epel8_h4v6_c Line | Count | Source | 535 | 104k | int my) \ | 536 | 104k | { \ | 537 | 104k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 104k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 104k | int x, y; \ | 540 | 104k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 104k | uint8_t *tmp = tmp_array; \ | 542 | 104k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 104k | \ | 544 | 1.44M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 12.0M | for (x = 0; x < SIZE; x++) \ | 546 | 10.7M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 1.33M | tmp += SIZE; \ | 548 | 1.33M | src += srcstride; \ | 549 | 1.33M | } \ | 550 | 104k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 104k | filter = subpel_filters[my - 1]; \ | 552 | 104k | \ | 553 | 921k | for (y = 0; y < h; y++) { \ | 554 | 7.35M | for (x = 0; x < SIZE; x++) \ | 555 | 6.53M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 817k | dst += dststride; \ | 557 | 817k | tmp += SIZE; \ | 558 | 817k | } \ | 559 | 104k | } |
vp8dsp.c:put_vp8_epel8_h6v6_c Line | Count | Source | 535 | 186k | int my) \ | 536 | 186k | { \ | 537 | 186k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 186k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 186k | int x, y; \ | 540 | 186k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 186k | uint8_t *tmp = tmp_array; \ | 542 | 186k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 186k | \ | 544 | 2.92M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 24.6M | for (x = 0; x < SIZE; x++) \ | 546 | 21.8M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 2.73M | tmp += SIZE; \ | 548 | 2.73M | src += srcstride; \ | 549 | 2.73M | } \ | 550 | 186k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 186k | filter = subpel_filters[my - 1]; \ | 552 | 186k | \ | 553 | 1.99M | for (y = 0; y < h; y++) { \ | 554 | 16.2M | for (x = 0; x < SIZE; x++) \ | 555 | 14.4M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 1.80M | dst += dststride; \ | 557 | 1.80M | tmp += SIZE; \ | 558 | 1.80M | } \ | 559 | 186k | } |
vp8dsp.c:put_vp8_epel4_h4v4_c Line | Count | Source | 535 | 100k | int my) \ | 536 | 100k | { \ | 537 | 100k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 100k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 100k | int x, y; \ | 540 | 100k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 100k | uint8_t *tmp = tmp_array; \ | 542 | 100k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 100k | \ | 544 | 1.04M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 4.71M | for (x = 0; x < SIZE; x++) \ | 546 | 3.77M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 942k | tmp += SIZE; \ | 548 | 942k | src += srcstride; \ | 549 | 942k | } \ | 550 | 100k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 100k | filter = subpel_filters[my - 1]; \ | 552 | 100k | \ | 553 | 741k | for (y = 0; y < h; y++) { \ | 554 | 3.20M | for (x = 0; x < SIZE; x++) \ | 555 | 2.56M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 641k | dst += dststride; \ | 557 | 641k | tmp += SIZE; \ | 558 | 641k | } \ | 559 | 100k | } |
vp8dsp.c:put_vp8_epel4_h6v4_c Line | Count | Source | 535 | 43.7k | int my) \ | 536 | 43.7k | { \ | 537 | 43.7k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 43.7k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 43.7k | int x, y; \ | 540 | 43.7k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 43.7k | uint8_t *tmp = tmp_array; \ | 542 | 43.7k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 43.7k | \ | 544 | 397k | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 1.76M | for (x = 0; x < SIZE; x++) \ | 546 | 1.41M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 353k | tmp += SIZE; \ | 548 | 353k | src += srcstride; \ | 549 | 353k | } \ | 550 | 43.7k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 43.7k | filter = subpel_filters[my - 1]; \ | 552 | 43.7k | \ | 553 | 265k | for (y = 0; y < h; y++) { \ | 554 | 1.10M | for (x = 0; x < SIZE; x++) \ | 555 | 887k | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 221k | dst += dststride; \ | 557 | 221k | tmp += SIZE; \ | 558 | 221k | } \ | 559 | 43.7k | } |
vp8dsp.c:put_vp8_epel4_h4v6_c Line | Count | Source | 535 | 47.9k | int my) \ | 536 | 47.9k | { \ | 537 | 47.9k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 47.9k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 47.9k | int x, y; \ | 540 | 47.9k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 47.9k | uint8_t *tmp = tmp_array; \ | 542 | 47.9k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 47.9k | \ | 544 | 507k | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 2.29M | for (x = 0; x < SIZE; x++) \ | 546 | 1.83M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 459k | tmp += SIZE; \ | 548 | 459k | src += srcstride; \ | 549 | 459k | } \ | 550 | 47.9k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 47.9k | filter = subpel_filters[my - 1]; \ | 552 | 47.9k | \ | 553 | 267k | for (y = 0; y < h; y++) { \ | 554 | 1.09M | for (x = 0; x < SIZE; x++) \ | 555 | 877k | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 219k | dst += dststride; \ | 557 | 219k | tmp += SIZE; \ | 558 | 219k | } \ | 559 | 47.9k | } |
vp8dsp.c:put_vp8_epel4_h6v6_c Line | Count | Source | 535 | 137k | int my) \ | 536 | 137k | { \ | 537 | 137k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 137k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 137k | int x, y; \ | 540 | 137k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 137k | uint8_t *tmp = tmp_array; \ | 542 | 137k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 137k | \ | 544 | 1.42M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 6.45M | for (x = 0; x < SIZE; x++) \ | 546 | 5.16M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 1.29M | tmp += SIZE; \ | 548 | 1.29M | src += srcstride; \ | 549 | 1.29M | } \ | 550 | 137k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 137k | filter = subpel_filters[my - 1]; \ | 552 | 137k | \ | 553 | 743k | for (y = 0; y < h; y++) { \ | 554 | 3.03M | for (x = 0; x < SIZE; x++) \ | 555 | 2.42M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 606k | dst += dststride; \ | 557 | 606k | tmp += SIZE; \ | 558 | 606k | } \ | 559 | 137k | } |
|
560 | | |
561 | | VP8_EPEL_H(8, 4) |
562 | | VP8_EPEL_H(4, 4) |
563 | | VP8_EPEL_H(16, 6) |
564 | | VP8_EPEL_H(8, 6) |
565 | | VP8_EPEL_H(4, 6) |
566 | | VP8_EPEL_V(8, 4) |
567 | | VP8_EPEL_V(4, 4) |
568 | | VP8_EPEL_V(16, 6) |
569 | | VP8_EPEL_V(8, 6) |
570 | | VP8_EPEL_V(4, 6) |
571 | | |
572 | | VP8_EPEL_HV(8, 4, 4) |
573 | | VP8_EPEL_HV(4, 4, 4) |
574 | | VP8_EPEL_HV(8, 4, 6) |
575 | | VP8_EPEL_HV(4, 4, 6) |
576 | | VP8_EPEL_HV(8, 6, 4) |
577 | | VP8_EPEL_HV(4, 6, 4) |
578 | | VP8_EPEL_HV(16, 6, 6) |
579 | | VP8_EPEL_HV(8, 6, 6) |
580 | | VP8_EPEL_HV(4, 6, 6) |
581 | | |
582 | | #define VP8_BILINEAR(SIZE) \ |
583 | | static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \ |
584 | | const uint8_t *src, ptrdiff_t sstride, \ |
585 | 285k | int h, int mx, int my) \ |
586 | 285k | { \ |
587 | 285k | int a = 8 - mx, b = mx; \ |
588 | 285k | int x, y; \ |
589 | 2.08M | for (y = 0; y < h; y++) { \ |
590 | 17.2M | for (x = 0; x < SIZE; x++) \ |
591 | 15.4M | dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ |
592 | 1.79M | dst += dstride; \ |
593 | 1.79M | src += sstride; \ |
594 | 1.79M | } \ |
595 | 285k | } \ vp8dsp.c:put_vp8_bilinear16_h_c Line | Count | Source | 585 | 33.9k | int h, int mx, int my) \ | 586 | 33.9k | { \ | 587 | 33.9k | int a = 8 - mx, b = mx; \ | 588 | 33.9k | int x, y; \ | 589 | 539k | for (y = 0; y < h; y++) { \ | 590 | 8.58M | for (x = 0; x < SIZE; x++) \ | 591 | 8.08M | dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 592 | 505k | dst += dstride; \ | 593 | 505k | src += sstride; \ | 594 | 505k | } \ | 595 | 33.9k | } \ |
vp8dsp.c:put_vp8_bilinear8_h_c Line | Count | Source | 585 | 69.9k | int h, int mx, int my) \ | 586 | 69.9k | { \ | 587 | 69.9k | int a = 8 - mx, b = mx; \ | 588 | 69.9k | int x, y; \ | 589 | 616k | for (y = 0; y < h; y++) { \ | 590 | 4.92M | for (x = 0; x < SIZE; x++) \ | 591 | 4.37M | dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 592 | 547k | dst += dstride; \ | 593 | 547k | src += sstride; \ | 594 | 547k | } \ | 595 | 69.9k | } \ |
vp8dsp.c:put_vp8_bilinear4_h_c Line | Count | Source | 585 | 181k | int h, int mx, int my) \ | 586 | 181k | { \ | 587 | 181k | int a = 8 - mx, b = mx; \ | 588 | 181k | int x, y; \ | 589 | 929k | for (y = 0; y < h; y++) { \ | 590 | 3.73M | for (x = 0; x < SIZE; x++) \ | 591 | 2.99M | dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 592 | 747k | dst += dstride; \ | 593 | 747k | src += sstride; \ | 594 | 747k | } \ | 595 | 181k | } \ |
|
596 | | \ |
597 | | static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \ |
598 | | const uint8_t *src, ptrdiff_t sstride, \ |
599 | 289k | int h, int mx, int my) \ |
600 | 289k | { \ |
601 | 289k | int c = 8 - my, d = my; \ |
602 | 289k | int x, y; \ |
603 | 2.15M | for (y = 0; y < h; y++) { \ |
604 | 17.6M | for (x = 0; x < SIZE; x++) \ |
605 | 15.8M | dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ |
606 | 1.86M | dst += dstride; \ |
607 | 1.86M | src += sstride; \ |
608 | 1.86M | } \ |
609 | 289k | } \ vp8dsp.c:put_vp8_bilinear16_v_c Line | Count | Source | 599 | 32.6k | int h, int mx, int my) \ | 600 | 32.6k | { \ | 601 | 32.6k | int c = 8 - my, d = my; \ | 602 | 32.6k | int x, y; \ | 603 | 516k | for (y = 0; y < h; y++) { \ | 604 | 8.21M | for (x = 0; x < SIZE; x++) \ | 605 | 7.73M | dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ | 606 | 483k | dst += dstride; \ | 607 | 483k | src += sstride; \ | 608 | 483k | } \ | 609 | 32.6k | } \ |
vp8dsp.c:put_vp8_bilinear8_v_c Line | Count | Source | 599 | 82.0k | int h, int mx, int my) \ | 600 | 82.0k | { \ | 601 | 82.0k | int c = 8 - my, d = my; \ | 602 | 82.0k | int x, y; \ | 603 | 729k | for (y = 0; y < h; y++) { \ | 604 | 5.82M | for (x = 0; x < SIZE; x++) \ | 605 | 5.17M | dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ | 606 | 647k | dst += dstride; \ | 607 | 647k | src += sstride; \ | 608 | 647k | } \ | 609 | 82.0k | } \ |
vp8dsp.c:put_vp8_bilinear4_v_c Line | Count | Source | 599 | 175k | int h, int mx, int my) \ | 600 | 175k | { \ | 601 | 175k | int c = 8 - my, d = my; \ | 602 | 175k | int x, y; \ | 603 | 905k | for (y = 0; y < h; y++) { \ | 604 | 3.65M | for (x = 0; x < SIZE; x++) \ | 605 | 2.92M | dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ | 606 | 730k | dst += dstride; \ | 607 | 730k | src += sstride; \ | 608 | 730k | } \ | 609 | 175k | } \ |
|
610 | | \ |
611 | | static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, \ |
612 | | ptrdiff_t dstride, \ |
613 | | const uint8_t *src, \ |
614 | | ptrdiff_t sstride, \ |
615 | 238k | int h, int mx, int my) \ |
616 | 238k | { \ |
617 | 238k | int a = 8 - mx, b = mx; \ |
618 | 238k | int c = 8 - my, d = my; \ |
619 | 238k | int x, y; \ |
620 | 238k | uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ |
621 | 238k | uint8_t *tmp = tmp_array; \ |
622 | 2.02M | for (y = 0; y < h + 1; y++) { \ |
623 | 15.9M | for (x = 0; x < SIZE; x++) \ |
624 | 14.1M | tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ |
625 | 1.78M | tmp += SIZE; \ |
626 | 1.78M | src += sstride; \ |
627 | 1.78M | } \ |
628 | 238k | tmp = tmp_array; \ |
629 | 1.78M | for (y = 0; y < h; y++) { \ |
630 | 14.1M | for (x = 0; x < SIZE; x++) \ |
631 | 12.6M | dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ |
632 | 1.54M | dst += dstride; \ |
633 | 1.54M | tmp += SIZE; \ |
634 | 1.54M | } \ |
635 | 238k | } vp8dsp.c:put_vp8_bilinear16_hv_c Line | Count | Source | 615 | 23.8k | int h, int mx, int my) \ | 616 | 23.8k | { \ | 617 | 23.8k | int a = 8 - mx, b = mx; \ | 618 | 23.8k | int c = 8 - my, d = my; \ | 619 | 23.8k | int x, y; \ | 620 | 23.8k | uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ | 621 | 23.8k | uint8_t *tmp = tmp_array; \ | 622 | 392k | for (y = 0; y < h + 1; y++) { \ | 623 | 6.26M | for (x = 0; x < SIZE; x++) \ | 624 | 5.89M | tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 625 | 368k | tmp += SIZE; \ | 626 | 368k | src += sstride; \ | 627 | 368k | } \ | 628 | 23.8k | tmp = tmp_array; \ | 629 | 368k | for (y = 0; y < h; y++) { \ | 630 | 5.86M | for (x = 0; x < SIZE; x++) \ | 631 | 5.51M | dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ | 632 | 344k | dst += dstride; \ | 633 | 344k | tmp += SIZE; \ | 634 | 344k | } \ | 635 | 23.8k | } |
vp8dsp.c:put_vp8_bilinear8_hv_c Line | Count | Source | 615 | 70.0k | int h, int mx, int my) \ | 616 | 70.0k | { \ | 617 | 70.0k | int a = 8 - mx, b = mx; \ | 618 | 70.0k | int c = 8 - my, d = my; \ | 619 | 70.0k | int x, y; \ | 620 | 70.0k | uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ | 621 | 70.0k | uint8_t *tmp = tmp_array; \ | 622 | 716k | for (y = 0; y < h + 1; y++) { \ | 623 | 5.81M | for (x = 0; x < SIZE; x++) \ | 624 | 5.17M | tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 625 | 646k | tmp += SIZE; \ | 626 | 646k | src += sstride; \ | 627 | 646k | } \ | 628 | 70.0k | tmp = tmp_array; \ | 629 | 646k | for (y = 0; y < h; y++) { \ | 630 | 5.18M | for (x = 0; x < SIZE; x++) \ | 631 | 4.61M | dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ | 632 | 576k | dst += dstride; \ | 633 | 576k | tmp += SIZE; \ | 634 | 576k | } \ | 635 | 70.0k | } |
vp8dsp.c:put_vp8_bilinear4_hv_c Line | Count | Source | 615 | 144k | int h, int mx, int my) \ | 616 | 144k | { \ | 617 | 144k | int a = 8 - mx, b = mx; \ | 618 | 144k | int c = 8 - my, d = my; \ | 619 | 144k | int x, y; \ | 620 | 144k | uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ | 621 | 144k | uint8_t *tmp = tmp_array; \ | 622 | 911k | for (y = 0; y < h + 1; y++) { \ | 623 | 3.83M | for (x = 0; x < SIZE; x++) \ | 624 | 3.06M | tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 625 | 766k | tmp += SIZE; \ | 626 | 766k | src += sstride; \ | 627 | 766k | } \ | 628 | 144k | tmp = tmp_array; \ | 629 | 766k | for (y = 0; y < h; y++) { \ | 630 | 3.10M | for (x = 0; x < SIZE; x++) \ | 631 | 2.48M | dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ | 632 | 621k | dst += dstride; \ | 633 | 621k | tmp += SIZE; \ | 634 | 621k | } \ | 635 | 144k | } |
|
636 | | |
637 | | VP8_BILINEAR(16) |
638 | | VP8_BILINEAR(8) |
639 | | VP8_BILINEAR(4) |
640 | | |
641 | | #define VP78_MC_FUNC(IDX, SIZE) \ |
642 | 36.2k | dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
643 | 36.2k | dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ |
644 | 36.2k | dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ |
645 | 36.2k | dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ |
646 | 36.2k | dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ |
647 | 36.2k | dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ |
648 | 36.2k | dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ |
649 | 36.2k | dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ |
650 | 36.2k | dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c |
651 | | |
652 | | #define VP78_BILINEAR_MC_FUNC(IDX, SIZE) \ |
653 | 54.4k | dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
654 | 54.4k | dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ |
655 | 54.4k | dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ |
656 | 54.4k | dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \ |
657 | 54.4k | dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
658 | 54.4k | dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
659 | 54.4k | dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \ |
660 | 54.4k | dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
661 | 54.4k | dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c |
662 | | |
663 | | av_cold void ff_vp78dsp_init(VP8DSPContext *dsp) |
664 | 18.1k | { |
665 | 18.1k | dsp->put_vp8_epel_pixels_tab[0][0][0] = put_vp8_pixels16_c; |
666 | 18.1k | dsp->put_vp8_epel_pixels_tab[0][0][2] = put_vp8_epel16_h6_c; |
667 | 18.1k | dsp->put_vp8_epel_pixels_tab[0][2][0] = put_vp8_epel16_v6_c; |
668 | 18.1k | dsp->put_vp8_epel_pixels_tab[0][2][2] = put_vp8_epel16_h6v6_c; |
669 | | |
670 | 18.1k | VP78_MC_FUNC(1, 8); |
671 | 18.1k | VP78_MC_FUNC(2, 4); |
672 | | |
673 | 18.1k | VP78_BILINEAR_MC_FUNC(0, 16); |
674 | 18.1k | VP78_BILINEAR_MC_FUNC(1, 8); |
675 | 18.1k | VP78_BILINEAR_MC_FUNC(2, 4); |
676 | | |
677 | | #if ARCH_AARCH64 |
678 | | ff_vp78dsp_init_aarch64(dsp); |
679 | | #elif ARCH_ARM |
680 | | ff_vp78dsp_init_arm(dsp); |
681 | | #elif ARCH_PPC |
682 | | ff_vp78dsp_init_ppc(dsp); |
683 | | #elif ARCH_RISCV |
684 | | ff_vp78dsp_init_riscv(dsp); |
685 | | #elif ARCH_X86 && HAVE_X86ASM |
686 | | ff_vp78dsp_init_x86(dsp); |
687 | | #endif |
688 | 18.1k | } |
689 | | |
690 | | #if CONFIG_VP7_DECODER |
691 | | LOOP_FILTERS(vp7) |
692 | | |
693 | | av_cold void ff_vp7dsp_init(VP8DSPContext *dsp) |
694 | 7.21k | { |
695 | 7.21k | dsp->vp8_luma_dc_wht = vp7_luma_dc_wht_c; |
696 | 7.21k | dsp->vp8_luma_dc_wht_dc = vp7_luma_dc_wht_dc_c; |
697 | 7.21k | dsp->vp8_idct_add = vp7_idct_add_c; |
698 | 7.21k | dsp->vp8_idct_dc_add = vp7_idct_dc_add_c; |
699 | 7.21k | dsp->vp8_idct_dc_add4y = vp7_idct_dc_add4y_c; |
700 | 7.21k | dsp->vp8_idct_dc_add4uv = vp7_idct_dc_add4uv_c; |
701 | | |
702 | 7.21k | dsp->vp8_v_loop_filter16y = vp7_v_loop_filter16_c; |
703 | 7.21k | dsp->vp8_h_loop_filter16y = vp7_h_loop_filter16_c; |
704 | 7.21k | dsp->vp8_v_loop_filter8uv = vp7_v_loop_filter8uv_c; |
705 | 7.21k | dsp->vp8_h_loop_filter8uv = vp7_h_loop_filter8uv_c; |
706 | | |
707 | 7.21k | dsp->vp8_v_loop_filter16y_inner = vp7_v_loop_filter16_inner_c; |
708 | 7.21k | dsp->vp8_h_loop_filter16y_inner = vp7_h_loop_filter16_inner_c; |
709 | 7.21k | dsp->vp8_v_loop_filter8uv_inner = vp7_v_loop_filter8uv_inner_c; |
710 | 7.21k | dsp->vp8_h_loop_filter8uv_inner = vp7_h_loop_filter8uv_inner_c; |
711 | | |
712 | 7.21k | dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c; |
713 | 7.21k | dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c; |
714 | | |
715 | | #if ARCH_RISCV |
716 | | ff_vp7dsp_init_riscv(dsp); |
717 | | #endif |
718 | 7.21k | } |
719 | | #endif /* CONFIG_VP7_DECODER */ |
720 | | |
721 | | #if CONFIG_VP8_DECODER |
722 | | LOOP_FILTERS(vp8) |
723 | | |
724 | | av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) |
725 | 10.9k | { |
726 | 10.9k | dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; |
727 | 10.9k | dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c; |
728 | 10.9k | dsp->vp8_idct_add = vp8_idct_add_c; |
729 | 10.9k | dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; |
730 | 10.9k | dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c; |
731 | 10.9k | dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c; |
732 | | |
733 | 10.9k | dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; |
734 | 10.9k | dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; |
735 | 10.9k | dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; |
736 | 10.9k | dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; |
737 | | |
738 | 10.9k | dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; |
739 | 10.9k | dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; |
740 | 10.9k | dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; |
741 | 10.9k | dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; |
742 | | |
743 | 10.9k | dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; |
744 | 10.9k | dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; |
745 | | |
746 | | #if ARCH_AARCH64 |
747 | | ff_vp8dsp_init_aarch64(dsp); |
748 | | #elif ARCH_ARM |
749 | | ff_vp8dsp_init_arm(dsp); |
750 | | #elif ARCH_RISCV |
751 | | ff_vp8dsp_init_riscv(dsp); |
752 | | #elif ARCH_X86 && HAVE_X86ASM |
753 | | ff_vp8dsp_init_x86(dsp); |
754 | | #elif ARCH_MIPS |
755 | | ff_vp8dsp_init_mips(dsp); |
756 | | #elif ARCH_LOONGARCH |
757 | | ff_vp8dsp_init_loongarch(dsp); |
758 | | #endif |
759 | 10.9k | } |
760 | | #endif /* CONFIG_VP8_DECODER */ |