/src/ffmpeg/libavcodec/vp8dsp.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2010 David Conrad |
3 | | * Copyright (C) 2010 Ronald S. Bultje |
4 | | * Copyright (C) 2014 Peter Ross |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | |
23 | | /** |
24 | | * @file |
25 | | * VP8 compatible video decoder |
26 | | */ |
27 | | |
28 | | #include "config_components.h" |
29 | | |
30 | | #include "libavutil/common.h" |
31 | | #include "libavutil/intreadwrite.h" |
32 | | |
33 | | #include "mathops.h" |
34 | | #include "vp8dsp.h" |
35 | | |
36 | | #define MK_IDCT_DC_ADD4_C(name) \ |
37 | | static void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], \ |
38 | 170k | ptrdiff_t stride) \ |
39 | 170k | { \ |
40 | 170k | name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \ |
41 | 170k | name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \ |
42 | 170k | name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \ |
43 | 170k | name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \ |
44 | 170k | } \ vp8dsp.c:vp7_idct_dc_add4uv_c Line | Count | Source | 38 | 23.0k | ptrdiff_t stride) \ | 39 | 23.0k | { \ | 40 | 23.0k | name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \ | 41 | 23.0k | name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \ | 42 | 23.0k | name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \ | 43 | 23.0k | name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \ | 44 | 23.0k | } \ |
vp8dsp.c:vp8_idct_dc_add4uv_c Line | Count | Source | 38 | 147k | ptrdiff_t stride) \ | 39 | 147k | { \ | 40 | 147k | name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \ | 41 | 147k | name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \ | 42 | 147k | name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \ | 43 | 147k | name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \ | 44 | 147k | } \ |
|
45 | | \ |
46 | | static void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], \ |
47 | 718k | ptrdiff_t stride) \ |
48 | 718k | { \ |
49 | 718k | name ## _idct_dc_add_c(dst + 0, block[0], stride); \ |
50 | 718k | name ## _idct_dc_add_c(dst + 4, block[1], stride); \ |
51 | 718k | name ## _idct_dc_add_c(dst + 8, block[2], stride); \ |
52 | 718k | name ## _idct_dc_add_c(dst + 12, block[3], stride); \ |
53 | 718k | } vp8dsp.c:vp7_idct_dc_add4y_c Line | Count | Source | 47 | 518k | ptrdiff_t stride) \ | 48 | 518k | { \ | 49 | 518k | name ## _idct_dc_add_c(dst + 0, block[0], stride); \ | 50 | 518k | name ## _idct_dc_add_c(dst + 4, block[1], stride); \ | 51 | 518k | name ## _idct_dc_add_c(dst + 8, block[2], stride); \ | 52 | 518k | name ## _idct_dc_add_c(dst + 12, block[3], stride); \ | 53 | 518k | } |
vp8dsp.c:vp8_idct_dc_add4y_c Line | Count | Source | 47 | 199k | ptrdiff_t stride) \ | 48 | 199k | { \ | 49 | 199k | name ## _idct_dc_add_c(dst + 0, block[0], stride); \ | 50 | 199k | name ## _idct_dc_add_c(dst + 4, block[1], stride); \ | 51 | 199k | name ## _idct_dc_add_c(dst + 8, block[2], stride); \ | 52 | 199k | name ## _idct_dc_add_c(dst + 12, block[3], stride); \ | 53 | 199k | } |
|
54 | | |
55 | | #if CONFIG_VP7_DECODER |
56 | | static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) |
57 | 35.4k | { |
58 | 35.4k | int i; |
59 | 35.4k | unsigned a1, b1, c1, d1; |
60 | 35.4k | int16_t tmp[16]; |
61 | | |
62 | 177k | for (i = 0; i < 4; i++) { |
63 | 141k | a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170; |
64 | 141k | b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170; |
65 | 141k | c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274; |
66 | 141k | d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540; |
67 | 141k | tmp[i * 4 + 0] = (int)(a1 + d1) >> 14; |
68 | 141k | tmp[i * 4 + 3] = (int)(a1 - d1) >> 14; |
69 | 141k | tmp[i * 4 + 1] = (int)(b1 + c1) >> 14; |
70 | 141k | tmp[i * 4 + 2] = (int)(b1 - c1) >> 14; |
71 | 141k | } |
72 | | |
73 | 177k | for (i = 0; i < 4; i++) { |
74 | 141k | a1 = (tmp[i + 0] + tmp[i + 8]) * 23170; |
75 | 141k | b1 = (tmp[i + 0] - tmp[i + 8]) * 23170; |
76 | 141k | c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274; |
77 | 141k | d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540; |
78 | 141k | AV_ZERO64(dc + i * 4); |
79 | 141k | block[0][i][0] = (int)(a1 + d1 + 0x20000) >> 18; |
80 | 141k | block[3][i][0] = (int)(a1 - d1 + 0x20000) >> 18; |
81 | 141k | block[1][i][0] = (int)(b1 + c1 + 0x20000) >> 18; |
82 | 141k | block[2][i][0] = (int)(b1 - c1 + 0x20000) >> 18; |
83 | 141k | } |
84 | 35.4k | } |
85 | | |
86 | | static void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) |
87 | 112k | { |
88 | 112k | int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18; |
89 | 112k | dc[0] = 0; |
90 | | |
91 | 561k | for (i = 0; i < 4; i++) { |
92 | 449k | block[i][0][0] = val; |
93 | 449k | block[i][1][0] = val; |
94 | 449k | block[i][2][0] = val; |
95 | 449k | block[i][3][0] = val; |
96 | 449k | } |
97 | 112k | } |
98 | | |
99 | | static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
100 | 527k | { |
101 | 527k | int i; |
102 | 527k | unsigned a1, b1, c1, d1; |
103 | 527k | int16_t tmp[16]; |
104 | | |
105 | 2.63M | for (i = 0; i < 4; i++) { |
106 | 2.10M | a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170; |
107 | 2.10M | b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170; |
108 | 2.10M | c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274; |
109 | 2.10M | d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540; |
110 | 2.10M | AV_ZERO64(block + i * 4); |
111 | 2.10M | tmp[i * 4 + 0] = (int)(a1 + d1) >> 14; |
112 | 2.10M | tmp[i * 4 + 3] = (int)(a1 - d1) >> 14; |
113 | 2.10M | tmp[i * 4 + 1] = (int)(b1 + c1) >> 14; |
114 | 2.10M | tmp[i * 4 + 2] = (int)(b1 - c1) >> 14; |
115 | 2.10M | } |
116 | | |
117 | 2.63M | for (i = 0; i < 4; i++) { |
118 | 2.10M | a1 = (tmp[i + 0] + tmp[i + 8]) * 23170; |
119 | 2.10M | b1 = (tmp[i + 0] - tmp[i + 8]) * 23170; |
120 | 2.10M | c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274; |
121 | 2.10M | d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540; |
122 | 2.10M | dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] + |
123 | 2.10M | ((int)(a1 + d1 + 0x20000) >> 18)); |
124 | 2.10M | dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] + |
125 | 2.10M | ((int)(a1 - d1 + 0x20000) >> 18)); |
126 | 2.10M | dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] + |
127 | 2.10M | ((int)(b1 + c1 + 0x20000) >> 18)); |
128 | 2.10M | dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] + |
129 | 2.10M | ((int)(b1 - c1 + 0x20000) >> 18)); |
130 | 2.10M | } |
131 | 527k | } |
132 | | |
133 | | static void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
134 | 2.29M | { |
135 | 2.29M | int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18; |
136 | 2.29M | block[0] = 0; |
137 | | |
138 | 11.4M | for (i = 0; i < 4; i++) { |
139 | 9.19M | dst[0] = av_clip_uint8(dst[0] + dc); |
140 | 9.19M | dst[1] = av_clip_uint8(dst[1] + dc); |
141 | 9.19M | dst[2] = av_clip_uint8(dst[2] + dc); |
142 | 9.19M | dst[3] = av_clip_uint8(dst[3] + dc); |
143 | 9.19M | dst += stride; |
144 | 9.19M | } |
145 | 2.29M | } |
146 | | |
147 | | MK_IDCT_DC_ADD4_C(vp7) |
148 | | #endif /* CONFIG_VP7_DECODER */ |
149 | | |
150 | | // TODO: Maybe add dequant |
151 | | #if CONFIG_VP8_DECODER |
152 | | static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) |
153 | 61.4k | { |
154 | 61.4k | int i, t0, t1, t2, t3; |
155 | | |
156 | 307k | for (i = 0; i < 4; i++) { |
157 | 245k | t0 = dc[0 * 4 + i] + dc[3 * 4 + i]; |
158 | 245k | t1 = dc[1 * 4 + i] + dc[2 * 4 + i]; |
159 | 245k | t2 = dc[1 * 4 + i] - dc[2 * 4 + i]; |
160 | 245k | t3 = dc[0 * 4 + i] - dc[3 * 4 + i]; |
161 | | |
162 | 245k | dc[0 * 4 + i] = t0 + t1; |
163 | 245k | dc[1 * 4 + i] = t3 + t2; |
164 | 245k | dc[2 * 4 + i] = t0 - t1; |
165 | 245k | dc[3 * 4 + i] = t3 - t2; |
166 | 245k | } |
167 | | |
168 | 307k | for (i = 0; i < 4; i++) { |
169 | 245k | t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding |
170 | 245k | t1 = dc[i * 4 + 1] + dc[i * 4 + 2]; |
171 | 245k | t2 = dc[i * 4 + 1] - dc[i * 4 + 2]; |
172 | 245k | t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding |
173 | 245k | AV_ZERO64(dc + i * 4); |
174 | | |
175 | 245k | block[i][0][0] = (t0 + t1) >> 3; |
176 | 245k | block[i][1][0] = (t3 + t2) >> 3; |
177 | 245k | block[i][2][0] = (t0 - t1) >> 3; |
178 | 245k | block[i][3][0] = (t3 - t2) >> 3; |
179 | 245k | } |
180 | 61.4k | } |
181 | | |
182 | | static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) |
183 | 26.1k | { |
184 | 26.1k | int i, val = (dc[0] + 3) >> 3; |
185 | 26.1k | dc[0] = 0; |
186 | | |
187 | 130k | for (i = 0; i < 4; i++) { |
188 | 104k | block[i][0][0] = val; |
189 | 104k | block[i][1][0] = val; |
190 | 104k | block[i][2][0] = val; |
191 | 104k | block[i][3][0] = val; |
192 | 104k | } |
193 | 26.1k | } |
194 | | |
195 | 29.0M | #define MUL_20091(a) ((((a) * 20091) >> 16) + (a)) |
196 | 29.0M | #define MUL_35468(a) (((a) * 35468) >> 16) |
197 | | |
198 | | static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
199 | 1.81M | { |
200 | 1.81M | int i, t0, t1, t2, t3; |
201 | 1.81M | int16_t tmp[16]; |
202 | | |
203 | 9.07M | for (i = 0; i < 4; i++) { |
204 | 7.25M | t0 = block[0 * 4 + i] + block[2 * 4 + i]; |
205 | 7.25M | t1 = block[0 * 4 + i] - block[2 * 4 + i]; |
206 | 7.25M | t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]); |
207 | 7.25M | t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]); |
208 | 7.25M | block[0 * 4 + i] = 0; |
209 | 7.25M | block[1 * 4 + i] = 0; |
210 | 7.25M | block[2 * 4 + i] = 0; |
211 | 7.25M | block[3 * 4 + i] = 0; |
212 | | |
213 | 7.25M | tmp[i * 4 + 0] = t0 + t3; |
214 | 7.25M | tmp[i * 4 + 1] = t1 + t2; |
215 | 7.25M | tmp[i * 4 + 2] = t1 - t2; |
216 | 7.25M | tmp[i * 4 + 3] = t0 - t3; |
217 | 7.25M | } |
218 | | |
219 | 9.07M | for (i = 0; i < 4; i++) { |
220 | 7.25M | t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i]; |
221 | 7.25M | t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i]; |
222 | 7.25M | t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]); |
223 | 7.25M | t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]); |
224 | | |
225 | 7.25M | dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); |
226 | 7.25M | dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); |
227 | 7.25M | dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); |
228 | 7.25M | dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); |
229 | 7.25M | dst += stride; |
230 | 7.25M | } |
231 | 1.81M | } |
232 | | |
233 | | static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
234 | 1.99M | { |
235 | 1.99M | int i, dc = (block[0] + 4) >> 3; |
236 | 1.99M | block[0] = 0; |
237 | | |
238 | 9.97M | for (i = 0; i < 4; i++) { |
239 | 7.98M | dst[0] = av_clip_uint8(dst[0] + dc); |
240 | 7.98M | dst[1] = av_clip_uint8(dst[1] + dc); |
241 | 7.98M | dst[2] = av_clip_uint8(dst[2] + dc); |
242 | 7.98M | dst[3] = av_clip_uint8(dst[3] + dc); |
243 | 7.98M | dst += stride; |
244 | 7.98M | } |
245 | 1.99M | } |
246 | | |
247 | | MK_IDCT_DC_ADD4_C(vp8) |
248 | | #endif /* CONFIG_VP8_DECODER */ |
249 | | |
250 | | // because I like only having two parameters to pass functions... |
251 | | #define LOAD_PIXELS \ |
252 | 589M | av_unused int p3 = p[-4 * stride]; \ |
253 | 589M | av_unused int p2 = p[-3 * stride]; \ |
254 | 589M | av_unused int p1 = p[-2 * stride]; \ |
255 | 589M | av_unused int p0 = p[-1 * stride]; \ |
256 | 589M | av_unused int q0 = p[ 0 * stride]; \ |
257 | 589M | av_unused int q1 = p[ 1 * stride]; \ |
258 | 589M | av_unused int q2 = p[ 2 * stride]; \ |
259 | 589M | av_unused int q3 = p[ 3 * stride]; |
260 | | |
261 | 304M | #define clip_int8(n) (cm[(n) + 0x80] - 0x80) |
262 | | |
263 | | static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, |
264 | | int is4tap, int is_vp7) |
265 | 135M | { |
266 | 135M | LOAD_PIXELS |
267 | 135M | int a, f1, f2; |
268 | 135M | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; |
269 | | |
270 | 135M | a = 3 * (q0 - p0); |
271 | | |
272 | 135M | if (is4tap) |
273 | 79.4M | a += clip_int8(p1 - q1); |
274 | | |
275 | 135M | a = clip_int8(a); |
276 | | |
277 | | // We deviate from the spec here with c(a+3) >> 3 |
278 | | // since that's what libvpx does. |
279 | 135M | f1 = FFMIN(a + 4, 127) >> 3; |
280 | | |
281 | 135M | if (is_vp7) |
282 | 85.8M | f2 = f1 - ((a & 7) == 4); |
283 | 49.1M | else |
284 | 49.1M | f2 = FFMIN(a + 3, 127) >> 3; |
285 | | |
286 | | // Despite what the spec says, we do need to clamp here to |
287 | | // be bitexact with libvpx. |
288 | 135M | p[-1 * stride] = cm[p0 + f2]; |
289 | 135M | p[ 0 * stride] = cm[q0 - f1]; |
290 | | |
291 | | // only used for _inner on blocks without high edge variance |
292 | 135M | if (!is4tap) { |
293 | 55.6M | a = (f1 + 1) >> 1; |
294 | 55.6M | p[-2 * stride] = cm[p1 + a]; |
295 | 55.6M | p[ 1 * stride] = cm[q1 - a]; |
296 | 55.6M | } |
297 | 135M | } |
298 | | |
299 | | static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride, |
300 | | int is4tap) |
301 | 85.8M | { |
302 | 85.8M | filter_common(p, stride, is4tap, IS_VP7); |
303 | 85.8M | } |
304 | | |
305 | | static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride, |
306 | | int is4tap) |
307 | 49.1M | { |
308 | 49.1M | filter_common(p, stride, is4tap, IS_VP8); |
309 | 49.1M | } |
310 | | |
311 | | static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride, |
312 | | int flim) |
313 | 104M | { |
314 | 104M | LOAD_PIXELS |
315 | 104M | return FFABS(p0 - q0) <= flim; |
316 | 104M | } |
317 | | |
318 | | static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, |
319 | | int flim) |
320 | 85.4M | { |
321 | 85.4M | LOAD_PIXELS |
322 | 85.4M | return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim; |
323 | 85.4M | } |
324 | | |
325 | | /** |
326 | | * E - limit at the macroblock edge |
327 | | * I - limit for interior difference |
328 | | */ |
329 | | #define NORMAL_LIMIT(vpn) \ |
330 | | static av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p, \ |
331 | | ptrdiff_t stride, \ |
332 | 113M | int E, int I) \ |
333 | 113M | { \ |
334 | 113M | LOAD_PIXELS \ |
335 | 113M | return vp ## vpn ## _simple_limit(p, stride, E) && \ |
336 | 113M | FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \ |
337 | 113M | FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \ |
338 | 113M | FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \ |
339 | 113M | } vp8dsp.c:vp7_normal_limit Line | Count | Source | 332 | 50.0M | int E, int I) \ | 333 | 50.0M | { \ | 334 | 50.0M | LOAD_PIXELS \ | 335 | 50.0M | return vp ## vpn ## _simple_limit(p, stride, E) && \ | 336 | 50.0M | FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \ | 337 | 50.0M | FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \ | 338 | 50.0M | FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \ | 339 | 50.0M | } |
vp8dsp.c:vp8_normal_limit Line | Count | Source | 332 | 63.7M | int E, int I) \ | 333 | 63.7M | { \ | 334 | 63.7M | LOAD_PIXELS \ | 335 | 63.7M | return vp ## vpn ## _simple_limit(p, stride, E) && \ | 336 | 63.7M | FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \ | 337 | 63.7M | FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \ | 338 | 63.7M | FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \ | 339 | 63.7M | } |
|
340 | | |
341 | | NORMAL_LIMIT(7) |
342 | | NORMAL_LIMIT(8) |
343 | | |
344 | | // high edge variance |
345 | | static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh) |
346 | 105M | { |
347 | 105M | LOAD_PIXELS |
348 | 105M | return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh; |
349 | 105M | } |
350 | | |
351 | | static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride) |
352 | 44.7M | { |
353 | 44.7M | int a0, a1, a2, w; |
354 | 44.7M | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; |
355 | | |
356 | 44.7M | LOAD_PIXELS |
357 | | |
358 | 44.7M | w = clip_int8(p1 - q1); |
359 | 44.7M | w = clip_int8(w + 3 * (q0 - p0)); |
360 | | |
361 | 44.7M | a0 = (27 * w + 63) >> 7; |
362 | 44.7M | a1 = (18 * w + 63) >> 7; |
363 | 44.7M | a2 = (9 * w + 63) >> 7; |
364 | | |
365 | 44.7M | p[-3 * stride] = cm[p2 + a2]; |
366 | 44.7M | p[-2 * stride] = cm[p1 + a1]; |
367 | 44.7M | p[-1 * stride] = cm[p0 + a0]; |
368 | 44.7M | p[ 0 * stride] = cm[q0 - a0]; |
369 | 44.7M | p[ 1 * stride] = cm[q1 - a1]; |
370 | 44.7M | p[ 2 * stride] = cm[q2 - a2]; |
371 | 44.7M | } |
372 | | |
373 | | #define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline) \ |
374 | | static maybe_inline \ |
375 | | void vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, \ |
376 | | ptrdiff_t stride, \ |
377 | | int flim_E, int flim_I, \ |
378 | 4.53M | int hev_thresh) \ |
379 | 4.53M | { \ |
380 | 4.53M | int i; \ |
381 | 52.9M | for (i = 0; i < size; i++) \ |
382 | 48.3M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ |
383 | 48.3M | flim_E, flim_I)) { \ |
384 | 46.0M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ |
385 | 46.0M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ |
386 | 46.0M | else \ |
387 | 46.0M | filter_mbedge(dst + i * stridea, strideb); \ |
388 | 46.0M | } \ |
389 | 4.53M | } \ vp8dsp.c:vp7_v_loop_filter16_c Line | Count | Source | 378 | 238k | int hev_thresh) \ | 379 | 238k | { \ | 380 | 238k | int i; \ | 381 | 4.06M | for (i = 0; i < size; i++) \ | 382 | 3.82M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 3.82M | flim_E, flim_I)) { \ | 384 | 3.61M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 3.61M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 3.61M | else \ | 387 | 3.61M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 3.61M | } \ | 389 | 238k | } \ |
vp8dsp.c:vp7_h_loop_filter16_c Line | Count | Source | 378 | 244k | int hev_thresh) \ | 379 | 244k | { \ | 380 | 244k | int i; \ | 381 | 4.16M | for (i = 0; i < size; i++) \ | 382 | 3.91M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 3.91M | flim_E, flim_I)) { \ | 384 | 3.70M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 3.70M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 3.70M | else \ | 387 | 3.70M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 3.70M | } \ | 389 | 244k | } \ |
vp8dsp.c:vp7_v_loop_filter8_c Line | Count | Source | 378 | 477k | int hev_thresh) \ | 379 | 477k | { \ | 380 | 477k | int i; \ | 381 | 4.30M | for (i = 0; i < size; i++) \ | 382 | 3.82M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 3.82M | flim_E, flim_I)) { \ | 384 | 3.71M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 3.71M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 3.71M | else \ | 387 | 3.71M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 3.71M | } \ | 389 | 477k | } \ |
vp8dsp.c:vp7_h_loop_filter8_c Line | Count | Source | 378 | 489k | int hev_thresh) \ | 379 | 489k | { \ | 380 | 489k | int i; \ | 381 | 4.40M | for (i = 0; i < size; i++) \ | 382 | 3.91M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 3.91M | flim_E, flim_I)) { \ | 384 | 3.80M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 3.80M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 3.80M | else \ | 387 | 3.80M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 3.80M | } \ | 389 | 489k | } \ |
vp8dsp.c:vp8_v_loop_filter16_c Line | Count | Source | 378 | 503k | int hev_thresh) \ | 379 | 503k | { \ | 380 | 503k | int i; \ | 381 | 8.55M | for (i = 0; i < size; i++) \ | 382 | 8.05M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 8.05M | flim_E, flim_I)) { \ | 384 | 7.52M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 7.52M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 7.52M | else \ | 387 | 7.52M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 7.52M | } \ | 389 | 503k | } \ |
vp8dsp.c:vp8_h_loop_filter16_c Line | Count | Source | 378 | 525k | int hev_thresh) \ | 379 | 525k | { \ | 380 | 525k | int i; \ | 381 | 8.92M | for (i = 0; i < size; i++) \ | 382 | 8.40M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 8.40M | flim_E, flim_I)) { \ | 384 | 7.81M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 7.81M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 7.81M | else \ | 387 | 7.81M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 7.81M | } \ | 389 | 525k | } \ |
vp8dsp.c:vp8_v_loop_filter8_c Line | Count | Source | 378 | 1.00M | int hev_thresh) \ | 379 | 1.00M | { \ | 380 | 1.00M | int i; \ | 381 | 9.06M | for (i = 0; i < size; i++) \ | 382 | 8.05M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 8.05M | flim_E, flim_I)) { \ | 384 | 7.75M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 7.75M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 7.75M | else \ | 387 | 7.75M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 7.75M | } \ | 389 | 1.00M | } \ |
vp8dsp.c:vp8_h_loop_filter8_c Line | Count | Source | 378 | 1.05M | int hev_thresh) \ | 379 | 1.05M | { \ | 380 | 1.05M | int i; \ | 381 | 9.45M | for (i = 0; i < size; i++) \ | 382 | 8.40M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 383 | 8.40M | flim_E, flim_I)) { \ | 384 | 8.08M | if (hev(dst + i * stridea, strideb, hev_thresh)) \ | 385 | 8.08M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 386 | 8.08M | else \ | 387 | 8.08M | filter_mbedge(dst + i * stridea, strideb); \ | 388 | 8.08M | } \ | 389 | 1.05M | } \ |
|
390 | | \ |
391 | | static maybe_inline \ |
392 | | void vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, \ |
393 | | ptrdiff_t stride, \ |
394 | | int flim_E, \ |
395 | | int flim_I, \ |
396 | 5.10M | int hev_thresh) \ |
397 | 5.10M | { \ |
398 | 5.10M | int i; \ |
399 | 70.4M | for (i = 0; i < size; i++) \ |
400 | 65.3M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ |
401 | 65.3M | flim_E, flim_I)) { \ |
402 | 59.4M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ |
403 | 59.4M | if (hv) \ |
404 | 59.4M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ |
405 | 59.4M | else \ |
406 | 59.4M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ |
407 | 59.4M | } \ |
408 | 5.10M | } vp8dsp.c:vp7_v_loop_filter16_inner_c Line | Count | Source | 396 | 809k | int hev_thresh) \ | 397 | 809k | { \ | 398 | 809k | int i; \ | 399 | 13.7M | for (i = 0; i < size; i++) \ | 400 | 12.9M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 12.9M | flim_E, flim_I)) { \ | 402 | 12.3M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 12.3M | if (hv) \ | 404 | 12.3M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 12.3M | else \ | 406 | 12.3M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 12.3M | } \ | 408 | 809k | } |
vp8dsp.c:vp7_h_loop_filter16_inner_c Line | Count | Source | 396 | 809k | int hev_thresh) \ | 397 | 809k | { \ | 398 | 809k | int i; \ | 399 | 13.7M | for (i = 0; i < size; i++) \ | 400 | 12.9M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 12.9M | flim_E, flim_I)) { \ | 402 | 12.2M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 12.2M | if (hv) \ | 404 | 12.2M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 12.2M | else \ | 406 | 12.2M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 12.2M | } \ | 408 | 809k | } |
vp8dsp.c:vp7_v_loop_filter8_inner_c Line | Count | Source | 396 | 539k | int hev_thresh) \ | 397 | 539k | { \ | 398 | 539k | int i; \ | 399 | 4.85M | for (i = 0; i < size; i++) \ | 400 | 4.31M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 4.31M | flim_E, flim_I)) { \ | 402 | 4.19M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 4.19M | if (hv) \ | 404 | 4.19M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 4.19M | else \ | 406 | 4.19M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 4.19M | } \ | 408 | 539k | } |
vp8dsp.c:vp7_h_loop_filter8_inner_c Line | Count | Source | 396 | 539k | int hev_thresh) \ | 397 | 539k | { \ | 398 | 539k | int i; \ | 399 | 4.85M | for (i = 0; i < size; i++) \ | 400 | 4.31M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 4.31M | flim_E, flim_I)) { \ | 402 | 4.19M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 4.19M | if (hv) \ | 404 | 4.19M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 4.19M | else \ | 406 | 4.19M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 4.19M | } \ | 408 | 539k | } |
vp8dsp.c:vp8_v_loop_filter16_inner_c Line | Count | Source | 396 | 722k | int hev_thresh) \ | 397 | 722k | { \ | 398 | 722k | int i; \ | 399 | 12.2M | for (i = 0; i < size; i++) \ | 400 | 11.5M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 11.5M | flim_E, flim_I)) { \ | 402 | 9.73M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 9.73M | if (hv) \ | 404 | 9.73M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 9.73M | else \ | 406 | 9.73M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 9.73M | } \ | 408 | 722k | } |
vp8dsp.c:vp8_h_loop_filter16_inner_c Line | Count | Source | 396 | 722k | int hev_thresh) \ | 397 | 722k | { \ | 398 | 722k | int i; \ | 399 | 12.2M | for (i = 0; i < size; i++) \ | 400 | 11.5M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 11.5M | flim_E, flim_I)) { \ | 402 | 9.73M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 9.73M | if (hv) \ | 404 | 9.73M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 9.73M | else \ | 406 | 9.73M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 9.73M | } \ | 408 | 722k | } |
vp8dsp.c:vp8_v_loop_filter8_inner_c Line | Count | Source | 396 | 481k | int hev_thresh) \ | 397 | 481k | { \ | 398 | 481k | int i; \ | 399 | 4.33M | for (i = 0; i < size; i++) \ | 400 | 3.85M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 3.85M | flim_E, flim_I)) { \ | 402 | 3.53M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 3.53M | if (hv) \ | 404 | 3.53M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 3.53M | else \ | 406 | 3.53M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 3.53M | } \ | 408 | 481k | } |
vp8dsp.c:vp8_h_loop_filter8_inner_c Line | Count | Source | 396 | 481k | int hev_thresh) \ | 397 | 481k | { \ | 398 | 481k | int i; \ | 399 | 4.33M | for (i = 0; i < size; i++) \ | 400 | 3.85M | if (vpn ## _normal_limit(dst + i * stridea, strideb, \ | 401 | 3.85M | flim_E, flim_I)) { \ | 402 | 3.51M | int hv = hev(dst + i * stridea, strideb, hev_thresh); \ | 403 | 3.51M | if (hv) \ | 404 | 3.51M | vpn ## _filter_common(dst + i * stridea, strideb, 1); \ | 405 | 3.51M | else \ | 406 | 3.51M | vpn ## _filter_common(dst + i * stridea, strideb, 0); \ | 407 | 3.51M | } \ | 408 | 481k | } |
|
409 | | |
410 | | #define UV_LOOP_FILTER(vpn, dir, stridea, strideb) \ |
411 | | LOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline) \ |
412 | | static void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU, \ |
413 | | uint8_t *dstV, \ |
414 | | ptrdiff_t stride, int fE, \ |
415 | 1.51M | int fI, int hev_thresh) \ |
416 | 1.51M | { \ |
417 | 1.51M | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ |
418 | 1.51M | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ |
419 | 1.51M | } \ vp8dsp.c:vp7_v_loop_filter8uv_c Line | Count | Source | 415 | 238k | int fI, int hev_thresh) \ | 416 | 238k | { \ | 417 | 238k | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ | 418 | 238k | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ | 419 | 238k | } \ |
vp8dsp.c:vp7_h_loop_filter8uv_c Line | Count | Source | 415 | 244k | int fI, int hev_thresh) \ | 416 | 244k | { \ | 417 | 244k | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ | 418 | 244k | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ | 419 | 244k | } \ |
vp8dsp.c:vp8_v_loop_filter8uv_c Line | Count | Source | 415 | 503k | int fI, int hev_thresh) \ | 416 | 503k | { \ | 417 | 503k | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ | 418 | 503k | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ | 419 | 503k | } \ |
vp8dsp.c:vp8_h_loop_filter8uv_c Line | Count | Source | 415 | 525k | int fI, int hev_thresh) \ | 416 | 525k | { \ | 417 | 525k | vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ | 418 | 525k | vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ | 419 | 525k | } \ |
|
420 | | \ |
421 | | static void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, \ |
422 | | uint8_t *dstV, \ |
423 | | ptrdiff_t stride, \ |
424 | | int fE, int fI, \ |
425 | 1.02M | int hev_thresh) \ |
426 | 1.02M | { \ |
427 | 1.02M | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ |
428 | 1.02M | hev_thresh); \ |
429 | 1.02M | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ |
430 | 1.02M | hev_thresh); \ |
431 | 1.02M | } vp8dsp.c:vp7_v_loop_filter8uv_inner_c Line | Count | Source | 425 | 269k | int hev_thresh) \ | 426 | 269k | { \ | 427 | 269k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ | 428 | 269k | hev_thresh); \ | 429 | 269k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ | 430 | 269k | hev_thresh); \ | 431 | 269k | } |
vp8dsp.c:vp7_h_loop_filter8uv_inner_c Line | Count | Source | 425 | 269k | int hev_thresh) \ | 426 | 269k | { \ | 427 | 269k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ | 428 | 269k | hev_thresh); \ | 429 | 269k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ | 430 | 269k | hev_thresh); \ | 431 | 269k | } |
vp8dsp.c:vp8_v_loop_filter8uv_inner_c Line | Count | Source | 425 | 240k | int hev_thresh) \ | 426 | 240k | { \ | 427 | 240k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ | 428 | 240k | hev_thresh); \ | 429 | 240k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ | 430 | 240k | hev_thresh); \ | 431 | 240k | } |
vp8dsp.c:vp8_h_loop_filter8uv_inner_c Line | Count | Source | 425 | 240k | int hev_thresh) \ | 426 | 240k | { \ | 427 | 240k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ | 428 | 240k | hev_thresh); \ | 429 | 240k | vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ | 430 | 240k | hev_thresh); \ | 431 | 240k | } |
|
432 | | |
433 | | #define LOOP_FILTER_SIMPLE(vpn) \ |
434 | | static void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \ |
435 | 2.39M | int flim) \ |
436 | 2.39M | { \ |
437 | 2.39M | int i; \ |
438 | 40.7M | for (i = 0; i < 16; i++) \ |
439 | 38.3M | if (vpn ## _simple_limit(dst + i, stride, flim)) \ |
440 | 38.3M | vpn ## _filter_common(dst + i, stride, 1); \ |
441 | 2.39M | } \ vp8dsp.c:vp7_v_loop_filter_simple_c Line | Count | Source | 435 | 1.71M | int flim) \ | 436 | 1.71M | { \ | 437 | 1.71M | int i; \ | 438 | 29.2M | for (i = 0; i < 16; i++) \ | 439 | 27.4M | if (vpn ## _simple_limit(dst + i, stride, flim)) \ | 440 | 27.4M | vpn ## _filter_common(dst + i, stride, 1); \ | 441 | 1.71M | } \ |
vp8dsp.c:vp8_v_loop_filter_simple_c Line | Count | Source | 435 | 676k | int flim) \ | 436 | 676k | { \ | 437 | 676k | int i; \ | 438 | 11.5M | for (i = 0; i < 16; i++) \ | 439 | 10.8M | if (vpn ## _simple_limit(dst + i, stride, flim)) \ | 440 | 10.8M | vpn ## _filter_common(dst + i, stride, 1); \ | 441 | 676k | } \ |
|
442 | | \ |
443 | | static void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \ |
444 | 2.38M | int flim) \ |
445 | 2.38M | { \ |
446 | 2.38M | int i; \ |
447 | 40.6M | for (i = 0; i < 16; i++) \ |
448 | 38.2M | if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \ |
449 | 38.2M | vpn ## _filter_common(dst + i * stride, 1, 1); \ |
450 | 2.38M | } vp8dsp.c:vp7_h_loop_filter_simple_c Line | Count | Source | 444 | 1.70M | int flim) \ | 445 | 1.70M | { \ | 446 | 1.70M | int i; \ | 447 | 29.0M | for (i = 0; i < 16; i++) \ | 448 | 27.3M | if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \ | 449 | 27.3M | vpn ## _filter_common(dst + i * stride, 1, 1); \ | 450 | 1.70M | } |
vp8dsp.c:vp8_h_loop_filter_simple_c Line | Count | Source | 444 | 681k | int flim) \ | 445 | 681k | { \ | 446 | 681k | int i; \ | 447 | 11.5M | for (i = 0; i < 16; i++) \ | 448 | 10.8M | if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \ | 449 | 10.8M | vpn ## _filter_common(dst + i * stride, 1, 1); \ | 450 | 681k | } |
|
451 | | |
452 | | #define LOOP_FILTERS(vpn) \ |
453 | | LOOP_FILTER(vpn, v, 16, 1, stride, ) \ |
454 | | LOOP_FILTER(vpn, h, 16, stride, 1, ) \ |
455 | | UV_LOOP_FILTER(vpn, v, 1, stride) \ |
456 | | UV_LOOP_FILTER(vpn, h, stride, 1) \ |
457 | | LOOP_FILTER_SIMPLE(vpn) \ |
458 | | |
459 | | static const uint8_t subpel_filters[7][6] = { |
460 | | { 0, 6, 123, 12, 1, 0 }, |
461 | | { 2, 11, 108, 36, 8, 1 }, |
462 | | { 0, 9, 93, 50, 6, 0 }, |
463 | | { 3, 16, 77, 77, 16, 3 }, |
464 | | { 0, 6, 50, 93, 9, 0 }, |
465 | | { 1, 8, 36, 108, 11, 2 }, |
466 | | { 0, 1, 12, 123, 6, 0 }, |
467 | | }; |
468 | | |
469 | | #define PUT_PIXELS(WIDTH) \ |
470 | | static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride, \ |
471 | | const uint8_t *src, ptrdiff_t srcstride, \ |
472 | 3.78M | int h, int x, int y) \ |
473 | 3.78M | { \ |
474 | 3.78M | int i; \ |
475 | 36.0M | for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ |
476 | 32.2M | memcpy(dst, src, WIDTH); \ |
477 | 3.78M | } vp8dsp.c:put_vp8_pixels16_c Line | Count | Source | 472 | 845k | int h, int x, int y) \ | 473 | 845k | { \ | 474 | 845k | int i; \ | 475 | 14.0M | for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ | 476 | 13.1M | memcpy(dst, src, WIDTH); \ | 477 | 845k | } |
vp8dsp.c:put_vp8_pixels8_c Line | Count | Source | 472 | 1.75M | int h, int x, int y) \ | 473 | 1.75M | { \ | 474 | 1.75M | int i; \ | 475 | 15.7M | for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ | 476 | 14.0M | memcpy(dst, src, WIDTH); \ | 477 | 1.75M | } |
vp8dsp.c:put_vp8_pixels4_c Line | Count | Source | 472 | 1.18M | int h, int x, int y) \ | 473 | 1.18M | { \ | 474 | 1.18M | int i; \ | 475 | 6.19M | for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ | 476 | 5.01M | memcpy(dst, src, WIDTH); \ | 477 | 1.18M | } |
|
478 | | |
479 | | PUT_PIXELS(16) |
480 | | PUT_PIXELS(8) |
481 | | PUT_PIXELS(4) |
482 | | |
483 | | #define FILTER_6TAP(src, F, stride) \ |
484 | 190M | cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ |
485 | 190M | F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \ |
486 | 190M | F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7] |
487 | | |
488 | | #define FILTER_4TAP(src, F, stride) \ |
489 | 46.7M | cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ |
490 | 46.7M | F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7] |
491 | | |
492 | | #define VP8_EPEL_H(SIZE, TAPS) \ |
493 | | static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, \ |
494 | | ptrdiff_t dststride, \ |
495 | | const uint8_t *src, \ |
496 | | ptrdiff_t srcstride, \ |
497 | 329k | int h, int mx, int my) \ |
498 | 329k | { \ |
499 | 329k | const uint8_t *filter = subpel_filters[mx - 1]; \ |
500 | 329k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
501 | 329k | int x, y; \ |
502 | 3.36M | for (y = 0; y < h; y++) { \ |
503 | 37.5M | for (x = 0; x < SIZE; x++) \ |
504 | 34.5M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ |
505 | 3.03M | dst += dststride; \ |
506 | 3.03M | src += srcstride; \ |
507 | 3.03M | } \ |
508 | 329k | } vp8dsp.c:put_vp8_epel16_h6_c Line | Count | Source | 497 | 93.5k | int h, int mx, int my) \ | 498 | 93.5k | { \ | 499 | 93.5k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 93.5k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 93.5k | int x, y; \ | 502 | 1.56M | for (y = 0; y < h; y++) { \ | 503 | 25.0M | for (x = 0; x < SIZE; x++) \ | 504 | 23.5M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 1.47M | dst += dststride; \ | 506 | 1.47M | src += srcstride; \ | 507 | 1.47M | } \ | 508 | 93.5k | } |
vp8dsp.c:put_vp8_epel8_h4_c Line | Count | Source | 497 | 51.1k | int h, int mx, int my) \ | 498 | 51.1k | { \ | 499 | 51.1k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 51.1k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 51.1k | int x, y; \ | 502 | 451k | for (y = 0; y < h; y++) { \ | 503 | 3.60M | for (x = 0; x < SIZE; x++) \ | 504 | 3.20M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 400k | dst += dststride; \ | 506 | 400k | src += srcstride; \ | 507 | 400k | } \ | 508 | 51.1k | } |
vp8dsp.c:put_vp8_epel8_h6_c Line | Count | Source | 497 | 94.5k | int h, int mx, int my) \ | 498 | 94.5k | { \ | 499 | 94.5k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 94.5k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 94.5k | int x, y; \ | 502 | 875k | for (y = 0; y < h; y++) { \ | 503 | 7.02M | for (x = 0; x < SIZE; x++) \ | 504 | 6.24M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 780k | dst += dststride; \ | 506 | 780k | src += srcstride; \ | 507 | 780k | } \ | 508 | 94.5k | } |
vp8dsp.c:put_vp8_epel4_h4_c Line | Count | Source | 497 | 19.0k | int h, int mx, int my) \ | 498 | 19.0k | { \ | 499 | 19.0k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 19.0k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 19.0k | int x, y; \ | 502 | 107k | for (y = 0; y < h; y++) { \ | 503 | 440k | for (x = 0; x < SIZE; x++) \ | 504 | 352k | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 88.1k | dst += dststride; \ | 506 | 88.1k | src += srcstride; \ | 507 | 88.1k | } \ | 508 | 19.0k | } |
vp8dsp.c:put_vp8_epel4_h6_c Line | Count | Source | 497 | 70.9k | int h, int mx, int my) \ | 498 | 70.9k | { \ | 499 | 70.9k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 500 | 70.9k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 501 | 70.9k | int x, y; \ | 502 | 365k | for (y = 0; y < h; y++) { \ | 503 | 1.47M | for (x = 0; x < SIZE; x++) \ | 504 | 1.17M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ | 505 | 294k | dst += dststride; \ | 506 | 294k | src += srcstride; \ | 507 | 294k | } \ | 508 | 70.9k | } |
|
509 | | |
510 | | #define VP8_EPEL_V(SIZE, TAPS) \ |
511 | | static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, \ |
512 | | ptrdiff_t dststride, \ |
513 | | const uint8_t *src, \ |
514 | | ptrdiff_t srcstride, \ |
515 | 292k | int h, int mx, int my) \ |
516 | 292k | { \ |
517 | 292k | const uint8_t *filter = subpel_filters[my - 1]; \ |
518 | 292k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
519 | 292k | int x, y; \ |
520 | 2.91M | for (y = 0; y < h; y++) { \ |
521 | 31.2M | for (x = 0; x < SIZE; x++) \ |
522 | 28.5M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ |
523 | 2.62M | dst += dststride; \ |
524 | 2.62M | src += srcstride; \ |
525 | 2.62M | } \ |
526 | 292k | } vp8dsp.c:put_vp8_epel16_v6_c Line | Count | Source | 515 | 71.7k | int h, int mx, int my) \ | 516 | 71.7k | { \ | 517 | 71.7k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 71.7k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 71.7k | int x, y; \ | 520 | 1.17M | for (y = 0; y < h; y++) { \ | 521 | 18.7M | for (x = 0; x < SIZE; x++) \ | 522 | 17.6M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 1.10M | dst += dststride; \ | 524 | 1.10M | src += srcstride; \ | 525 | 1.10M | } \ | 526 | 71.7k | } |
vp8dsp.c:put_vp8_epel8_v4_c Line | Count | Source | 515 | 59.7k | int h, int mx, int my) \ | 516 | 59.7k | { \ | 517 | 59.7k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 59.7k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 59.7k | int x, y; \ | 520 | 527k | for (y = 0; y < h; y++) { \ | 521 | 4.20M | for (x = 0; x < SIZE; x++) \ | 522 | 3.74M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 467k | dst += dststride; \ | 524 | 467k | src += srcstride; \ | 525 | 467k | } \ | 526 | 59.7k | } |
vp8dsp.c:put_vp8_epel8_v6_c Line | Count | Source | 515 | 90.5k | int h, int mx, int my) \ | 516 | 90.5k | { \ | 517 | 90.5k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 90.5k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 90.5k | int x, y; \ | 520 | 832k | for (y = 0; y < h; y++) { \ | 521 | 6.68M | for (x = 0; x < SIZE; x++) \ | 522 | 5.93M | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 742k | dst += dststride; \ | 524 | 742k | src += srcstride; \ | 525 | 742k | } \ | 526 | 90.5k | } |
vp8dsp.c:put_vp8_epel4_v4_c Line | Count | Source | 515 | 21.3k | int h, int mx, int my) \ | 516 | 21.3k | { \ | 517 | 21.3k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 21.3k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 21.3k | int x, y; \ | 520 | 126k | for (y = 0; y < h; y++) { \ | 521 | 523k | for (x = 0; x < SIZE; x++) \ | 522 | 418k | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 104k | dst += dststride; \ | 524 | 104k | src += srcstride; \ | 525 | 104k | } \ | 526 | 21.3k | } |
vp8dsp.c:put_vp8_epel4_v6_c Line | Count | Source | 515 | 49.4k | int h, int mx, int my) \ | 516 | 49.4k | { \ | 517 | 49.4k | const uint8_t *filter = subpel_filters[my - 1]; \ | 518 | 49.4k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 519 | 49.4k | int x, y; \ | 520 | 256k | for (y = 0; y < h; y++) { \ | 521 | 1.03M | for (x = 0; x < SIZE; x++) \ | 522 | 830k | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ | 523 | 207k | dst += dststride; \ | 524 | 207k | src += srcstride; \ | 525 | 207k | } \ | 526 | 49.4k | } |
|
527 | | |
528 | | #define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \ |
529 | | static void \ |
530 | | put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, \ |
531 | | ptrdiff_t dststride, \ |
532 | | const uint8_t *src, \ |
533 | | ptrdiff_t srcstride, \ |
534 | | int h, int mx, \ |
535 | 920k | int my) \ |
536 | 920k | { \ |
537 | 920k | const uint8_t *filter = subpel_filters[mx - 1]; \ |
538 | 920k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ |
539 | 920k | int x, y; \ |
540 | 920k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ |
541 | 920k | uint8_t *tmp = tmp_array; \ |
542 | 920k | src -= (2 - (VTAPS == 4)) * srcstride; \ |
543 | 920k | \ |
544 | 12.5M | for (y = 0; y < h + VTAPS - 1; y++) { \ |
545 | 114M | for (x = 0; x < SIZE; x++) \ |
546 | 103M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ |
547 | 11.6M | tmp += SIZE; \ |
548 | 11.6M | src += srcstride; \ |
549 | 11.6M | } \ |
550 | 920k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ |
551 | 920k | filter = subpel_filters[my - 1]; \ |
552 | 920k | \ |
553 | 8.62M | for (y = 0; y < h; y++) { \ |
554 | 79.1M | for (x = 0; x < SIZE; x++) \ |
555 | 71.4M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ |
556 | 7.70M | dst += dststride; \ |
557 | 7.70M | tmp += SIZE; \ |
558 | 7.70M | } \ |
559 | 920k | } vp8dsp.c:put_vp8_epel16_h6v6_c Line | Count | Source | 535 | 130k | int my) \ | 536 | 130k | { \ | 537 | 130k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 130k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 130k | int x, y; \ | 540 | 130k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 130k | uint8_t *tmp = tmp_array; \ | 542 | 130k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 130k | \ | 544 | 2.83M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 45.9M | for (x = 0; x < SIZE; x++) \ | 546 | 43.2M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 2.70M | tmp += SIZE; \ | 548 | 2.70M | src += srcstride; \ | 549 | 2.70M | } \ | 550 | 130k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 130k | filter = subpel_filters[my - 1]; \ | 552 | 130k | \ | 553 | 2.17M | for (y = 0; y < h; y++) { \ | 554 | 34.8M | for (x = 0; x < SIZE; x++) \ | 555 | 32.7M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 2.04M | dst += dststride; \ | 557 | 2.04M | tmp += SIZE; \ | 558 | 2.04M | } \ | 559 | 130k | } |
vp8dsp.c:put_vp8_epel8_h4v4_c Line | Count | Source | 535 | 94.0k | int my) \ | 536 | 94.0k | { \ | 537 | 94.0k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 94.0k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 94.0k | int x, y; \ | 540 | 94.0k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 94.0k | uint8_t *tmp = tmp_array; \ | 542 | 94.0k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 94.0k | \ | 544 | 1.10M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 9.13M | for (x = 0; x < SIZE; x++) \ | 546 | 8.12M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 1.01M | tmp += SIZE; \ | 548 | 1.01M | src += srcstride; \ | 549 | 1.01M | } \ | 550 | 94.0k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 94.0k | filter = subpel_filters[my - 1]; \ | 552 | 94.0k | \ | 553 | 827k | for (y = 0; y < h; y++) { \ | 554 | 6.60M | for (x = 0; x < SIZE; x++) \ | 555 | 5.86M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 733k | dst += dststride; \ | 557 | 733k | tmp += SIZE; \ | 558 | 733k | } \ | 559 | 94.0k | } |
vp8dsp.c:put_vp8_epel8_h6v4_c Line | Count | Source | 535 | 89.9k | int my) \ | 536 | 89.9k | { \ | 537 | 89.9k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 89.9k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 89.9k | int x, y; \ | 540 | 89.9k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 89.9k | uint8_t *tmp = tmp_array; \ | 542 | 89.9k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 89.9k | \ | 544 | 1.06M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 8.75M | for (x = 0; x < SIZE; x++) \ | 546 | 7.77M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 972k | tmp += SIZE; \ | 548 | 972k | src += srcstride; \ | 549 | 972k | } \ | 550 | 89.9k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 89.9k | filter = subpel_filters[my - 1]; \ | 552 | 89.9k | \ | 553 | 792k | for (y = 0; y < h; y++) { \ | 554 | 6.32M | for (x = 0; x < SIZE; x++) \ | 555 | 5.61M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 702k | dst += dststride; \ | 557 | 702k | tmp += SIZE; \ | 558 | 702k | } \ | 559 | 89.9k | } |
vp8dsp.c:put_vp8_epel8_h4v6_c Line | Count | Source | 535 | 103k | int my) \ | 536 | 103k | { \ | 537 | 103k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 103k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 103k | int x, y; \ | 540 | 103k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 103k | uint8_t *tmp = tmp_array; \ | 542 | 103k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 103k | \ | 544 | 1.42M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 11.9M | for (x = 0; x < SIZE; x++) \ | 546 | 10.5M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 1.32M | tmp += SIZE; \ | 548 | 1.32M | src += srcstride; \ | 549 | 1.32M | } \ | 550 | 103k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 103k | filter = subpel_filters[my - 1]; \ | 552 | 103k | \ | 553 | 911k | for (y = 0; y < h; y++) { \ | 554 | 7.27M | for (x = 0; x < SIZE; x++) \ | 555 | 6.46M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 808k | dst += dststride; \ | 557 | 808k | tmp += SIZE; \ | 558 | 808k | } \ | 559 | 103k | } |
vp8dsp.c:put_vp8_epel8_h6v6_c Line | Count | Source | 535 | 183k | int my) \ | 536 | 183k | { \ | 537 | 183k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 183k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 183k | int x, y; \ | 540 | 183k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 183k | uint8_t *tmp = tmp_array; \ | 542 | 183k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 183k | \ | 544 | 2.87M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 24.2M | for (x = 0; x < SIZE; x++) \ | 546 | 21.5M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 2.69M | tmp += SIZE; \ | 548 | 2.69M | src += srcstride; \ | 549 | 2.69M | } \ | 550 | 183k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 183k | filter = subpel_filters[my - 1]; \ | 552 | 183k | \ | 553 | 1.95M | for (y = 0; y < h; y++) { \ | 554 | 15.9M | for (x = 0; x < SIZE; x++) \ | 555 | 14.1M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 1.77M | dst += dststride; \ | 557 | 1.77M | tmp += SIZE; \ | 558 | 1.77M | } \ | 559 | 183k | } |
vp8dsp.c:put_vp8_epel4_h4v4_c Line | Count | Source | 535 | 94.4k | int my) \ | 536 | 94.4k | { \ | 537 | 94.4k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 94.4k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 94.4k | int x, y; \ | 540 | 94.4k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 94.4k | uint8_t *tmp = tmp_array; \ | 542 | 94.4k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 94.4k | \ | 544 | 987k | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 4.46M | for (x = 0; x < SIZE; x++) \ | 546 | 3.57M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 892k | tmp += SIZE; \ | 548 | 892k | src += srcstride; \ | 549 | 892k | } \ | 550 | 94.4k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 94.4k | filter = subpel_filters[my - 1]; \ | 552 | 94.4k | \ | 553 | 704k | for (y = 0; y < h; y++) { \ | 554 | 3.04M | for (x = 0; x < SIZE; x++) \ | 555 | 2.43M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 609k | dst += dststride; \ | 557 | 609k | tmp += SIZE; \ | 558 | 609k | } \ | 559 | 94.4k | } |
vp8dsp.c:put_vp8_epel4_h6v4_c Line | Count | Source | 535 | 41.2k | int my) \ | 536 | 41.2k | { \ | 537 | 41.2k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 41.2k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 41.2k | int x, y; \ | 540 | 41.2k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 41.2k | uint8_t *tmp = tmp_array; \ | 542 | 41.2k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 41.2k | \ | 544 | 376k | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 1.67M | for (x = 0; x < SIZE; x++) \ | 546 | 1.34M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 335k | tmp += SIZE; \ | 548 | 335k | src += srcstride; \ | 549 | 335k | } \ | 550 | 41.2k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 41.2k | filter = subpel_filters[my - 1]; \ | 552 | 41.2k | \ | 553 | 252k | for (y = 0; y < h; y++) { \ | 554 | 1.05M | for (x = 0; x < SIZE; x++) \ | 555 | 846k | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 211k | dst += dststride; \ | 557 | 211k | tmp += SIZE; \ | 558 | 211k | } \ | 559 | 41.2k | } |
vp8dsp.c:put_vp8_epel4_h4v6_c Line | Count | Source | 535 | 52.5k | int my) \ | 536 | 52.5k | { \ | 537 | 52.5k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 52.5k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 52.5k | int x, y; \ | 540 | 52.5k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 52.5k | uint8_t *tmp = tmp_array; \ | 542 | 52.5k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 52.5k | \ | 544 | 553k | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 2.50M | for (x = 0; x < SIZE; x++) \ | 546 | 2.00M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 500k | tmp += SIZE; \ | 548 | 500k | src += srcstride; \ | 549 | 500k | } \ | 550 | 52.5k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 52.5k | filter = subpel_filters[my - 1]; \ | 552 | 52.5k | \ | 553 | 290k | for (y = 0; y < h; y++) { \ | 554 | 1.18M | for (x = 0; x < SIZE; x++) \ | 555 | 951k | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 237k | dst += dststride; \ | 557 | 237k | tmp += SIZE; \ | 558 | 237k | } \ | 559 | 52.5k | } |
vp8dsp.c:put_vp8_epel4_h6v6_c Line | Count | Source | 535 | 130k | int my) \ | 536 | 130k | { \ | 537 | 130k | const uint8_t *filter = subpel_filters[mx - 1]; \ | 538 | 130k | const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ | 539 | 130k | int x, y; \ | 540 | 130k | uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ | 541 | 130k | uint8_t *tmp = tmp_array; \ | 542 | 130k | src -= (2 - (VTAPS == 4)) * srcstride; \ | 543 | 130k | \ | 544 | 1.36M | for (y = 0; y < h + VTAPS - 1; y++) { \ | 545 | 6.15M | for (x = 0; x < SIZE; x++) \ | 546 | 4.92M | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ | 547 | 1.23M | tmp += SIZE; \ | 548 | 1.23M | src += srcstride; \ | 549 | 1.23M | } \ | 550 | 130k | tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ | 551 | 130k | filter = subpel_filters[my - 1]; \ | 552 | 130k | \ | 553 | 710k | for (y = 0; y < h; y++) { \ | 554 | 2.90M | for (x = 0; x < SIZE; x++) \ | 555 | 2.32M | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ | 556 | 580k | dst += dststride; \ | 557 | 580k | tmp += SIZE; \ | 558 | 580k | } \ | 559 | 130k | } |
|
560 | | |
561 | | VP8_EPEL_H(8, 4) |
562 | | VP8_EPEL_H(4, 4) |
563 | | VP8_EPEL_H(16, 6) |
564 | | VP8_EPEL_H(8, 6) |
565 | | VP8_EPEL_H(4, 6) |
566 | | VP8_EPEL_V(8, 4) |
567 | | VP8_EPEL_V(4, 4) |
568 | | VP8_EPEL_V(16, 6) |
569 | | VP8_EPEL_V(8, 6) |
570 | | VP8_EPEL_V(4, 6) |
571 | | |
572 | | VP8_EPEL_HV(8, 4, 4) |
573 | | VP8_EPEL_HV(4, 4, 4) |
574 | | VP8_EPEL_HV(8, 4, 6) |
575 | | VP8_EPEL_HV(4, 4, 6) |
576 | | VP8_EPEL_HV(8, 6, 4) |
577 | | VP8_EPEL_HV(4, 6, 4) |
578 | | VP8_EPEL_HV(16, 6, 6) |
579 | | VP8_EPEL_HV(8, 6, 6) |
580 | | VP8_EPEL_HV(4, 6, 6) |
581 | | |
582 | | #define VP8_BILINEAR(SIZE) \ |
583 | | static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \ |
584 | | const uint8_t *src, ptrdiff_t sstride, \ |
585 | 501k | int h, int mx, int my) \ |
586 | 501k | { \ |
587 | 501k | int a = 8 - mx, b = mx; \ |
588 | 501k | int x, y; \ |
589 | 3.61M | for (y = 0; y < h; y++) { \ |
590 | 29.2M | for (x = 0; x < SIZE; x++) \ |
591 | 26.1M | dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ |
592 | 3.11M | dst += dstride; \ |
593 | 3.11M | src += sstride; \ |
594 | 3.11M | } \ |
595 | 501k | } \ vp8dsp.c:put_vp8_bilinear16_h_c Line | Count | Source | 585 | 55.4k | int h, int mx, int my) \ | 586 | 55.4k | { \ | 587 | 55.4k | int a = 8 - mx, b = mx; \ | 588 | 55.4k | int x, y; \ | 589 | 875k | for (y = 0; y < h; y++) { \ | 590 | 13.9M | for (x = 0; x < SIZE; x++) \ | 591 | 13.1M | dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 592 | 819k | dst += dstride; \ | 593 | 819k | src += sstride; \ | 594 | 819k | } \ | 595 | 55.4k | } \ |
vp8dsp.c:put_vp8_bilinear8_h_c Line | Count | Source | 585 | 122k | int h, int mx, int my) \ | 586 | 122k | { \ | 587 | 122k | int a = 8 - mx, b = mx; \ | 588 | 122k | int x, y; \ | 589 | 1.08M | for (y = 0; y < h; y++) { \ | 590 | 8.67M | for (x = 0; x < SIZE; x++) \ | 591 | 7.70M | dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 592 | 963k | dst += dstride; \ | 593 | 963k | src += sstride; \ | 594 | 963k | } \ | 595 | 122k | } \ |
vp8dsp.c:put_vp8_bilinear4_h_c Line | Count | Source | 585 | 323k | int h, int mx, int my) \ | 586 | 323k | { \ | 587 | 323k | int a = 8 - mx, b = mx; \ | 588 | 323k | int x, y; \ | 589 | 1.65M | for (y = 0; y < h; y++) { \ | 590 | 6.66M | for (x = 0; x < SIZE; x++) \ | 591 | 5.32M | dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 592 | 1.33M | dst += dstride; \ | 593 | 1.33M | src += sstride; \ | 594 | 1.33M | } \ | 595 | 323k | } \ |
|
596 | | \ |
597 | | static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \ |
598 | | const uint8_t *src, ptrdiff_t sstride, \ |
599 | 332k | int h, int mx, int my) \ |
600 | 332k | { \ |
601 | 332k | int c = 8 - my, d = my; \ |
602 | 332k | int x, y; \ |
603 | 2.46M | for (y = 0; y < h; y++) { \ |
604 | 20.1M | for (x = 0; x < SIZE; x++) \ |
605 | 18.0M | dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ |
606 | 2.12M | dst += dstride; \ |
607 | 2.12M | src += sstride; \ |
608 | 2.12M | } \ |
609 | 332k | } \ vp8dsp.c:put_vp8_bilinear16_v_c Line | Count | Source | 599 | 37.0k | int h, int mx, int my) \ | 600 | 37.0k | { \ | 601 | 37.0k | int c = 8 - my, d = my; \ | 602 | 37.0k | int x, y; \ | 603 | 584k | for (y = 0; y < h; y++) { \ | 604 | 9.30M | for (x = 0; x < SIZE; x++) \ | 605 | 8.75M | dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ | 606 | 547k | dst += dstride; \ | 607 | 547k | src += sstride; \ | 608 | 547k | } \ | 609 | 37.0k | } \ |
vp8dsp.c:put_vp8_bilinear8_v_c Line | Count | Source | 599 | 94.3k | int h, int mx, int my) \ | 600 | 94.3k | { \ | 601 | 94.3k | int c = 8 - my, d = my; \ | 602 | 94.3k | int x, y; \ | 603 | 838k | for (y = 0; y < h; y++) { \ | 604 | 6.69M | for (x = 0; x < SIZE; x++) \ | 605 | 5.95M | dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ | 606 | 743k | dst += dstride; \ | 607 | 743k | src += sstride; \ | 608 | 743k | } \ | 609 | 94.3k | } \ |
vp8dsp.c:put_vp8_bilinear4_v_c Line | Count | Source | 599 | 200k | int h, int mx, int my) \ | 600 | 200k | { \ | 601 | 200k | int c = 8 - my, d = my; \ | 602 | 200k | int x, y; \ | 603 | 1.03M | for (y = 0; y < h; y++) { \ | 604 | 4.18M | for (x = 0; x < SIZE; x++) \ | 605 | 3.34M | dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ | 606 | 837k | dst += dstride; \ | 607 | 837k | src += sstride; \ | 608 | 837k | } \ | 609 | 200k | } \ |
|
610 | | \ |
611 | | static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, \ |
612 | | ptrdiff_t dstride, \ |
613 | | const uint8_t *src, \ |
614 | | ptrdiff_t sstride, \ |
615 | 322k | int h, int mx, int my) \ |
616 | 322k | { \ |
617 | 322k | int a = 8 - mx, b = mx; \ |
618 | 322k | int c = 8 - my, d = my; \ |
619 | 322k | int x, y; \ |
620 | 322k | uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ |
621 | 322k | uint8_t *tmp = tmp_array; \ |
622 | 2.68M | for (y = 0; y < h + 1; y++) { \ |
623 | 20.7M | for (x = 0; x < SIZE; x++) \ |
624 | 18.4M | tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ |
625 | 2.36M | tmp += SIZE; \ |
626 | 2.36M | src += sstride; \ |
627 | 2.36M | } \ |
628 | 322k | tmp = tmp_array; \ |
629 | 2.36M | for (y = 0; y < h; y++) { \ |
630 | 18.4M | for (x = 0; x < SIZE; x++) \ |
631 | 16.4M | dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ |
632 | 2.03M | dst += dstride; \ |
633 | 2.03M | tmp += SIZE; \ |
634 | 2.03M | } \ |
635 | 322k | } vp8dsp.c:put_vp8_bilinear16_hv_c Line | Count | Source | 615 | 30.2k | int h, int mx, int my) \ | 616 | 30.2k | { \ | 617 | 30.2k | int a = 8 - mx, b = mx; \ | 618 | 30.2k | int c = 8 - my, d = my; \ | 619 | 30.2k | int x, y; \ | 620 | 30.2k | uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ | 621 | 30.2k | uint8_t *tmp = tmp_array; \ | 622 | 500k | for (y = 0; y < h + 1; y++) { \ | 623 | 7.99M | for (x = 0; x < SIZE; x++) \ | 624 | 7.52M | tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 625 | 470k | tmp += SIZE; \ | 626 | 470k | src += sstride; \ | 627 | 470k | } \ | 628 | 30.2k | tmp = tmp_array; \ | 629 | 470k | for (y = 0; y < h; y++) { \ | 630 | 7.48M | for (x = 0; x < SIZE; x++) \ | 631 | 7.04M | dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ | 632 | 440k | dst += dstride; \ | 633 | 440k | tmp += SIZE; \ | 634 | 440k | } \ | 635 | 30.2k | } |
vp8dsp.c:put_vp8_bilinear8_hv_c Line | Count | Source | 615 | 91.8k | int h, int mx, int my) \ | 616 | 91.8k | { \ | 617 | 91.8k | int a = 8 - mx, b = mx; \ | 618 | 91.8k | int c = 8 - my, d = my; \ | 619 | 91.8k | int x, y; \ | 620 | 91.8k | uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ | 621 | 91.8k | uint8_t *tmp = tmp_array; \ | 622 | 928k | for (y = 0; y < h + 1; y++) { \ | 623 | 7.53M | for (x = 0; x < SIZE; x++) \ | 624 | 6.69M | tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 625 | 837k | tmp += SIZE; \ | 626 | 837k | src += sstride; \ | 627 | 837k | } \ | 628 | 91.8k | tmp = tmp_array; \ | 629 | 837k | for (y = 0; y < h; y++) { \ | 630 | 6.70M | for (x = 0; x < SIZE; x++) \ | 631 | 5.96M | dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ | 632 | 745k | dst += dstride; \ | 633 | 745k | tmp += SIZE; \ | 634 | 745k | } \ | 635 | 91.8k | } |
vp8dsp.c:put_vp8_bilinear4_hv_c Line | Count | Source | 615 | 200k | int h, int mx, int my) \ | 616 | 200k | { \ | 617 | 200k | int a = 8 - mx, b = mx; \ | 618 | 200k | int c = 8 - my, d = my; \ | 619 | 200k | int x, y; \ | 620 | 200k | uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ | 621 | 200k | uint8_t *tmp = tmp_array; \ | 622 | 1.25M | for (y = 0; y < h + 1; y++) { \ | 623 | 5.26M | for (x = 0; x < SIZE; x++) \ | 624 | 4.21M | tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ | 625 | 1.05M | tmp += SIZE; \ | 626 | 1.05M | src += sstride; \ | 627 | 1.05M | } \ | 628 | 200k | tmp = tmp_array; \ | 629 | 1.05M | for (y = 0; y < h; y++) { \ | 630 | 4.26M | for (x = 0; x < SIZE; x++) \ | 631 | 3.41M | dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ | 632 | 852k | dst += dstride; \ | 633 | 852k | tmp += SIZE; \ | 634 | 852k | } \ | 635 | 200k | } |
|
636 | | |
637 | | VP8_BILINEAR(16) |
638 | | VP8_BILINEAR(8) |
639 | | VP8_BILINEAR(4) |
640 | | |
641 | | #define VP78_MC_FUNC(IDX, SIZE) \ |
642 | 36.4k | dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
643 | 36.4k | dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ |
644 | 36.4k | dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ |
645 | 36.4k | dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ |
646 | 36.4k | dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ |
647 | 36.4k | dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ |
648 | 36.4k | dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ |
649 | 36.4k | dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ |
650 | 36.4k | dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c |
651 | | |
652 | | #define VP78_BILINEAR_MC_FUNC(IDX, SIZE) \ |
653 | 54.6k | dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
654 | 54.6k | dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ |
655 | 54.6k | dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ |
656 | 54.6k | dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \ |
657 | 54.6k | dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
658 | 54.6k | dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
659 | 54.6k | dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \ |
660 | 54.6k | dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
661 | 54.6k | dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c |
662 | | |
663 | | av_cold void ff_vp78dsp_init(VP8DSPContext *dsp) |
664 | 18.2k | { |
665 | 18.2k | dsp->put_vp8_epel_pixels_tab[0][0][0] = put_vp8_pixels16_c; |
666 | 18.2k | dsp->put_vp8_epel_pixels_tab[0][0][2] = put_vp8_epel16_h6_c; |
667 | 18.2k | dsp->put_vp8_epel_pixels_tab[0][2][0] = put_vp8_epel16_v6_c; |
668 | 18.2k | dsp->put_vp8_epel_pixels_tab[0][2][2] = put_vp8_epel16_h6v6_c; |
669 | | |
670 | 18.2k | VP78_MC_FUNC(1, 8); |
671 | 18.2k | VP78_MC_FUNC(2, 4); |
672 | | |
673 | 18.2k | VP78_BILINEAR_MC_FUNC(0, 16); |
674 | 18.2k | VP78_BILINEAR_MC_FUNC(1, 8); |
675 | 18.2k | VP78_BILINEAR_MC_FUNC(2, 4); |
676 | | |
677 | | #if ARCH_AARCH64 |
678 | | ff_vp78dsp_init_aarch64(dsp); |
679 | | #elif ARCH_ARM |
680 | | ff_vp78dsp_init_arm(dsp); |
681 | | #elif ARCH_PPC |
682 | | ff_vp78dsp_init_ppc(dsp); |
683 | | #elif ARCH_RISCV |
684 | | ff_vp78dsp_init_riscv(dsp); |
685 | | #elif ARCH_X86 && HAVE_X86ASM |
686 | | ff_vp78dsp_init_x86(dsp); |
687 | | #endif |
688 | 18.2k | } |
689 | | |
690 | | #if CONFIG_VP7_DECODER |
691 | | LOOP_FILTERS(vp7) |
692 | | |
693 | | av_cold void ff_vp7dsp_init(VP8DSPContext *dsp) |
694 | 7.32k | { |
695 | 7.32k | dsp->vp8_luma_dc_wht = vp7_luma_dc_wht_c; |
696 | 7.32k | dsp->vp8_luma_dc_wht_dc = vp7_luma_dc_wht_dc_c; |
697 | 7.32k | dsp->vp8_idct_add = vp7_idct_add_c; |
698 | 7.32k | dsp->vp8_idct_dc_add = vp7_idct_dc_add_c; |
699 | 7.32k | dsp->vp8_idct_dc_add4y = vp7_idct_dc_add4y_c; |
700 | 7.32k | dsp->vp8_idct_dc_add4uv = vp7_idct_dc_add4uv_c; |
701 | | |
702 | 7.32k | dsp->vp8_v_loop_filter16y = vp7_v_loop_filter16_c; |
703 | 7.32k | dsp->vp8_h_loop_filter16y = vp7_h_loop_filter16_c; |
704 | 7.32k | dsp->vp8_v_loop_filter8uv = vp7_v_loop_filter8uv_c; |
705 | 7.32k | dsp->vp8_h_loop_filter8uv = vp7_h_loop_filter8uv_c; |
706 | | |
707 | 7.32k | dsp->vp8_v_loop_filter16y_inner = vp7_v_loop_filter16_inner_c; |
708 | 7.32k | dsp->vp8_h_loop_filter16y_inner = vp7_h_loop_filter16_inner_c; |
709 | 7.32k | dsp->vp8_v_loop_filter8uv_inner = vp7_v_loop_filter8uv_inner_c; |
710 | 7.32k | dsp->vp8_h_loop_filter8uv_inner = vp7_h_loop_filter8uv_inner_c; |
711 | | |
712 | 7.32k | dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c; |
713 | 7.32k | dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c; |
714 | | |
715 | | #if ARCH_RISCV |
716 | | ff_vp7dsp_init_riscv(dsp); |
717 | | #endif |
718 | 7.32k | } |
719 | | #endif /* CONFIG_VP7_DECODER */ |
720 | | |
721 | | #if CONFIG_VP8_DECODER |
722 | | LOOP_FILTERS(vp8) |
723 | | |
724 | | av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) |
725 | 10.8k | { |
726 | 10.8k | dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; |
727 | 10.8k | dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c; |
728 | 10.8k | dsp->vp8_idct_add = vp8_idct_add_c; |
729 | 10.8k | dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; |
730 | 10.8k | dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c; |
731 | 10.8k | dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c; |
732 | | |
733 | 10.8k | dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; |
734 | 10.8k | dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; |
735 | 10.8k | dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; |
736 | 10.8k | dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; |
737 | | |
738 | 10.8k | dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; |
739 | 10.8k | dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; |
740 | 10.8k | dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; |
741 | 10.8k | dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; |
742 | | |
743 | 10.8k | dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; |
744 | 10.8k | dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; |
745 | | |
746 | | #if ARCH_AARCH64 |
747 | | ff_vp8dsp_init_aarch64(dsp); |
748 | | #elif ARCH_ARM |
749 | | ff_vp8dsp_init_arm(dsp); |
750 | | #elif ARCH_RISCV |
751 | | ff_vp8dsp_init_riscv(dsp); |
752 | | #elif ARCH_X86 && HAVE_X86ASM |
753 | | ff_vp8dsp_init_x86(dsp); |
754 | | #elif ARCH_MIPS |
755 | | ff_vp8dsp_init_mips(dsp); |
756 | | #elif ARCH_LOONGARCH |
757 | | ff_vp8dsp_init_loongarch(dsp); |
758 | | #endif |
759 | 10.8k | } |
760 | | #endif /* CONFIG_VP8_DECODER */ |