/src/ffmpeg/libavcodec/x86/vp9dsp_init.c
Line | Count | Source |
1 | | /* |
2 | | * VP9 SIMD optimizations |
3 | | * |
4 | | * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | |
23 | | #include "libavutil/attributes.h" |
24 | | #include "libavutil/cpu.h" |
25 | | #include "libavutil/x86/cpu.h" |
26 | | #include "libavcodec/vp9dsp.h" |
27 | | #include "libavcodec/x86/vp9dsp_init.h" |
28 | | |
29 | | #if HAVE_X86ASM |
30 | | |
31 | | decl_fpel_func(put, 4, , mmx); |
32 | | decl_fpel_func(put, 8, , mmx); |
33 | | decl_fpel_func(put, 16, , sse); |
34 | | decl_fpel_func(put, 32, , sse); |
35 | | decl_fpel_func(put, 64, , sse); |
36 | | decl_fpel_func(avg, 4, _8, mmxext); |
37 | | decl_fpel_func(avg, 8, _8, mmxext); |
38 | | decl_fpel_func(avg, 16, _8, sse2); |
39 | | decl_fpel_func(avg, 32, _8, sse2); |
40 | | decl_fpel_func(avg, 64, _8, sse2); |
41 | | decl_fpel_func(put, 32, , avx); |
42 | | decl_fpel_func(put, 64, , avx); |
43 | | decl_fpel_func(avg, 32, _8, avx2); |
44 | | decl_fpel_func(avg, 64, _8, avx2); |
45 | | |
46 | | decl_mc_funcs(4, mmxext, int16_t, 8, 8); |
47 | | decl_mc_funcs(8, sse2, int16_t, 8, 8); |
48 | | decl_mc_funcs(4, ssse3, int8_t, 32, 8); |
49 | | decl_mc_funcs(8, ssse3, int8_t, 32, 8); |
50 | | #if ARCH_X86_64 |
51 | | decl_mc_funcs(16, ssse3, int8_t, 32, 8); |
52 | | decl_mc_funcs(32, avx2, int8_t, 32, 8); |
53 | | #endif |
54 | | |
55 | | mc_rep_funcs(16, 8, 8, sse2, int16_t, 8, 8) |
56 | | #if ARCH_X86_32 |
57 | | mc_rep_funcs(16, 8, 8, ssse3, int8_t, 32, 8) |
58 | | #endif |
59 | | mc_rep_funcs(32, 16, 16, sse2, int16_t, 8, 8) |
60 | | mc_rep_funcs(32, 16, 16, ssse3, int8_t, 32, 8) |
61 | | mc_rep_funcs(64, 32, 32, sse2, int16_t, 8, 8) |
62 | | mc_rep_funcs(64, 32, 32, ssse3, int8_t, 32, 8) |
63 | | #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL |
64 | | mc_rep_funcs(64, 32, 32, avx2, int8_t, 32, 8) |
65 | | #endif |
66 | | |
67 | | extern const int8_t ff_filters_ssse3[3][15][4][32]; |
68 | | extern const int16_t ff_filters_sse2[3][15][8][8]; |
69 | | |
70 | | filters_8tap_2d_fn2(put, 16, 8, 1, mmxext, sse2, sse2) |
71 | | filters_8tap_2d_fn2(avg, 16, 8, 1, mmxext, sse2, sse2) |
72 | | filters_8tap_2d_fn2(put, 16, 8, 1, ssse3, ssse3, ssse3) |
73 | | filters_8tap_2d_fn2(avg, 16, 8, 1, ssse3, ssse3, ssse3) |
74 | | #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL |
75 | | filters_8tap_2d_fn(put, 64, 32, 8, 1, avx2, ssse3) |
76 | | filters_8tap_2d_fn(put, 32, 32, 8, 1, avx2, ssse3) |
77 | | filters_8tap_2d_fn(avg, 64, 32, 8, 1, avx2, ssse3) |
78 | | filters_8tap_2d_fn(avg, 32, 32, 8, 1, avx2, ssse3) |
79 | | #endif |
80 | | |
81 | | filters_8tap_1d_fn3(put, 8, mmxext, sse2, sse2) |
82 | | filters_8tap_1d_fn3(avg, 8, mmxext, sse2, sse2) |
83 | | filters_8tap_1d_fn3(put, 8, ssse3, ssse3, ssse3) |
84 | | filters_8tap_1d_fn3(avg, 8, ssse3, ssse3, ssse3) |
85 | | #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL |
86 | | filters_8tap_1d_fn2(put, 64, 8, avx2, ssse3) |
87 | | filters_8tap_1d_fn2(put, 32, 8, avx2, ssse3) |
88 | | filters_8tap_1d_fn2(avg, 64, 8, avx2, ssse3) |
89 | | filters_8tap_1d_fn2(avg, 32, 8, avx2, ssse3) |
90 | | #endif |
91 | | |
92 | | #define itxfm_func(typea, typeb, size, opt) \ |
93 | | void ff_vp9_##typea##_##typeb##_##size##x##size##_add_##opt(uint8_t *dst, ptrdiff_t stride, \ |
94 | | int16_t *block, int eob) |
95 | | #define itxfm_funcs(size, opt) \ |
96 | | itxfm_func(idct, idct, size, opt); \ |
97 | | itxfm_func(iadst, idct, size, opt); \ |
98 | | itxfm_func(idct, iadst, size, opt); \ |
99 | | itxfm_func(iadst, iadst, size, opt) |
100 | | |
101 | | itxfm_func(idct, idct, 4, mmxext); |
102 | | itxfm_func(idct, iadst, 4, sse2); |
103 | | itxfm_func(iadst, idct, 4, sse2); |
104 | | itxfm_func(iadst, iadst, 4, sse2); |
105 | | itxfm_funcs(4, ssse3); |
106 | | itxfm_funcs(8, sse2); |
107 | | itxfm_funcs(8, ssse3); |
108 | | itxfm_funcs(8, avx); |
109 | | itxfm_funcs(16, sse2); |
110 | | itxfm_funcs(16, ssse3); |
111 | | itxfm_funcs(16, avx); |
112 | | itxfm_func(idct, idct, 32, sse2); |
113 | | itxfm_func(idct, idct, 32, ssse3); |
114 | | itxfm_func(idct, idct, 32, avx); |
115 | | itxfm_func(iwht, iwht, 4, mmx); |
116 | | itxfm_funcs(16, avx2); |
117 | | itxfm_func(idct, idct, 32, avx2); |
118 | | |
119 | | #undef itxfm_func |
120 | | #undef itxfm_funcs |
121 | | |
122 | | #define lpf_funcs(size1, size2, opt) \ |
123 | | void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ |
124 | | int E, int I, int H); \ |
125 | | void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ |
126 | | int E, int I, int H) |
127 | | |
128 | | lpf_funcs(4, 8, mmxext); |
129 | | lpf_funcs(8, 8, mmxext); |
130 | | lpf_funcs(16, 16, sse2); |
131 | | lpf_funcs(16, 16, ssse3); |
132 | | lpf_funcs(16, 16, avx); |
133 | | lpf_funcs(44, 16, sse2); |
134 | | lpf_funcs(44, 16, ssse3); |
135 | | lpf_funcs(44, 16, avx); |
136 | | lpf_funcs(84, 16, sse2); |
137 | | lpf_funcs(84, 16, ssse3); |
138 | | lpf_funcs(84, 16, avx); |
139 | | lpf_funcs(48, 16, sse2); |
140 | | lpf_funcs(48, 16, ssse3); |
141 | | lpf_funcs(48, 16, avx); |
142 | | lpf_funcs(88, 16, sse2); |
143 | | lpf_funcs(88, 16, ssse3); |
144 | | lpf_funcs(88, 16, avx); |
145 | | |
146 | | #undef lpf_funcs |
147 | | |
148 | | #define ipred_func(size, type, opt) \ |
149 | | void ff_vp9_ipred_##type##_##size##x##size##_##opt(uint8_t *dst, ptrdiff_t stride, \ |
150 | | const uint8_t *l, const uint8_t *a) |
151 | | |
152 | | ipred_func(8, v, mmx); |
153 | | |
154 | | #define ipred_dc_funcs(size, opt) \ |
155 | | ipred_func(size, dc, opt); \ |
156 | | ipred_func(size, dc_left, opt); \ |
157 | | ipred_func(size, dc_top, opt) |
158 | | |
159 | | ipred_dc_funcs(4, mmxext); |
160 | | ipred_dc_funcs(8, mmxext); |
161 | | |
162 | | #define ipred_dir_tm_funcs(size, opt) \ |
163 | | ipred_func(size, tm, opt); \ |
164 | | ipred_func(size, dl, opt); \ |
165 | | ipred_func(size, dr, opt); \ |
166 | | ipred_func(size, hd, opt); \ |
167 | | ipred_func(size, hu, opt); \ |
168 | | ipred_func(size, vl, opt); \ |
169 | | ipred_func(size, vr, opt) |
170 | | |
171 | | ipred_dir_tm_funcs(4, mmxext); |
172 | | |
173 | | ipred_func(16, v, sse); |
174 | | ipred_func(32, v, sse); |
175 | | |
176 | | ipred_dc_funcs(16, sse2); |
177 | | ipred_dc_funcs(32, sse2); |
178 | | |
179 | | #define ipred_dir_tm_h_funcs(size, opt) \ |
180 | | ipred_dir_tm_funcs(size, opt); \ |
181 | | ipred_func(size, h, opt) |
182 | | |
183 | | ipred_dir_tm_h_funcs(8, sse2); |
184 | | ipred_dir_tm_h_funcs(16, sse2); |
185 | | ipred_dir_tm_h_funcs(32, sse2); |
186 | | |
187 | | ipred_func(4, h, sse2); |
188 | | |
189 | | #define ipred_all_funcs(size, opt) \ |
190 | | ipred_dc_funcs(size, opt); \ |
191 | | ipred_dir_tm_h_funcs(size, opt) |
192 | | |
193 | | // FIXME hd/vl_4x4_ssse3 does not exist |
194 | | ipred_all_funcs(4, ssse3); |
195 | | ipred_all_funcs(8, ssse3); |
196 | | ipred_all_funcs(16, ssse3); |
197 | | ipred_all_funcs(32, ssse3); |
198 | | |
199 | | ipred_dir_tm_h_funcs(8, avx); |
200 | | ipred_dir_tm_h_funcs(16, avx); |
201 | | ipred_dir_tm_h_funcs(32, avx); |
202 | | |
203 | | ipred_func(32, v, avx); |
204 | | |
205 | | ipred_dc_funcs(32, avx2); |
206 | | ipred_func(32, h, avx2); |
207 | | ipred_func(32, tm, avx2); |
208 | | |
209 | | #undef ipred_func |
210 | | #undef ipred_dir_tm_h_funcs |
211 | | #undef ipred_dir_tm_funcs |
212 | | #undef ipred_dc_funcs |
213 | | |
214 | | #endif /* HAVE_X86ASM */ |
215 | | |
216 | | av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) |
217 | 11.7k | { |
218 | 11.7k | #if HAVE_X86ASM |
219 | 11.7k | int cpu_flags; |
220 | | |
221 | 11.7k | if (bpp == 10) { |
222 | 3.51k | ff_vp9dsp_init_10bpp_x86(dsp, bitexact); |
223 | 3.51k | return; |
224 | 8.20k | } else if (bpp == 12) { |
225 | 3.35k | ff_vp9dsp_init_12bpp_x86(dsp, bitexact); |
226 | 3.35k | return; |
227 | 3.35k | } |
228 | | |
229 | 4.84k | cpu_flags = av_get_cpu_flags(); |
230 | | |
231 | 7.99k | #define init_lpf(opt) do { \ |
232 | 7.99k | dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \ |
233 | 7.99k | dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \ |
234 | 7.99k | dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \ |
235 | 7.99k | dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \ |
236 | 7.99k | dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \ |
237 | 7.99k | dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \ |
238 | 7.99k | dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \ |
239 | 7.99k | dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \ |
240 | 7.99k | dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \ |
241 | 7.99k | dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \ |
242 | 7.99k | } while (0) |
243 | | |
244 | 4.84k | #define init_ipred(sz, opt, t, e) \ |
245 | 322k | dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt |
246 | | |
247 | 4.84k | #define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext |
248 | 4.84k | #define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext |
249 | 29.3k | #define init_dir_tm_ipred(sz, opt) do { \ |
250 | 29.3k | init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \ |
251 | 29.3k | init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \ |
252 | 29.3k | init_ipred(sz, opt, hd, HOR_DOWN); \ |
253 | 29.3k | init_ipred(sz, opt, vl, VERT_LEFT); \ |
254 | 29.3k | init_ipred(sz, opt, hu, HOR_UP); \ |
255 | 29.3k | init_ipred(sz, opt, tm, TM_VP8); \ |
256 | 29.3k | init_ipred(sz, opt, vr, VERT_RIGHT); \ |
257 | 29.3k | } while (0) |
258 | 26.6k | #define init_dir_tm_h_ipred(sz, opt) do { \ |
259 | 26.6k | init_dir_tm_ipred(sz, opt); \ |
260 | 26.6k | init_ipred(sz, opt, h, HOR); \ |
261 | 26.6k | } while (0) |
262 | 23.9k | #define init_dc_ipred(sz, opt) do { \ |
263 | 23.9k | init_ipred(sz, opt, dc, DC); \ |
264 | 23.9k | init_ipred(sz, opt, dc_left, LEFT_DC); \ |
265 | 23.9k | init_ipred(sz, opt, dc_top, TOP_DC); \ |
266 | 23.9k | } while (0) |
267 | 10.6k | #define init_all_ipred(sz, opt) do { \ |
268 | 10.6k | init_dc_ipred(sz, opt); \ |
269 | 10.6k | init_dir_tm_h_ipred(sz, opt); \ |
270 | 10.6k | } while (0) |
271 | | |
272 | 4.84k | if (EXTERNAL_MMX(cpu_flags)) { |
273 | 2.66k | init_fpel_func(4, 0, 4, put, , mmx); |
274 | 2.66k | init_fpel_func(3, 0, 8, put, , mmx); |
275 | 2.66k | if (!bitexact) { |
276 | 2.66k | dsp->itxfm_add[4 /* lossless */][DCT_DCT] = |
277 | 2.66k | dsp->itxfm_add[4 /* lossless */][ADST_DCT] = |
278 | 2.66k | dsp->itxfm_add[4 /* lossless */][DCT_ADST] = |
279 | 2.66k | dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx; |
280 | 2.66k | } |
281 | 2.66k | init_ipred(8, mmx, v, VERT); |
282 | 2.66k | } |
283 | | |
284 | 4.84k | if (EXTERNAL_MMXEXT(cpu_flags)) { |
285 | 2.66k | dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_mmxext; |
286 | 2.66k | dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_mmxext; |
287 | 2.66k | dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_mmxext; |
288 | 2.66k | dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_mmxext; |
289 | 2.66k | init_subpel2(4, 0, 4, put, 8, mmxext); |
290 | 2.66k | init_subpel2(4, 1, 4, avg, 8, mmxext); |
291 | 2.66k | init_fpel_func(4, 1, 4, avg, _8, mmxext); |
292 | 2.66k | init_fpel_func(3, 1, 8, avg, _8, mmxext); |
293 | 2.66k | dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext; |
294 | 2.66k | init_dc_ipred(4, mmxext); |
295 | 2.66k | init_dc_ipred(8, mmxext); |
296 | 2.66k | init_dir_tm_ipred(4, mmxext); |
297 | 2.66k | } |
298 | | |
299 | 4.84k | if (EXTERNAL_SSE(cpu_flags)) { |
300 | 2.66k | init_fpel_func(2, 0, 16, put, , sse); |
301 | 2.66k | init_fpel_func(1, 0, 32, put, , sse); |
302 | 2.66k | init_fpel_func(0, 0, 64, put, , sse); |
303 | 2.66k | init_ipred(16, sse, v, VERT); |
304 | 2.66k | init_ipred(32, sse, v, VERT); |
305 | 2.66k | } |
306 | | |
307 | 4.84k | if (EXTERNAL_SSE2(cpu_flags)) { |
308 | 2.66k | init_subpel3_8to64(0, put, 8, sse2); |
309 | 2.66k | init_subpel3_8to64(1, avg, 8, sse2); |
310 | 2.66k | init_fpel_func(2, 1, 16, avg, _8, sse2); |
311 | 2.66k | init_fpel_func(1, 1, 32, avg, _8, sse2); |
312 | 2.66k | init_fpel_func(0, 1, 64, avg, _8, sse2); |
313 | 2.66k | init_lpf(sse2); |
314 | 2.66k | dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_sse2; |
315 | 2.66k | dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_sse2; |
316 | 2.66k | dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2; |
317 | 2.66k | dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2; |
318 | 2.66k | dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_sse2; |
319 | 2.66k | dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_sse2; |
320 | 2.66k | dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2; |
321 | 2.66k | dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_sse2; |
322 | 2.66k | dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_sse2; |
323 | 2.66k | dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_sse2; |
324 | 2.66k | dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2; |
325 | 2.66k | dsp->itxfm_add[TX_32X32][ADST_ADST] = |
326 | 2.66k | dsp->itxfm_add[TX_32X32][ADST_DCT] = |
327 | 2.66k | dsp->itxfm_add[TX_32X32][DCT_ADST] = |
328 | 2.66k | dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2; |
329 | 2.66k | init_dc_ipred(16, sse2); |
330 | 2.66k | init_dc_ipred(32, sse2); |
331 | 2.66k | init_dir_tm_h_ipred(8, sse2); |
332 | 2.66k | init_dir_tm_h_ipred(16, sse2); |
333 | 2.66k | init_dir_tm_h_ipred(32, sse2); |
334 | 2.66k | init_ipred(4, sse2, h, HOR); |
335 | 2.66k | } |
336 | | |
337 | 4.84k | if (EXTERNAL_SSSE3(cpu_flags)) { |
338 | 2.66k | init_subpel3(0, put, 8, ssse3); |
339 | 2.66k | init_subpel3(1, avg, 8, ssse3); |
340 | 2.66k | dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3; |
341 | 2.66k | dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_ssse3; |
342 | 2.66k | dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_ssse3; |
343 | 2.66k | dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3; |
344 | 2.66k | dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3; |
345 | 2.66k | dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3; |
346 | 2.66k | dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_ssse3; |
347 | 2.66k | dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3; |
348 | 2.66k | dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3; |
349 | 2.66k | dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_ssse3; |
350 | 2.66k | dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_ssse3; |
351 | 2.66k | dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3; |
352 | 2.66k | dsp->itxfm_add[TX_32X32][ADST_ADST] = |
353 | 2.66k | dsp->itxfm_add[TX_32X32][ADST_DCT] = |
354 | 2.66k | dsp->itxfm_add[TX_32X32][DCT_ADST] = |
355 | 2.66k | dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3; |
356 | 2.66k | init_lpf(ssse3); |
357 | 2.66k | init_all_ipred(4, ssse3); |
358 | 2.66k | init_all_ipred(8, ssse3); |
359 | 2.66k | init_all_ipred(16, ssse3); |
360 | 2.66k | init_all_ipred(32, ssse3); |
361 | 2.66k | } |
362 | | |
363 | 4.84k | if (EXTERNAL_AVX(cpu_flags)) { |
364 | 2.66k | dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx; |
365 | 2.66k | dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_avx; |
366 | 2.66k | dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_avx; |
367 | 2.66k | dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx; |
368 | 2.66k | dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx; |
369 | 2.66k | dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx; |
370 | 2.66k | dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx; |
371 | 2.66k | dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx; |
372 | 2.66k | dsp->itxfm_add[TX_32X32][ADST_ADST] = |
373 | 2.66k | dsp->itxfm_add[TX_32X32][ADST_DCT] = |
374 | 2.66k | dsp->itxfm_add[TX_32X32][DCT_ADST] = |
375 | 2.66k | dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; |
376 | 2.66k | init_lpf(avx); |
377 | 2.66k | init_dir_tm_h_ipred(8, avx); |
378 | 2.66k | init_dir_tm_h_ipred(16, avx); |
379 | 2.66k | init_dir_tm_h_ipred(32, avx); |
380 | 2.66k | } |
381 | 4.84k | if (EXTERNAL_AVX_FAST(cpu_flags)) { |
382 | 2.66k | init_fpel_func(1, 0, 32, put, , avx); |
383 | 2.66k | init_fpel_func(0, 0, 64, put, , avx); |
384 | 2.66k | init_ipred(32, avx, v, VERT); |
385 | 2.66k | } |
386 | | |
387 | 4.84k | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
388 | 2.66k | init_fpel_func(1, 1, 32, avg, _8, avx2); |
389 | 2.66k | init_fpel_func(0, 1, 64, avg, _8, avx2); |
390 | 2.66k | if (ARCH_X86_64) { |
391 | 2.66k | #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL |
392 | 2.66k | dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx2; |
393 | 2.66k | dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx2; |
394 | 2.66k | dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx2; |
395 | 2.66k | dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx2; |
396 | 2.66k | dsp->itxfm_add[TX_32X32][ADST_ADST] = |
397 | 2.66k | dsp->itxfm_add[TX_32X32][ADST_DCT] = |
398 | 2.66k | dsp->itxfm_add[TX_32X32][DCT_ADST] = |
399 | 2.66k | dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx2; |
400 | 2.66k | init_subpel3_32_64(0, put, 8, avx2); |
401 | 2.66k | init_subpel3_32_64(1, avg, 8, avx2); |
402 | 2.66k | #endif |
403 | 2.66k | } |
404 | 2.66k | init_dc_ipred(32, avx2); |
405 | 2.66k | init_ipred(32, avx2, h, HOR); |
406 | 2.66k | init_ipred(32, avx2, tm, TM_VP8); |
407 | 2.66k | } |
408 | | |
409 | 4.84k | #undef init_fpel |
410 | 4.84k | #undef init_subpel1 |
411 | 4.84k | #undef init_subpel2 |
412 | 4.84k | #undef init_subpel3 |
413 | | |
414 | 4.84k | #endif /* HAVE_X86ASM */ |
415 | 4.84k | } |