/src/ffmpeg/libavcodec/x86/hpeldsp_init.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * SIMD-optimized halfpel functions |
3 | | * Copyright (c) 2000, 2001 Fabrice Bellard |
4 | | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | * |
22 | | * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
23 | | */ |
24 | | |
25 | | #include "libavutil/attributes.h" |
26 | | #include "libavutil/cpu.h" |
27 | | #include "libavutil/x86/cpu.h" |
28 | | #include "libavcodec/avcodec.h" |
29 | | #include "libavcodec/hpeldsp.h" |
30 | | #include "libavcodec/pixels.h" |
31 | | #include "fpel.h" |
32 | | #include "hpeldsp.h" |
33 | | |
34 | | void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, |
35 | | ptrdiff_t line_size, int h); |
36 | | void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels, |
37 | | ptrdiff_t line_size, int h); |
38 | | void ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels, |
39 | | ptrdiff_t line_size, int h); |
40 | | void ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels, |
41 | | ptrdiff_t line_size, int h); |
42 | | void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, |
43 | | ptrdiff_t line_size, int h); |
44 | | void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, |
45 | | ptrdiff_t line_size, int h); |
46 | | void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, |
47 | | ptrdiff_t line_size, int h); |
48 | | void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, |
49 | | const uint8_t *pixels, |
50 | | ptrdiff_t line_size, int h); |
51 | | void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, |
52 | | ptrdiff_t line_size, int h); |
53 | | void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, |
54 | | ptrdiff_t line_size, int h); |
55 | | void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, |
56 | | const uint8_t *pixels, |
57 | | ptrdiff_t line_size, int h); |
58 | | void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, |
59 | | ptrdiff_t line_size, int h); |
60 | | void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, |
61 | | ptrdiff_t line_size, int h); |
62 | | void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, |
63 | | ptrdiff_t line_size, int h); |
64 | | |
65 | 51.2k | #define put_pixels8_mmx ff_put_pixels8_mmx |
66 | 51.2k | #define put_pixels16_mmx ff_put_pixels16_mmx |
67 | 51.2k | #define put_pixels8_xy2_mmx ff_put_pixels8_xy2_mmx |
68 | 51.2k | #define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx |
69 | 51.2k | #define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx |
70 | | |
71 | | #if HAVE_INLINE_ASM |
72 | | |
73 | | /***********************************/ |
74 | | /* MMX no rounding */ |
75 | | #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx |
76 | 0 | #define SET_RND MOVQ_WONE |
77 | | #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) |
78 | | #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e) |
79 | | #define STATIC static |
80 | | |
81 | | #include "rnd_template.c" |
82 | | #include "hpeldsp_rnd_template.c" |
83 | | |
84 | | #undef DEF |
85 | | #undef SET_RND |
86 | | #undef PAVGBP |
87 | | #undef PAVGB |
88 | | #undef STATIC |
89 | | |
90 | | #if HAVE_MMX |
91 | | CALL_2X_PIXELS(avg_no_rnd_pixels16_y2_mmx, avg_no_rnd_pixels8_y2_mmx, 8) |
92 | | CALL_2X_PIXELS(put_no_rnd_pixels16_y2_mmx, put_no_rnd_pixels8_y2_mmx, 8) |
93 | | |
94 | | CALL_2X_PIXELS(avg_no_rnd_pixels16_xy2_mmx, avg_no_rnd_pixels8_xy2_mmx, 8) |
95 | | CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8) |
96 | | #endif |
97 | | |
98 | | /***********************************/ |
99 | | /* MMX rounding */ |
100 | | |
101 | 0 | #define SET_RND MOVQ_WTWO |
102 | | #define DEF(x, y) ff_ ## x ## _ ## y ## _mmx |
103 | | #define STATIC |
104 | | #define NO_AVG |
105 | | |
106 | | #include "rnd_template.c" |
107 | | |
108 | | #undef NO_AVG |
109 | | #undef DEF |
110 | | #undef SET_RND |
111 | | |
112 | | #if HAVE_MMX |
113 | | CALL_2X_PIXELS(put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8) |
114 | | #endif |
115 | | |
116 | | #endif /* HAVE_INLINE_ASM */ |
117 | | |
118 | | |
119 | | #if HAVE_X86ASM |
120 | | |
121 | | #define HPELDSP_AVG_PIXELS16(CPUEXT) \ |
122 | | CALL_2X_PIXELS(put_no_rnd_pixels16_x2 ## CPUEXT, ff_put_no_rnd_pixels8_x2 ## CPUEXT, 8) \ |
123 | | CALL_2X_PIXELS(put_pixels16_y2 ## CPUEXT, ff_put_pixels8_y2 ## CPUEXT, 8) \ |
124 | | CALL_2X_PIXELS(put_no_rnd_pixels16_y2 ## CPUEXT, ff_put_no_rnd_pixels8_y2 ## CPUEXT, 8) \ |
125 | | CALL_2X_PIXELS(avg_pixels16 ## CPUEXT, ff_avg_pixels8 ## CPUEXT, 8) \ |
126 | | CALL_2X_PIXELS(avg_pixels16_x2 ## CPUEXT, ff_avg_pixels8_x2 ## CPUEXT, 8) \ |
127 | | CALL_2X_PIXELS(avg_pixels16_y2 ## CPUEXT, ff_avg_pixels8_y2 ## CPUEXT, 8) \ |
128 | | CALL_2X_PIXELS(avg_pixels16_xy2 ## CPUEXT, ff_avg_pixels8_xy2 ## CPUEXT, 8) \ |
129 | | CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8) |
130 | | |
131 | | HPELDSP_AVG_PIXELS16(_mmxext) |
132 | | |
133 | | #endif /* HAVE_X86ASM */ |
134 | | |
135 | | #define SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU) \ |
136 | 205k | if (HAVE_MMX_EXTERNAL) \ |
137 | 205k | c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU |
138 | | |
139 | | #define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU) \ |
140 | 205k | do { \ |
141 | 205k | SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU); \ |
142 | 205k | c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \ |
143 | 205k | } while (0) |
144 | | #define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU) \ |
145 | 153k | do { \ |
146 | 153k | c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ |
147 | 153k | c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ |
148 | 153k | } while (0) |
149 | | #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ |
150 | 102k | do { \ |
151 | 102k | SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU); \ |
152 | 102k | SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU); \ |
153 | 102k | } while (0) |
154 | | |
155 | | static void hpeldsp_init_mmx(HpelDSPContext *c, int flags) |
156 | 51.2k | { |
157 | 51.2k | #if HAVE_MMX_INLINE |
158 | 51.2k | SET_HPEL_FUNCS03(put, [0], 16, mmx); |
159 | 51.2k | SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx); |
160 | 51.2k | SET_HPEL_FUNCS12(avg_no_rnd, , 16, mmx); |
161 | 51.2k | c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_mmx; |
162 | 51.2k | SET_HPEL_FUNCS03(put, [1], 8, mmx); |
163 | 51.2k | SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx); |
164 | 51.2k | #endif |
165 | 51.2k | } |
166 | | |
167 | | static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags) |
168 | 51.2k | { |
169 | 51.2k | #if HAVE_MMXEXT_EXTERNAL |
170 | 51.2k | c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext; |
171 | 51.2k | c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext; |
172 | | |
173 | 51.2k | c->avg_pixels_tab[0][0] = avg_pixels16_mmxext; |
174 | 51.2k | c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext; |
175 | 51.2k | c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext; |
176 | 51.2k | c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext; |
177 | | |
178 | 51.2k | c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext; |
179 | 51.2k | c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext; |
180 | | |
181 | 51.2k | c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext; |
182 | 51.2k | c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext; |
183 | 51.2k | c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext; |
184 | 51.2k | c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext; |
185 | | |
186 | 51.2k | c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; |
187 | 51.2k | c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; |
188 | | |
189 | 51.2k | c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_mmxext; |
190 | | |
191 | 51.2k | if (!(flags & AV_CODEC_FLAG_BITEXACT)) { |
192 | 51.2k | c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext; |
193 | 51.2k | c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext; |
194 | 51.2k | c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext; |
195 | 51.2k | c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext; |
196 | | |
197 | 51.2k | c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext; |
198 | 51.2k | c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext; |
199 | 51.2k | } |
200 | 51.2k | #endif /* HAVE_MMXEXT_EXTERNAL */ |
201 | 51.2k | } |
202 | | |
203 | | static void hpeldsp_init_sse2_fast(HpelDSPContext *c, int flags) |
204 | 51.2k | { |
205 | 51.2k | #if HAVE_SSE2_EXTERNAL |
206 | 51.2k | c->put_pixels_tab[0][0] = ff_put_pixels16_sse2; |
207 | 51.2k | c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2; |
208 | 51.2k | c->put_pixels_tab[0][1] = ff_put_pixels16_x2_sse2; |
209 | 51.2k | c->put_pixels_tab[0][2] = ff_put_pixels16_y2_sse2; |
210 | 51.2k | c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_sse2; |
211 | 51.2k | c->avg_pixels_tab[0][0] = ff_avg_pixels16_sse2; |
212 | 51.2k | c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_sse2; |
213 | 51.2k | c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_sse2; |
214 | 51.2k | c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_sse2; |
215 | 51.2k | c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_sse2; |
216 | 51.2k | #endif /* HAVE_SSE2_EXTERNAL */ |
217 | 51.2k | } |
218 | | |
219 | | static void hpeldsp_init_ssse3(HpelDSPContext *c, int flags) |
220 | 51.2k | { |
221 | 51.2k | #if HAVE_SSSE3_EXTERNAL |
222 | 51.2k | c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_ssse3; |
223 | 51.2k | c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_ssse3; |
224 | 51.2k | c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_ssse3; |
225 | 51.2k | c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_ssse3; |
226 | 51.2k | #endif |
227 | 51.2k | } |
228 | | |
229 | | av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags) |
230 | 197k | { |
231 | 197k | int cpu_flags = av_get_cpu_flags(); |
232 | | |
233 | 197k | if (INLINE_MMX(cpu_flags)) |
234 | 51.2k | hpeldsp_init_mmx(c, flags); |
235 | | |
236 | 197k | if (EXTERNAL_MMXEXT(cpu_flags)) |
237 | 51.2k | hpeldsp_init_mmxext(c, flags); |
238 | | |
239 | 197k | if (EXTERNAL_SSE2_FAST(cpu_flags)) |
240 | 51.2k | hpeldsp_init_sse2_fast(c, flags); |
241 | | |
242 | 197k | if (EXTERNAL_SSSE3(cpu_flags)) |
243 | 51.2k | hpeldsp_init_ssse3(c, flags); |
244 | 197k | } |