/src/ffmpeg/libavcodec/x86/flacdsp_init.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2014 James Almer |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/attributes.h" |
22 | | #include "libavcodec/flacdsp.h" |
23 | | #include "libavutil/x86/cpu.h" |
24 | | #include "config.h" |
25 | | |
26 | | void ff_flac_lpc_16_sse4(int32_t *samples, const int coeffs[32], int order, |
27 | | int qlevel, int len); |
28 | | void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order, |
29 | | int qlevel, int len); |
30 | | void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order, |
31 | | int qlevel, int len); |
32 | | |
33 | | void ff_flac_wasted_32_sse2(int32_t *decoded, int wasted, int len); |
34 | | void ff_flac_wasted_33_sse4(int64_t *decoded, const int32_t *residual, int wasted, int len); |
35 | | |
36 | | #define DECORRELATE_FUNCS(fmt, opt) \ |
37 | | void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ |
38 | | int len, int shift); \ |
39 | | void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ |
40 | | int len, int shift); \ |
41 | | void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ |
42 | | int len, int shift) |
43 | | |
44 | | #define DECORRELATE_IFUNCS(fmt, opt) \ |
45 | | void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ |
46 | | int len, int shift); \ |
47 | | void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ |
48 | | int len, int shift); \ |
49 | | void ff_flac_decorrelate_indep6_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ |
50 | | int len, int shift); \ |
51 | | void ff_flac_decorrelate_indep8_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ |
52 | | int len, int shift) |
53 | | |
54 | | DECORRELATE_FUNCS(16, sse2); |
55 | | DECORRELATE_FUNCS(16, avx); |
56 | | DECORRELATE_FUNCS(32, sse2); |
57 | | DECORRELATE_FUNCS(32, avx); |
58 | | DECORRELATE_IFUNCS(16, ssse3); |
59 | | DECORRELATE_IFUNCS(16, avx); |
60 | | DECORRELATE_IFUNCS(32, ssse3); |
61 | | DECORRELATE_IFUNCS(32, avx); |
62 | | |
63 | | av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels) |
64 | 932k | { |
65 | 932k | #if HAVE_X86ASM |
66 | 932k | int cpu_flags = av_get_cpu_flags(); |
67 | | |
68 | 932k | if (EXTERNAL_SSE2(cpu_flags)) { |
69 | 6.00k | c->wasted32 = ff_flac_wasted_32_sse2; |
70 | 6.00k | if (fmt == AV_SAMPLE_FMT_S16) { |
71 | 3.09k | c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2; |
72 | 3.09k | c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2; |
73 | 3.09k | c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2; |
74 | 3.09k | } else if (fmt == AV_SAMPLE_FMT_S32) { |
75 | 2.91k | c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2; |
76 | 2.91k | c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2; |
77 | 2.91k | c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2; |
78 | 2.91k | } |
79 | 6.00k | } |
80 | 932k | if (EXTERNAL_SSSE3(cpu_flags)) { |
81 | 6.00k | if (fmt == AV_SAMPLE_FMT_S16) { |
82 | 3.09k | if (channels == 2) |
83 | 846 | c->decorrelate[0] = ff_flac_decorrelate_indep2_16_ssse3; |
84 | 2.24k | else if (channels == 4) |
85 | 266 | c->decorrelate[0] = ff_flac_decorrelate_indep4_16_ssse3; |
86 | 1.97k | else if (channels == 6) |
87 | 59 | c->decorrelate[0] = ff_flac_decorrelate_indep6_16_ssse3; |
88 | 1.92k | else if (ARCH_X86_64 && channels == 8) |
89 | 116 | c->decorrelate[0] = ff_flac_decorrelate_indep8_16_ssse3; |
90 | 3.09k | } else if (fmt == AV_SAMPLE_FMT_S32) { |
91 | 2.91k | if (channels == 2) |
92 | 1.66k | c->decorrelate[0] = ff_flac_decorrelate_indep2_32_ssse3; |
93 | 1.25k | else if (channels == 4) |
94 | 24 | c->decorrelate[0] = ff_flac_decorrelate_indep4_32_ssse3; |
95 | 1.22k | else if (channels == 6) |
96 | 39 | c->decorrelate[0] = ff_flac_decorrelate_indep6_32_ssse3; |
97 | 1.19k | else if (ARCH_X86_64 && channels == 8) |
98 | 261 | c->decorrelate[0] = ff_flac_decorrelate_indep8_32_ssse3; |
99 | 2.91k | } |
100 | 6.00k | } |
101 | 932k | if (EXTERNAL_SSE4(cpu_flags)) { |
102 | 6.00k | c->lpc16 = ff_flac_lpc_16_sse4; |
103 | 6.00k | c->lpc32 = ff_flac_lpc_32_sse4; |
104 | 6.00k | c->wasted33 = ff_flac_wasted_33_sse4; |
105 | 6.00k | } |
106 | 932k | if (EXTERNAL_AVX(cpu_flags)) { |
107 | 6.00k | if (fmt == AV_SAMPLE_FMT_S16) { |
108 | 3.09k | if (ARCH_X86_64 && channels == 8) |
109 | 116 | c->decorrelate[0] = ff_flac_decorrelate_indep8_16_avx; |
110 | 3.09k | } else if (fmt == AV_SAMPLE_FMT_S32) { |
111 | 2.91k | if (channels == 4) |
112 | 24 | c->decorrelate[0] = ff_flac_decorrelate_indep4_32_avx; |
113 | 2.89k | else if (channels == 6) |
114 | 39 | c->decorrelate[0] = ff_flac_decorrelate_indep6_32_avx; |
115 | 2.85k | else if (ARCH_X86_64 && channels == 8) |
116 | 261 | c->decorrelate[0] = ff_flac_decorrelate_indep8_32_avx; |
117 | 2.91k | } |
118 | 6.00k | } |
119 | 932k | if (EXTERNAL_XOP(cpu_flags)) { |
120 | 0 | c->lpc32 = ff_flac_lpc_32_xop; |
121 | 0 | } |
122 | 932k | #endif /* HAVE_X86ASM */ |
123 | 932k | } |