/src/opus/celt/kiss_fft.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*Copyright (c) 2003-2004, Mark Borgerding |
2 | | Lots of modifications by Jean-Marc Valin |
3 | | Copyright (c) 2005-2007, Xiph.Org Foundation |
4 | | Copyright (c) 2008, Xiph.Org Foundation, CSIRO |
5 | | |
6 | | All rights reserved. |
7 | | |
8 | | Redistribution and use in source and binary forms, with or without |
9 | | modification, are permitted provided that the following conditions are met: |
10 | | |
11 | | * Redistributions of source code must retain the above copyright notice, |
12 | | this list of conditions and the following disclaimer. |
13 | | * Redistributions in binary form must reproduce the above copyright notice, |
14 | | this list of conditions and the following disclaimer in the |
15 | | documentation and/or other materials provided with the distribution. |
16 | | |
17 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
18 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
21 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
22 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
23 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
24 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
25 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
26 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
27 | | POSSIBILITY OF SUCH DAMAGE.*/ |
28 | | |
29 | | /* This code is originally from Mark Borgerding's KISS-FFT but has been |
30 | | heavily modified to better suit Opus */ |
31 | | |
32 | | #ifndef SKIP_CONFIG_H |
33 | | # ifdef HAVE_CONFIG_H |
34 | | # include "config.h" |
35 | | # endif |
36 | | #endif |
37 | | |
38 | | #include "_kiss_fft_guts.h" |
39 | | #include "arch.h" |
40 | | #include "os_support.h" |
41 | | #include "mathops.h" |
42 | | #include "stack_alloc.h" |
43 | | |
44 | | #ifndef M_PI |
45 | | #define M_PI 3.141592653 |
46 | | #endif |
47 | | |
48 | | /* The guts header contains all the multiplication and addition macros that are defined for |
49 | | complex numbers. It also delares the kf_ internal functions. |
50 | | */ |
51 | | |
52 | | static void kf_bfly2( |
53 | | kiss_fft_cpx * Fout, |
54 | | int m, |
55 | | int N |
56 | | ) |
57 | 1.22M | { |
58 | 1.22M | kiss_fft_cpx * Fout2; |
59 | 1.22M | int i; |
60 | 1.22M | (void)m; |
61 | | #ifdef CUSTOM_MODES |
62 | | if (m==1) |
63 | | { |
64 | | celt_assert(m==1); |
65 | | for (i=0;i<N;i++) |
66 | | { |
67 | | kiss_fft_cpx t; |
68 | | Fout2 = Fout + 1; |
69 | | t = *Fout2; |
70 | | C_SUB( *Fout2 , *Fout , t ); |
71 | | C_ADDTO( *Fout , t ); |
72 | | Fout += 2; |
73 | | } |
74 | | } else |
75 | | #endif |
76 | 1.22M | { |
77 | 1.22M | celt_coef tw; |
78 | 1.22M | tw = QCONST32(0.7071067812f, COEF_SHIFT-1); |
79 | | /* We know that m==4 here because the radix-2 is just after a radix-4 */ |
80 | 1.22M | celt_assert(m==4); |
81 | 58.0M | for (i=0;i<N;i++) |
82 | 56.8M | { |
83 | 56.8M | kiss_fft_cpx t; |
84 | 56.8M | Fout2 = Fout + 4; |
85 | 56.8M | t = Fout2[0]; |
86 | 56.8M | C_SUB( Fout2[0] , Fout[0] , t ); |
87 | 56.8M | C_ADDTO( Fout[0] , t ); |
88 | | |
89 | 56.8M | t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw); |
90 | 56.8M | t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw); |
91 | 56.8M | C_SUB( Fout2[1] , Fout[1] , t ); |
92 | 56.8M | C_ADDTO( Fout[1] , t ); |
93 | | |
94 | 56.8M | t.r = Fout2[2].i; |
95 | 56.8M | t.i = NEG32_ovflw(Fout2[2].r); |
96 | 56.8M | C_SUB( Fout2[2] , Fout[2] , t ); |
97 | 56.8M | C_ADDTO( Fout[2] , t ); |
98 | | |
99 | 56.8M | t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw); |
100 | 56.8M | t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw); |
101 | 56.8M | C_SUB( Fout2[3] , Fout[3] , t ); |
102 | 56.8M | C_ADDTO( Fout[3] , t ); |
103 | 56.8M | Fout += 8; |
104 | 56.8M | } |
105 | 1.22M | } |
106 | 1.22M | } Line | Count | Source | 57 | 613k | { | 58 | 613k | kiss_fft_cpx * Fout2; | 59 | 613k | int i; | 60 | 613k | (void)m; | 61 | | #ifdef CUSTOM_MODES | 62 | | if (m==1) | 63 | | { | 64 | | celt_assert(m==1); | 65 | | for (i=0;i<N;i++) | 66 | | { | 67 | | kiss_fft_cpx t; | 68 | | Fout2 = Fout + 1; | 69 | | t = *Fout2; | 70 | | C_SUB( *Fout2 , *Fout , t ); | 71 | | C_ADDTO( *Fout , t ); | 72 | | Fout += 2; | 73 | | } | 74 | | } else | 75 | | #endif | 76 | 613k | { | 77 | 613k | celt_coef tw; | 78 | 613k | tw = QCONST32(0.7071067812f, COEF_SHIFT-1); | 79 | | /* We know that m==4 here because the radix-2 is just after a radix-4 */ | 80 | 613k | celt_assert(m==4); | 81 | 29.0M | for (i=0;i<N;i++) | 82 | 28.4M | { | 83 | 28.4M | kiss_fft_cpx t; | 84 | 28.4M | Fout2 = Fout + 4; | 85 | 28.4M | t = Fout2[0]; | 86 | 28.4M | C_SUB( Fout2[0] , Fout[0] , t ); | 87 | 28.4M | C_ADDTO( Fout[0] , t ); | 88 | | | 89 | 28.4M | t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw); | 90 | 28.4M | t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw); | 91 | 28.4M | C_SUB( Fout2[1] , Fout[1] , t ); | 92 | 28.4M | C_ADDTO( Fout[1] , t ); | 93 | | | 94 | 28.4M | t.r = Fout2[2].i; | 95 | 28.4M | t.i = NEG32_ovflw(Fout2[2].r); | 96 | 28.4M | C_SUB( Fout2[2] , Fout[2] , t ); | 97 | 28.4M | C_ADDTO( Fout[2] , t ); | 98 | | | 99 | 28.4M | t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw); | 100 | 28.4M | t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw); | 101 | 28.4M | C_SUB( Fout2[3] , Fout[3] , t ); | 102 | 28.4M | C_ADDTO( Fout[3] , t ); | 103 | 28.4M | Fout += 8; | 104 | 28.4M | } | 105 | 613k | } | 106 | 613k | } |
Line | Count | Source | 57 | 613k | { | 58 | 613k | kiss_fft_cpx * Fout2; | 59 | 613k | int i; | 60 | 613k | (void)m; | 61 | | #ifdef CUSTOM_MODES | 62 | | if (m==1) | 63 | | { | 64 | | celt_assert(m==1); | 65 | | for (i=0;i<N;i++) | 66 | | { | 67 | | kiss_fft_cpx t; | 68 | | Fout2 = Fout + 1; | 69 | | t = *Fout2; | 70 | | C_SUB( *Fout2 , *Fout , t ); | 71 | | C_ADDTO( *Fout , t ); | 72 | | Fout += 2; | 73 | | } | 74 | | } else | 75 | | #endif | 76 | 613k | { | 77 | 613k | celt_coef tw; | 78 | 613k | tw = QCONST32(0.7071067812f, COEF_SHIFT-1); | 79 | | /* We know that m==4 here because the radix-2 is just after a radix-4 */ | 80 | 613k | celt_assert(m==4); | 81 | 29.0M | for (i=0;i<N;i++) | 82 | 28.4M | { | 83 | 28.4M | kiss_fft_cpx t; | 84 | 28.4M | Fout2 = Fout + 4; | 85 | 28.4M | t = Fout2[0]; | 86 | 28.4M | C_SUB( Fout2[0] , Fout[0] , t ); | 87 | 28.4M | C_ADDTO( Fout[0] , t ); | 88 | | | 89 | 28.4M | t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw); | 90 | 28.4M | t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw); | 91 | 28.4M | C_SUB( Fout2[1] , Fout[1] , t ); | 92 | 28.4M | C_ADDTO( Fout[1] , t ); | 93 | | | 94 | 28.4M | t.r = Fout2[2].i; | 95 | 28.4M | t.i = NEG32_ovflw(Fout2[2].r); | 96 | 28.4M | C_SUB( Fout2[2] , Fout[2] , t ); | 97 | 28.4M | C_ADDTO( Fout[2] , t ); | 98 | | | 99 | 28.4M | t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw); | 100 | 28.4M | t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw); | 101 | 28.4M | C_SUB( Fout2[3] , Fout[3] , t ); | 102 | 28.4M | C_ADDTO( Fout[3] , t ); | 103 | 28.4M | Fout += 8; | 104 | 28.4M | } | 105 | 613k | } | 106 | 613k | } |
|
107 | | |
108 | | static void kf_bfly4( |
109 | | kiss_fft_cpx * Fout, |
110 | | const size_t fstride, |
111 | | const kiss_fft_state *st, |
112 | | int m, |
113 | | int N, |
114 | | int mm |
115 | | ) |
116 | 5.77M | { |
117 | 5.77M | int i; |
118 | | |
119 | 5.77M | if (m==1) |
120 | 4.61M | { |
121 | | /* Degenerate case where all the twiddles are 1. */ |
122 | 184M | for (i=0;i<N;i++) |
123 | 179M | { |
124 | 179M | kiss_fft_cpx scratch0, scratch1; |
125 | | |
126 | 179M | C_SUB( scratch0 , *Fout, Fout[2] ); |
127 | 179M | C_ADDTO(*Fout, Fout[2]); |
128 | 179M | C_ADD( scratch1 , Fout[1] , Fout[3] ); |
129 | 179M | C_SUB( Fout[2], *Fout, scratch1 ); |
130 | 179M | C_ADDTO( *Fout , scratch1 ); |
131 | 179M | C_SUB( scratch1 , Fout[1] , Fout[3] ); |
132 | | |
133 | 179M | Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i); |
134 | 179M | Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r); |
135 | 179M | Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i); |
136 | 179M | Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r); |
137 | 179M | Fout+=4; |
138 | 179M | } |
139 | 4.61M | } else { |
140 | 1.15M | int j; |
141 | 1.15M | kiss_fft_cpx scratch[6]; |
142 | 1.15M | const kiss_twiddle_cpx *tw1,*tw2,*tw3; |
143 | 1.15M | const int m2=2*m; |
144 | 1.15M | const int m3=3*m; |
145 | 1.15M | kiss_fft_cpx * Fout_beg = Fout; |
146 | 19.0M | for (i=0;i<N;i++) |
147 | 17.8M | { |
148 | 17.8M | Fout = Fout_beg + i*mm; |
149 | 17.8M | tw3 = tw2 = tw1 = st->twiddles; |
150 | | /* m is guaranteed to be a multiple of 4. */ |
151 | 142M | for (j=0;j<m;j++) |
152 | 124M | { |
153 | 124M | C_MUL(scratch[0],Fout[m] , *tw1 ); |
154 | 124M | C_MUL(scratch[1],Fout[m2] , *tw2 ); |
155 | 124M | C_MUL(scratch[2],Fout[m3] , *tw3 ); |
156 | | |
157 | 124M | C_SUB( scratch[5] , *Fout, scratch[1] ); |
158 | 124M | C_ADDTO(*Fout, scratch[1]); |
159 | 124M | C_ADD( scratch[3] , scratch[0] , scratch[2] ); |
160 | 124M | C_SUB( scratch[4] , scratch[0] , scratch[2] ); |
161 | 124M | C_SUB( Fout[m2], *Fout, scratch[3] ); |
162 | 124M | tw1 += fstride; |
163 | 124M | tw2 += fstride*2; |
164 | 124M | tw3 += fstride*3; |
165 | 124M | C_ADDTO( *Fout , scratch[3] ); |
166 | | |
167 | 124M | Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i); |
168 | 124M | Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r); |
169 | 124M | Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i); |
170 | 124M | Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r); |
171 | 124M | ++Fout; |
172 | 124M | } |
173 | 17.8M | } |
174 | 1.15M | } |
175 | 5.77M | } Line | Count | Source | 116 | 2.88M | { | 117 | 2.88M | int i; | 118 | | | 119 | 2.88M | if (m==1) | 120 | 2.30M | { | 121 | | /* Degenerate case where all the twiddles are 1. */ | 122 | 92.1M | for (i=0;i<N;i++) | 123 | 89.8M | { | 124 | 89.8M | kiss_fft_cpx scratch0, scratch1; | 125 | | | 126 | 89.8M | C_SUB( scratch0 , *Fout, Fout[2] ); | 127 | 89.8M | C_ADDTO(*Fout, Fout[2]); | 128 | 89.8M | C_ADD( scratch1 , Fout[1] , Fout[3] ); | 129 | 89.8M | C_SUB( Fout[2], *Fout, scratch1 ); | 130 | 89.8M | C_ADDTO( *Fout , scratch1 ); | 131 | 89.8M | C_SUB( scratch1 , Fout[1] , Fout[3] ); | 132 | | | 133 | 89.8M | Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i); | 134 | 89.8M | Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r); | 135 | 89.8M | Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i); | 136 | 89.8M | Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r); | 137 | 89.8M | Fout+=4; | 138 | 89.8M | } | 139 | 2.30M | } else { | 140 | 579k | int j; | 141 | 579k | kiss_fft_cpx scratch[6]; | 142 | 579k | const kiss_twiddle_cpx *tw1,*tw2,*tw3; | 143 | 579k | const int m2=2*m; | 144 | 579k | const int m3=3*m; | 145 | 579k | kiss_fft_cpx * Fout_beg = Fout; | 146 | 9.51M | for (i=0;i<N;i++) | 147 | 8.93M | { | 148 | 8.93M | Fout = Fout_beg + i*mm; | 149 | 8.93M | tw3 = tw2 = tw1 = st->twiddles; | 150 | | /* m is guaranteed to be a multiple of 4. */ | 151 | 71.2M | for (j=0;j<m;j++) | 152 | 62.3M | { | 153 | 62.3M | C_MUL(scratch[0],Fout[m] , *tw1 ); | 154 | 62.3M | C_MUL(scratch[1],Fout[m2] , *tw2 ); | 155 | 62.3M | C_MUL(scratch[2],Fout[m3] , *tw3 ); | 156 | | | 157 | 62.3M | C_SUB( scratch[5] , *Fout, scratch[1] ); | 158 | 62.3M | C_ADDTO(*Fout, scratch[1]); | 159 | 62.3M | C_ADD( scratch[3] , scratch[0] , scratch[2] ); | 160 | 62.3M | C_SUB( scratch[4] , scratch[0] , scratch[2] ); | 161 | 62.3M | C_SUB( Fout[m2], *Fout, scratch[3] ); | 162 | 62.3M | tw1 += fstride; | 163 | 62.3M | tw2 += fstride*2; | 164 | 62.3M | tw3 += fstride*3; | 165 | 62.3M | C_ADDTO( *Fout , scratch[3] ); | 166 | | | 167 | 62.3M | Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i); | 168 | 62.3M | Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r); | 169 | 62.3M | Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i); | 170 | 62.3M | Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r); | 171 | 62.3M | ++Fout; | 172 | 62.3M | } | 173 | 8.93M | } | 174 | 579k | } | 175 | 2.88M | } |
Line | Count | Source | 116 | 2.88M | { | 117 | 2.88M | int i; | 118 | | | 119 | 2.88M | if (m==1) | 120 | 2.30M | { | 121 | | /* Degenerate case where all the twiddles are 1. */ | 122 | 92.1M | for (i=0;i<N;i++) | 123 | 89.8M | { | 124 | 89.8M | kiss_fft_cpx scratch0, scratch1; | 125 | | | 126 | 89.8M | C_SUB( scratch0 , *Fout, Fout[2] ); | 127 | 89.8M | C_ADDTO(*Fout, Fout[2]); | 128 | 89.8M | C_ADD( scratch1 , Fout[1] , Fout[3] ); | 129 | 89.8M | C_SUB( Fout[2], *Fout, scratch1 ); | 130 | 89.8M | C_ADDTO( *Fout , scratch1 ); | 131 | 89.8M | C_SUB( scratch1 , Fout[1] , Fout[3] ); | 132 | | | 133 | 89.8M | Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i); | 134 | 89.8M | Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r); | 135 | 89.8M | Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i); | 136 | 89.8M | Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r); | 137 | 89.8M | Fout+=4; | 138 | 89.8M | } | 139 | 2.30M | } else { | 140 | 579k | int j; | 141 | 579k | kiss_fft_cpx scratch[6]; | 142 | 579k | const kiss_twiddle_cpx *tw1,*tw2,*tw3; | 143 | 579k | const int m2=2*m; | 144 | 579k | const int m3=3*m; | 145 | 579k | kiss_fft_cpx * Fout_beg = Fout; | 146 | 9.51M | for (i=0;i<N;i++) | 147 | 8.93M | { | 148 | 8.93M | Fout = Fout_beg + i*mm; | 149 | 8.93M | tw3 = tw2 = tw1 = st->twiddles; | 150 | | /* m is guaranteed to be a multiple of 4. */ | 151 | 71.2M | for (j=0;j<m;j++) | 152 | 62.3M | { | 153 | 62.3M | C_MUL(scratch[0],Fout[m] , *tw1 ); | 154 | 62.3M | C_MUL(scratch[1],Fout[m2] , *tw2 ); | 155 | 62.3M | C_MUL(scratch[2],Fout[m3] , *tw3 ); | 156 | | | 157 | 62.3M | C_SUB( scratch[5] , *Fout, scratch[1] ); | 158 | 62.3M | C_ADDTO(*Fout, scratch[1]); | 159 | 62.3M | C_ADD( scratch[3] , scratch[0] , scratch[2] ); | 160 | 62.3M | C_SUB( scratch[4] , scratch[0] , scratch[2] ); | 161 | 62.3M | C_SUB( Fout[m2], *Fout, scratch[3] ); | 162 | 62.3M | tw1 += fstride; | 163 | 62.3M | tw2 += fstride*2; | 164 | 62.3M | tw3 += fstride*3; | 165 | 62.3M | C_ADDTO( *Fout , scratch[3] ); | 166 | | | 167 | 62.3M | Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i); | 168 | 62.3M | Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r); | 169 | 62.3M | Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i); | 170 | 62.3M | Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r); | 171 | 62.3M | ++Fout; | 172 | 62.3M | } | 173 | 8.93M | } | 174 | 579k | } | 175 | 2.88M | } |
|
176 | | |
177 | | |
178 | | #ifndef RADIX_TWO_ONLY |
179 | | |
180 | | static void kf_bfly3( |
181 | | kiss_fft_cpx * Fout, |
182 | | const size_t fstride, |
183 | | const kiss_fft_state *st, |
184 | | int m, |
185 | | int N, |
186 | | int mm |
187 | | ) |
188 | 4.61M | { |
189 | 4.61M | int i; |
190 | 4.61M | size_t k; |
191 | 4.61M | const size_t m2 = 2*m; |
192 | 4.61M | const kiss_twiddle_cpx *tw1,*tw2; |
193 | 4.61M | kiss_fft_cpx scratch[5]; |
194 | 4.61M | kiss_twiddle_cpx epi3; |
195 | | |
196 | 4.61M | kiss_fft_cpx * Fout_beg = Fout; |
197 | | #ifdef FIXED_POINT |
198 | | /*epi3.r = -16384;*/ /* Unused */ |
199 | 2.30M | epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1); |
200 | | #else |
201 | | epi3 = st->twiddles[fstride*m]; |
202 | | #endif |
203 | 27.6M | for (i=0;i<N;i++) |
204 | 23.0M | { |
205 | 23.0M | Fout = Fout_beg + i*mm; |
206 | 23.0M | tw1=tw2=st->twiddles; |
207 | | /* For non-custom modes, m is guaranteed to be a multiple of 4. */ |
208 | 23.0M | k=m; |
209 | 239M | do { |
210 | | |
211 | 239M | C_MUL(scratch[1],Fout[m] , *tw1); |
212 | 239M | C_MUL(scratch[2],Fout[m2] , *tw2); |
213 | | |
214 | 239M | C_ADD(scratch[3],scratch[1],scratch[2]); |
215 | 239M | C_SUB(scratch[0],scratch[1],scratch[2]); |
216 | 239M | tw1 += fstride; |
217 | 239M | tw2 += fstride*2; |
218 | | |
219 | 239M | Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); |
220 | 239M | Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); |
221 | | |
222 | 239M | C_MULBYSCALAR( scratch[0] , epi3.i ); |
223 | | |
224 | 239M | C_ADDTO(*Fout,scratch[3]); |
225 | | |
226 | 239M | Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); |
227 | 239M | Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); |
228 | | |
229 | 239M | Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); |
230 | 239M | Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); |
231 | | |
232 | 239M | ++Fout; |
233 | 239M | } while(--k); |
234 | 23.0M | } |
235 | 4.61M | } Line | Count | Source | 188 | 2.30M | { | 189 | 2.30M | int i; | 190 | 2.30M | size_t k; | 191 | 2.30M | const size_t m2 = 2*m; | 192 | 2.30M | const kiss_twiddle_cpx *tw1,*tw2; | 193 | 2.30M | kiss_fft_cpx scratch[5]; | 194 | 2.30M | kiss_twiddle_cpx epi3; | 195 | | | 196 | 2.30M | kiss_fft_cpx * Fout_beg = Fout; | 197 | 2.30M | #ifdef FIXED_POINT | 198 | | /*epi3.r = -16384;*/ /* Unused */ | 199 | 2.30M | epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1); | 200 | | #else | 201 | | epi3 = st->twiddles[fstride*m]; | 202 | | #endif | 203 | 13.8M | for (i=0;i<N;i++) | 204 | 11.5M | { | 205 | 11.5M | Fout = Fout_beg + i*mm; | 206 | 11.5M | tw1=tw2=st->twiddles; | 207 | | /* For non-custom modes, m is guaranteed to be a multiple of 4. */ | 208 | 11.5M | k=m; | 209 | 119M | do { | 210 | | | 211 | 119M | C_MUL(scratch[1],Fout[m] , *tw1); | 212 | 119M | C_MUL(scratch[2],Fout[m2] , *tw2); | 213 | | | 214 | 119M | C_ADD(scratch[3],scratch[1],scratch[2]); | 215 | 119M | C_SUB(scratch[0],scratch[1],scratch[2]); | 216 | 119M | tw1 += fstride; | 217 | 119M | tw2 += fstride*2; | 218 | | | 219 | 119M | Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); | 220 | 119M | Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); | 221 | | | 222 | 119M | C_MULBYSCALAR( scratch[0] , epi3.i ); | 223 | | | 224 | 119M | C_ADDTO(*Fout,scratch[3]); | 225 | | | 226 | 119M | Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); | 227 | 119M | Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); | 228 | | | 229 | 119M | Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); | 230 | 119M | Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); | 231 | | | 232 | 119M | ++Fout; | 233 | 119M | } while(--k); | 234 | 11.5M | } | 235 | 2.30M | } |
Line | Count | Source | 188 | 2.30M | { | 189 | 2.30M | int i; | 190 | 2.30M | size_t k; | 191 | 2.30M | const size_t m2 = 2*m; | 192 | 2.30M | const kiss_twiddle_cpx *tw1,*tw2; | 193 | 2.30M | kiss_fft_cpx scratch[5]; | 194 | 2.30M | kiss_twiddle_cpx epi3; | 195 | | | 196 | 2.30M | kiss_fft_cpx * Fout_beg = Fout; | 197 | | #ifdef FIXED_POINT | 198 | | /*epi3.r = -16384;*/ /* Unused */ | 199 | | epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1); | 200 | | #else | 201 | 2.30M | epi3 = st->twiddles[fstride*m]; | 202 | 2.30M | #endif | 203 | 13.8M | for (i=0;i<N;i++) | 204 | 11.5M | { | 205 | 11.5M | Fout = Fout_beg + i*mm; | 206 | 11.5M | tw1=tw2=st->twiddles; | 207 | | /* For non-custom modes, m is guaranteed to be a multiple of 4. */ | 208 | 11.5M | k=m; | 209 | 119M | do { | 210 | | | 211 | 119M | C_MUL(scratch[1],Fout[m] , *tw1); | 212 | 119M | C_MUL(scratch[2],Fout[m2] , *tw2); | 213 | | | 214 | 119M | C_ADD(scratch[3],scratch[1],scratch[2]); | 215 | 119M | C_SUB(scratch[0],scratch[1],scratch[2]); | 216 | 119M | tw1 += fstride; | 217 | 119M | tw2 += fstride*2; | 218 | | | 219 | 119M | Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); | 220 | 119M | Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); | 221 | | | 222 | 119M | C_MULBYSCALAR( scratch[0] , epi3.i ); | 223 | | | 224 | 119M | C_ADDTO(*Fout,scratch[3]); | 225 | | | 226 | 119M | Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); | 227 | 119M | Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); | 228 | | | 229 | 119M | Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); | 230 | 119M | Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); | 231 | | | 232 | 119M | ++Fout; | 233 | 119M | } while(--k); | 234 | 11.5M | } | 235 | 2.30M | } |
|
236 | | |
237 | | |
238 | | #ifndef OVERRIDE_kf_bfly5 |
239 | | static void kf_bfly5( |
240 | | kiss_fft_cpx * Fout, |
241 | | const size_t fstride, |
242 | | const kiss_fft_state *st, |
243 | | int m, |
244 | | int N, |
245 | | int mm |
246 | | ) |
247 | 4.61M | { |
248 | 4.61M | kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; |
249 | 4.61M | int i, u; |
250 | 4.61M | kiss_fft_cpx scratch[13]; |
251 | 4.61M | const kiss_twiddle_cpx *tw; |
252 | 4.61M | kiss_twiddle_cpx ya,yb; |
253 | 4.61M | kiss_fft_cpx * Fout_beg = Fout; |
254 | | |
255 | | #ifdef FIXED_POINT |
256 | 2.30M | ya.r = QCONST32(0.30901699f, COEF_SHIFT-1); |
257 | 2.30M | ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1); |
258 | 2.30M | yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1); |
259 | 2.30M | yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1); |
260 | | #else |
261 | | ya = st->twiddles[fstride*m]; |
262 | | yb = st->twiddles[fstride*2*m]; |
263 | | #endif |
264 | 4.61M | tw=st->twiddles; |
265 | | |
266 | 9.23M | for (i=0;i<N;i++) |
267 | 4.61M | { |
268 | 4.61M | Fout = Fout_beg + i*mm; |
269 | 4.61M | Fout0=Fout; |
270 | 4.61M | Fout1=Fout0+m; |
271 | 4.61M | Fout2=Fout0+2*m; |
272 | 4.61M | Fout3=Fout0+3*m; |
273 | 4.61M | Fout4=Fout0+4*m; |
274 | | |
275 | | /* For non-custom modes, m is guaranteed to be a multiple of 4. */ |
276 | 148M | for ( u=0; u<m; ++u ) { |
277 | 143M | scratch[0] = *Fout0; |
278 | | |
279 | 143M | C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); |
280 | 143M | C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); |
281 | 143M | C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); |
282 | 143M | C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); |
283 | | |
284 | 143M | C_ADD( scratch[7],scratch[1],scratch[4]); |
285 | 143M | C_SUB( scratch[10],scratch[1],scratch[4]); |
286 | 143M | C_ADD( scratch[8],scratch[2],scratch[3]); |
287 | 143M | C_SUB( scratch[9],scratch[2],scratch[3]); |
288 | | |
289 | 143M | Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r)); |
290 | 143M | Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i)); |
291 | | |
292 | 143M | scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r))); |
293 | 143M | scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r))); |
294 | | |
295 | 143M | scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i)); |
296 | 143M | scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i))); |
297 | | |
298 | 143M | C_SUB(*Fout1,scratch[5],scratch[6]); |
299 | 143M | C_ADD(*Fout4,scratch[5],scratch[6]); |
300 | | |
301 | 143M | scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r))); |
302 | 143M | scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r))); |
303 | 143M | scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i)); |
304 | 143M | scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i)); |
305 | | |
306 | 143M | C_ADD(*Fout2,scratch[11],scratch[12]); |
307 | 143M | C_SUB(*Fout3,scratch[11],scratch[12]); |
308 | | |
309 | 143M | ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; |
310 | 143M | } |
311 | 4.61M | } |
312 | 4.61M | } Line | Count | Source | 247 | 2.30M | { | 248 | 2.30M | kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | 249 | 2.30M | int i, u; | 250 | 2.30M | kiss_fft_cpx scratch[13]; | 251 | 2.30M | const kiss_twiddle_cpx *tw; | 252 | 2.30M | kiss_twiddle_cpx ya,yb; | 253 | 2.30M | kiss_fft_cpx * Fout_beg = Fout; | 254 | | | 255 | 2.30M | #ifdef FIXED_POINT | 256 | 2.30M | ya.r = QCONST32(0.30901699f, COEF_SHIFT-1); | 257 | 2.30M | ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1); | 258 | 2.30M | yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1); | 259 | 2.30M | yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1); | 260 | | #else | 261 | | ya = st->twiddles[fstride*m]; | 262 | | yb = st->twiddles[fstride*2*m]; | 263 | | #endif | 264 | 2.30M | tw=st->twiddles; | 265 | | | 266 | 4.61M | for (i=0;i<N;i++) | 267 | 2.30M | { | 268 | 2.30M | Fout = Fout_beg + i*mm; | 269 | 2.30M | Fout0=Fout; | 270 | 2.30M | Fout1=Fout0+m; | 271 | 2.30M | Fout2=Fout0+2*m; | 272 | 2.30M | Fout3=Fout0+3*m; | 273 | 2.30M | Fout4=Fout0+4*m; | 274 | | | 275 | | /* For non-custom modes, m is guaranteed to be a multiple of 4. */ | 276 | 74.1M | for ( u=0; u<m; ++u ) { | 277 | 71.8M | scratch[0] = *Fout0; | 278 | | | 279 | 71.8M | C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | 280 | 71.8M | C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | 281 | 71.8M | C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); | 282 | 71.8M | C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); | 283 | | | 284 | 71.8M | C_ADD( scratch[7],scratch[1],scratch[4]); | 285 | 71.8M | C_SUB( scratch[10],scratch[1],scratch[4]); | 286 | 71.8M | C_ADD( scratch[8],scratch[2],scratch[3]); | 287 | 71.8M | C_SUB( scratch[9],scratch[2],scratch[3]); | 288 | | | 289 | 71.8M | Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r)); | 290 | 71.8M | Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i)); | 291 | | | 292 | 71.8M | scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r))); | 293 | 71.8M | scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r))); | 294 | | | 295 | 71.8M | scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i)); | 296 | 71.8M | scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i))); | 297 | | | 298 | 71.8M | C_SUB(*Fout1,scratch[5],scratch[6]); | 299 | 71.8M | C_ADD(*Fout4,scratch[5],scratch[6]); | 300 | | | 301 | 71.8M | scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r))); | 302 | 71.8M | scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r))); | 303 | 71.8M | scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i)); | 304 | 71.8M | scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i)); | 305 | | | 306 | 71.8M | C_ADD(*Fout2,scratch[11],scratch[12]); | 307 | 71.8M | C_SUB(*Fout3,scratch[11],scratch[12]); | 308 | | | 309 | 71.8M | ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | 310 | 71.8M | } | 311 | 2.30M | } | 312 | 2.30M | } |
Line | Count | Source | 247 | 2.30M | { | 248 | 2.30M | kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | 249 | 2.30M | int i, u; | 250 | 2.30M | kiss_fft_cpx scratch[13]; | 251 | 2.30M | const kiss_twiddle_cpx *tw; | 252 | 2.30M | kiss_twiddle_cpx ya,yb; | 253 | 2.30M | kiss_fft_cpx * Fout_beg = Fout; | 254 | | | 255 | | #ifdef FIXED_POINT | 256 | | ya.r = QCONST32(0.30901699f, COEF_SHIFT-1); | 257 | | ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1); | 258 | | yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1); | 259 | | yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1); | 260 | | #else | 261 | 2.30M | ya = st->twiddles[fstride*m]; | 262 | 2.30M | yb = st->twiddles[fstride*2*m]; | 263 | 2.30M | #endif | 264 | 2.30M | tw=st->twiddles; | 265 | | | 266 | 4.61M | for (i=0;i<N;i++) | 267 | 2.30M | { | 268 | 2.30M | Fout = Fout_beg + i*mm; | 269 | 2.30M | Fout0=Fout; | 270 | 2.30M | Fout1=Fout0+m; | 271 | 2.30M | Fout2=Fout0+2*m; | 272 | 2.30M | Fout3=Fout0+3*m; | 273 | 2.30M | Fout4=Fout0+4*m; | 274 | | | 275 | | /* For non-custom modes, m is guaranteed to be a multiple of 4. */ | 276 | 74.1M | for ( u=0; u<m; ++u ) { | 277 | 71.8M | scratch[0] = *Fout0; | 278 | | | 279 | 71.8M | C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | 280 | 71.8M | C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | 281 | 71.8M | C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); | 282 | 71.8M | C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); | 283 | | | 284 | 71.8M | C_ADD( scratch[7],scratch[1],scratch[4]); | 285 | 71.8M | C_SUB( scratch[10],scratch[1],scratch[4]); | 286 | 71.8M | C_ADD( scratch[8],scratch[2],scratch[3]); | 287 | 71.8M | C_SUB( scratch[9],scratch[2],scratch[3]); | 288 | | | 289 | 71.8M | Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r)); | 290 | 71.8M | Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i)); | 291 | | | 292 | 71.8M | scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r))); | 293 | 71.8M | scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r))); | 294 | | | 295 | 71.8M | scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i)); | 296 | 71.8M | scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i))); | 297 | | | 298 | 71.8M | C_SUB(*Fout1,scratch[5],scratch[6]); | 299 | 71.8M | C_ADD(*Fout4,scratch[5],scratch[6]); | 300 | | | 301 | 71.8M | scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r))); | 302 | 71.8M | scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r))); | 303 | 71.8M | scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i)); | 304 | 71.8M | scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i)); | 305 | | | 306 | 71.8M | C_ADD(*Fout2,scratch[11],scratch[12]); | 307 | 71.8M | C_SUB(*Fout3,scratch[11],scratch[12]); | 308 | | | 309 | 71.8M | ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | 310 | 71.8M | } | 311 | 2.30M | } | 312 | 2.30M | } |
|
313 | | #endif /* OVERRIDE_kf_bfly5 */ |
314 | | |
315 | | |
316 | | #endif |
317 | | |
318 | | |
319 | | #ifdef CUSTOM_MODES |
320 | | |
321 | | static |
322 | | void compute_bitrev_table( |
323 | | int Fout, |
324 | | opus_int16 *f, |
325 | | const size_t fstride, |
326 | | int in_stride, |
327 | | opus_int16 * factors, |
328 | | const kiss_fft_state *st |
329 | | ) |
330 | | { |
331 | | const int p=*factors++; /* the radix */ |
332 | | const int m=*factors++; /* stage's fft length/p */ |
333 | | |
334 | | /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ |
335 | | if (m==1) |
336 | | { |
337 | | int j; |
338 | | for (j=0;j<p;j++) |
339 | | { |
340 | | *f = Fout+j; |
341 | | f += fstride*in_stride; |
342 | | } |
343 | | } else { |
344 | | int j; |
345 | | for (j=0;j<p;j++) |
346 | | { |
347 | | compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st); |
348 | | f += fstride*in_stride; |
349 | | Fout += m; |
350 | | } |
351 | | } |
352 | | } |
353 | | |
354 | | /* facbuf is populated by p1,m1,p2,m2, ... |
355 | | where |
356 | | p[i] * m[i] = m[i-1] |
357 | | m0 = n */ |
358 | | static |
359 | | int kf_factor(int n,opus_int16 * facbuf) |
360 | | { |
361 | | int p=4; |
362 | | int i; |
363 | | int stages=0; |
364 | | int nbak = n; |
365 | | |
366 | | /*factor out powers of 4, powers of 2, then any remaining primes */ |
367 | | do { |
368 | | while (n % p) { |
369 | | switch (p) { |
370 | | case 4: p = 2; break; |
371 | | case 2: p = 3; break; |
372 | | default: p += 2; break; |
373 | | } |
374 | | if (p>32000 || (opus_int32)p*(opus_int32)p > n) |
375 | | p = n; /* no more factors, skip to end */ |
376 | | } |
377 | | n /= p; |
378 | | #ifdef RADIX_TWO_ONLY |
379 | | if (p!=2 && p != 4) |
380 | | #else |
381 | | if (p>5) |
382 | | #endif |
383 | | { |
384 | | return 0; |
385 | | } |
386 | | facbuf[2*stages] = p; |
387 | | if (p==2 && stages > 1) |
388 | | { |
389 | | facbuf[2*stages] = 4; |
390 | | facbuf[2] = 2; |
391 | | } |
392 | | stages++; |
393 | | } while (n > 1); |
394 | | n = nbak; |
395 | | /* Reverse the order to get the radix 4 at the end, so we can use the |
396 | | fast degenerate case. It turns out that reversing the order also |
397 | | improves the noise behaviour. */ |
398 | | for (i=0;i<stages/2;i++) |
399 | | { |
400 | | int tmp; |
401 | | tmp = facbuf[2*i]; |
402 | | facbuf[2*i] = facbuf[2*(stages-i-1)]; |
403 | | facbuf[2*(stages-i-1)] = tmp; |
404 | | } |
405 | | for (i=0;i<stages;i++) |
406 | | { |
407 | | n /= facbuf[2*i]; |
408 | | facbuf[2*i+1] = n; |
409 | | } |
410 | | return 1; |
411 | | } |
412 | | |
413 | | static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) |
414 | | { |
415 | | int i; |
416 | | #ifdef FIXED_POINT |
417 | | for (i=0;i<nfft;++i) { |
418 | | opus_val32 phase = -i; |
419 | | #ifdef ENABLE_QEXT |
420 | | twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648*cos((2*M_PI/nfft)*phase))); |
421 | | twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648*sin((2*M_PI/nfft)*phase))); |
422 | | #else |
423 | | kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft)); |
424 | | #endif |
425 | | } |
426 | | #else |
427 | | for (i=0;i<nfft;++i) { |
428 | | const double pi=3.14159265358979323846264338327; |
429 | | double phase = ( -2*pi /nfft ) * i; |
430 | | kf_cexp(twiddles+i, phase ); |
431 | | } |
432 | | #endif |
433 | | } |
434 | | |
435 | | int opus_fft_alloc_arch_c(kiss_fft_state *st) { |
436 | | (void)st; |
437 | | return 0; |
438 | | } |
439 | | |
440 | | /* |
441 | | * |
442 | | * Allocates all necessary storage space for the fft and ifft. |
443 | | * The return value is a contiguous block of memory. As such, |
444 | | * It can be freed with free(). |
445 | | * */ |
446 | | kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, |
447 | | const kiss_fft_state *base, int arch) |
448 | | { |
449 | | kiss_fft_state *st=NULL; |
450 | | size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/ |
451 | | |
452 | | if ( lenmem==NULL ) { |
453 | | st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded ); |
454 | | }else{ |
455 | | if (mem != NULL && *lenmem >= memneeded) |
456 | | st = (kiss_fft_state*)mem; |
457 | | *lenmem = memneeded; |
458 | | } |
459 | | if (st) { |
460 | | opus_int16 *bitrev; |
461 | | kiss_twiddle_cpx *twiddles; |
462 | | |
463 | | st->nfft=nfft; |
464 | | #ifdef FIXED_POINT |
465 | | st->scale_shift = celt_ilog2(st->nfft); |
466 | | # ifdef ENABLE_QEXT |
467 | | if (st->nfft == 1<<st->scale_shift) |
468 | | st->scale = QCONST32(1.0f, 30); |
469 | | else |
470 | | st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft; |
471 | | # else |
472 | | if (st->nfft == 1<<st->scale_shift) |
473 | | st->scale = Q15ONE; |
474 | | else |
475 | | st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); |
476 | | # endif |
477 | | #else |
478 | | st->scale = 1.f/nfft; |
479 | | #endif |
480 | | if (base != NULL) |
481 | | { |
482 | | st->twiddles = base->twiddles; |
483 | | st->shift = 0; |
484 | | while (st->shift < 32 && nfft<<st->shift != base->nfft) |
485 | | st->shift++; |
486 | | if (st->shift>=32) |
487 | | goto fail; |
488 | | } else { |
489 | | st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); |
490 | | compute_twiddles(twiddles, nfft); |
491 | | st->shift = -1; |
492 | | } |
493 | | if (!kf_factor(nfft,st->factors)) |
494 | | { |
495 | | goto fail; |
496 | | } |
497 | | |
498 | | /* bitrev */ |
499 | | st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); |
500 | | if (st->bitrev==NULL) |
501 | | goto fail; |
502 | | compute_bitrev_table(0, bitrev, 1,1, st->factors,st); |
503 | | |
504 | | /* Initialize architecture specific fft parameters */ |
505 | | if (opus_fft_alloc_arch(st, arch)) |
506 | | goto fail; |
507 | | } |
508 | | return st; |
509 | | fail: |
510 | | opus_fft_free(st, arch); |
511 | | return NULL; |
512 | | } |
513 | | |
514 | | kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) |
515 | | { |
516 | | return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); |
517 | | } |
518 | | |
519 | | void opus_fft_free_arch_c(kiss_fft_state *st) { |
520 | | (void)st; |
521 | | } |
522 | | |
523 | | void opus_fft_free(const kiss_fft_state *cfg, int arch) |
524 | | { |
525 | | if (cfg) |
526 | | { |
527 | | opus_fft_free_arch((kiss_fft_state *)cfg, arch); |
528 | | opus_free((opus_int16*)cfg->bitrev); |
529 | | if (cfg->shift < 0) |
530 | | opus_free((kiss_twiddle_cpx*)cfg->twiddles); |
531 | | opus_free((kiss_fft_state*)cfg); |
532 | | } |
533 | | } |
534 | | |
535 | | #endif /* CUSTOM_MODES */ |
536 | | |
537 | | #ifdef FIXED_POINT |
538 | 5.52M | static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) { |
539 | 5.52M | int shift; |
540 | 5.52M | shift = IMIN(step, *total); |
541 | 5.52M | *total -= shift; |
542 | 5.52M | if (shift == 1) { |
543 | 399k | int i; |
544 | 106M | for (i=0;i<N;i++) { |
545 | 105M | x[i].r = SHR32(x[i].r, 1); |
546 | 105M | x[i].i = SHR32(x[i].i, 1); |
547 | 105M | } |
548 | 5.12M | } else if (shift>0) { |
549 | 973k | int i; |
550 | 268M | for (i=0;i<N;i++) { |
551 | 267M | x[i].r = PSHR32(x[i].r, shift); |
552 | 267M | x[i].i = PSHR32(x[i].i, shift); |
553 | 267M | } |
554 | 973k | } |
555 | 5.52M | } |
556 | | #else |
557 | | #define fft_downshift(x, N, total, step) |
558 | | #endif |
559 | | |
560 | | void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift)) |
561 | 2.30M | { |
562 | 2.30M | int m2, m; |
563 | 2.30M | int p; |
564 | 2.30M | int L; |
565 | 2.30M | int fstride[MAXFACTORS]; |
566 | 2.30M | int i; |
567 | 2.30M | int shift; |
568 | | |
569 | | /* st->shift can be -1 */ |
570 | 2.30M | shift = st->shift>0 ? st->shift : 0; |
571 | | |
572 | 2.30M | fstride[0] = 1; |
573 | 2.30M | L=0; |
574 | 8.11M | do { |
575 | 8.11M | p = st->factors[2*L]; |
576 | 8.11M | m = st->factors[2*L+1]; |
577 | 8.11M | fstride[L+1] = fstride[L]*p; |
578 | 8.11M | L++; |
579 | 8.11M | } while(m!=1); |
580 | 2.30M | m = st->factors[2*L-1]; |
581 | 10.4M | for (i=L-1;i>=0;i--) |
582 | 8.11M | { |
583 | 8.11M | if (i!=0) |
584 | 5.80M | m2 = st->factors[2*i-1]; |
585 | 2.30M | else |
586 | 2.30M | m2 = 1; |
587 | 8.11M | switch (st->factors[2*i]) |
588 | 8.11M | { |
589 | 613k | case 2: |
590 | 613k | fft_downshift(fout, st->nfft, &downshift, 1); |
591 | 613k | kf_bfly2(fout, m, fstride[i]); |
592 | 613k | break; |
593 | 2.88M | case 4: |
594 | 2.88M | fft_downshift(fout, st->nfft, &downshift, 2); |
595 | 2.88M | kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
596 | 2.88M | break; |
597 | 0 | #ifndef RADIX_TWO_ONLY |
598 | 2.30M | case 3: |
599 | 2.30M | fft_downshift(fout, st->nfft, &downshift, 2); |
600 | 2.30M | kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
601 | 2.30M | break; |
602 | 2.30M | case 5: |
603 | 2.30M | fft_downshift(fout, st->nfft, &downshift, 3); |
604 | 2.30M | kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
605 | 2.30M | break; |
606 | 8.11M | #endif |
607 | 8.11M | } |
608 | 8.11M | m = m2; |
609 | 8.11M | } |
610 | 2.30M | fft_downshift(fout, st->nfft, &downshift, downshift); |
611 | 2.30M | } |
612 | | |
613 | | void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) |
614 | 349k | { |
615 | 349k | int i; |
616 | 349k | celt_coef scale; |
617 | | #ifdef FIXED_POINT |
618 | | /* Allows us to scale with MULT16_32_Q16(), which is faster than |
619 | | MULT16_32_Q15() on ARM. */ |
620 | | int scale_shift = st->scale_shift-1; |
621 | | #endif |
622 | 349k | scale = st->scale; |
623 | | |
624 | 349k | celt_assert2 (fin != fout, "In-place FFT not supported"); |
625 | | /* Bit-reverse the input */ |
626 | 168M | for (i=0;i<st->nfft;i++) |
627 | 167M | { |
628 | 167M | kiss_fft_cpx x = fin[i]; |
629 | 167M | fout[st->bitrev[i]].r = S_MUL2(x.r, scale); |
630 | 167M | fout[st->bitrev[i]].i = S_MUL2(x.i, scale); |
631 | 167M | } |
632 | 349k | opus_fft_impl(st, fout ARG_FIXED(scale_shift)); |
633 | 349k | } Line | Count | Source | 614 | 174k | { | 615 | 174k | int i; | 616 | 174k | celt_coef scale; | 617 | 174k | #ifdef FIXED_POINT | 618 | | /* Allows us to scale with MULT16_32_Q16(), which is faster than | 619 | | MULT16_32_Q15() on ARM. */ | 620 | 174k | int scale_shift = st->scale_shift-1; | 621 | 174k | #endif | 622 | 174k | scale = st->scale; | 623 | | | 624 | 174k | celt_assert2 (fin != fout, "In-place FFT not supported"); | 625 | | /* Bit-reverse the input */ | 626 | 84.0M | for (i=0;i<st->nfft;i++) | 627 | 83.9M | { | 628 | 83.9M | kiss_fft_cpx x = fin[i]; | 629 | 83.9M | fout[st->bitrev[i]].r = S_MUL2(x.r, scale); | 630 | 83.9M | fout[st->bitrev[i]].i = S_MUL2(x.i, scale); | 631 | 83.9M | } | 632 | 174k | opus_fft_impl(st, fout ARG_FIXED(scale_shift)); | 633 | 174k | } |
Line | Count | Source | 614 | 174k | { | 615 | 174k | int i; | 616 | 174k | celt_coef scale; | 617 | | #ifdef FIXED_POINT | 618 | | /* Allows us to scale with MULT16_32_Q16(), which is faster than | 619 | | MULT16_32_Q15() on ARM. */ | 620 | | int scale_shift = st->scale_shift-1; | 621 | | #endif | 622 | 174k | scale = st->scale; | 623 | | | 624 | 174k | celt_assert2 (fin != fout, "In-place FFT not supported"); | 625 | | /* Bit-reverse the input */ | 626 | 84.0M | for (i=0;i<st->nfft;i++) | 627 | 83.9M | { | 628 | 83.9M | kiss_fft_cpx x = fin[i]; | 629 | 83.9M | fout[st->bitrev[i]].r = S_MUL2(x.r, scale); | 630 | 83.9M | fout[st->bitrev[i]].i = S_MUL2(x.i, scale); | 631 | 83.9M | } | 632 | 174k | opus_fft_impl(st, fout ARG_FIXED(scale_shift)); | 633 | 174k | } |
|
634 | | |
635 | | |
636 | | void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) |
637 | 0 | { |
638 | 0 | int i; |
639 | 0 | celt_assert2 (fin != fout, "In-place FFT not supported"); |
640 | | /* Bit-reverse the input */ |
641 | 0 | for (i=0;i<st->nfft;i++) |
642 | 0 | fout[st->bitrev[i]] = fin[i]; |
643 | 0 | for (i=0;i<st->nfft;i++) |
644 | 0 | fout[i].i = -fout[i].i; |
645 | 0 | opus_fft_impl(st, fout ARG_FIXED(0)); |
646 | 0 | for (i=0;i<st->nfft;i++) |
647 | 0 | fout[i].i = -fout[i].i; |
648 | 0 | } Unexecuted instantiation: opus_ifft_c Unexecuted instantiation: opus_ifft_c |