Coverage Report

Created: 2025-07-23 07:59

/src/opus/celt/kiss_fft.c
Line
Count
Source (jump to first uncovered line)
1
/*Copyright (c) 2003-2004, Mark Borgerding
2
  Lots of modifications by Jean-Marc Valin
3
  Copyright (c) 2005-2007, Xiph.Org Foundation
4
  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
5
6
  All rights reserved.
7
8
  Redistribution and use in source and binary forms, with or without
9
   modification, are permitted provided that the following conditions are met:
10
11
    * Redistributions of source code must retain the above copyright notice,
12
       this list of conditions and the following disclaimer.
13
    * Redistributions in binary form must reproduce the above copyright notice,
14
       this list of conditions and the following disclaimer in the
15
       documentation and/or other materials provided with the distribution.
16
17
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
  POSSIBILITY OF SUCH DAMAGE.*/
28
29
/* This code is originally from Mark Borgerding's KISS-FFT but has been
30
   heavily modified to better suit Opus */
31
32
#ifndef SKIP_CONFIG_H
33
#  ifdef HAVE_CONFIG_H
34
#    include "config.h"
35
#  endif
36
#endif
37
38
#include "_kiss_fft_guts.h"
39
#include "arch.h"
40
#include "os_support.h"
41
#include "mathops.h"
42
#include "stack_alloc.h"
43
44
#ifndef M_PI
45
#define M_PI 3.141592653
46
#endif
47
48
/* The guts header contains all the multiplication and addition macros that are defined for
49
   complex numbers.  It also delares the kf_ internal functions.
50
*/
51
52
static void kf_bfly2(
53
                     kiss_fft_cpx * Fout,
54
                     int m,
55
                     int N
56
                    )
57
1.22M
{
58
1.22M
   kiss_fft_cpx * Fout2;
59
1.22M
   int i;
60
1.22M
   (void)m;
61
#ifdef CUSTOM_MODES
62
   if (m==1)
63
   {
64
      celt_assert(m==1);
65
      for (i=0;i<N;i++)
66
      {
67
         kiss_fft_cpx t;
68
         Fout2 = Fout + 1;
69
         t = *Fout2;
70
         C_SUB( *Fout2 ,  *Fout , t );
71
         C_ADDTO( *Fout ,  t );
72
         Fout += 2;
73
      }
74
   } else
75
#endif
76
1.22M
   {
77
1.22M
      celt_coef tw;
78
1.22M
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
80
1.22M
      celt_assert(m==4);
81
58.0M
      for (i=0;i<N;i++)
82
56.8M
      {
83
56.8M
         kiss_fft_cpx t;
84
56.8M
         Fout2 = Fout + 4;
85
56.8M
         t = Fout2[0];
86
56.8M
         C_SUB( Fout2[0] ,  Fout[0] , t );
87
56.8M
         C_ADDTO( Fout[0] ,  t );
88
89
56.8M
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90
56.8M
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91
56.8M
         C_SUB( Fout2[1] ,  Fout[1] , t );
92
56.8M
         C_ADDTO( Fout[1] ,  t );
93
94
56.8M
         t.r = Fout2[2].i;
95
56.8M
         t.i = NEG32_ovflw(Fout2[2].r);
96
56.8M
         C_SUB( Fout2[2] ,  Fout[2] , t );
97
56.8M
         C_ADDTO( Fout[2] ,  t );
98
99
56.8M
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100
56.8M
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101
56.8M
         C_SUB( Fout2[3] ,  Fout[3] , t );
102
56.8M
         C_ADDTO( Fout[3] ,  t );
103
56.8M
         Fout += 8;
104
56.8M
      }
105
1.22M
   }
106
1.22M
}
kiss_fft.c:kf_bfly2
Line
Count
Source
57
613k
{
58
613k
   kiss_fft_cpx * Fout2;
59
613k
   int i;
60
613k
   (void)m;
61
#ifdef CUSTOM_MODES
62
   if (m==1)
63
   {
64
      celt_assert(m==1);
65
      for (i=0;i<N;i++)
66
      {
67
         kiss_fft_cpx t;
68
         Fout2 = Fout + 1;
69
         t = *Fout2;
70
         C_SUB( *Fout2 ,  *Fout , t );
71
         C_ADDTO( *Fout ,  t );
72
         Fout += 2;
73
      }
74
   } else
75
#endif
76
613k
   {
77
613k
      celt_coef tw;
78
613k
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
80
613k
      celt_assert(m==4);
81
29.0M
      for (i=0;i<N;i++)
82
28.4M
      {
83
28.4M
         kiss_fft_cpx t;
84
28.4M
         Fout2 = Fout + 4;
85
28.4M
         t = Fout2[0];
86
28.4M
         C_SUB( Fout2[0] ,  Fout[0] , t );
87
28.4M
         C_ADDTO( Fout[0] ,  t );
88
89
28.4M
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90
28.4M
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91
28.4M
         C_SUB( Fout2[1] ,  Fout[1] , t );
92
28.4M
         C_ADDTO( Fout[1] ,  t );
93
94
28.4M
         t.r = Fout2[2].i;
95
28.4M
         t.i = NEG32_ovflw(Fout2[2].r);
96
28.4M
         C_SUB( Fout2[2] ,  Fout[2] , t );
97
28.4M
         C_ADDTO( Fout[2] ,  t );
98
99
28.4M
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100
28.4M
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101
28.4M
         C_SUB( Fout2[3] ,  Fout[3] , t );
102
28.4M
         C_ADDTO( Fout[3] ,  t );
103
28.4M
         Fout += 8;
104
28.4M
      }
105
613k
   }
106
613k
}
kiss_fft.c:kf_bfly2
Line
Count
Source
57
613k
{
58
613k
   kiss_fft_cpx * Fout2;
59
613k
   int i;
60
613k
   (void)m;
61
#ifdef CUSTOM_MODES
62
   if (m==1)
63
   {
64
      celt_assert(m==1);
65
      for (i=0;i<N;i++)
66
      {
67
         kiss_fft_cpx t;
68
         Fout2 = Fout + 1;
69
         t = *Fout2;
70
         C_SUB( *Fout2 ,  *Fout , t );
71
         C_ADDTO( *Fout ,  t );
72
         Fout += 2;
73
      }
74
   } else
75
#endif
76
613k
   {
77
613k
      celt_coef tw;
78
613k
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
80
613k
      celt_assert(m==4);
81
29.0M
      for (i=0;i<N;i++)
82
28.4M
      {
83
28.4M
         kiss_fft_cpx t;
84
28.4M
         Fout2 = Fout + 4;
85
28.4M
         t = Fout2[0];
86
28.4M
         C_SUB( Fout2[0] ,  Fout[0] , t );
87
28.4M
         C_ADDTO( Fout[0] ,  t );
88
89
28.4M
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90
28.4M
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91
28.4M
         C_SUB( Fout2[1] ,  Fout[1] , t );
92
28.4M
         C_ADDTO( Fout[1] ,  t );
93
94
28.4M
         t.r = Fout2[2].i;
95
28.4M
         t.i = NEG32_ovflw(Fout2[2].r);
96
28.4M
         C_SUB( Fout2[2] ,  Fout[2] , t );
97
28.4M
         C_ADDTO( Fout[2] ,  t );
98
99
28.4M
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100
28.4M
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101
28.4M
         C_SUB( Fout2[3] ,  Fout[3] , t );
102
28.4M
         C_ADDTO( Fout[3] ,  t );
103
28.4M
         Fout += 8;
104
28.4M
      }
105
613k
   }
106
613k
}
107
108
static void kf_bfly4(
109
                     kiss_fft_cpx * Fout,
110
                     const size_t fstride,
111
                     const kiss_fft_state *st,
112
                     int m,
113
                     int N,
114
                     int mm
115
                    )
116
5.77M
{
117
5.77M
   int i;
118
119
5.77M
   if (m==1)
120
4.61M
   {
121
      /* Degenerate case where all the twiddles are 1. */
122
184M
      for (i=0;i<N;i++)
123
179M
      {
124
179M
         kiss_fft_cpx scratch0, scratch1;
125
126
179M
         C_SUB( scratch0 , *Fout, Fout[2] );
127
179M
         C_ADDTO(*Fout, Fout[2]);
128
179M
         C_ADD( scratch1 , Fout[1] , Fout[3] );
129
179M
         C_SUB( Fout[2], *Fout, scratch1 );
130
179M
         C_ADDTO( *Fout , scratch1 );
131
179M
         C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133
179M
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134
179M
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135
179M
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136
179M
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137
179M
         Fout+=4;
138
179M
      }
139
4.61M
   } else {
140
1.15M
      int j;
141
1.15M
      kiss_fft_cpx scratch[6];
142
1.15M
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
143
1.15M
      const int m2=2*m;
144
1.15M
      const int m3=3*m;
145
1.15M
      kiss_fft_cpx * Fout_beg = Fout;
146
19.0M
      for (i=0;i<N;i++)
147
17.8M
      {
148
17.8M
         Fout = Fout_beg + i*mm;
149
17.8M
         tw3 = tw2 = tw1 = st->twiddles;
150
         /* m is guaranteed to be a multiple of 4. */
151
142M
         for (j=0;j<m;j++)
152
124M
         {
153
124M
            C_MUL(scratch[0],Fout[m] , *tw1 );
154
124M
            C_MUL(scratch[1],Fout[m2] , *tw2 );
155
124M
            C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157
124M
            C_SUB( scratch[5] , *Fout, scratch[1] );
158
124M
            C_ADDTO(*Fout, scratch[1]);
159
124M
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
160
124M
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
161
124M
            C_SUB( Fout[m2], *Fout, scratch[3] );
162
124M
            tw1 += fstride;
163
124M
            tw2 += fstride*2;
164
124M
            tw3 += fstride*3;
165
124M
            C_ADDTO( *Fout , scratch[3] );
166
167
124M
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168
124M
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169
124M
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170
124M
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171
124M
            ++Fout;
172
124M
         }
173
17.8M
      }
174
1.15M
   }
175
5.77M
}
kiss_fft.c:kf_bfly4
Line
Count
Source
116
2.88M
{
117
2.88M
   int i;
118
119
2.88M
   if (m==1)
120
2.30M
   {
121
      /* Degenerate case where all the twiddles are 1. */
122
92.1M
      for (i=0;i<N;i++)
123
89.8M
      {
124
89.8M
         kiss_fft_cpx scratch0, scratch1;
125
126
89.8M
         C_SUB( scratch0 , *Fout, Fout[2] );
127
89.8M
         C_ADDTO(*Fout, Fout[2]);
128
89.8M
         C_ADD( scratch1 , Fout[1] , Fout[3] );
129
89.8M
         C_SUB( Fout[2], *Fout, scratch1 );
130
89.8M
         C_ADDTO( *Fout , scratch1 );
131
89.8M
         C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133
89.8M
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134
89.8M
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135
89.8M
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136
89.8M
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137
89.8M
         Fout+=4;
138
89.8M
      }
139
2.30M
   } else {
140
579k
      int j;
141
579k
      kiss_fft_cpx scratch[6];
142
579k
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
143
579k
      const int m2=2*m;
144
579k
      const int m3=3*m;
145
579k
      kiss_fft_cpx * Fout_beg = Fout;
146
9.51M
      for (i=0;i<N;i++)
147
8.93M
      {
148
8.93M
         Fout = Fout_beg + i*mm;
149
8.93M
         tw3 = tw2 = tw1 = st->twiddles;
150
         /* m is guaranteed to be a multiple of 4. */
151
71.2M
         for (j=0;j<m;j++)
152
62.3M
         {
153
62.3M
            C_MUL(scratch[0],Fout[m] , *tw1 );
154
62.3M
            C_MUL(scratch[1],Fout[m2] , *tw2 );
155
62.3M
            C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157
62.3M
            C_SUB( scratch[5] , *Fout, scratch[1] );
158
62.3M
            C_ADDTO(*Fout, scratch[1]);
159
62.3M
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
160
62.3M
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
161
62.3M
            C_SUB( Fout[m2], *Fout, scratch[3] );
162
62.3M
            tw1 += fstride;
163
62.3M
            tw2 += fstride*2;
164
62.3M
            tw3 += fstride*3;
165
62.3M
            C_ADDTO( *Fout , scratch[3] );
166
167
62.3M
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168
62.3M
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169
62.3M
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170
62.3M
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171
62.3M
            ++Fout;
172
62.3M
         }
173
8.93M
      }
174
579k
   }
175
2.88M
}
kiss_fft.c:kf_bfly4
Line
Count
Source
116
2.88M
{
117
2.88M
   int i;
118
119
2.88M
   if (m==1)
120
2.30M
   {
121
      /* Degenerate case where all the twiddles are 1. */
122
92.1M
      for (i=0;i<N;i++)
123
89.8M
      {
124
89.8M
         kiss_fft_cpx scratch0, scratch1;
125
126
89.8M
         C_SUB( scratch0 , *Fout, Fout[2] );
127
89.8M
         C_ADDTO(*Fout, Fout[2]);
128
89.8M
         C_ADD( scratch1 , Fout[1] , Fout[3] );
129
89.8M
         C_SUB( Fout[2], *Fout, scratch1 );
130
89.8M
         C_ADDTO( *Fout , scratch1 );
131
89.8M
         C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133
89.8M
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134
89.8M
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135
89.8M
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136
89.8M
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137
89.8M
         Fout+=4;
138
89.8M
      }
139
2.30M
   } else {
140
579k
      int j;
141
579k
      kiss_fft_cpx scratch[6];
142
579k
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
143
579k
      const int m2=2*m;
144
579k
      const int m3=3*m;
145
579k
      kiss_fft_cpx * Fout_beg = Fout;
146
9.51M
      for (i=0;i<N;i++)
147
8.93M
      {
148
8.93M
         Fout = Fout_beg + i*mm;
149
8.93M
         tw3 = tw2 = tw1 = st->twiddles;
150
         /* m is guaranteed to be a multiple of 4. */
151
71.2M
         for (j=0;j<m;j++)
152
62.3M
         {
153
62.3M
            C_MUL(scratch[0],Fout[m] , *tw1 );
154
62.3M
            C_MUL(scratch[1],Fout[m2] , *tw2 );
155
62.3M
            C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157
62.3M
            C_SUB( scratch[5] , *Fout, scratch[1] );
158
62.3M
            C_ADDTO(*Fout, scratch[1]);
159
62.3M
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
160
62.3M
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
161
62.3M
            C_SUB( Fout[m2], *Fout, scratch[3] );
162
62.3M
            tw1 += fstride;
163
62.3M
            tw2 += fstride*2;
164
62.3M
            tw3 += fstride*3;
165
62.3M
            C_ADDTO( *Fout , scratch[3] );
166
167
62.3M
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168
62.3M
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169
62.3M
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170
62.3M
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171
62.3M
            ++Fout;
172
62.3M
         }
173
8.93M
      }
174
579k
   }
175
2.88M
}
176
177
178
#ifndef RADIX_TWO_ONLY
179
180
static void kf_bfly3(
181
                     kiss_fft_cpx * Fout,
182
                     const size_t fstride,
183
                     const kiss_fft_state *st,
184
                     int m,
185
                     int N,
186
                     int mm
187
                    )
188
4.61M
{
189
4.61M
   int i;
190
4.61M
   size_t k;
191
4.61M
   const size_t m2 = 2*m;
192
4.61M
   const kiss_twiddle_cpx *tw1,*tw2;
193
4.61M
   kiss_fft_cpx scratch[5];
194
4.61M
   kiss_twiddle_cpx epi3;
195
196
4.61M
   kiss_fft_cpx * Fout_beg = Fout;
197
#ifdef FIXED_POINT
198
   /*epi3.r = -16384;*/ /* Unused */
199
2.30M
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200
#else
201
   epi3 = st->twiddles[fstride*m];
202
#endif
203
27.6M
   for (i=0;i<N;i++)
204
23.0M
   {
205
23.0M
      Fout = Fout_beg + i*mm;
206
23.0M
      tw1=tw2=st->twiddles;
207
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
208
23.0M
      k=m;
209
239M
      do {
210
211
239M
         C_MUL(scratch[1],Fout[m] , *tw1);
212
239M
         C_MUL(scratch[2],Fout[m2] , *tw2);
213
214
239M
         C_ADD(scratch[3],scratch[1],scratch[2]);
215
239M
         C_SUB(scratch[0],scratch[1],scratch[2]);
216
239M
         tw1 += fstride;
217
239M
         tw2 += fstride*2;
218
219
239M
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220
239M
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222
239M
         C_MULBYSCALAR( scratch[0] , epi3.i );
223
224
239M
         C_ADDTO(*Fout,scratch[3]);
225
226
239M
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227
239M
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229
239M
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230
239M
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232
239M
         ++Fout;
233
239M
      } while(--k);
234
23.0M
   }
235
4.61M
}
kiss_fft.c:kf_bfly3
Line
Count
Source
188
2.30M
{
189
2.30M
   int i;
190
2.30M
   size_t k;
191
2.30M
   const size_t m2 = 2*m;
192
2.30M
   const kiss_twiddle_cpx *tw1,*tw2;
193
2.30M
   kiss_fft_cpx scratch[5];
194
2.30M
   kiss_twiddle_cpx epi3;
195
196
2.30M
   kiss_fft_cpx * Fout_beg = Fout;
197
2.30M
#ifdef FIXED_POINT
198
   /*epi3.r = -16384;*/ /* Unused */
199
2.30M
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200
#else
201
   epi3 = st->twiddles[fstride*m];
202
#endif
203
13.8M
   for (i=0;i<N;i++)
204
11.5M
   {
205
11.5M
      Fout = Fout_beg + i*mm;
206
11.5M
      tw1=tw2=st->twiddles;
207
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
208
11.5M
      k=m;
209
119M
      do {
210
211
119M
         C_MUL(scratch[1],Fout[m] , *tw1);
212
119M
         C_MUL(scratch[2],Fout[m2] , *tw2);
213
214
119M
         C_ADD(scratch[3],scratch[1],scratch[2]);
215
119M
         C_SUB(scratch[0],scratch[1],scratch[2]);
216
119M
         tw1 += fstride;
217
119M
         tw2 += fstride*2;
218
219
119M
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220
119M
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222
119M
         C_MULBYSCALAR( scratch[0] , epi3.i );
223
224
119M
         C_ADDTO(*Fout,scratch[3]);
225
226
119M
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227
119M
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229
119M
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230
119M
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232
119M
         ++Fout;
233
119M
      } while(--k);
234
11.5M
   }
235
2.30M
}
kiss_fft.c:kf_bfly3
Line
Count
Source
188
2.30M
{
189
2.30M
   int i;
190
2.30M
   size_t k;
191
2.30M
   const size_t m2 = 2*m;
192
2.30M
   const kiss_twiddle_cpx *tw1,*tw2;
193
2.30M
   kiss_fft_cpx scratch[5];
194
2.30M
   kiss_twiddle_cpx epi3;
195
196
2.30M
   kiss_fft_cpx * Fout_beg = Fout;
197
#ifdef FIXED_POINT
198
   /*epi3.r = -16384;*/ /* Unused */
199
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200
#else
201
2.30M
   epi3 = st->twiddles[fstride*m];
202
2.30M
#endif
203
13.8M
   for (i=0;i<N;i++)
204
11.5M
   {
205
11.5M
      Fout = Fout_beg + i*mm;
206
11.5M
      tw1=tw2=st->twiddles;
207
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
208
11.5M
      k=m;
209
119M
      do {
210
211
119M
         C_MUL(scratch[1],Fout[m] , *tw1);
212
119M
         C_MUL(scratch[2],Fout[m2] , *tw2);
213
214
119M
         C_ADD(scratch[3],scratch[1],scratch[2]);
215
119M
         C_SUB(scratch[0],scratch[1],scratch[2]);
216
119M
         tw1 += fstride;
217
119M
         tw2 += fstride*2;
218
219
119M
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220
119M
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222
119M
         C_MULBYSCALAR( scratch[0] , epi3.i );
223
224
119M
         C_ADDTO(*Fout,scratch[3]);
225
226
119M
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227
119M
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229
119M
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230
119M
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232
119M
         ++Fout;
233
119M
      } while(--k);
234
11.5M
   }
235
2.30M
}
236
237
238
#ifndef OVERRIDE_kf_bfly5
239
static void kf_bfly5(
240
                     kiss_fft_cpx * Fout,
241
                     const size_t fstride,
242
                     const kiss_fft_state *st,
243
                     int m,
244
                     int N,
245
                     int mm
246
                    )
247
4.61M
{
248
4.61M
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
249
4.61M
   int i, u;
250
4.61M
   kiss_fft_cpx scratch[13];
251
4.61M
   const kiss_twiddle_cpx *tw;
252
4.61M
   kiss_twiddle_cpx ya,yb;
253
4.61M
   kiss_fft_cpx * Fout_beg = Fout;
254
255
#ifdef FIXED_POINT
256
2.30M
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257
2.30M
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258
2.30M
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259
2.30M
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260
#else
261
   ya = st->twiddles[fstride*m];
262
   yb = st->twiddles[fstride*2*m];
263
#endif
264
4.61M
   tw=st->twiddles;
265
266
9.23M
   for (i=0;i<N;i++)
267
4.61M
   {
268
4.61M
      Fout = Fout_beg + i*mm;
269
4.61M
      Fout0=Fout;
270
4.61M
      Fout1=Fout0+m;
271
4.61M
      Fout2=Fout0+2*m;
272
4.61M
      Fout3=Fout0+3*m;
273
4.61M
      Fout4=Fout0+4*m;
274
275
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
276
148M
      for ( u=0; u<m; ++u ) {
277
143M
         scratch[0] = *Fout0;
278
279
143M
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
280
143M
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
281
143M
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
282
143M
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
283
284
143M
         C_ADD( scratch[7],scratch[1],scratch[4]);
285
143M
         C_SUB( scratch[10],scratch[1],scratch[4]);
286
143M
         C_ADD( scratch[8],scratch[2],scratch[3]);
287
143M
         C_SUB( scratch[9],scratch[2],scratch[3]);
288
289
143M
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290
143M
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292
143M
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293
143M
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295
143M
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296
143M
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298
143M
         C_SUB(*Fout1,scratch[5],scratch[6]);
299
143M
         C_ADD(*Fout4,scratch[5],scratch[6]);
300
301
143M
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302
143M
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303
143M
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304
143M
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306
143M
         C_ADD(*Fout2,scratch[11],scratch[12]);
307
143M
         C_SUB(*Fout3,scratch[11],scratch[12]);
308
309
143M
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310
143M
      }
311
4.61M
   }
312
4.61M
}
kiss_fft.c:kf_bfly5
Line
Count
Source
247
2.30M
{
248
2.30M
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
249
2.30M
   int i, u;
250
2.30M
   kiss_fft_cpx scratch[13];
251
2.30M
   const kiss_twiddle_cpx *tw;
252
2.30M
   kiss_twiddle_cpx ya,yb;
253
2.30M
   kiss_fft_cpx * Fout_beg = Fout;
254
255
2.30M
#ifdef FIXED_POINT
256
2.30M
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257
2.30M
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258
2.30M
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259
2.30M
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260
#else
261
   ya = st->twiddles[fstride*m];
262
   yb = st->twiddles[fstride*2*m];
263
#endif
264
2.30M
   tw=st->twiddles;
265
266
4.61M
   for (i=0;i<N;i++)
267
2.30M
   {
268
2.30M
      Fout = Fout_beg + i*mm;
269
2.30M
      Fout0=Fout;
270
2.30M
      Fout1=Fout0+m;
271
2.30M
      Fout2=Fout0+2*m;
272
2.30M
      Fout3=Fout0+3*m;
273
2.30M
      Fout4=Fout0+4*m;
274
275
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
276
74.1M
      for ( u=0; u<m; ++u ) {
277
71.8M
         scratch[0] = *Fout0;
278
279
71.8M
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
280
71.8M
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
281
71.8M
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
282
71.8M
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
283
284
71.8M
         C_ADD( scratch[7],scratch[1],scratch[4]);
285
71.8M
         C_SUB( scratch[10],scratch[1],scratch[4]);
286
71.8M
         C_ADD( scratch[8],scratch[2],scratch[3]);
287
71.8M
         C_SUB( scratch[9],scratch[2],scratch[3]);
288
289
71.8M
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290
71.8M
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292
71.8M
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293
71.8M
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295
71.8M
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296
71.8M
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298
71.8M
         C_SUB(*Fout1,scratch[5],scratch[6]);
299
71.8M
         C_ADD(*Fout4,scratch[5],scratch[6]);
300
301
71.8M
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302
71.8M
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303
71.8M
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304
71.8M
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306
71.8M
         C_ADD(*Fout2,scratch[11],scratch[12]);
307
71.8M
         C_SUB(*Fout3,scratch[11],scratch[12]);
308
309
71.8M
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310
71.8M
      }
311
2.30M
   }
312
2.30M
}
kiss_fft.c:kf_bfly5
Line
Count
Source
247
2.30M
{
248
2.30M
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
249
2.30M
   int i, u;
250
2.30M
   kiss_fft_cpx scratch[13];
251
2.30M
   const kiss_twiddle_cpx *tw;
252
2.30M
   kiss_twiddle_cpx ya,yb;
253
2.30M
   kiss_fft_cpx * Fout_beg = Fout;
254
255
#ifdef FIXED_POINT
256
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260
#else
261
2.30M
   ya = st->twiddles[fstride*m];
262
2.30M
   yb = st->twiddles[fstride*2*m];
263
2.30M
#endif
264
2.30M
   tw=st->twiddles;
265
266
4.61M
   for (i=0;i<N;i++)
267
2.30M
   {
268
2.30M
      Fout = Fout_beg + i*mm;
269
2.30M
      Fout0=Fout;
270
2.30M
      Fout1=Fout0+m;
271
2.30M
      Fout2=Fout0+2*m;
272
2.30M
      Fout3=Fout0+3*m;
273
2.30M
      Fout4=Fout0+4*m;
274
275
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
276
74.1M
      for ( u=0; u<m; ++u ) {
277
71.8M
         scratch[0] = *Fout0;
278
279
71.8M
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
280
71.8M
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
281
71.8M
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
282
71.8M
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
283
284
71.8M
         C_ADD( scratch[7],scratch[1],scratch[4]);
285
71.8M
         C_SUB( scratch[10],scratch[1],scratch[4]);
286
71.8M
         C_ADD( scratch[8],scratch[2],scratch[3]);
287
71.8M
         C_SUB( scratch[9],scratch[2],scratch[3]);
288
289
71.8M
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290
71.8M
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292
71.8M
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293
71.8M
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295
71.8M
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296
71.8M
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298
71.8M
         C_SUB(*Fout1,scratch[5],scratch[6]);
299
71.8M
         C_ADD(*Fout4,scratch[5],scratch[6]);
300
301
71.8M
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302
71.8M
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303
71.8M
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304
71.8M
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306
71.8M
         C_ADD(*Fout2,scratch[11],scratch[12]);
307
71.8M
         C_SUB(*Fout3,scratch[11],scratch[12]);
308
309
71.8M
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310
71.8M
      }
311
2.30M
   }
312
2.30M
}
313
#endif /* OVERRIDE_kf_bfly5 */
314
315
316
#endif
317
318
319
#ifdef CUSTOM_MODES
320
321
static
322
void compute_bitrev_table(
323
         int Fout,
324
         opus_int16 *f,
325
         const size_t fstride,
326
         int in_stride,
327
         opus_int16 * factors,
328
         const kiss_fft_state *st
329
            )
330
{
331
   const int p=*factors++; /* the radix  */
332
   const int m=*factors++; /* stage's fft length/p */
333
334
    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
335
   if (m==1)
336
   {
337
      int j;
338
      for (j=0;j<p;j++)
339
      {
340
         *f = Fout+j;
341
         f += fstride*in_stride;
342
      }
343
   } else {
344
      int j;
345
      for (j=0;j<p;j++)
346
      {
347
         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
348
         f += fstride*in_stride;
349
         Fout += m;
350
      }
351
   }
352
}
353
354
/*  facbuf is populated by p1,m1,p2,m2, ...
355
    where
356
    p[i] * m[i] = m[i-1]
357
    m0 = n                  */
358
static
359
int kf_factor(int n,opus_int16 * facbuf)
360
{
361
    int p=4;
362
    int i;
363
    int stages=0;
364
    int nbak = n;
365
366
    /*factor out powers of 4, powers of 2, then any remaining primes */
367
    do {
368
        while (n % p) {
369
            switch (p) {
370
                case 4: p = 2; break;
371
                case 2: p = 3; break;
372
                default: p += 2; break;
373
            }
374
            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
375
                p = n;          /* no more factors, skip to end */
376
        }
377
        n /= p;
378
#ifdef RADIX_TWO_ONLY
379
        if (p!=2 && p != 4)
380
#else
381
        if (p>5)
382
#endif
383
        {
384
           return 0;
385
        }
386
        facbuf[2*stages] = p;
387
        if (p==2 && stages > 1)
388
        {
389
           facbuf[2*stages] = 4;
390
           facbuf[2] = 2;
391
        }
392
        stages++;
393
    } while (n > 1);
394
    n = nbak;
395
    /* Reverse the order to get the radix 4 at the end, so we can use the
396
       fast degenerate case. It turns out that reversing the order also
397
       improves the noise behaviour. */
398
    for (i=0;i<stages/2;i++)
399
    {
400
       int tmp;
401
       tmp = facbuf[2*i];
402
       facbuf[2*i] = facbuf[2*(stages-i-1)];
403
       facbuf[2*(stages-i-1)] = tmp;
404
    }
405
    for (i=0;i<stages;i++)
406
    {
407
        n /= facbuf[2*i];
408
        facbuf[2*i+1] = n;
409
    }
410
    return 1;
411
}
412
413
static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
414
{
415
   int i;
416
#ifdef FIXED_POINT
417
   for (i=0;i<nfft;++i) {
418
      opus_val32 phase = -i;
419
#ifdef ENABLE_QEXT
420
      twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648*cos((2*M_PI/nfft)*phase)));
421
      twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648*sin((2*M_PI/nfft)*phase)));
422
#else
423
      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
424
#endif
425
   }
426
#else
427
   for (i=0;i<nfft;++i) {
428
      const double pi=3.14159265358979323846264338327;
429
      double phase = ( -2*pi /nfft ) * i;
430
      kf_cexp(twiddles+i, phase );
431
   }
432
#endif
433
}
434
435
int opus_fft_alloc_arch_c(kiss_fft_state *st) {
436
   (void)st;
437
   return 0;
438
}
439
440
/*
441
 *
442
 * Allocates all necessary storage space for the fft and ifft.
443
 * The return value is a contiguous block of memory.  As such,
444
 * It can be freed with free().
445
 * */
446
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
447
                                        const kiss_fft_state *base, int arch)
448
{
449
    kiss_fft_state *st=NULL;
450
    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
451
452
    if ( lenmem==NULL ) {
453
        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
454
    }else{
455
        if (mem != NULL && *lenmem >= memneeded)
456
            st = (kiss_fft_state*)mem;
457
        *lenmem = memneeded;
458
    }
459
    if (st) {
460
        opus_int16 *bitrev;
461
        kiss_twiddle_cpx *twiddles;
462
463
        st->nfft=nfft;
464
#ifdef FIXED_POINT
465
        st->scale_shift = celt_ilog2(st->nfft);
466
# ifdef ENABLE_QEXT
467
        if (st->nfft == 1<<st->scale_shift)
468
           st->scale = QCONST32(1.0f, 30);
469
        else
470
           st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
471
# else
472
        if (st->nfft == 1<<st->scale_shift)
473
           st->scale = Q15ONE;
474
        else
475
           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
476
# endif
477
#else
478
        st->scale = 1.f/nfft;
479
#endif
480
        if (base != NULL)
481
        {
482
           st->twiddles = base->twiddles;
483
           st->shift = 0;
484
           while (st->shift < 32 && nfft<<st->shift != base->nfft)
485
              st->shift++;
486
           if (st->shift>=32)
487
              goto fail;
488
        } else {
489
           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
490
           compute_twiddles(twiddles, nfft);
491
           st->shift = -1;
492
        }
493
        if (!kf_factor(nfft,st->factors))
494
        {
495
           goto fail;
496
        }
497
498
        /* bitrev */
499
        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
500
        if (st->bitrev==NULL)
501
            goto fail;
502
        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
503
504
        /* Initialize architecture specific fft parameters */
505
        if (opus_fft_alloc_arch(st, arch))
506
            goto fail;
507
    }
508
    return st;
509
fail:
510
    opus_fft_free(st, arch);
511
    return NULL;
512
}
513
514
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
515
{
516
   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
517
}
518
519
void opus_fft_free_arch_c(kiss_fft_state *st) {
520
   (void)st;
521
}
522
523
void opus_fft_free(const kiss_fft_state *cfg, int arch)
524
{
525
   if (cfg)
526
   {
527
      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
528
      opus_free((opus_int16*)cfg->bitrev);
529
      if (cfg->shift < 0)
530
         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
531
      opus_free((kiss_fft_state*)cfg);
532
   }
533
}
534
535
#endif /* CUSTOM_MODES */
536
537
#ifdef FIXED_POINT
538
5.52M
static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
539
5.52M
   int shift;
540
5.52M
   shift = IMIN(step, *total);
541
5.52M
   *total -= shift;
542
5.52M
   if (shift == 1) {
543
399k
      int i;
544
106M
      for (i=0;i<N;i++) {
545
105M
         x[i].r = SHR32(x[i].r, 1);
546
105M
         x[i].i = SHR32(x[i].i, 1);
547
105M
      }
548
5.12M
   } else if (shift>0) {
549
973k
      int i;
550
268M
      for (i=0;i<N;i++) {
551
267M
         x[i].r = PSHR32(x[i].r, shift);
552
267M
         x[i].i = PSHR32(x[i].i, shift);
553
267M
      }
554
973k
   }
555
5.52M
}
556
#else
557
#define fft_downshift(x, N, total, step)
558
#endif
559
560
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift))
561
2.30M
{
562
2.30M
    int m2, m;
563
2.30M
    int p;
564
2.30M
    int L;
565
2.30M
    int fstride[MAXFACTORS];
566
2.30M
    int i;
567
2.30M
    int shift;
568
569
    /* st->shift can be -1 */
570
2.30M
    shift = st->shift>0 ? st->shift : 0;
571
572
2.30M
    fstride[0] = 1;
573
2.30M
    L=0;
574
8.11M
    do {
575
8.11M
       p = st->factors[2*L];
576
8.11M
       m = st->factors[2*L+1];
577
8.11M
       fstride[L+1] = fstride[L]*p;
578
8.11M
       L++;
579
8.11M
    } while(m!=1);
580
2.30M
    m = st->factors[2*L-1];
581
10.4M
    for (i=L-1;i>=0;i--)
582
8.11M
    {
583
8.11M
       if (i!=0)
584
5.80M
          m2 = st->factors[2*i-1];
585
2.30M
       else
586
2.30M
          m2 = 1;
587
8.11M
       switch (st->factors[2*i])
588
8.11M
       {
589
613k
       case 2:
590
613k
          fft_downshift(fout, st->nfft, &downshift, 1);
591
613k
          kf_bfly2(fout, m, fstride[i]);
592
613k
          break;
593
2.88M
       case 4:
594
2.88M
          fft_downshift(fout, st->nfft, &downshift, 2);
595
2.88M
          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
596
2.88M
          break;
597
0
 #ifndef RADIX_TWO_ONLY
598
2.30M
       case 3:
599
2.30M
          fft_downshift(fout, st->nfft, &downshift, 2);
600
2.30M
          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
601
2.30M
          break;
602
2.30M
       case 5:
603
2.30M
          fft_downshift(fout, st->nfft, &downshift, 3);
604
2.30M
          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
605
2.30M
          break;
606
8.11M
 #endif
607
8.11M
       }
608
8.11M
       m = m2;
609
8.11M
    }
610
2.30M
    fft_downshift(fout, st->nfft, &downshift, downshift);
611
2.30M
}
612
613
void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
614
349k
{
615
349k
   int i;
616
349k
   celt_coef scale;
617
#ifdef FIXED_POINT
618
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
619
      MULT16_32_Q15() on ARM. */
620
   int scale_shift = st->scale_shift-1;
621
#endif
622
349k
   scale = st->scale;
623
624
349k
   celt_assert2 (fin != fout, "In-place FFT not supported");
625
   /* Bit-reverse the input */
626
168M
   for (i=0;i<st->nfft;i++)
627
167M
   {
628
167M
      kiss_fft_cpx x = fin[i];
629
167M
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
630
167M
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
631
167M
   }
632
349k
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
633
349k
}
opus_fft_c
Line
Count
Source
614
174k
{
615
174k
   int i;
616
174k
   celt_coef scale;
617
174k
#ifdef FIXED_POINT
618
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
619
      MULT16_32_Q15() on ARM. */
620
174k
   int scale_shift = st->scale_shift-1;
621
174k
#endif
622
174k
   scale = st->scale;
623
624
174k
   celt_assert2 (fin != fout, "In-place FFT not supported");
625
   /* Bit-reverse the input */
626
84.0M
   for (i=0;i<st->nfft;i++)
627
83.9M
   {
628
83.9M
      kiss_fft_cpx x = fin[i];
629
83.9M
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
630
83.9M
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
631
83.9M
   }
632
174k
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
633
174k
}
opus_fft_c
Line
Count
Source
614
174k
{
615
174k
   int i;
616
174k
   celt_coef scale;
617
#ifdef FIXED_POINT
618
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
619
      MULT16_32_Q15() on ARM. */
620
   int scale_shift = st->scale_shift-1;
621
#endif
622
174k
   scale = st->scale;
623
624
174k
   celt_assert2 (fin != fout, "In-place FFT not supported");
625
   /* Bit-reverse the input */
626
84.0M
   for (i=0;i<st->nfft;i++)
627
83.9M
   {
628
83.9M
      kiss_fft_cpx x = fin[i];
629
83.9M
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
630
83.9M
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
631
83.9M
   }
632
174k
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
633
174k
}
634
635
636
void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
637
0
{
638
0
   int i;
639
0
   celt_assert2 (fin != fout, "In-place FFT not supported");
640
   /* Bit-reverse the input */
641
0
   for (i=0;i<st->nfft;i++)
642
0
      fout[st->bitrev[i]] = fin[i];
643
0
   for (i=0;i<st->nfft;i++)
644
0
      fout[i].i = -fout[i].i;
645
0
   opus_fft_impl(st, fout ARG_FIXED(0));
646
0
   for (i=0;i<st->nfft;i++)
647
0
      fout[i].i = -fout[i].i;
648
0
}
Unexecuted instantiation: opus_ifft_c
Unexecuted instantiation: opus_ifft_c