Coverage Report

Created: 2023-09-25 08:12

/src/opus/celt/kiss_fft.c
Line
Count
Source (jump to first uncovered line)
1
/*Copyright (c) 2003-2004, Mark Borgerding
2
  Lots of modifications by Jean-Marc Valin
3
  Copyright (c) 2005-2007, Xiph.Org Foundation
4
  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
5
6
  All rights reserved.
7
8
  Redistribution and use in source and binary forms, with or without
9
   modification, are permitted provided that the following conditions are met:
10
11
    * Redistributions of source code must retain the above copyright notice,
12
       this list of conditions and the following disclaimer.
13
    * Redistributions in binary form must reproduce the above copyright notice,
14
       this list of conditions and the following disclaimer in the
15
       documentation and/or other materials provided with the distribution.
16
17
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
  POSSIBILITY OF SUCH DAMAGE.*/
28
29
/* This code is originally from Mark Borgerding's KISS-FFT but has been
30
   heavily modified to better suit Opus */
31
32
#ifndef SKIP_CONFIG_H
33
#  ifdef HAVE_CONFIG_H
34
#    include "config.h"
35
#  endif
36
#endif
37
38
#include "_kiss_fft_guts.h"
39
#include "arch.h"
40
#include "os_support.h"
41
#include "mathops.h"
42
#include "stack_alloc.h"
43
44
/* The guts header contains all the multiplication and addition macros that are defined for
45
   complex numbers.  It also delares the kf_ internal functions.
46
*/
47
48
static void kf_bfly2(
49
                     kiss_fft_cpx * Fout,
50
                     int m,
51
                     int N
52
                    )
53
49.1M
{
54
49.1M
   kiss_fft_cpx * Fout2;
55
49.1M
   int i;
56
49.1M
   (void)m;
57
#ifdef CUSTOM_MODES
58
   if (m==1)
59
   {
60
      celt_assert(m==1);
61
      for (i=0;i<N;i++)
62
      {
63
         kiss_fft_cpx t;
64
         Fout2 = Fout + 1;
65
         t = *Fout2;
66
         C_SUB( *Fout2 ,  *Fout , t );
67
         C_ADDTO( *Fout ,  t );
68
         Fout += 2;
69
      }
70
   } else
71
#endif
72
49.1M
   {
73
49.1M
      opus_val16 tw;
74
49.1M
      tw = QCONST16(0.7071067812f, 15);
75
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
76
49.1M
      celt_assert(m==4);
77
1.66G
      for (i=0;i<N;i++)
78
1.61G
      {
79
1.61G
         kiss_fft_cpx t;
80
1.61G
         Fout2 = Fout + 4;
81
1.61G
         t = Fout2[0];
82
1.61G
         C_SUB( Fout2[0] ,  Fout[0] , t );
83
1.61G
         C_ADDTO( Fout[0] ,  t );
84
85
1.61G
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
86
1.61G
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
87
1.61G
         C_SUB( Fout2[1] ,  Fout[1] , t );
88
1.61G
         C_ADDTO( Fout[1] ,  t );
89
90
1.61G
         t.r = Fout2[2].i;
91
1.61G
         t.i = -Fout2[2].r;
92
1.61G
         C_SUB( Fout2[2] ,  Fout[2] , t );
93
1.61G
         C_ADDTO( Fout[2] ,  t );
94
95
1.61G
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
96
1.61G
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
97
1.61G
         C_SUB( Fout2[3] ,  Fout[3] , t );
98
1.61G
         C_ADDTO( Fout[3] ,  t );
99
1.61G
         Fout += 8;
100
1.61G
      }
101
49.1M
   }
102
49.1M
}
kiss_fft.c:kf_bfly2
Line
Count
Source
53
24.5M
{
54
24.5M
   kiss_fft_cpx * Fout2;
55
24.5M
   int i;
56
24.5M
   (void)m;
57
#ifdef CUSTOM_MODES
58
   if (m==1)
59
   {
60
      celt_assert(m==1);
61
      for (i=0;i<N;i++)
62
      {
63
         kiss_fft_cpx t;
64
         Fout2 = Fout + 1;
65
         t = *Fout2;
66
         C_SUB( *Fout2 ,  *Fout , t );
67
         C_ADDTO( *Fout ,  t );
68
         Fout += 2;
69
      }
70
   } else
71
#endif
72
24.5M
   {
73
24.5M
      opus_val16 tw;
74
24.5M
      tw = QCONST16(0.7071067812f, 15);
75
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
76
24.5M
      celt_assert(m==4);
77
833M
      for (i=0;i<N;i++)
78
808M
      {
79
808M
         kiss_fft_cpx t;
80
808M
         Fout2 = Fout + 4;
81
808M
         t = Fout2[0];
82
808M
         C_SUB( Fout2[0] ,  Fout[0] , t );
83
808M
         C_ADDTO( Fout[0] ,  t );
84
85
808M
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
86
808M
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
87
808M
         C_SUB( Fout2[1] ,  Fout[1] , t );
88
808M
         C_ADDTO( Fout[1] ,  t );
89
90
808M
         t.r = Fout2[2].i;
91
808M
         t.i = -Fout2[2].r;
92
808M
         C_SUB( Fout2[2] ,  Fout[2] , t );
93
808M
         C_ADDTO( Fout[2] ,  t );
94
95
808M
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
96
808M
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
97
808M
         C_SUB( Fout2[3] ,  Fout[3] , t );
98
808M
         C_ADDTO( Fout[3] ,  t );
99
808M
         Fout += 8;
100
808M
      }
101
24.5M
   }
102
24.5M
}
kiss_fft.c:kf_bfly2
Line
Count
Source
53
24.5M
{
54
24.5M
   kiss_fft_cpx * Fout2;
55
24.5M
   int i;
56
24.5M
   (void)m;
57
#ifdef CUSTOM_MODES
58
   if (m==1)
59
   {
60
      celt_assert(m==1);
61
      for (i=0;i<N;i++)
62
      {
63
         kiss_fft_cpx t;
64
         Fout2 = Fout + 1;
65
         t = *Fout2;
66
         C_SUB( *Fout2 ,  *Fout , t );
67
         C_ADDTO( *Fout ,  t );
68
         Fout += 2;
69
      }
70
   } else
71
#endif
72
24.5M
   {
73
24.5M
      opus_val16 tw;
74
24.5M
      tw = QCONST16(0.7071067812f, 15);
75
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
76
24.5M
      celt_assert(m==4);
77
833M
      for (i=0;i<N;i++)
78
808M
      {
79
808M
         kiss_fft_cpx t;
80
808M
         Fout2 = Fout + 4;
81
808M
         t = Fout2[0];
82
808M
         C_SUB( Fout2[0] ,  Fout[0] , t );
83
808M
         C_ADDTO( Fout[0] ,  t );
84
85
808M
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
86
808M
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
87
808M
         C_SUB( Fout2[1] ,  Fout[1] , t );
88
808M
         C_ADDTO( Fout[1] ,  t );
89
90
808M
         t.r = Fout2[2].i;
91
808M
         t.i = -Fout2[2].r;
92
808M
         C_SUB( Fout2[2] ,  Fout[2] , t );
93
808M
         C_ADDTO( Fout[2] ,  t );
94
95
808M
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
96
808M
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
97
808M
         C_SUB( Fout2[3] ,  Fout[3] , t );
98
808M
         C_ADDTO( Fout[3] ,  t );
99
808M
         Fout += 8;
100
808M
      }
101
24.5M
   }
102
24.5M
}
103
104
static void kf_bfly4(
105
                     kiss_fft_cpx * Fout,
106
                     const size_t fstride,
107
                     const kiss_fft_state *st,
108
                     int m,
109
                     int N,
110
                     int mm
111
                    )
112
184M
{
113
184M
   int i;
114
115
184M
   if (m==1)
116
155M
   {
117
      /* Degenerate case where all the twiddles are 1. */
118
5.38G
      for (i=0;i<N;i++)
119
5.22G
      {
120
5.22G
         kiss_fft_cpx scratch0, scratch1;
121
122
5.22G
         C_SUB( scratch0 , *Fout, Fout[2] );
123
5.22G
         C_ADDTO(*Fout, Fout[2]);
124
5.22G
         C_ADD( scratch1 , Fout[1] , Fout[3] );
125
5.22G
         C_SUB( Fout[2], *Fout, scratch1 );
126
5.22G
         C_ADDTO( *Fout , scratch1 );
127
5.22G
         C_SUB( scratch1 , Fout[1] , Fout[3] );
128
129
5.22G
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
130
5.22G
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
131
5.22G
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
132
5.22G
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
133
5.22G
         Fout+=4;
134
5.22G
      }
135
155M
   } else {
136
28.2M
      int j;
137
28.2M
      kiss_fft_cpx scratch[6];
138
28.2M
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
139
28.2M
      const int m2=2*m;
140
28.2M
      const int m3=3*m;
141
28.2M
      kiss_fft_cpx * Fout_beg = Fout;
142
452M
      for (i=0;i<N;i++)
143
424M
      {
144
424M
         Fout = Fout_beg + i*mm;
145
424M
         tw3 = tw2 = tw1 = st->twiddles;
146
         /* m is guaranteed to be a multiple of 4. */
147
3.29G
         for (j=0;j<m;j++)
148
2.87G
         {
149
2.87G
            C_MUL(scratch[0],Fout[m] , *tw1 );
150
2.87G
            C_MUL(scratch[1],Fout[m2] , *tw2 );
151
2.87G
            C_MUL(scratch[2],Fout[m3] , *tw3 );
152
153
2.87G
            C_SUB( scratch[5] , *Fout, scratch[1] );
154
2.87G
            C_ADDTO(*Fout, scratch[1]);
155
2.87G
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
156
2.87G
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
157
2.87G
            C_SUB( Fout[m2], *Fout, scratch[3] );
158
2.87G
            tw1 += fstride;
159
2.87G
            tw2 += fstride*2;
160
2.87G
            tw3 += fstride*3;
161
2.87G
            C_ADDTO( *Fout , scratch[3] );
162
163
2.87G
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
164
2.87G
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
165
2.87G
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
166
2.87G
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
167
2.87G
            ++Fout;
168
2.87G
         }
169
424M
      }
170
28.2M
   }
171
184M
}
kiss_fft.c:kf_bfly4
Line
Count
Source
112
92.0M
{
113
92.0M
   int i;
114
115
92.0M
   if (m==1)
116
77.9M
   {
117
      /* Degenerate case where all the twiddles are 1. */
118
2.69G
      for (i=0;i<N;i++)
119
2.61G
      {
120
2.61G
         kiss_fft_cpx scratch0, scratch1;
121
122
2.61G
         C_SUB( scratch0 , *Fout, Fout[2] );
123
2.61G
         C_ADDTO(*Fout, Fout[2]);
124
2.61G
         C_ADD( scratch1 , Fout[1] , Fout[3] );
125
2.61G
         C_SUB( Fout[2], *Fout, scratch1 );
126
2.61G
         C_ADDTO( *Fout , scratch1 );
127
2.61G
         C_SUB( scratch1 , Fout[1] , Fout[3] );
128
129
2.61G
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
130
2.61G
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
131
2.61G
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
132
2.61G
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
133
2.61G
         Fout+=4;
134
2.61G
      }
135
77.9M
   } else {
136
14.1M
      int j;
137
14.1M
      kiss_fft_cpx scratch[6];
138
14.1M
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
139
14.1M
      const int m2=2*m;
140
14.1M
      const int m3=3*m;
141
14.1M
      kiss_fft_cpx * Fout_beg = Fout;
142
226M
      for (i=0;i<N;i++)
143
212M
      {
144
212M
         Fout = Fout_beg + i*mm;
145
212M
         tw3 = tw2 = tw1 = st->twiddles;
146
         /* m is guaranteed to be a multiple of 4. */
147
1.64G
         for (j=0;j<m;j++)
148
1.43G
         {
149
1.43G
            C_MUL(scratch[0],Fout[m] , *tw1 );
150
1.43G
            C_MUL(scratch[1],Fout[m2] , *tw2 );
151
1.43G
            C_MUL(scratch[2],Fout[m3] , *tw3 );
152
153
1.43G
            C_SUB( scratch[5] , *Fout, scratch[1] );
154
1.43G
            C_ADDTO(*Fout, scratch[1]);
155
1.43G
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
156
1.43G
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
157
1.43G
            C_SUB( Fout[m2], *Fout, scratch[3] );
158
1.43G
            tw1 += fstride;
159
1.43G
            tw2 += fstride*2;
160
1.43G
            tw3 += fstride*3;
161
1.43G
            C_ADDTO( *Fout , scratch[3] );
162
163
1.43G
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
164
1.43G
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
165
1.43G
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
166
1.43G
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
167
1.43G
            ++Fout;
168
1.43G
         }
169
212M
      }
170
14.1M
   }
171
92.0M
}
kiss_fft.c:kf_bfly4
Line
Count
Source
112
92.0M
{
113
92.0M
   int i;
114
115
92.0M
   if (m==1)
116
77.9M
   {
117
      /* Degenerate case where all the twiddles are 1. */
118
2.69G
      for (i=0;i<N;i++)
119
2.61G
      {
120
2.61G
         kiss_fft_cpx scratch0, scratch1;
121
122
2.61G
         C_SUB( scratch0 , *Fout, Fout[2] );
123
2.61G
         C_ADDTO(*Fout, Fout[2]);
124
2.61G
         C_ADD( scratch1 , Fout[1] , Fout[3] );
125
2.61G
         C_SUB( Fout[2], *Fout, scratch1 );
126
2.61G
         C_ADDTO( *Fout , scratch1 );
127
2.61G
         C_SUB( scratch1 , Fout[1] , Fout[3] );
128
129
2.61G
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
130
2.61G
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
131
2.61G
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
132
2.61G
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
133
2.61G
         Fout+=4;
134
2.61G
      }
135
77.9M
   } else {
136
14.1M
      int j;
137
14.1M
      kiss_fft_cpx scratch[6];
138
14.1M
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
139
14.1M
      const int m2=2*m;
140
14.1M
      const int m3=3*m;
141
14.1M
      kiss_fft_cpx * Fout_beg = Fout;
142
226M
      for (i=0;i<N;i++)
143
212M
      {
144
212M
         Fout = Fout_beg + i*mm;
145
212M
         tw3 = tw2 = tw1 = st->twiddles;
146
         /* m is guaranteed to be a multiple of 4. */
147
1.64G
         for (j=0;j<m;j++)
148
1.43G
         {
149
1.43G
            C_MUL(scratch[0],Fout[m] , *tw1 );
150
1.43G
            C_MUL(scratch[1],Fout[m2] , *tw2 );
151
1.43G
            C_MUL(scratch[2],Fout[m3] , *tw3 );
152
153
1.43G
            C_SUB( scratch[5] , *Fout, scratch[1] );
154
1.43G
            C_ADDTO(*Fout, scratch[1]);
155
1.43G
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
156
1.43G
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
157
1.43G
            C_SUB( Fout[m2], *Fout, scratch[3] );
158
1.43G
            tw1 += fstride;
159
1.43G
            tw2 += fstride*2;
160
1.43G
            tw3 += fstride*3;
161
1.43G
            C_ADDTO( *Fout , scratch[3] );
162
163
1.43G
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
164
1.43G
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
165
1.43G
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
166
1.43G
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
167
1.43G
            ++Fout;
168
1.43G
         }
169
212M
      }
170
14.1M
   }
171
92.0M
}
172
173
174
#ifndef RADIX_TWO_ONLY
175
176
static void kf_bfly3(
177
                     kiss_fft_cpx * Fout,
178
                     const size_t fstride,
179
                     const kiss_fft_state *st,
180
                     int m,
181
                     int N,
182
                     int mm
183
                    )
184
155M
{
185
155M
   int i;
186
155M
   size_t k;
187
155M
   const size_t m2 = 2*m;
188
155M
   const kiss_twiddle_cpx *tw1,*tw2;
189
155M
   kiss_fft_cpx scratch[5];
190
155M
   kiss_twiddle_cpx epi3;
191
192
155M
   kiss_fft_cpx * Fout_beg = Fout;
193
#ifdef FIXED_POINT
194
   /*epi3.r = -16384;*/ /* Unused */
195
   epi3.i = -28378;
196
#else
197
   epi3 = st->twiddles[fstride*m];
198
#endif
199
935M
   for (i=0;i<N;i++)
200
779M
   {
201
779M
      Fout = Fout_beg + i*mm;
202
779M
      tw1=tw2=st->twiddles;
203
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
204
779M
      k=m;
205
6.97G
      do {
206
207
6.97G
         C_MUL(scratch[1],Fout[m] , *tw1);
208
6.97G
         C_MUL(scratch[2],Fout[m2] , *tw2);
209
210
6.97G
         C_ADD(scratch[3],scratch[1],scratch[2]);
211
6.97G
         C_SUB(scratch[0],scratch[1],scratch[2]);
212
6.97G
         tw1 += fstride;
213
6.97G
         tw2 += fstride*2;
214
215
6.97G
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
216
6.97G
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
217
218
6.97G
         C_MULBYSCALAR( scratch[0] , epi3.i );
219
220
6.97G
         C_ADDTO(*Fout,scratch[3]);
221
222
6.97G
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
223
6.97G
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
224
225
6.97G
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
226
6.97G
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
227
228
6.97G
         ++Fout;
229
6.97G
      } while(--k);
230
779M
   }
231
155M
}
kiss_fft.c:kf_bfly3
Line
Count
Source
184
77.9M
{
185
77.9M
   int i;
186
77.9M
   size_t k;
187
77.9M
   const size_t m2 = 2*m;
188
77.9M
   const kiss_twiddle_cpx *tw1,*tw2;
189
77.9M
   kiss_fft_cpx scratch[5];
190
77.9M
   kiss_twiddle_cpx epi3;
191
192
77.9M
   kiss_fft_cpx * Fout_beg = Fout;
193
#ifdef FIXED_POINT
194
   /*epi3.r = -16384;*/ /* Unused */
195
   epi3.i = -28378;
196
#else
197
77.9M
   epi3 = st->twiddles[fstride*m];
198
77.9M
#endif
199
467M
   for (i=0;i<N;i++)
200
389M
   {
201
389M
      Fout = Fout_beg + i*mm;
202
389M
      tw1=tw2=st->twiddles;
203
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
204
389M
      k=m;
205
3.48G
      do {
206
207
3.48G
         C_MUL(scratch[1],Fout[m] , *tw1);
208
3.48G
         C_MUL(scratch[2],Fout[m2] , *tw2);
209
210
3.48G
         C_ADD(scratch[3],scratch[1],scratch[2]);
211
3.48G
         C_SUB(scratch[0],scratch[1],scratch[2]);
212
3.48G
         tw1 += fstride;
213
3.48G
         tw2 += fstride*2;
214
215
3.48G
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
216
3.48G
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
217
218
3.48G
         C_MULBYSCALAR( scratch[0] , epi3.i );
219
220
3.48G
         C_ADDTO(*Fout,scratch[3]);
221
222
3.48G
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
223
3.48G
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
224
225
3.48G
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
226
3.48G
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
227
228
3.48G
         ++Fout;
229
3.48G
      } while(--k);
230
389M
   }
231
77.9M
}
kiss_fft.c:kf_bfly3
Line
Count
Source
184
77.9M
{
185
77.9M
   int i;
186
77.9M
   size_t k;
187
77.9M
   const size_t m2 = 2*m;
188
77.9M
   const kiss_twiddle_cpx *tw1,*tw2;
189
77.9M
   kiss_fft_cpx scratch[5];
190
77.9M
   kiss_twiddle_cpx epi3;
191
192
77.9M
   kiss_fft_cpx * Fout_beg = Fout;
193
77.9M
#ifdef FIXED_POINT
194
   /*epi3.r = -16384;*/ /* Unused */
195
77.9M
   epi3.i = -28378;
196
#else
197
   epi3 = st->twiddles[fstride*m];
198
#endif
199
467M
   for (i=0;i<N;i++)
200
389M
   {
201
389M
      Fout = Fout_beg + i*mm;
202
389M
      tw1=tw2=st->twiddles;
203
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
204
389M
      k=m;
205
3.48G
      do {
206
207
3.48G
         C_MUL(scratch[1],Fout[m] , *tw1);
208
3.48G
         C_MUL(scratch[2],Fout[m2] , *tw2);
209
210
3.48G
         C_ADD(scratch[3],scratch[1],scratch[2]);
211
3.48G
         C_SUB(scratch[0],scratch[1],scratch[2]);
212
3.48G
         tw1 += fstride;
213
3.48G
         tw2 += fstride*2;
214
215
3.48G
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
216
3.48G
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
217
218
3.48G
         C_MULBYSCALAR( scratch[0] , epi3.i );
219
220
3.48G
         C_ADDTO(*Fout,scratch[3]);
221
222
3.48G
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
223
3.48G
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
224
225
3.48G
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
226
3.48G
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
227
228
3.48G
         ++Fout;
229
3.48G
      } while(--k);
230
389M
   }
231
77.9M
}
232
233
234
#ifndef OVERRIDE_kf_bfly5
235
static void kf_bfly5(
236
                     kiss_fft_cpx * Fout,
237
                     const size_t fstride,
238
                     const kiss_fft_state *st,
239
                     int m,
240
                     int N,
241
                     int mm
242
                    )
243
155M
{
244
155M
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
245
155M
   int i, u;
246
155M
   kiss_fft_cpx scratch[13];
247
155M
   const kiss_twiddle_cpx *tw;
248
155M
   kiss_twiddle_cpx ya,yb;
249
155M
   kiss_fft_cpx * Fout_beg = Fout;
250
251
#ifdef FIXED_POINT
252
   ya.r = 10126;
253
   ya.i = -31164;
254
   yb.r = -26510;
255
   yb.i = -19261;
256
#else
257
   ya = st->twiddles[fstride*m];
258
   yb = st->twiddles[fstride*2*m];
259
#endif
260
155M
   tw=st->twiddles;
261
262
311M
   for (i=0;i<N;i++)
263
155M
   {
264
155M
      Fout = Fout_beg + i*mm;
265
155M
      Fout0=Fout;
266
155M
      Fout1=Fout0+m;
267
155M
      Fout2=Fout0+2*m;
268
155M
      Fout3=Fout0+3*m;
269
155M
      Fout4=Fout0+4*m;
270
271
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
272
4.33G
      for ( u=0; u<m; ++u ) {
273
4.18G
         scratch[0] = *Fout0;
274
275
4.18G
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
276
4.18G
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
277
4.18G
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
278
4.18G
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
279
280
4.18G
         C_ADD( scratch[7],scratch[1],scratch[4]);
281
4.18G
         C_SUB( scratch[10],scratch[1],scratch[4]);
282
4.18G
         C_ADD( scratch[8],scratch[2],scratch[3]);
283
4.18G
         C_SUB( scratch[9],scratch[2],scratch[3]);
284
285
4.18G
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
286
4.18G
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
287
288
4.18G
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
289
4.18G
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
290
291
4.18G
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
292
4.18G
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
293
294
4.18G
         C_SUB(*Fout1,scratch[5],scratch[6]);
295
4.18G
         C_ADD(*Fout4,scratch[5],scratch[6]);
296
297
4.18G
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
298
4.18G
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
299
4.18G
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
300
4.18G
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
301
302
4.18G
         C_ADD(*Fout2,scratch[11],scratch[12]);
303
4.18G
         C_SUB(*Fout3,scratch[11],scratch[12]);
304
305
4.18G
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
306
4.18G
      }
307
155M
   }
308
155M
}
kiss_fft.c:kf_bfly5
Line
Count
Source
243
77.9M
{
244
77.9M
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
245
77.9M
   int i, u;
246
77.9M
   kiss_fft_cpx scratch[13];
247
77.9M
   const kiss_twiddle_cpx *tw;
248
77.9M
   kiss_twiddle_cpx ya,yb;
249
77.9M
   kiss_fft_cpx * Fout_beg = Fout;
250
251
#ifdef FIXED_POINT
252
   ya.r = 10126;
253
   ya.i = -31164;
254
   yb.r = -26510;
255
   yb.i = -19261;
256
#else
257
77.9M
   ya = st->twiddles[fstride*m];
258
77.9M
   yb = st->twiddles[fstride*2*m];
259
77.9M
#endif
260
77.9M
   tw=st->twiddles;
261
262
155M
   for (i=0;i<N;i++)
263
77.9M
   {
264
77.9M
      Fout = Fout_beg + i*mm;
265
77.9M
      Fout0=Fout;
266
77.9M
      Fout1=Fout0+m;
267
77.9M
      Fout2=Fout0+2*m;
268
77.9M
      Fout3=Fout0+3*m;
269
77.9M
      Fout4=Fout0+4*m;
270
271
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
272
2.16G
      for ( u=0; u<m; ++u ) {
273
2.09G
         scratch[0] = *Fout0;
274
275
2.09G
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
276
2.09G
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
277
2.09G
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
278
2.09G
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
279
280
2.09G
         C_ADD( scratch[7],scratch[1],scratch[4]);
281
2.09G
         C_SUB( scratch[10],scratch[1],scratch[4]);
282
2.09G
         C_ADD( scratch[8],scratch[2],scratch[3]);
283
2.09G
         C_SUB( scratch[9],scratch[2],scratch[3]);
284
285
2.09G
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
286
2.09G
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
287
288
2.09G
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
289
2.09G
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
290
291
2.09G
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
292
2.09G
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
293
294
2.09G
         C_SUB(*Fout1,scratch[5],scratch[6]);
295
2.09G
         C_ADD(*Fout4,scratch[5],scratch[6]);
296
297
2.09G
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
298
2.09G
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
299
2.09G
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
300
2.09G
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
301
302
2.09G
         C_ADD(*Fout2,scratch[11],scratch[12]);
303
2.09G
         C_SUB(*Fout3,scratch[11],scratch[12]);
304
305
2.09G
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
306
2.09G
      }
307
77.9M
   }
308
77.9M
}
kiss_fft.c:kf_bfly5
Line
Count
Source
243
77.9M
{
244
77.9M
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
245
77.9M
   int i, u;
246
77.9M
   kiss_fft_cpx scratch[13];
247
77.9M
   const kiss_twiddle_cpx *tw;
248
77.9M
   kiss_twiddle_cpx ya,yb;
249
77.9M
   kiss_fft_cpx * Fout_beg = Fout;
250
251
77.9M
#ifdef FIXED_POINT
252
77.9M
   ya.r = 10126;
253
77.9M
   ya.i = -31164;
254
77.9M
   yb.r = -26510;
255
77.9M
   yb.i = -19261;
256
#else
257
   ya = st->twiddles[fstride*m];
258
   yb = st->twiddles[fstride*2*m];
259
#endif
260
77.9M
   tw=st->twiddles;
261
262
155M
   for (i=0;i<N;i++)
263
77.9M
   {
264
77.9M
      Fout = Fout_beg + i*mm;
265
77.9M
      Fout0=Fout;
266
77.9M
      Fout1=Fout0+m;
267
77.9M
      Fout2=Fout0+2*m;
268
77.9M
      Fout3=Fout0+3*m;
269
77.9M
      Fout4=Fout0+4*m;
270
271
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
272
2.16G
      for ( u=0; u<m; ++u ) {
273
2.09G
         scratch[0] = *Fout0;
274
275
2.09G
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
276
2.09G
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
277
2.09G
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
278
2.09G
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
279
280
2.09G
         C_ADD( scratch[7],scratch[1],scratch[4]);
281
2.09G
         C_SUB( scratch[10],scratch[1],scratch[4]);
282
2.09G
         C_ADD( scratch[8],scratch[2],scratch[3]);
283
2.09G
         C_SUB( scratch[9],scratch[2],scratch[3]);
284
285
2.09G
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
286
2.09G
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
287
288
2.09G
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
289
2.09G
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
290
291
2.09G
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
292
2.09G
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
293
294
2.09G
         C_SUB(*Fout1,scratch[5],scratch[6]);
295
2.09G
         C_ADD(*Fout4,scratch[5],scratch[6]);
296
297
2.09G
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
298
2.09G
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
299
2.09G
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
300
2.09G
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
301
302
2.09G
         C_ADD(*Fout2,scratch[11],scratch[12]);
303
2.09G
         C_SUB(*Fout3,scratch[11],scratch[12]);
304
305
2.09G
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
306
2.09G
      }
307
77.9M
   }
308
77.9M
}
309
#endif /* OVERRIDE_kf_bfly5 */
310
311
312
#endif
313
314
315
#ifdef CUSTOM_MODES
316
317
static
318
void compute_bitrev_table(
319
         int Fout,
320
         opus_int16 *f,
321
         const size_t fstride,
322
         int in_stride,
323
         opus_int16 * factors,
324
         const kiss_fft_state *st
325
            )
326
{
327
   const int p=*factors++; /* the radix  */
328
   const int m=*factors++; /* stage's fft length/p */
329
330
    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
331
   if (m==1)
332
   {
333
      int j;
334
      for (j=0;j<p;j++)
335
      {
336
         *f = Fout+j;
337
         f += fstride*in_stride;
338
      }
339
   } else {
340
      int j;
341
      for (j=0;j<p;j++)
342
      {
343
         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
344
         f += fstride*in_stride;
345
         Fout += m;
346
      }
347
   }
348
}
349
350
/*  facbuf is populated by p1,m1,p2,m2, ...
351
    where
352
    p[i] * m[i] = m[i-1]
353
    m0 = n                  */
354
static
355
int kf_factor(int n,opus_int16 * facbuf)
356
{
357
    int p=4;
358
    int i;
359
    int stages=0;
360
    int nbak = n;
361
362
    /*factor out powers of 4, powers of 2, then any remaining primes */
363
    do {
364
        while (n % p) {
365
            switch (p) {
366
                case 4: p = 2; break;
367
                case 2: p = 3; break;
368
                default: p += 2; break;
369
            }
370
            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
371
                p = n;          /* no more factors, skip to end */
372
        }
373
        n /= p;
374
#ifdef RADIX_TWO_ONLY
375
        if (p!=2 && p != 4)
376
#else
377
        if (p>5)
378
#endif
379
        {
380
           return 0;
381
        }
382
        facbuf[2*stages] = p;
383
        if (p==2 && stages > 1)
384
        {
385
           facbuf[2*stages] = 4;
386
           facbuf[2] = 2;
387
        }
388
        stages++;
389
    } while (n > 1);
390
    n = nbak;
391
    /* Reverse the order to get the radix 4 at the end, so we can use the
392
       fast degenerate case. It turns out that reversing the order also
393
       improves the noise behaviour. */
394
    for (i=0;i<stages/2;i++)
395
    {
396
       int tmp;
397
       tmp = facbuf[2*i];
398
       facbuf[2*i] = facbuf[2*(stages-i-1)];
399
       facbuf[2*(stages-i-1)] = tmp;
400
    }
401
    for (i=0;i<stages;i++)
402
    {
403
        n /= facbuf[2*i];
404
        facbuf[2*i+1] = n;
405
    }
406
    return 1;
407
}
408
409
static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
410
{
411
   int i;
412
#ifdef FIXED_POINT
413
   for (i=0;i<nfft;++i) {
414
      opus_val32 phase = -i;
415
      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
416
   }
417
#else
418
   for (i=0;i<nfft;++i) {
419
      const double pi=3.14159265358979323846264338327;
420
      double phase = ( -2*pi /nfft ) * i;
421
      kf_cexp(twiddles+i, phase );
422
   }
423
#endif
424
}
425
426
int opus_fft_alloc_arch_c(kiss_fft_state *st) {
427
   (void)st;
428
   return 0;
429
}
430
431
/*
432
 *
433
 * Allocates all necessary storage space for the fft and ifft.
434
 * The return value is a contiguous block of memory.  As such,
435
 * It can be freed with free().
436
 * */
437
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
438
                                        const kiss_fft_state *base, int arch)
439
{
440
    kiss_fft_state *st=NULL;
441
    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
442
443
    if ( lenmem==NULL ) {
444
        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
445
    }else{
446
        if (mem != NULL && *lenmem >= memneeded)
447
            st = (kiss_fft_state*)mem;
448
        *lenmem = memneeded;
449
    }
450
    if (st) {
451
        opus_int16 *bitrev;
452
        kiss_twiddle_cpx *twiddles;
453
454
        st->nfft=nfft;
455
#ifdef FIXED_POINT
456
        st->scale_shift = celt_ilog2(st->nfft);
457
        if (st->nfft == 1<<st->scale_shift)
458
           st->scale = Q15ONE;
459
        else
460
           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
461
#else
462
        st->scale = 1.f/nfft;
463
#endif
464
        if (base != NULL)
465
        {
466
           st->twiddles = base->twiddles;
467
           st->shift = 0;
468
           while (st->shift < 32 && nfft<<st->shift != base->nfft)
469
              st->shift++;
470
           if (st->shift>=32)
471
              goto fail;
472
        } else {
473
           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
474
           compute_twiddles(twiddles, nfft);
475
           st->shift = -1;
476
        }
477
        if (!kf_factor(nfft,st->factors))
478
        {
479
           goto fail;
480
        }
481
482
        /* bitrev */
483
        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
484
        if (st->bitrev==NULL)
485
            goto fail;
486
        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
487
488
        /* Initialize architecture specific fft parameters */
489
        if (opus_fft_alloc_arch(st, arch))
490
            goto fail;
491
    }
492
    return st;
493
fail:
494
    opus_fft_free(st, arch);
495
    return NULL;
496
}
497
498
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
499
{
500
   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
501
}
502
503
void opus_fft_free_arch_c(kiss_fft_state *st) {
504
   (void)st;
505
}
506
507
void opus_fft_free(const kiss_fft_state *cfg, int arch)
508
{
509
   if (cfg)
510
   {
511
      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
512
      opus_free((opus_int16*)cfg->bitrev);
513
      if (cfg->shift < 0)
514
         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
515
      opus_free((kiss_fft_state*)cfg);
516
   }
517
}
518
519
#endif /* CUSTOM_MODES */
520
521
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
522
77.9M
{
523
77.9M
    int m2, m;
524
77.9M
    int p;
525
77.9M
    int L;
526
77.9M
    int fstride[MAXFACTORS];
527
77.9M
    int i;
528
77.9M
    int shift;
529
530
    /* st->shift can be -1 */
531
77.9M
    shift = st->shift>0 ? st->shift : 0;
532
533
77.9M
    fstride[0] = 1;
534
77.9M
    L=0;
535
272M
    do {
536
272M
       p = st->factors[2*L];
537
272M
       m = st->factors[2*L+1];
538
272M
       fstride[L+1] = fstride[L]*p;
539
272M
       L++;
540
272M
    } while(m!=1);
541
77.9M
    m = st->factors[2*L-1];
542
350M
    for (i=L-1;i>=0;i--)
543
272M
    {
544
272M
       if (i!=0)
545
194M
          m2 = st->factors[2*i-1];
546
77.9M
       else
547
77.9M
          m2 = 1;
548
272M
       switch (st->factors[2*i])
549
272M
       {
550
24.5M
       case 2:
551
24.5M
          kf_bfly2(fout, m, fstride[i]);
552
24.5M
          break;
553
92.0M
       case 4:
554
92.0M
          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
555
92.0M
          break;
556
0
 #ifndef RADIX_TWO_ONLY
557
77.9M
       case 3:
558
77.9M
          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
559
77.9M
          break;
560
77.9M
       case 5:
561
77.9M
          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
562
77.9M
          break;
563
272M
 #endif
564
272M
       }
565
272M
       m = m2;
566
272M
    }
567
77.9M
}
568
569
void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
570
680k
{
571
680k
   int i;
572
680k
   opus_val16 scale;
573
#ifdef FIXED_POINT
574
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
575
      MULT16_32_Q15() on ARM. */
576
   int scale_shift = st->scale_shift-1;
577
#endif
578
680k
   scale = st->scale;
579
580
680k
   celt_assert2 (fin != fout, "In-place FFT not supported");
581
   /* Bit-reverse the input */
582
327M
   for (i=0;i<st->nfft;i++)
583
326M
   {
584
326M
      kiss_fft_cpx x = fin[i];
585
326M
      fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
586
326M
      fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
587
326M
   }
588
680k
   opus_fft_impl(st, fout);
589
680k
}
opus_fft_c
Line
Count
Source
570
340k
{
571
340k
   int i;
572
340k
   opus_val16 scale;
573
#ifdef FIXED_POINT
574
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
575
      MULT16_32_Q15() on ARM. */
576
   int scale_shift = st->scale_shift-1;
577
#endif
578
340k
   scale = st->scale;
579
580
340k
   celt_assert2 (fin != fout, "In-place FFT not supported");
581
   /* Bit-reverse the input */
582
163M
   for (i=0;i<st->nfft;i++)
583
163M
   {
584
163M
      kiss_fft_cpx x = fin[i];
585
163M
      fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
586
163M
      fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
587
163M
   }
588
340k
   opus_fft_impl(st, fout);
589
340k
}
opus_fft_c
Line
Count
Source
570
340k
{
571
340k
   int i;
572
340k
   opus_val16 scale;
573
340k
#ifdef FIXED_POINT
574
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
575
      MULT16_32_Q15() on ARM. */
576
340k
   int scale_shift = st->scale_shift-1;
577
340k
#endif
578
340k
   scale = st->scale;
579
580
340k
   celt_assert2 (fin != fout, "In-place FFT not supported");
581
   /* Bit-reverse the input */
582
163M
   for (i=0;i<st->nfft;i++)
583
163M
   {
584
163M
      kiss_fft_cpx x = fin[i];
585
163M
      fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
586
163M
      fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
587
163M
   }
588
340k
   opus_fft_impl(st, fout);
589
340k
}
590
591
592
void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
593
0
{
594
0
   int i;
595
0
   celt_assert2 (fin != fout, "In-place FFT not supported");
596
   /* Bit-reverse the input */
597
0
   for (i=0;i<st->nfft;i++)
598
0
      fout[st->bitrev[i]] = fin[i];
599
0
   for (i=0;i<st->nfft;i++)
600
0
      fout[i].i = -fout[i].i;
601
0
   opus_fft_impl(st, fout);
602
0
   for (i=0;i<st->nfft;i++)
603
0
      fout[i].i = -fout[i].i;
604
0
}