Coverage Report

Created: 2026-06-30 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/opus/celt/kiss_fft.c
Line
Count
Source
1
/*Copyright (c) 2003-2004, Mark Borgerding
2
  Lots of modifications by Jean-Marc Valin
3
  Copyright (c) 2005-2007, Xiph.Org Foundation
4
  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
5
6
  All rights reserved.
7
8
  Redistribution and use in source and binary forms, with or without
9
   modification, are permitted provided that the following conditions are met:
10
11
    * Redistributions of source code must retain the above copyright notice,
12
       this list of conditions and the following disclaimer.
13
    * Redistributions in binary form must reproduce the above copyright notice,
14
       this list of conditions and the following disclaimer in the
15
       documentation and/or other materials provided with the distribution.
16
17
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
  POSSIBILITY OF SUCH DAMAGE.*/
28
29
/* This code is originally from Mark Borgerding's KISS-FFT but has been
30
   heavily modified to better suit Opus */
31
32
#ifndef SKIP_CONFIG_H
33
#  ifdef HAVE_CONFIG_H
34
#    include "config.h"
35
#  endif
36
#endif
37
38
#include "_kiss_fft_guts.h"
39
#include "arch.h"
40
#include "os_support.h"
41
#include "mathops.h"
42
#include "stack_alloc.h"
43
44
#ifndef M_PI
45
#define M_PI 3.141592653
46
#endif
47
48
/* The guts header contains all the multiplication and addition macros that are defined for
49
   complex numbers.  It also declares the kf_ internal functions.
50
*/
51
52
static void kf_bfly2(
53
                     kiss_fft_cpx * Fout,
54
                     int m,
55
                     int N
56
                    )
57
391M
{
58
391M
   kiss_fft_cpx * Fout2;
59
391M
   int i;
60
391M
   (void)m;
61
#ifdef CUSTOM_MODES
62
   if (m==1)
63
   {
64
      celt_assert(m==1);
65
      for (i=0;i<N;i++)
66
      {
67
         kiss_fft_cpx t;
68
         Fout2 = Fout + 1;
69
         t = *Fout2;
70
         C_SUB( *Fout2 ,  *Fout , t );
71
         C_ADDTO( *Fout ,  t );
72
         Fout += 2;
73
      }
74
   } else
75
#endif
76
391M
   {
77
391M
      celt_coef tw;
78
391M
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
80
391M
      celt_assert(m==4);
81
14.6G
      for (i=0;i<N;i++)
82
14.3G
      {
83
14.3G
         kiss_fft_cpx t;
84
14.3G
         Fout2 = Fout + 4;
85
14.3G
         t = Fout2[0];
86
14.3G
         C_SUB( Fout2[0] ,  Fout[0] , t );
87
14.3G
         C_ADDTO( Fout[0] ,  t );
88
89
14.3G
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90
14.3G
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91
14.3G
         C_SUB( Fout2[1] ,  Fout[1] , t );
92
14.3G
         C_ADDTO( Fout[1] ,  t );
93
94
14.3G
         t.r = Fout2[2].i;
95
14.3G
         t.i = NEG32_ovflw(Fout2[2].r);
96
14.3G
         C_SUB( Fout2[2] ,  Fout[2] , t );
97
14.3G
         C_ADDTO( Fout[2] ,  t );
98
99
14.3G
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100
14.3G
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101
14.3G
         C_SUB( Fout2[3] ,  Fout[3] , t );
102
14.3G
         C_ADDTO( Fout[3] ,  t );
103
14.3G
         Fout += 8;
104
14.3G
      }
105
391M
   }
106
391M
}
kiss_fft.c:kf_bfly2
Line
Count
Source
57
130M
{
58
130M
   kiss_fft_cpx * Fout2;
59
130M
   int i;
60
130M
   (void)m;
61
#ifdef CUSTOM_MODES
62
   if (m==1)
63
   {
64
      celt_assert(m==1);
65
      for (i=0;i<N;i++)
66
      {
67
         kiss_fft_cpx t;
68
         Fout2 = Fout + 1;
69
         t = *Fout2;
70
         C_SUB( *Fout2 ,  *Fout , t );
71
         C_ADDTO( *Fout ,  t );
72
         Fout += 2;
73
      }
74
   } else
75
#endif
76
130M
   {
77
130M
      celt_coef tw;
78
130M
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
80
130M
      celt_assert(m==4);
81
4.89G
      for (i=0;i<N;i++)
82
4.76G
      {
83
4.76G
         kiss_fft_cpx t;
84
4.76G
         Fout2 = Fout + 4;
85
4.76G
         t = Fout2[0];
86
4.76G
         C_SUB( Fout2[0] ,  Fout[0] , t );
87
4.76G
         C_ADDTO( Fout[0] ,  t );
88
89
4.76G
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90
4.76G
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91
4.76G
         C_SUB( Fout2[1] ,  Fout[1] , t );
92
4.76G
         C_ADDTO( Fout[1] ,  t );
93
94
4.76G
         t.r = Fout2[2].i;
95
4.76G
         t.i = NEG32_ovflw(Fout2[2].r);
96
4.76G
         C_SUB( Fout2[2] ,  Fout[2] , t );
97
4.76G
         C_ADDTO( Fout[2] ,  t );
98
99
4.76G
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100
4.76G
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101
4.76G
         C_SUB( Fout2[3] ,  Fout[3] , t );
102
4.76G
         C_ADDTO( Fout[3] ,  t );
103
4.76G
         Fout += 8;
104
4.76G
      }
105
130M
   }
106
130M
}
kiss_fft.c:kf_bfly2
Line
Count
Source
57
130M
{
58
130M
   kiss_fft_cpx * Fout2;
59
130M
   int i;
60
130M
   (void)m;
61
#ifdef CUSTOM_MODES
62
   if (m==1)
63
   {
64
      celt_assert(m==1);
65
      for (i=0;i<N;i++)
66
      {
67
         kiss_fft_cpx t;
68
         Fout2 = Fout + 1;
69
         t = *Fout2;
70
         C_SUB( *Fout2 ,  *Fout , t );
71
         C_ADDTO( *Fout ,  t );
72
         Fout += 2;
73
      }
74
   } else
75
#endif
76
130M
   {
77
130M
      celt_coef tw;
78
130M
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
80
130M
      celt_assert(m==4);
81
4.89G
      for (i=0;i<N;i++)
82
4.76G
      {
83
4.76G
         kiss_fft_cpx t;
84
4.76G
         Fout2 = Fout + 4;
85
4.76G
         t = Fout2[0];
86
4.76G
         C_SUB( Fout2[0] ,  Fout[0] , t );
87
4.76G
         C_ADDTO( Fout[0] ,  t );
88
89
4.76G
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90
4.76G
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91
4.76G
         C_SUB( Fout2[1] ,  Fout[1] , t );
92
4.76G
         C_ADDTO( Fout[1] ,  t );
93
94
4.76G
         t.r = Fout2[2].i;
95
4.76G
         t.i = NEG32_ovflw(Fout2[2].r);
96
4.76G
         C_SUB( Fout2[2] ,  Fout[2] , t );
97
4.76G
         C_ADDTO( Fout[2] ,  t );
98
99
4.76G
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100
4.76G
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101
4.76G
         C_SUB( Fout2[3] ,  Fout[3] , t );
102
4.76G
         C_ADDTO( Fout[3] ,  t );
103
4.76G
         Fout += 8;
104
4.76G
      }
105
130M
   }
106
130M
}
kiss_fft.c:kf_bfly2
Line
Count
Source
57
130M
{
58
130M
   kiss_fft_cpx * Fout2;
59
130M
   int i;
60
130M
   (void)m;
61
#ifdef CUSTOM_MODES
62
   if (m==1)
63
   {
64
      celt_assert(m==1);
65
      for (i=0;i<N;i++)
66
      {
67
         kiss_fft_cpx t;
68
         Fout2 = Fout + 1;
69
         t = *Fout2;
70
         C_SUB( *Fout2 ,  *Fout , t );
71
         C_ADDTO( *Fout ,  t );
72
         Fout += 2;
73
      }
74
   } else
75
#endif
76
130M
   {
77
130M
      celt_coef tw;
78
130M
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
80
130M
      celt_assert(m==4);
81
4.89G
      for (i=0;i<N;i++)
82
4.76G
      {
83
4.76G
         kiss_fft_cpx t;
84
4.76G
         Fout2 = Fout + 4;
85
4.76G
         t = Fout2[0];
86
4.76G
         C_SUB( Fout2[0] ,  Fout[0] , t );
87
4.76G
         C_ADDTO( Fout[0] ,  t );
88
89
4.76G
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90
4.76G
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91
4.76G
         C_SUB( Fout2[1] ,  Fout[1] , t );
92
4.76G
         C_ADDTO( Fout[1] ,  t );
93
94
4.76G
         t.r = Fout2[2].i;
95
4.76G
         t.i = NEG32_ovflw(Fout2[2].r);
96
4.76G
         C_SUB( Fout2[2] ,  Fout[2] , t );
97
4.76G
         C_ADDTO( Fout[2] ,  t );
98
99
4.76G
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100
4.76G
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101
4.76G
         C_SUB( Fout2[3] ,  Fout[3] , t );
102
4.76G
         C_ADDTO( Fout[3] ,  t );
103
4.76G
         Fout += 8;
104
4.76G
      }
105
130M
   }
106
130M
}
107
108
static void kf_bfly4(
109
                     kiss_fft_cpx * Fout,
110
                     const size_t fstride,
111
                     const kiss_fft_state *st,
112
                     int m,
113
                     int N,
114
                     int mm
115
                    )
116
893M
{
117
893M
   int i;
118
119
893M
   if (m==1)
120
689M
   {
121
      /* Degenerate case where all the twiddles are 1. */
122
31.5G
      for (i=0;i<N;i++)
123
30.8G
      {
124
30.8G
         kiss_fft_cpx scratch0, scratch1;
125
126
30.8G
         C_SUB( scratch0 , *Fout, Fout[2] );
127
30.8G
         C_ADDTO(*Fout, Fout[2]);
128
30.8G
         C_ADD( scratch1 , Fout[1] , Fout[3] );
129
30.8G
         C_SUB( Fout[2], *Fout, scratch1 );
130
30.8G
         C_ADDTO( *Fout , scratch1 );
131
30.8G
         C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133
30.8G
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134
30.8G
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135
30.8G
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136
30.8G
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137
30.8G
         Fout+=4;
138
30.8G
      }
139
689M
   } else {
140
203M
      int j;
141
203M
      kiss_fft_cpx scratch[6];
142
203M
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
143
203M
      const int m2=2*m;
144
203M
      const int m3=3*m;
145
203M
      kiss_fft_cpx * Fout_beg = Fout;
146
3.85G
      for (i=0;i<N;i++)
147
3.64G
      {
148
3.64G
         Fout = Fout_beg + i*mm;
149
3.64G
         tw3 = tw2 = tw1 = st->twiddles;
150
         /* m is guaranteed to be a multiple of 4. */
151
28.0G
         for (j=0;j<m;j++)
152
24.4G
         {
153
24.4G
            C_MUL(scratch[0],Fout[m] , *tw1 );
154
24.4G
            C_MUL(scratch[1],Fout[m2] , *tw2 );
155
24.4G
            C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157
24.4G
            C_SUB( scratch[5] , *Fout, scratch[1] );
158
24.4G
            C_ADDTO(*Fout, scratch[1]);
159
24.4G
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
160
24.4G
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
161
24.4G
            C_SUB( Fout[m2], *Fout, scratch[3] );
162
24.4G
            tw1 += fstride;
163
24.4G
            tw2 += fstride*2;
164
24.4G
            tw3 += fstride*3;
165
24.4G
            C_ADDTO( *Fout , scratch[3] );
166
167
24.4G
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168
24.4G
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169
24.4G
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170
24.4G
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171
24.4G
            ++Fout;
172
24.4G
         }
173
3.64G
      }
174
203M
   }
175
893M
}
kiss_fft.c:kf_bfly4
Line
Count
Source
116
446M
{
117
446M
   int i;
118
119
446M
   if (m==1)
120
344M
   {
121
      /* Degenerate case where all the twiddles are 1. */
122
15.7G
      for (i=0;i<N;i++)
123
15.4G
      {
124
15.4G
         kiss_fft_cpx scratch0, scratch1;
125
126
15.4G
         C_SUB( scratch0 , *Fout, Fout[2] );
127
15.4G
         C_ADDTO(*Fout, Fout[2]);
128
15.4G
         C_ADD( scratch1 , Fout[1] , Fout[3] );
129
15.4G
         C_SUB( Fout[2], *Fout, scratch1 );
130
15.4G
         C_ADDTO( *Fout , scratch1 );
131
15.4G
         C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133
15.4G
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134
15.4G
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135
15.4G
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136
15.4G
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137
15.4G
         Fout+=4;
138
15.4G
      }
139
344M
   } else {
140
101M
      int j;
141
101M
      kiss_fft_cpx scratch[6];
142
101M
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
143
101M
      const int m2=2*m;
144
101M
      const int m3=3*m;
145
101M
      kiss_fft_cpx * Fout_beg = Fout;
146
1.92G
      for (i=0;i<N;i++)
147
1.82G
      {
148
1.82G
         Fout = Fout_beg + i*mm;
149
1.82G
         tw3 = tw2 = tw1 = st->twiddles;
150
         /* m is guaranteed to be a multiple of 4. */
151
14.0G
         for (j=0;j<m;j++)
152
12.2G
         {
153
12.2G
            C_MUL(scratch[0],Fout[m] , *tw1 );
154
12.2G
            C_MUL(scratch[1],Fout[m2] , *tw2 );
155
12.2G
            C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157
12.2G
            C_SUB( scratch[5] , *Fout, scratch[1] );
158
12.2G
            C_ADDTO(*Fout, scratch[1]);
159
12.2G
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
160
12.2G
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
161
12.2G
            C_SUB( Fout[m2], *Fout, scratch[3] );
162
12.2G
            tw1 += fstride;
163
12.2G
            tw2 += fstride*2;
164
12.2G
            tw3 += fstride*3;
165
12.2G
            C_ADDTO( *Fout , scratch[3] );
166
167
12.2G
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168
12.2G
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169
12.2G
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170
12.2G
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171
12.2G
            ++Fout;
172
12.2G
         }
173
1.82G
      }
174
101M
   }
175
446M
}
kiss_fft.c:kf_bfly4
Line
Count
Source
116
446M
{
117
446M
   int i;
118
119
446M
   if (m==1)
120
344M
   {
121
      /* Degenerate case where all the twiddles are 1. */
122
15.7G
      for (i=0;i<N;i++)
123
15.4G
      {
124
15.4G
         kiss_fft_cpx scratch0, scratch1;
125
126
15.4G
         C_SUB( scratch0 , *Fout, Fout[2] );
127
15.4G
         C_ADDTO(*Fout, Fout[2]);
128
15.4G
         C_ADD( scratch1 , Fout[1] , Fout[3] );
129
15.4G
         C_SUB( Fout[2], *Fout, scratch1 );
130
15.4G
         C_ADDTO( *Fout , scratch1 );
131
15.4G
         C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133
15.4G
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134
15.4G
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135
15.4G
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136
15.4G
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137
15.4G
         Fout+=4;
138
15.4G
      }
139
344M
   } else {
140
101M
      int j;
141
101M
      kiss_fft_cpx scratch[6];
142
101M
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
143
101M
      const int m2=2*m;
144
101M
      const int m3=3*m;
145
101M
      kiss_fft_cpx * Fout_beg = Fout;
146
1.92G
      for (i=0;i<N;i++)
147
1.82G
      {
148
1.82G
         Fout = Fout_beg + i*mm;
149
1.82G
         tw3 = tw2 = tw1 = st->twiddles;
150
         /* m is guaranteed to be a multiple of 4. */
151
14.0G
         for (j=0;j<m;j++)
152
12.2G
         {
153
12.2G
            C_MUL(scratch[0],Fout[m] , *tw1 );
154
12.2G
            C_MUL(scratch[1],Fout[m2] , *tw2 );
155
12.2G
            C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157
12.2G
            C_SUB( scratch[5] , *Fout, scratch[1] );
158
12.2G
            C_ADDTO(*Fout, scratch[1]);
159
12.2G
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
160
12.2G
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
161
12.2G
            C_SUB( Fout[m2], *Fout, scratch[3] );
162
12.2G
            tw1 += fstride;
163
12.2G
            tw2 += fstride*2;
164
12.2G
            tw3 += fstride*3;
165
12.2G
            C_ADDTO( *Fout , scratch[3] );
166
167
12.2G
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168
12.2G
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169
12.2G
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170
12.2G
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171
12.2G
            ++Fout;
172
12.2G
         }
173
1.82G
      }
174
101M
   }
175
446M
}
176
177
178
#ifndef RADIX_TWO_ONLY
179
180
static void kf_bfly3(
181
                     kiss_fft_cpx * Fout,
182
                     const size_t fstride,
183
                     const kiss_fft_state *st,
184
                     int m,
185
                     int N,
186
                     int mm
187
                    )
188
1.03G
{
189
1.03G
   int i;
190
1.03G
   size_t k;
191
1.03G
   const size_t m2 = 2*m;
192
1.03G
   const kiss_twiddle_cpx *tw1,*tw2;
193
1.03G
   kiss_fft_cpx scratch[5];
194
1.03G
   kiss_twiddle_cpx epi3;
195
196
1.03G
   kiss_fft_cpx * Fout_beg = Fout;
197
#ifdef FIXED_POINT
198
   /*epi3.r = -16384;*/ /* Unused */
199
689M
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200
#else
201
   epi3 = st->twiddles[fstride*m];
202
#endif
203
6.20G
   for (i=0;i<N;i++)
204
5.17G
   {
205
5.17G
      Fout = Fout_beg + i*mm;
206
5.17G
      tw1=tw2=st->twiddles;
207
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
208
5.17G
      k=m;
209
61.6G
      do {
210
211
61.6G
         C_MUL(scratch[1],Fout[m] , *tw1);
212
61.6G
         C_MUL(scratch[2],Fout[m2] , *tw2);
213
214
61.6G
         C_ADD(scratch[3],scratch[1],scratch[2]);
215
61.6G
         C_SUB(scratch[0],scratch[1],scratch[2]);
216
61.6G
         tw1 += fstride;
217
61.6G
         tw2 += fstride*2;
218
219
61.6G
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220
61.6G
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222
61.6G
         C_MULBYSCALAR( scratch[0] , epi3.i );
223
224
61.6G
         C_ADDTO(*Fout,scratch[3]);
225
226
61.6G
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227
61.6G
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229
61.6G
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230
61.6G
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232
61.6G
         ++Fout;
233
61.6G
      } while(--k);
234
5.17G
   }
235
1.03G
}
kiss_fft.c:kf_bfly3
Line
Count
Source
188
344M
{
189
344M
   int i;
190
344M
   size_t k;
191
344M
   const size_t m2 = 2*m;
192
344M
   const kiss_twiddle_cpx *tw1,*tw2;
193
344M
   kiss_fft_cpx scratch[5];
194
344M
   kiss_twiddle_cpx epi3;
195
196
344M
   kiss_fft_cpx * Fout_beg = Fout;
197
344M
#ifdef FIXED_POINT
198
   /*epi3.r = -16384;*/ /* Unused */
199
344M
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200
#else
201
   epi3 = st->twiddles[fstride*m];
202
#endif
203
2.06G
   for (i=0;i<N;i++)
204
1.72G
   {
205
1.72G
      Fout = Fout_beg + i*mm;
206
1.72G
      tw1=tw2=st->twiddles;
207
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
208
1.72G
      k=m;
209
20.5G
      do {
210
211
20.5G
         C_MUL(scratch[1],Fout[m] , *tw1);
212
20.5G
         C_MUL(scratch[2],Fout[m2] , *tw2);
213
214
20.5G
         C_ADD(scratch[3],scratch[1],scratch[2]);
215
20.5G
         C_SUB(scratch[0],scratch[1],scratch[2]);
216
20.5G
         tw1 += fstride;
217
20.5G
         tw2 += fstride*2;
218
219
20.5G
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220
20.5G
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222
20.5G
         C_MULBYSCALAR( scratch[0] , epi3.i );
223
224
20.5G
         C_ADDTO(*Fout,scratch[3]);
225
226
20.5G
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227
20.5G
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229
20.5G
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230
20.5G
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232
20.5G
         ++Fout;
233
20.5G
      } while(--k);
234
1.72G
   }
235
344M
}
kiss_fft.c:kf_bfly3
Line
Count
Source
188
344M
{
189
344M
   int i;
190
344M
   size_t k;
191
344M
   const size_t m2 = 2*m;
192
344M
   const kiss_twiddle_cpx *tw1,*tw2;
193
344M
   kiss_fft_cpx scratch[5];
194
344M
   kiss_twiddle_cpx epi3;
195
196
344M
   kiss_fft_cpx * Fout_beg = Fout;
197
344M
#ifdef FIXED_POINT
198
   /*epi3.r = -16384;*/ /* Unused */
199
344M
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200
#else
201
   epi3 = st->twiddles[fstride*m];
202
#endif
203
2.06G
   for (i=0;i<N;i++)
204
1.72G
   {
205
1.72G
      Fout = Fout_beg + i*mm;
206
1.72G
      tw1=tw2=st->twiddles;
207
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
208
1.72G
      k=m;
209
20.5G
      do {
210
211
20.5G
         C_MUL(scratch[1],Fout[m] , *tw1);
212
20.5G
         C_MUL(scratch[2],Fout[m2] , *tw2);
213
214
20.5G
         C_ADD(scratch[3],scratch[1],scratch[2]);
215
20.5G
         C_SUB(scratch[0],scratch[1],scratch[2]);
216
20.5G
         tw1 += fstride;
217
20.5G
         tw2 += fstride*2;
218
219
20.5G
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220
20.5G
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222
20.5G
         C_MULBYSCALAR( scratch[0] , epi3.i );
223
224
20.5G
         C_ADDTO(*Fout,scratch[3]);
225
226
20.5G
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227
20.5G
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229
20.5G
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230
20.5G
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232
20.5G
         ++Fout;
233
20.5G
      } while(--k);
234
1.72G
   }
235
344M
}
kiss_fft.c:kf_bfly3
Line
Count
Source
188
344M
{
189
344M
   int i;
190
344M
   size_t k;
191
344M
   const size_t m2 = 2*m;
192
344M
   const kiss_twiddle_cpx *tw1,*tw2;
193
344M
   kiss_fft_cpx scratch[5];
194
344M
   kiss_twiddle_cpx epi3;
195
196
344M
   kiss_fft_cpx * Fout_beg = Fout;
197
#ifdef FIXED_POINT
198
   /*epi3.r = -16384;*/ /* Unused */
199
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200
#else
201
344M
   epi3 = st->twiddles[fstride*m];
202
344M
#endif
203
2.06G
   for (i=0;i<N;i++)
204
1.72G
   {
205
1.72G
      Fout = Fout_beg + i*mm;
206
1.72G
      tw1=tw2=st->twiddles;
207
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
208
1.72G
      k=m;
209
20.5G
      do {
210
211
20.5G
         C_MUL(scratch[1],Fout[m] , *tw1);
212
20.5G
         C_MUL(scratch[2],Fout[m2] , *tw2);
213
214
20.5G
         C_ADD(scratch[3],scratch[1],scratch[2]);
215
20.5G
         C_SUB(scratch[0],scratch[1],scratch[2]);
216
20.5G
         tw1 += fstride;
217
20.5G
         tw2 += fstride*2;
218
219
20.5G
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220
20.5G
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222
20.5G
         C_MULBYSCALAR( scratch[0] , epi3.i );
223
224
20.5G
         C_ADDTO(*Fout,scratch[3]);
225
226
20.5G
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227
20.5G
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229
20.5G
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230
20.5G
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232
20.5G
         ++Fout;
233
20.5G
      } while(--k);
234
1.72G
   }
235
344M
}
236
237
238
#ifndef OVERRIDE_kf_bfly5
239
static void kf_bfly5(
240
                     kiss_fft_cpx * Fout,
241
                     const size_t fstride,
242
                     const kiss_fft_state *st,
243
                     int m,
244
                     int N,
245
                     int mm
246
                    )
247
689M
{
248
689M
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
249
689M
   int i, u;
250
689M
   kiss_fft_cpx scratch[13];
251
689M
   const kiss_twiddle_cpx *tw;
252
689M
   kiss_twiddle_cpx ya,yb;
253
689M
   kiss_fft_cpx * Fout_beg = Fout;
254
255
#ifdef FIXED_POINT
256
344M
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257
344M
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258
344M
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259
344M
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260
#else
261
   ya = st->twiddles[fstride*m];
262
   yb = st->twiddles[fstride*2*m];
263
#endif
264
689M
   tw=st->twiddles;
265
266
1.37G
   for (i=0;i<N;i++)
267
689M
   {
268
689M
      Fout = Fout_beg + i*mm;
269
689M
      Fout0=Fout;
270
689M
      Fout1=Fout0+m;
271
689M
      Fout2=Fout0+2*m;
272
689M
      Fout3=Fout0+3*m;
273
689M
      Fout4=Fout0+4*m;
274
275
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
276
25.3G
      for ( u=0; u<m; ++u ) {
277
24.6G
         scratch[0] = *Fout0;
278
279
24.6G
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
280
24.6G
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
281
24.6G
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
282
24.6G
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
283
284
24.6G
         C_ADD( scratch[7],scratch[1],scratch[4]);
285
24.6G
         C_SUB( scratch[10],scratch[1],scratch[4]);
286
24.6G
         C_ADD( scratch[8],scratch[2],scratch[3]);
287
24.6G
         C_SUB( scratch[9],scratch[2],scratch[3]);
288
289
24.6G
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290
24.6G
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292
24.6G
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293
24.6G
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295
24.6G
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296
24.6G
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298
24.6G
         C_SUB(*Fout1,scratch[5],scratch[6]);
299
24.6G
         C_ADD(*Fout4,scratch[5],scratch[6]);
300
301
24.6G
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302
24.6G
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303
24.6G
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304
24.6G
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306
24.6G
         C_ADD(*Fout2,scratch[11],scratch[12]);
307
24.6G
         C_SUB(*Fout3,scratch[11],scratch[12]);
308
309
24.6G
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310
24.6G
      }
311
689M
   }
312
689M
}
kiss_fft.c:kf_bfly5
Line
Count
Source
247
344M
{
248
344M
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
249
344M
   int i, u;
250
344M
   kiss_fft_cpx scratch[13];
251
344M
   const kiss_twiddle_cpx *tw;
252
344M
   kiss_twiddle_cpx ya,yb;
253
344M
   kiss_fft_cpx * Fout_beg = Fout;
254
255
344M
#ifdef FIXED_POINT
256
344M
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257
344M
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258
344M
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259
344M
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260
#else
261
   ya = st->twiddles[fstride*m];
262
   yb = st->twiddles[fstride*2*m];
263
#endif
264
344M
   tw=st->twiddles;
265
266
689M
   for (i=0;i<N;i++)
267
344M
   {
268
344M
      Fout = Fout_beg + i*mm;
269
344M
      Fout0=Fout;
270
344M
      Fout1=Fout0+m;
271
344M
      Fout2=Fout0+2*m;
272
344M
      Fout3=Fout0+3*m;
273
344M
      Fout4=Fout0+4*m;
274
275
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
276
12.6G
      for ( u=0; u<m; ++u ) {
277
12.3G
         scratch[0] = *Fout0;
278
279
12.3G
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
280
12.3G
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
281
12.3G
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
282
12.3G
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
283
284
12.3G
         C_ADD( scratch[7],scratch[1],scratch[4]);
285
12.3G
         C_SUB( scratch[10],scratch[1],scratch[4]);
286
12.3G
         C_ADD( scratch[8],scratch[2],scratch[3]);
287
12.3G
         C_SUB( scratch[9],scratch[2],scratch[3]);
288
289
12.3G
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290
12.3G
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292
12.3G
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293
12.3G
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295
12.3G
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296
12.3G
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298
12.3G
         C_SUB(*Fout1,scratch[5],scratch[6]);
299
12.3G
         C_ADD(*Fout4,scratch[5],scratch[6]);
300
301
12.3G
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302
12.3G
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303
12.3G
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304
12.3G
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306
12.3G
         C_ADD(*Fout2,scratch[11],scratch[12]);
307
12.3G
         C_SUB(*Fout3,scratch[11],scratch[12]);
308
309
12.3G
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310
12.3G
      }
311
344M
   }
312
344M
}
kiss_fft.c:kf_bfly5
Line
Count
Source
247
344M
{
248
344M
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
249
344M
   int i, u;
250
344M
   kiss_fft_cpx scratch[13];
251
344M
   const kiss_twiddle_cpx *tw;
252
344M
   kiss_twiddle_cpx ya,yb;
253
344M
   kiss_fft_cpx * Fout_beg = Fout;
254
255
#ifdef FIXED_POINT
256
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260
#else
261
344M
   ya = st->twiddles[fstride*m];
262
344M
   yb = st->twiddles[fstride*2*m];
263
344M
#endif
264
344M
   tw=st->twiddles;
265
266
689M
   for (i=0;i<N;i++)
267
344M
   {
268
344M
      Fout = Fout_beg + i*mm;
269
344M
      Fout0=Fout;
270
344M
      Fout1=Fout0+m;
271
344M
      Fout2=Fout0+2*m;
272
344M
      Fout3=Fout0+3*m;
273
344M
      Fout4=Fout0+4*m;
274
275
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
276
12.6G
      for ( u=0; u<m; ++u ) {
277
12.3G
         scratch[0] = *Fout0;
278
279
12.3G
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
280
12.3G
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
281
12.3G
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
282
12.3G
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
283
284
12.3G
         C_ADD( scratch[7],scratch[1],scratch[4]);
285
12.3G
         C_SUB( scratch[10],scratch[1],scratch[4]);
286
12.3G
         C_ADD( scratch[8],scratch[2],scratch[3]);
287
12.3G
         C_SUB( scratch[9],scratch[2],scratch[3]);
288
289
12.3G
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290
12.3G
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292
12.3G
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293
12.3G
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295
12.3G
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296
12.3G
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298
12.3G
         C_SUB(*Fout1,scratch[5],scratch[6]);
299
12.3G
         C_ADD(*Fout4,scratch[5],scratch[6]);
300
301
12.3G
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302
12.3G
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303
12.3G
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304
12.3G
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306
12.3G
         C_ADD(*Fout2,scratch[11],scratch[12]);
307
12.3G
         C_SUB(*Fout3,scratch[11],scratch[12]);
308
309
12.3G
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310
12.3G
      }
311
344M
   }
312
344M
}
313
#endif /* OVERRIDE_kf_bfly5 */
314
315
316
#endif
317
318
319
#ifdef CUSTOM_MODES
320
321
static
322
void compute_bitrev_table(
323
         int Fout,
324
         opus_int16 *f,
325
         const size_t fstride,
326
         int in_stride,
327
         opus_int16 * factors,
328
         const kiss_fft_state *st
329
            )
330
{
331
   const int p=*factors++; /* the radix  */
332
   const int m=*factors++; /* stage's fft length/p */
333
334
    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
335
   if (m==1)
336
   {
337
      int j;
338
      for (j=0;j<p;j++)
339
      {
340
         *f = Fout+j;
341
         f += fstride*in_stride;
342
      }
343
   } else {
344
      int j;
345
      for (j=0;j<p;j++)
346
      {
347
         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
348
         f += fstride*in_stride;
349
         Fout += m;
350
      }
351
   }
352
}
353
354
/*  facbuf is populated by p1,m1,p2,m2, ...
355
    where
356
    p[i] * m[i] = m[i-1]
357
    m0 = n                  */
358
static
359
int kf_factor(int n,opus_int16 * facbuf)
360
{
361
    int p=4;
362
    int i;
363
    int stages=0;
364
    int nbak = n;
365
366
    /*factor out powers of 4, powers of 2, then any remaining primes */
367
    do {
368
        while (n % p) {
369
            switch (p) {
370
                case 4: p = 2; break;
371
                case 2: p = 3; break;
372
                default: p += 2; break;
373
            }
374
            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
375
                p = n;          /* no more factors, skip to end */
376
        }
377
        n /= p;
378
#ifdef RADIX_TWO_ONLY
379
        if (p!=2 && p != 4)
380
#else
381
        if (p>5)
382
#endif
383
        {
384
           return 0;
385
        }
386
        facbuf[2*stages] = p;
387
        if (p==2 && stages > 1)
388
        {
389
           facbuf[2*stages] = 4;
390
           facbuf[2] = 2;
391
        }
392
        stages++;
393
    } while (n > 1);
394
    n = nbak;
395
    /* Reverse the order to get the radix 4 at the end, so we can use the
396
       fast degenerate case. It turns out that reversing the order also
397
       improves the noise behaviour. */
398
    for (i=0;i<stages/2;i++)
399
    {
400
       int tmp;
401
       tmp = facbuf[2*i];
402
       facbuf[2*i] = facbuf[2*(stages-i-1)];
403
       facbuf[2*(stages-i-1)] = tmp;
404
    }
405
    for (i=0;i<stages;i++)
406
    {
407
        n /= facbuf[2*i];
408
        facbuf[2*i+1] = n;
409
    }
410
    return 1;
411
}
412
413
static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
414
{
415
   int i;
416
#ifdef FIXED_POINT
417
   for (i=0;i<nfft;++i) {
418
      opus_val32 phase = -i;
419
#ifdef ENABLE_QEXT
420
      twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648*cos((2*M_PI/nfft)*phase)));
421
      twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648*sin((2*M_PI/nfft)*phase)));
422
#else
423
      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
424
#endif
425
   }
426
#else
427
   for (i=0;i<nfft;++i) {
428
      const double pi=3.14159265358979323846264338327;
429
      double phase = ( -2*pi /nfft ) * i;
430
      kf_cexp(twiddles+i, phase );
431
   }
432
#endif
433
}
434
435
int opus_fft_alloc_arch_c(kiss_fft_state *st) {
436
   (void)st;
437
   return 0;
438
}
439
440
/*
441
 *
442
 * Allocates all necessary storage space for the fft and ifft.
443
 * The return value is a contiguous block of memory.  As such,
444
 * It can be freed with free().
445
 * */
446
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
447
                                        const kiss_fft_state *base, int arch)
448
{
449
    kiss_fft_state *st=NULL;
450
    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
451
452
    if ( lenmem==NULL ) {
453
        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
454
    }else{
455
        if (mem != NULL && *lenmem >= memneeded)
456
            st = (kiss_fft_state*)mem;
457
        *lenmem = memneeded;
458
    }
459
    if (st) {
460
        opus_int16 *bitrev;
461
        kiss_twiddle_cpx *twiddles;
462
463
        st->nfft=nfft;
464
#ifdef FIXED_POINT
465
        st->scale_shift = celt_ilog2(st->nfft);
466
# ifdef ENABLE_QEXT
467
        if (st->nfft == 1<<st->scale_shift)
468
           st->scale = QCONST32(1.0f, 30);
469
        else
470
           st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
471
# else
472
        if (st->nfft == 1<<st->scale_shift)
473
           st->scale = Q15ONE;
474
        else
475
           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
476
# endif
477
#else
478
        st->scale = 1.f/nfft;
479
#endif
480
        if (base != NULL)
481
        {
482
           st->twiddles = base->twiddles;
483
           st->shift = 0;
484
           while (st->shift < 32 && nfft<<st->shift != base->nfft)
485
              st->shift++;
486
           if (st->shift>=32)
487
              goto fail;
488
        } else {
489
           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
490
           compute_twiddles(twiddles, nfft);
491
           st->shift = -1;
492
        }
493
        if (!kf_factor(nfft,st->factors))
494
        {
495
           goto fail;
496
        }
497
498
        /* bitrev */
499
        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
500
        if (st->bitrev==NULL)
501
            goto fail;
502
        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
503
504
        /* Initialize architecture specific fft parameters */
505
        if (opus_fft_alloc_arch(st, arch))
506
            goto fail;
507
    }
508
    return st;
509
fail:
510
    opus_fft_free(st, arch);
511
    return NULL;
512
}
513
514
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
515
{
516
   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
517
}
518
519
void opus_fft_free_arch_c(kiss_fft_state *st) {
520
   (void)st;
521
}
522
523
void opus_fft_free(const kiss_fft_state *cfg, int arch)
524
{
525
   if (cfg)
526
   {
527
      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
528
      opus_free((opus_int16*)cfg->bitrev);
529
      if (cfg->shift < 0)
530
         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
531
      opus_free((kiss_fft_state*)cfg);
532
   }
533
}
534
535
#endif /* CUSTOM_MODES */
536
537
#ifdef FIXED_POINT
538
#ifndef OVERRIDE_fft_downshift
539
772M
static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
540
772M
   int shift;
541
772M
   shift = IMIN(step, *total);
542
772M
   *total -= shift;
543
772M
   if (shift == 1) {
544
2.41M
      int i;
545
719M
      for (i=0;i<N;i++) {
546
716M
         x[i].r = SHR32(x[i].r, 1);
547
716M
         x[i].i = SHR32(x[i].i, 1);
548
716M
      }
549
770M
   } else if (shift>0) {
550
3.88M
      int i;
551
1.27G
      for (i=0;i<N;i++) {
552
1.27G
         x[i].r = PSHR32(x[i].r, shift);
553
1.27G
         x[i].i = PSHR32(x[i].i, shift);
554
1.27G
      }
555
3.88M
   }
556
772M
}
557
#endif /* OVERRIDE_fft_downshift */
558
#else
559
#define fft_downshift(x, N, total, step)
560
#endif
561
562
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift))
563
344M
{
564
344M
    int m2, m;
565
344M
    int p;
566
344M
    int L;
567
344M
    int fstride[MAXFACTORS];
568
344M
    int i;
569
344M
    int shift;
570
571
    /* st->shift can be -1 */
572
344M
    shift = st->shift>0 ? st->shift : 0;
573
574
344M
    fstride[0] = 1;
575
344M
    L=0;
576
1.26G
    do {
577
1.26G
       p = st->factors[2*L];
578
1.26G
       m = st->factors[2*L+1];
579
1.26G
       fstride[L+1] = fstride[L]*p;
580
1.26G
       L++;
581
1.26G
    } while(m!=1);
582
344M
    m = st->factors[2*L-1];
583
1.61G
    for (i=L-1;i>=0;i--)
584
1.26G
    {
585
1.26G
       if (i!=0)
586
921M
          m2 = st->factors[2*i-1];
587
344M
       else
588
344M
          m2 = 1;
589
1.26G
       switch (st->factors[2*i])
590
1.26G
       {
591
130M
       case 2:
592
130M
          fft_downshift(fout, st->nfft, &downshift, 1);
593
130M
          kf_bfly2(fout, m, fstride[i]);
594
130M
          break;
595
446M
       case 4:
596
446M
          fft_downshift(fout, st->nfft, &downshift, 2);
597
446M
          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
598
446M
          break;
599
0
 #ifndef RADIX_TWO_ONLY
600
344M
       case 3:
601
344M
          fft_downshift(fout, st->nfft, &downshift, 2);
602
344M
          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
603
344M
          break;
604
344M
       case 5:
605
344M
          fft_downshift(fout, st->nfft, &downshift, 3);
606
344M
          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
607
344M
          break;
608
1.26G
 #endif
609
1.26G
       }
610
1.26G
       m = m2;
611
1.26G
    }
612
344M
    fft_downshift(fout, st->nfft, &downshift, downshift);
613
344M
}
614
615
void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
616
4.47M
{
617
4.47M
   int i;
618
4.47M
   celt_coef scale;
619
#ifdef FIXED_POINT
620
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
621
      MULT16_32_Q15() on ARM. */
622
   int scale_shift = st->scale_shift-1;
623
#endif
624
4.47M
   scale = st->scale;
625
626
4.47M
   celt_assert2 (fin != fout, "In-place FFT not supported");
627
   /* Bit-reverse the input */
628
2.15G
   for (i=0;i<st->nfft;i++)
629
2.14G
   {
630
2.14G
      kiss_fft_cpx x = fin[i];
631
2.14G
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
632
2.14G
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
633
2.14G
   }
634
4.47M
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
635
4.47M
}
opus_fft_c
Line
Count
Source
616
1.49M
{
617
1.49M
   int i;
618
1.49M
   celt_coef scale;
619
1.49M
#ifdef FIXED_POINT
620
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
621
      MULT16_32_Q15() on ARM. */
622
1.49M
   int scale_shift = st->scale_shift-1;
623
1.49M
#endif
624
1.49M
   scale = st->scale;
625
626
1.49M
   celt_assert2 (fin != fout, "In-place FFT not supported");
627
   /* Bit-reverse the input */
628
717M
   for (i=0;i<st->nfft;i++)
629
716M
   {
630
716M
      kiss_fft_cpx x = fin[i];
631
716M
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
632
716M
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
633
716M
   }
634
1.49M
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
635
1.49M
}
opus_fft_c
Line
Count
Source
616
1.49M
{
617
1.49M
   int i;
618
1.49M
   celt_coef scale;
619
1.49M
#ifdef FIXED_POINT
620
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
621
      MULT16_32_Q15() on ARM. */
622
1.49M
   int scale_shift = st->scale_shift-1;
623
1.49M
#endif
624
1.49M
   scale = st->scale;
625
626
1.49M
   celt_assert2 (fin != fout, "In-place FFT not supported");
627
   /* Bit-reverse the input */
628
717M
   for (i=0;i<st->nfft;i++)
629
716M
   {
630
716M
      kiss_fft_cpx x = fin[i];
631
716M
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
632
716M
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
633
716M
   }
634
1.49M
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
635
1.49M
}
opus_fft_c
Line
Count
Source
616
1.49M
{
617
1.49M
   int i;
618
1.49M
   celt_coef scale;
619
#ifdef FIXED_POINT
620
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
621
      MULT16_32_Q15() on ARM. */
622
   int scale_shift = st->scale_shift-1;
623
#endif
624
1.49M
   scale = st->scale;
625
626
1.49M
   celt_assert2 (fin != fout, "In-place FFT not supported");
627
   /* Bit-reverse the input */
628
717M
   for (i=0;i<st->nfft;i++)
629
716M
   {
630
716M
      kiss_fft_cpx x = fin[i];
631
716M
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
632
716M
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
633
716M
   }
634
1.49M
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
635
1.49M
}
636
637
638
void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
639
0
{
640
0
   int i;
641
0
   celt_assert2 (fin != fout, "In-place FFT not supported");
642
   /* Bit-reverse the input */
643
0
   for (i=0;i<st->nfft;i++)
644
0
      fout[st->bitrev[i]] = fin[i];
645
0
   for (i=0;i<st->nfft;i++)
646
0
      fout[i].i = -fout[i].i;
647
0
   opus_fft_impl(st, fout ARG_FIXED(0));
648
0
   for (i=0;i<st->nfft;i++)
649
0
      fout[i].i = -fout[i].i;
650
0
}
Unexecuted instantiation: opus_ifft_c
Unexecuted instantiation: opus_ifft_c