Coverage Report

Created: 2025-08-28 07:12

/src/opus/celt/kiss_fft.c
Line
Count
Source (jump to first uncovered line)
1
/*Copyright (c) 2003-2004, Mark Borgerding
2
  Lots of modifications by Jean-Marc Valin
3
  Copyright (c) 2005-2007, Xiph.Org Foundation
4
  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
5
6
  All rights reserved.
7
8
  Redistribution and use in source and binary forms, with or without
9
   modification, are permitted provided that the following conditions are met:
10
11
    * Redistributions of source code must retain the above copyright notice,
12
       this list of conditions and the following disclaimer.
13
    * Redistributions in binary form must reproduce the above copyright notice,
14
       this list of conditions and the following disclaimer in the
15
       documentation and/or other materials provided with the distribution.
16
17
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
  POSSIBILITY OF SUCH DAMAGE.*/
28
29
/* This code is originally from Mark Borgerding's KISS-FFT but has been
30
   heavily modified to better suit Opus */
31
32
#ifndef SKIP_CONFIG_H
33
#  ifdef HAVE_CONFIG_H
34
#    include "config.h"
35
#  endif
36
#endif
37
38
#include "_kiss_fft_guts.h"
39
#include "arch.h"
40
#include "os_support.h"
41
#include "mathops.h"
42
#include "stack_alloc.h"
43
44
#ifndef M_PI
45
#define M_PI 3.141592653
46
#endif
47
48
/* The guts header contains all the multiplication and addition macros that are defined for
49
   complex numbers.  It also declares the kf_ internal functions.
50
*/
51
52
static void kf_bfly2(
53
                     kiss_fft_cpx * Fout,
54
                     int m,
55
                     int N
56
                    )
57
128k
{
58
128k
   kiss_fft_cpx * Fout2;
59
128k
   int i;
60
128k
   (void)m;
61
#ifdef CUSTOM_MODES
62
   if (m==1)
63
   {
64
      celt_assert(m==1);
65
      for (i=0;i<N;i++)
66
      {
67
         kiss_fft_cpx t;
68
         Fout2 = Fout + 1;
69
         t = *Fout2;
70
         C_SUB( *Fout2 ,  *Fout , t );
71
         C_ADDTO( *Fout ,  t );
72
         Fout += 2;
73
      }
74
   } else
75
#endif
76
128k
   {
77
128k
      celt_coef tw;
78
128k
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
80
128k
      celt_assert(m==4);
81
4.95M
      for (i=0;i<N;i++)
82
4.82M
      {
83
4.82M
         kiss_fft_cpx t;
84
4.82M
         Fout2 = Fout + 4;
85
4.82M
         t = Fout2[0];
86
4.82M
         C_SUB( Fout2[0] ,  Fout[0] , t );
87
4.82M
         C_ADDTO( Fout[0] ,  t );
88
89
4.82M
         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90
4.82M
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91
4.82M
         C_SUB( Fout2[1] ,  Fout[1] , t );
92
4.82M
         C_ADDTO( Fout[1] ,  t );
93
94
4.82M
         t.r = Fout2[2].i;
95
4.82M
         t.i = NEG32_ovflw(Fout2[2].r);
96
4.82M
         C_SUB( Fout2[2] ,  Fout[2] , t );
97
4.82M
         C_ADDTO( Fout[2] ,  t );
98
99
4.82M
         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100
4.82M
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101
4.82M
         C_SUB( Fout2[3] ,  Fout[3] , t );
102
4.82M
         C_ADDTO( Fout[3] ,  t );
103
4.82M
         Fout += 8;
104
4.82M
      }
105
128k
   }
106
128k
}
107
108
static void kf_bfly4(
109
                     kiss_fft_cpx * Fout,
110
                     const size_t fstride,
111
                     const kiss_fft_state *st,
112
                     int m,
113
                     int N,
114
                     int mm
115
                    )
116
689k
{
117
689k
   int i;
118
119
689k
   if (m==1)
120
424k
   {
121
      /* Degenerate case where all the twiddles are 1. */
122
23.5M
      for (i=0;i<N;i++)
123
23.1M
      {
124
23.1M
         kiss_fft_cpx scratch0, scratch1;
125
126
23.1M
         C_SUB( scratch0 , *Fout, Fout[2] );
127
23.1M
         C_ADDTO(*Fout, Fout[2]);
128
23.1M
         C_ADD( scratch1 , Fout[1] , Fout[3] );
129
23.1M
         C_SUB( Fout[2], *Fout, scratch1 );
130
23.1M
         C_ADDTO( *Fout , scratch1 );
131
23.1M
         C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133
23.1M
         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134
23.1M
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135
23.1M
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136
23.1M
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137
23.1M
         Fout+=4;
138
23.1M
      }
139
424k
   } else {
140
264k
      int j;
141
264k
      kiss_fft_cpx scratch[6];
142
264k
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
143
264k
      const int m2=2*m;
144
264k
      const int m3=3*m;
145
264k
      kiss_fft_cpx * Fout_beg = Fout;
146
4.23M
      for (i=0;i<N;i++)
147
3.97M
      {
148
3.97M
         Fout = Fout_beg + i*mm;
149
3.97M
         tw3 = tw2 = tw1 = st->twiddles;
150
         /* m is guaranteed to be a multiple of 4. */
151
23.7M
         for (j=0;j<m;j++)
152
19.7M
         {
153
19.7M
            C_MUL(scratch[0],Fout[m] , *tw1 );
154
19.7M
            C_MUL(scratch[1],Fout[m2] , *tw2 );
155
19.7M
            C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157
19.7M
            C_SUB( scratch[5] , *Fout, scratch[1] );
158
19.7M
            C_ADDTO(*Fout, scratch[1]);
159
19.7M
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
160
19.7M
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
161
19.7M
            C_SUB( Fout[m2], *Fout, scratch[3] );
162
19.7M
            tw1 += fstride;
163
19.7M
            tw2 += fstride*2;
164
19.7M
            tw3 += fstride*3;
165
19.7M
            C_ADDTO( *Fout , scratch[3] );
166
167
19.7M
            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168
19.7M
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169
19.7M
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170
19.7M
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171
19.7M
            ++Fout;
172
19.7M
         }
173
3.97M
      }
174
264k
   }
175
689k
}
176
177
178
#ifndef RADIX_TWO_ONLY
179
180
static void kf_bfly3(
181
                     kiss_fft_cpx * Fout,
182
                     const size_t fstride,
183
                     const kiss_fft_state *st,
184
                     int m,
185
                     int N,
186
                     int mm
187
                    )
188
424k
{
189
424k
   int i;
190
424k
   size_t k;
191
424k
   const size_t m2 = 2*m;
192
424k
   const kiss_twiddle_cpx *tw1,*tw2;
193
424k
   kiss_fft_cpx scratch[5];
194
424k
   kiss_twiddle_cpx epi3;
195
196
424k
   kiss_fft_cpx * Fout_beg = Fout;
197
#ifdef FIXED_POINT
198
   /*epi3.r = -16384;*/ /* Unused */
199
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200
#else
201
424k
   epi3 = st->twiddles[fstride*m];
202
424k
#endif
203
2.54M
   for (i=0;i<N;i++)
204
2.12M
   {
205
2.12M
      Fout = Fout_beg + i*mm;
206
2.12M
      tw1=tw2=st->twiddles;
207
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
208
2.12M
      k=m;
209
30.8M
      do {
210
211
30.8M
         C_MUL(scratch[1],Fout[m] , *tw1);
212
30.8M
         C_MUL(scratch[2],Fout[m2] , *tw2);
213
214
30.8M
         C_ADD(scratch[3],scratch[1],scratch[2]);
215
30.8M
         C_SUB(scratch[0],scratch[1],scratch[2]);
216
30.8M
         tw1 += fstride;
217
30.8M
         tw2 += fstride*2;
218
219
30.8M
         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220
30.8M
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222
30.8M
         C_MULBYSCALAR( scratch[0] , epi3.i );
223
224
30.8M
         C_ADDTO(*Fout,scratch[3]);
225
226
30.8M
         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227
30.8M
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229
30.8M
         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230
30.8M
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232
30.8M
         ++Fout;
233
30.8M
      } while(--k);
234
2.12M
   }
235
424k
}
236
237
238
#ifndef OVERRIDE_kf_bfly5
239
static void kf_bfly5(
240
                     kiss_fft_cpx * Fout,
241
                     const size_t fstride,
242
                     const kiss_fft_state *st,
243
                     int m,
244
                     int N,
245
                     int mm
246
                    )
247
424k
{
248
424k
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
249
424k
   int i, u;
250
424k
   kiss_fft_cpx scratch[13];
251
424k
   const kiss_twiddle_cpx *tw;
252
424k
   kiss_twiddle_cpx ya,yb;
253
424k
   kiss_fft_cpx * Fout_beg = Fout;
254
255
#ifdef FIXED_POINT
256
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260
#else
261
424k
   ya = st->twiddles[fstride*m];
262
424k
   yb = st->twiddles[fstride*2*m];
263
424k
#endif
264
424k
   tw=st->twiddles;
265
266
849k
   for (i=0;i<N;i++)
267
424k
   {
268
424k
      Fout = Fout_beg + i*mm;
269
424k
      Fout0=Fout;
270
424k
      Fout1=Fout0+m;
271
424k
      Fout2=Fout0+2*m;
272
424k
      Fout3=Fout0+3*m;
273
424k
      Fout4=Fout0+4*m;
274
275
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
276
18.9M
      for ( u=0; u<m; ++u ) {
277
18.4M
         scratch[0] = *Fout0;
278
279
18.4M
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
280
18.4M
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
281
18.4M
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
282
18.4M
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
283
284
18.4M
         C_ADD( scratch[7],scratch[1],scratch[4]);
285
18.4M
         C_SUB( scratch[10],scratch[1],scratch[4]);
286
18.4M
         C_ADD( scratch[8],scratch[2],scratch[3]);
287
18.4M
         C_SUB( scratch[9],scratch[2],scratch[3]);
288
289
18.4M
         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290
18.4M
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292
18.4M
         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293
18.4M
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295
18.4M
         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296
18.4M
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298
18.4M
         C_SUB(*Fout1,scratch[5],scratch[6]);
299
18.4M
         C_ADD(*Fout4,scratch[5],scratch[6]);
300
301
18.4M
         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302
18.4M
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303
18.4M
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304
18.4M
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306
18.4M
         C_ADD(*Fout2,scratch[11],scratch[12]);
307
18.4M
         C_SUB(*Fout3,scratch[11],scratch[12]);
308
309
18.4M
         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310
18.4M
      }
311
424k
   }
312
424k
}
313
#endif /* OVERRIDE_kf_bfly5 */
314
315
316
#endif
317
318
319
#ifdef CUSTOM_MODES
320
321
static
322
void compute_bitrev_table(
323
         int Fout,
324
         opus_int16 *f,
325
         const size_t fstride,
326
         int in_stride,
327
         opus_int16 * factors,
328
         const kiss_fft_state *st
329
            )
330
{
331
   const int p=*factors++; /* the radix  */
332
   const int m=*factors++; /* stage's fft length/p */
333
334
    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
335
   if (m==1)
336
   {
337
      int j;
338
      for (j=0;j<p;j++)
339
      {
340
         *f = Fout+j;
341
         f += fstride*in_stride;
342
      }
343
   } else {
344
      int j;
345
      for (j=0;j<p;j++)
346
      {
347
         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
348
         f += fstride*in_stride;
349
         Fout += m;
350
      }
351
   }
352
}
353
354
/*  facbuf is populated by p1,m1,p2,m2, ...
355
    where
356
    p[i] * m[i] = m[i-1]
357
    m0 = n                  */
358
static
359
int kf_factor(int n,opus_int16 * facbuf)
360
{
361
    int p=4;
362
    int i;
363
    int stages=0;
364
    int nbak = n;
365
366
    /*factor out powers of 4, powers of 2, then any remaining primes */
367
    do {
368
        while (n % p) {
369
            switch (p) {
370
                case 4: p = 2; break;
371
                case 2: p = 3; break;
372
                default: p += 2; break;
373
            }
374
            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
375
                p = n;          /* no more factors, skip to end */
376
        }
377
        n /= p;
378
#ifdef RADIX_TWO_ONLY
379
        if (p!=2 && p != 4)
380
#else
381
        if (p>5)
382
#endif
383
        {
384
           return 0;
385
        }
386
        facbuf[2*stages] = p;
387
        if (p==2 && stages > 1)
388
        {
389
           facbuf[2*stages] = 4;
390
           facbuf[2] = 2;
391
        }
392
        stages++;
393
    } while (n > 1);
394
    n = nbak;
395
    /* Reverse the order to get the radix 4 at the end, so we can use the
396
       fast degenerate case. It turns out that reversing the order also
397
       improves the noise behaviour. */
398
    for (i=0;i<stages/2;i++)
399
    {
400
       int tmp;
401
       tmp = facbuf[2*i];
402
       facbuf[2*i] = facbuf[2*(stages-i-1)];
403
       facbuf[2*(stages-i-1)] = tmp;
404
    }
405
    for (i=0;i<stages;i++)
406
    {
407
        n /= facbuf[2*i];
408
        facbuf[2*i+1] = n;
409
    }
410
    return 1;
411
}
412
413
static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
414
{
415
   int i;
416
#ifdef FIXED_POINT
417
   for (i=0;i<nfft;++i) {
418
      opus_val32 phase = -i;
419
#ifdef ENABLE_QEXT
420
      twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648*cos((2*M_PI/nfft)*phase)));
421
      twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648*sin((2*M_PI/nfft)*phase)));
422
#else
423
      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
424
#endif
425
   }
426
#else
427
   for (i=0;i<nfft;++i) {
428
      const double pi=3.14159265358979323846264338327;
429
      double phase = ( -2*pi /nfft ) * i;
430
      kf_cexp(twiddles+i, phase );
431
   }
432
#endif
433
}
434
435
int opus_fft_alloc_arch_c(kiss_fft_state *st) {
436
   (void)st;
437
   return 0;
438
}
439
440
/*
441
 *
442
 * Allocates all necessary storage space for the fft and ifft.
443
 * The return value is a contiguous block of memory.  As such,
444
 * It can be freed with free().
445
 * */
446
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
447
                                        const kiss_fft_state *base, int arch)
448
{
449
    kiss_fft_state *st=NULL;
450
    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
451
452
    if ( lenmem==NULL ) {
453
        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
454
    }else{
455
        if (mem != NULL && *lenmem >= memneeded)
456
            st = (kiss_fft_state*)mem;
457
        *lenmem = memneeded;
458
    }
459
    if (st) {
460
        opus_int16 *bitrev;
461
        kiss_twiddle_cpx *twiddles;
462
463
        st->nfft=nfft;
464
#ifdef FIXED_POINT
465
        st->scale_shift = celt_ilog2(st->nfft);
466
# ifdef ENABLE_QEXT
467
        if (st->nfft == 1<<st->scale_shift)
468
           st->scale = QCONST32(1.0f, 30);
469
        else
470
           st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
471
# else
472
        if (st->nfft == 1<<st->scale_shift)
473
           st->scale = Q15ONE;
474
        else
475
           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
476
# endif
477
#else
478
        st->scale = 1.f/nfft;
479
#endif
480
        if (base != NULL)
481
        {
482
           st->twiddles = base->twiddles;
483
           st->shift = 0;
484
           while (st->shift < 32 && nfft<<st->shift != base->nfft)
485
              st->shift++;
486
           if (st->shift>=32)
487
              goto fail;
488
        } else {
489
           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
490
           compute_twiddles(twiddles, nfft);
491
           st->shift = -1;
492
        }
493
        if (!kf_factor(nfft,st->factors))
494
        {
495
           goto fail;
496
        }
497
498
        /* bitrev */
499
        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
500
        if (st->bitrev==NULL)
501
            goto fail;
502
        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
503
504
        /* Initialize architecture specific fft parameters */
505
        if (opus_fft_alloc_arch(st, arch))
506
            goto fail;
507
    }
508
    return st;
509
fail:
510
    opus_fft_free(st, arch);
511
    return NULL;
512
}
513
514
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
515
{
516
   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
517
}
518
519
void opus_fft_free_arch_c(kiss_fft_state *st) {
520
   (void)st;
521
}
522
523
void opus_fft_free(const kiss_fft_state *cfg, int arch)
524
{
525
   if (cfg)
526
   {
527
      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
528
      opus_free((opus_int16*)cfg->bitrev);
529
      if (cfg->shift < 0)
530
         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
531
      opus_free((kiss_fft_state*)cfg);
532
   }
533
}
534
535
#endif /* CUSTOM_MODES */
536
537
#ifdef FIXED_POINT
538
static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
539
   int shift;
540
   shift = IMIN(step, *total);
541
   *total -= shift;
542
   if (shift == 1) {
543
      int i;
544
      for (i=0;i<N;i++) {
545
         x[i].r = SHR32(x[i].r, 1);
546
         x[i].i = SHR32(x[i].i, 1);
547
      }
548
   } else if (shift>0) {
549
      int i;
550
      for (i=0;i<N;i++) {
551
         x[i].r = PSHR32(x[i].r, shift);
552
         x[i].i = PSHR32(x[i].i, shift);
553
      }
554
   }
555
}
556
#else
557
#define fft_downshift(x, N, total, step)
558
#endif
559
560
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift))
561
424k
{
562
424k
    int m2, m;
563
424k
    int p;
564
424k
    int L;
565
424k
    int fstride[MAXFACTORS];
566
424k
    int i;
567
424k
    int shift;
568
569
    /* st->shift can be -1 */
570
424k
    shift = st->shift>0 ? st->shift : 0;
571
572
424k
    fstride[0] = 1;
573
424k
    L=0;
574
1.66M
    do {
575
1.66M
       p = st->factors[2*L];
576
1.66M
       m = st->factors[2*L+1];
577
1.66M
       fstride[L+1] = fstride[L]*p;
578
1.66M
       L++;
579
1.66M
    } while(m!=1);
580
424k
    m = st->factors[2*L-1];
581
2.09M
    for (i=L-1;i>=0;i--)
582
1.66M
    {
583
1.66M
       if (i!=0)
584
1.24M
          m2 = st->factors[2*i-1];
585
424k
       else
586
424k
          m2 = 1;
587
1.66M
       switch (st->factors[2*i])
588
1.66M
       {
589
128k
       case 2:
590
128k
          fft_downshift(fout, st->nfft, &downshift, 1);
591
128k
          kf_bfly2(fout, m, fstride[i]);
592
128k
          break;
593
689k
       case 4:
594
689k
          fft_downshift(fout, st->nfft, &downshift, 2);
595
689k
          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
596
689k
          break;
597
0
 #ifndef RADIX_TWO_ONLY
598
424k
       case 3:
599
424k
          fft_downshift(fout, st->nfft, &downshift, 2);
600
424k
          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
601
424k
          break;
602
424k
       case 5:
603
424k
          fft_downshift(fout, st->nfft, &downshift, 3);
604
424k
          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
605
424k
          break;
606
1.66M
 #endif
607
1.66M
       }
608
1.66M
       m = m2;
609
1.66M
    }
610
424k
    fft_downshift(fout, st->nfft, &downshift, downshift);
611
424k
}
612
613
void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
614
0
{
615
0
   int i;
616
0
   celt_coef scale;
617
#ifdef FIXED_POINT
618
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
619
      MULT16_32_Q15() on ARM. */
620
   int scale_shift = st->scale_shift-1;
621
#endif
622
0
   scale = st->scale;
623
624
0
   celt_assert2 (fin != fout, "In-place FFT not supported");
625
   /* Bit-reverse the input */
626
0
   for (i=0;i<st->nfft;i++)
627
0
   {
628
0
      kiss_fft_cpx x = fin[i];
629
0
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
630
0
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
631
0
   }
632
0
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
633
0
}
634
635
636
void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
637
0
{
638
0
   int i;
639
0
   celt_assert2 (fin != fout, "In-place FFT not supported");
640
   /* Bit-reverse the input */
641
0
   for (i=0;i<st->nfft;i++)
642
0
      fout[st->bitrev[i]] = fin[i];
643
0
   for (i=0;i<st->nfft;i++)
644
0
      fout[i].i = -fout[i].i;
645
0
   opus_fft_impl(st, fout ARG_FIXED(0));
646
0
   for (i=0;i<st->nfft;i++)
647
0
      fout[i].i = -fout[i].i;
648
0
}