/src/opus/celt/kiss_fft.c

Source
/*Copyright (c) 2003-2004, Mark Borgerding
  Lots of modifications by Jean-Marc Valin
  Copyright (c) 2005-2007, Xiph.Org Foundation
  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO

  All rights reserved.

  Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
       this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice,
       this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  POSSIBILITY OF SUCH DAMAGE.*/

/* This code is originally from Mark Borgerding's KISS-FFT but has been
   heavily modified to better suit Opus */

#ifndef SKIP_CONFIG_H
#  ifdef HAVE_CONFIG_H
#    include "config.h"
#  endif
#endif

#include "_kiss_fft_guts.h"
#include "arch.h"
#include "os_support.h"
#include "mathops.h"
#include "stack_alloc.h"

#ifndef M_PI
#define M_PI 3.141592653
#endif

/* The guts header contains all the multiplication and addition macros that are defined for
   complex numbers.  It also declares the kf_ internal functions.
*/

static void kf_bfly2(
                     kiss_fft_cpx * Fout,
                     int m,
                     int N
                    )
{
   kiss_fft_cpx * Fout2;
   int i;
   (void)m;
#ifdef CUSTOM_MODES
   if (m==1)
   {
      celt_assert(m==1);
      for (i=0;i<N;i++)
      {
         kiss_fft_cpx t;
         Fout2 = Fout + 1;
         t = *Fout2;
         C_SUB( *Fout2 ,  *Fout , t );
         C_ADDTO( *Fout ,  t );
         Fout += 2;
      }
   } else
#endif
   {
      celt_coef tw;
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
      celt_assert(m==4);
      for (i=0;i<N;i++)
      {
         kiss_fft_cpx t;
         Fout2 = Fout + 4;
         t = Fout2[0];
         C_SUB( Fout2[0] ,  Fout[0] , t );
         C_ADDTO( Fout[0] ,  t );

         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
         C_SUB( Fout2[1] ,  Fout[1] , t );
         C_ADDTO( Fout[1] ,  t );

         t.r = Fout2[2].i;
         t.i = NEG32_ovflw(Fout2[2].r);
         C_SUB( Fout2[2] ,  Fout[2] , t );
         C_ADDTO( Fout[2] ,  t );

         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
         C_SUB( Fout2[3] ,  Fout[3] , t );
         C_ADDTO( Fout[3] ,  t );
         Fout += 8;
      }
   }
}

static void kf_bfly4(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   int i;

   if (m==1)
   {
      /* Degenerate case where all the twiddles are 1. */
      for (i=0;i<N;i++)
      {
         kiss_fft_cpx scratch0, scratch1;

         C_SUB( scratch0 , *Fout, Fout[2] );
         C_ADDTO(*Fout, Fout[2]);
         C_ADD( scratch1 , Fout[1] , Fout[3] );
         C_SUB( Fout[2], *Fout, scratch1 );
         C_ADDTO( *Fout , scratch1 );
         C_SUB( scratch1 , Fout[1] , Fout[3] );

         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
         Fout+=4;
      }
   } else {
      int j;
      kiss_fft_cpx scratch[6];
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
      const int m2=2*m;
      const int m3=3*m;
      kiss_fft_cpx * Fout_beg = Fout;
      for (i=0;i<N;i++)
      {
         Fout = Fout_beg + i*mm;
         tw3 = tw2 = tw1 = st->twiddles;
         /* m is guaranteed to be a multiple of 4. */
         for (j=0;j<m;j++)
         {
            C_MUL(scratch[0],Fout[m] , *tw1 );
            C_MUL(scratch[1],Fout[m2] , *tw2 );
            C_MUL(scratch[2],Fout[m3] , *tw3 );

            C_SUB( scratch[5] , *Fout, scratch[1] );
            C_ADDTO(*Fout, scratch[1]);
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
            C_SUB( Fout[m2], *Fout, scratch[3] );
            tw1 += fstride;
            tw2 += fstride*2;
            tw3 += fstride*3;
            C_ADDTO( *Fout , scratch[3] );

            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
            ++Fout;
         }
      }
   }
}


#ifndef RADIX_TWO_ONLY

static void kf_bfly3(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   int i;
   size_t k;
   const size_t m2 = 2*m;
   const kiss_twiddle_cpx *tw1,*tw2;
   kiss_fft_cpx scratch[5];
   kiss_twiddle_cpx epi3;

   kiss_fft_cpx * Fout_beg = Fout;
#ifdef FIXED_POINT
   /*epi3.r = -16384;*/ /* Unused */
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
#else
   epi3 = st->twiddles[fstride*m];
#endif
   for (i=0;i<N;i++)
   {
      Fout = Fout_beg + i*mm;
      tw1=tw2=st->twiddles;
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
      k=m;
      do {

         C_MUL(scratch[1],Fout[m] , *tw1);
         C_MUL(scratch[2],Fout[m2] , *tw2);

         C_ADD(scratch[3],scratch[1],scratch[2]);
         C_SUB(scratch[0],scratch[1],scratch[2]);
         tw1 += fstride;
         tw2 += fstride*2;

         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));

         C_MULBYSCALAR( scratch[0] , epi3.i );

         C_ADDTO(*Fout,scratch[3]);

         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);

         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);

         ++Fout;
      } while(--k);
   }
}


#ifndef OVERRIDE_kf_bfly5
static void kf_bfly5(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
   int i, u;
   kiss_fft_cpx scratch[13];
   const kiss_twiddle_cpx *tw;
   kiss_twiddle_cpx ya,yb;
   kiss_fft_cpx * Fout_beg = Fout;

#ifdef FIXED_POINT
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
#else
   ya = st->twiddles[fstride*m];
   yb = st->twiddles[fstride*2*m];
#endif
   tw=st->twiddles;

   for (i=0;i<N;i++)
   {
      Fout = Fout_beg + i*mm;
      Fout0=Fout;
      Fout1=Fout0+m;
      Fout2=Fout0+2*m;
      Fout3=Fout0+3*m;
      Fout4=Fout0+4*m;

      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
      for ( u=0; u<m; ++u ) {
         scratch[0] = *Fout0;

         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);

         C_ADD( scratch[7],scratch[1],scratch[4]);
         C_SUB( scratch[10],scratch[1],scratch[4]);
         C_ADD( scratch[8],scratch[2],scratch[3]);
         C_SUB( scratch[9],scratch[2],scratch[3]);

         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));

         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));

         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));

         C_SUB(*Fout1,scratch[5],scratch[6]);
         C_ADD(*Fout4,scratch[5],scratch[6]);

         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));

         C_ADD(*Fout2,scratch[11],scratch[12]);
         C_SUB(*Fout3,scratch[11],scratch[12]);

         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
      }
   }
}
#endif /* OVERRIDE_kf_bfly5 */


#endif


#ifdef CUSTOM_MODES

static
void compute_bitrev_table(
         int Fout,
         opus_int16 *f,
         const size_t fstride,
         int in_stride,
         opus_int16 * factors,
         const kiss_fft_state *st
            )
{
   const int p=*factors++; /* the radix  */
   const int m=*factors++; /* stage's fft length/p */

    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
   if (m==1)
   {
      int j;
      for (j=0;j<p;j++)
      {
         *f = Fout+j;
         f += fstride*in_stride;
      }
   } else {
      int j;
      for (j=0;j<p;j++)
      {
         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
         f += fstride*in_stride;
         Fout += m;
      }
   }
}

/*  facbuf is populated by p1,m1,p2,m2, ...
    where
    p[i] * m[i] = m[i-1]
    m0 = n                  */
static
int kf_factor(int n,opus_int16 * facbuf)
{
    int p=4;
    int i;
    int stages=0;
    int nbak = n;

    /*factor out powers of 4, powers of 2, then any remaining primes */
    do {
        while (n % p) {
            switch (p) {
                case 4: p = 2; break;
                case 2: p = 3; break;
                default: p += 2; break;
            }
            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
                p = n;          /* no more factors, skip to end */
        }
        n /= p;
#ifdef RADIX_TWO_ONLY
        if (p!=2 && p != 4)
#else
        if (p>5)
#endif
        {
           return 0;
        }
        facbuf[2*stages] = p;
        if (p==2 && stages > 1)
        {
           facbuf[2*stages] = 4;
           facbuf[2] = 2;
        }
        stages++;
    } while (n > 1);
    n = nbak;
    /* Reverse the order to get the radix 4 at the end, so we can use the
       fast degenerate case. It turns out that reversing the order also
       improves the noise behaviour. */
    for (i=0;i<stages/2;i++)
    {
       int tmp;
       tmp = facbuf[2*i];
       facbuf[2*i] = facbuf[2*(stages-i-1)];
       facbuf[2*(stages-i-1)] = tmp;
    }
    for (i=0;i<stages;i++)
    {
        n /= facbuf[2*i];
        facbuf[2*i+1] = n;
    }
    return 1;
}

static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
{
   int i;
#ifdef FIXED_POINT
   for (i=0;i<nfft;++i) {
      opus_val32 phase = -i;
#ifdef ENABLE_QEXT
      twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648*cos((2*M_PI/nfft)*phase)));
      twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648*sin((2*M_PI/nfft)*phase)));
#else
      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
#endif
   }
#else
   for (i=0;i<nfft;++i) {
      const double pi=3.14159265358979323846264338327;
      double phase = ( -2*pi /nfft ) * i;
      kf_cexp(twiddles+i, phase );
   }
#endif
}

int opus_fft_alloc_arch_c(kiss_fft_state *st) {
   (void)st;
   return 0;
}

/*
 *
 * Allocates all necessary storage space for the fft and ifft.
 * The return value is a contiguous block of memory.  As such,
 * It can be freed with free().
 * */
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
                                        const kiss_fft_state *base, int arch)
{
    kiss_fft_state *st=NULL;
    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/

    if ( lenmem==NULL ) {
        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
    }else{
        if (mem != NULL && *lenmem >= memneeded)
            st = (kiss_fft_state*)mem;
        *lenmem = memneeded;
    }
    if (st) {
        opus_int16 *bitrev;
        kiss_twiddle_cpx *twiddles;

        st->nfft=nfft;
#ifdef FIXED_POINT
        st->scale_shift = celt_ilog2(st->nfft);
# ifdef ENABLE_QEXT
        if (st->nfft == 1<<st->scale_shift)
           st->scale = QCONST32(1.0f, 30);
        else
           st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
# else
        if (st->nfft == 1<<st->scale_shift)
           st->scale = Q15ONE;
        else
           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
# endif
#else
        st->scale = 1.f/nfft;
#endif
        if (base != NULL)
        {
           st->twiddles = base->twiddles;
           st->shift = 0;
           while (st->shift < 32 && nfft<<st->shift != base->nfft)
              st->shift++;
           if (st->shift>=32)
              goto fail;
        } else {
           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
           compute_twiddles(twiddles, nfft);
           st->shift = -1;
        }
        if (!kf_factor(nfft,st->factors))
        {
           goto fail;
        }

        /* bitrev */
        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
        if (st->bitrev==NULL)
            goto fail;
        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);

        /* Initialize architecture specific fft parameters */
        if (opus_fft_alloc_arch(st, arch))
            goto fail;
    }
    return st;
fail:
    opus_fft_free(st, arch);
    return NULL;
}

kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
{
   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
}

void opus_fft_free_arch_c(kiss_fft_state *st) {
   (void)st;
}

void opus_fft_free(const kiss_fft_state *cfg, int arch)
{
   if (cfg)
   {
      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
      opus_free((opus_int16*)cfg->bitrev);
      if (cfg->shift < 0)
         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
      opus_free((kiss_fft_state*)cfg);
   }
}

#endif /* CUSTOM_MODES */

#ifdef FIXED_POINT
#ifndef OVERRIDE_fft_downshift
static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
   int shift;
   shift = IMIN(step, *total);
   *total -= shift;
   if (shift == 1) {
      int i;
      for (i=0;i<N;i++) {
         x[i].r = SHR32(x[i].r, 1);
         x[i].i = SHR32(x[i].i, 1);
      }
   } else if (shift>0) {
      int i;
      for (i=0;i<N;i++) {
         x[i].r = PSHR32(x[i].r, shift);
         x[i].i = PSHR32(x[i].i, shift);
      }
   }
}
#endif /* OVERRIDE_fft_downshift */
#else
#define fft_downshift(x, N, total, step)
#endif

void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift))
{
    int m2, m;
    int p;
    int L;
    int fstride[MAXFACTORS];
    int i;
    int shift;

    /* st->shift can be -1 */
    shift = st->shift>0 ? st->shift : 0;

    fstride[0] = 1;
    L=0;
    do {
       p = st->factors[2*L];
       m = st->factors[2*L+1];
       fstride[L+1] = fstride[L]*p;
       L++;
    } while(m!=1);
    m = st->factors[2*L-1];
    for (i=L-1;i>=0;i--)
    {
       if (i!=0)
          m2 = st->factors[2*i-1];
       else
          m2 = 1;
       switch (st->factors[2*i])
       {
       case 2:
          fft_downshift(fout, st->nfft, &downshift, 1);
          kf_bfly2(fout, m, fstride[i]);
          break;
       case 4:
          fft_downshift(fout, st->nfft, &downshift, 2);
          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
          break;
 #ifndef RADIX_TWO_ONLY
       case 3:
          fft_downshift(fout, st->nfft, &downshift, 2);
          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
          break;
       case 5:
          fft_downshift(fout, st->nfft, &downshift, 3);
          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
          break;
 #endif
       }
       m = m2;
    }
    fft_downshift(fout, st->nfft, &downshift, downshift);
}

void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
   int i;
   celt_coef scale;
#ifdef FIXED_POINT
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
      MULT16_32_Q15() on ARM. */
   int scale_shift = st->scale_shift-1;
#endif
   scale = st->scale;

   celt_assert2 (fin != fout, "In-place FFT not supported");
   /* Bit-reverse the input */
   for (i=0;i<st->nfft;i++)
   {
      kiss_fft_cpx x = fin[i];
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
   }
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
}


void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
   int i;
   celt_assert2 (fin != fout, "In-place FFT not supported");
   /* Bit-reverse the input */
   for (i=0;i<st->nfft;i++)
      fout[st->bitrev[i]] = fin[i];
   for (i=0;i<st->nfft;i++)
      fout[i].i = -fout[i].i;
   opus_fft_impl(st, fout ARG_FIXED(0));
   for (i=0;i<st->nfft;i++)
      fout[i].i = -fout[i].i;
}

Coverage Report

Created: 2025-12-31 07:57

Line	Count	Source
1		/*Copyright (c) 2003-2004, Mark Borgerding
2		Lots of modifications by Jean-Marc Valin
3		Copyright (c) 2005-2007, Xiph.Org Foundation
4		Copyright (c) 2008, Xiph.Org Foundation, CSIRO
5
6		All rights reserved.
7
8		Redistribution and use in source and binary forms, with or without
9		modification, are permitted provided that the following conditions are met:
10
11		* Redistributions of source code must retain the above copyright notice,
12		this list of conditions and the following disclaimer.
13		* Redistributions in binary form must reproduce the above copyright notice,
14		this list of conditions and the following disclaimer in the
15		documentation and/or other materials provided with the distribution.
16
17		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18		AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19		IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20		ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21		LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22		CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23		SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24		INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25		CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26		ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27		POSSIBILITY OF SUCH DAMAGE.*/
28
29		/* This code is originally from Mark Borgerding's KISS-FFT but has been
30		heavily modified to better suit Opus */
31
32		#ifndef SKIP_CONFIG_H
33		# ifdef HAVE_CONFIG_H
34		# include "config.h"
35		# endif
36		#endif
37
38		#include "_kiss_fft_guts.h"
39		#include "arch.h"
40		#include "os_support.h"
41		#include "mathops.h"
42		#include "stack_alloc.h"
43
44		#ifndef M_PI
45		#define M_PI 3.141592653
46		#endif
47
48		/* The guts header contains all the multiplication and addition macros that are defined for
49		complex numbers. It also declares the kf_ internal functions.
50		*/
51
52		static void kf_bfly2(
53		kiss_fft_cpx * Fout,
54		int m,
55		int N
56		)
57	115k	{
58	115k	kiss_fft_cpx * Fout2;
59	115k	int i;
60	115k	(void)m;
61		#ifdef CUSTOM_MODES
62		if (m==1)
63		{
64		celt_assert(m==1);
65		for (i=0;i<N;i++)
66		{
67		kiss_fft_cpx t;
68		Fout2 = Fout + 1;
69		t = *Fout2;
70		C_SUB( Fout2 , Fout , t );
71		C_ADDTO( *Fout , t );
72		Fout += 2;
73		}
74		} else
75		#endif
76	115k	{
77	115k	celt_coef tw;
78	115k	tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79		/* We know that m==4 here because the radix-2 is just after a radix-4 */
80	115k	celt_assert(m==4);
81	4.47M	for (i=0;i<N;i++)
82	4.35M	{
83	4.35M	kiss_fft_cpx t;
84	4.35M	Fout2 = Fout + 4;
85	4.35M	t = Fout2[0];
86	4.35M	C_SUB( Fout2[0] , Fout[0] , t );
87	4.35M	C_ADDTO( Fout[0] , t );
88
89	4.35M	t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90	4.35M	t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91	4.35M	C_SUB( Fout2[1] , Fout[1] , t );
92	4.35M	C_ADDTO( Fout[1] , t );
93
94	4.35M	t.r = Fout2[2].i;
95	4.35M	t.i = NEG32_ovflw(Fout2[2].r);
96	4.35M	C_SUB( Fout2[2] , Fout[2] , t );
97	4.35M	C_ADDTO( Fout[2] , t );
98
99	4.35M	t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100	4.35M	t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101	4.35M	C_SUB( Fout2[3] , Fout[3] , t );
102	4.35M	C_ADDTO( Fout[3] , t );
103	4.35M	Fout += 8;
104	4.35M	}
105	115k	}
106	115k	}
107
108		static void kf_bfly4(
109		kiss_fft_cpx * Fout,
110		const size_t fstride,
111		const kiss_fft_state *st,
112		int m,
113		int N,
114		int mm
115		)
116	637k	{
117	637k	int i;
118
119	637k	if (m==1)
120	402k	{
121		/* Degenerate case where all the twiddles are 1. */
122	21.3M	for (i=0;i<N;i++)
123	20.9M	{
124	20.9M	kiss_fft_cpx scratch0, scratch1;
125
126	20.9M	C_SUB( scratch0 , *Fout, Fout[2] );
127	20.9M	C_ADDTO(*Fout, Fout[2]);
128	20.9M	C_ADD( scratch1 , Fout[1] , Fout[3] );
129	20.9M	C_SUB( Fout[2], *Fout, scratch1 );
130	20.9M	C_ADDTO( *Fout , scratch1 );
131	20.9M	C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133	20.9M	Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134	20.9M	Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135	20.9M	Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136	20.9M	Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137	20.9M	Fout+=4;
138	20.9M	}
139	402k	} else {
140	234k	int j;
141	234k	kiss_fft_cpx scratch[6];
142	234k	const kiss_twiddle_cpx tw1,tw2,*tw3;
143	234k	const int m2=2*m;
144	234k	const int m3=3*m;
145	234k	kiss_fft_cpx * Fout_beg = Fout;
146	3.75M	for (i=0;i<N;i++)
147	3.52M	{
148	3.52M	Fout = Fout_beg + i*mm;
149	3.52M	tw3 = tw2 = tw1 = st->twiddles;
150		/* m is guaranteed to be a multiple of 4. */
151	21.0M	for (j=0;j<m;j++)
152	17.5M	{
153	17.5M	C_MUL(scratch[0],Fout[m] , *tw1 );
154	17.5M	C_MUL(scratch[1],Fout[m2] , *tw2 );
155	17.5M	C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157	17.5M	C_SUB( scratch[5] , *Fout, scratch[1] );
158	17.5M	C_ADDTO(*Fout, scratch[1]);
159	17.5M	C_ADD( scratch[3] , scratch[0] , scratch[2] );
160	17.5M	C_SUB( scratch[4] , scratch[0] , scratch[2] );
161	17.5M	C_SUB( Fout[m2], *Fout, scratch[3] );
162	17.5M	tw1 += fstride;
163	17.5M	tw2 += fstride*2;
164	17.5M	tw3 += fstride*3;
165	17.5M	C_ADDTO( *Fout , scratch[3] );
166
167	17.5M	Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168	17.5M	Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169	17.5M	Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170	17.5M	Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171	17.5M	++Fout;
172	17.5M	}
173	3.52M	}
174	234k	}
175	637k	}
176
177
178		#ifndef RADIX_TWO_ONLY
179
180		static void kf_bfly3(
181		kiss_fft_cpx * Fout,
182		const size_t fstride,
183		const kiss_fft_state *st,
184		int m,
185		int N,
186		int mm
187		)
188	402k	{
189	402k	int i;
190	402k	size_t k;
191	402k	const size_t m2 = 2*m;
192	402k	const kiss_twiddle_cpx tw1,tw2;
193	402k	kiss_fft_cpx scratch[5];
194	402k	kiss_twiddle_cpx epi3;
195
196	402k	kiss_fft_cpx * Fout_beg = Fout;
197		#ifdef FIXED_POINT
198		/epi3.r = -16384;/ /* Unused */
199		epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200		#else
201	402k	epi3 = st->twiddles[fstride*m];
202	402k	#endif
203	2.41M	for (i=0;i<N;i++)
204	2.01M	{
205	2.01M	Fout = Fout_beg + i*mm;
206	2.01M	tw1=tw2=st->twiddles;
207		/* For non-custom modes, m is guaranteed to be a multiple of 4. */
208	2.01M	k=m;
209	27.9M	do {
210
211	27.9M	C_MUL(scratch[1],Fout[m] , *tw1);
212	27.9M	C_MUL(scratch[2],Fout[m2] , *tw2);
213
214	27.9M	C_ADD(scratch[3],scratch[1],scratch[2]);
215	27.9M	C_SUB(scratch[0],scratch[1],scratch[2]);
216	27.9M	tw1 += fstride;
217	27.9M	tw2 += fstride*2;
218
219	27.9M	Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220	27.9M	Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222	27.9M	C_MULBYSCALAR( scratch[0] , epi3.i );
223
224	27.9M	C_ADDTO(*Fout,scratch[3]);
225
226	27.9M	Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227	27.9M	Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229	27.9M	Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230	27.9M	Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232	27.9M	++Fout;
233	27.9M	} while(--k);
234	2.01M	}
235	402k	}
236
237
238		#ifndef OVERRIDE_kf_bfly5
239		static void kf_bfly5(
240		kiss_fft_cpx * Fout,
241		const size_t fstride,
242		const kiss_fft_state *st,
243		int m,
244		int N,
245		int mm
246		)
247	402k	{
248	402k	kiss_fft_cpx Fout0,Fout1,Fout2,Fout3,*Fout4;
249	402k	int i, u;
250	402k	kiss_fft_cpx scratch[13];
251	402k	const kiss_twiddle_cpx *tw;
252	402k	kiss_twiddle_cpx ya,yb;
253	402k	kiss_fft_cpx * Fout_beg = Fout;
254
255		#ifdef FIXED_POINT
256		ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257		ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258		yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259		yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260		#else
261	402k	ya = st->twiddles[fstride*m];
262	402k	yb = st->twiddles[fstride2m];
263	402k	#endif
264	402k	tw=st->twiddles;
265
266	805k	for (i=0;i<N;i++)
267	402k	{
268	402k	Fout = Fout_beg + i*mm;
269	402k	Fout0=Fout;
270	402k	Fout1=Fout0+m;
271	402k	Fout2=Fout0+2*m;
272	402k	Fout3=Fout0+3*m;
273	402k	Fout4=Fout0+4*m;
274
275		/* For non-custom modes, m is guaranteed to be a multiple of 4. */
276	17.1M	for ( u=0; u<m; ++u ) {
277	16.7M	scratch[0] = *Fout0;
278
279	16.7M	C_MUL(scratch[1] ,Fout1, tw[ufstride]);
280	16.7M	C_MUL(scratch[2] ,Fout2, tw[2u*fstride]);
281	16.7M	C_MUL(scratch[3] ,Fout3, tw[3u*fstride]);
282	16.7M	C_MUL(scratch[4] ,Fout4, tw[4u*fstride]);
283
284	16.7M	C_ADD( scratch[7],scratch[1],scratch[4]);
285	16.7M	C_SUB( scratch[10],scratch[1],scratch[4]);
286	16.7M	C_ADD( scratch[8],scratch[2],scratch[3]);
287	16.7M	C_SUB( scratch[9],scratch[2],scratch[3]);
288
289	16.7M	Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290	16.7M	Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292	16.7M	scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293	16.7M	scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295	16.7M	scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296	16.7M	scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298	16.7M	C_SUB(*Fout1,scratch[5],scratch[6]);
299	16.7M	C_ADD(*Fout4,scratch[5],scratch[6]);
300
301	16.7M	scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302	16.7M	scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303	16.7M	scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304	16.7M	scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306	16.7M	C_ADD(*Fout2,scratch[11],scratch[12]);
307	16.7M	C_SUB(*Fout3,scratch[11],scratch[12]);
308
309	16.7M	++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310	16.7M	}
311	402k	}
312	402k	}
313		#endif /* OVERRIDE_kf_bfly5 */
314
315
316		#endif
317
318
319		#ifdef CUSTOM_MODES
320
321		static
322		void compute_bitrev_table(
323		int Fout,
324		opus_int16 *f,
325		const size_t fstride,
326		int in_stride,
327		opus_int16 * factors,
328		const kiss_fft_state *st
329		)
330		{
331		const int p=factors++; / the radix */
332		const int m=factors++; / stage's fft length/p */
333
334		/printf ("fft %d %d %d %d %d %d\n", pm, m, p, s2, fstridein_stride, N);/
335		if (m==1)
336		{
337		int j;
338		for (j=0;j<p;j++)
339		{
340		*f = Fout+j;
341		f += fstride*in_stride;
342		}
343		} else {
344		int j;
345		for (j=0;j<p;j++)
346		{
347		compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
348		f += fstride*in_stride;
349		Fout += m;
350		}
351		}
352		}
353
354		/* facbuf is populated by p1,m1,p2,m2, ...
355		where
356		p[i] * m[i] = m[i-1]
357		m0 = n */
358		static
359		int kf_factor(int n,opus_int16 * facbuf)
360		{
361		int p=4;
362		int i;
363		int stages=0;
364		int nbak = n;
365
366		/factor out powers of 4, powers of 2, then any remaining primes /
367		do {
368		while (n % p) {
369		switch (p) {
370		case 4: p = 2; break;
371		case 2: p = 3; break;
372		default: p += 2; break;
373		}
374		if (p>32000 \|\| (opus_int32)p*(opus_int32)p > n)
375		p = n; /* no more factors, skip to end */
376		}
377		n /= p;
378		#ifdef RADIX_TWO_ONLY
379		if (p!=2 && p != 4)
380		#else
381		if (p>5)
382		#endif
383		{
384		return 0;
385		}
386		facbuf[2*stages] = p;
387		if (p==2 && stages > 1)
388		{
389		facbuf[2*stages] = 4;
390		facbuf[2] = 2;
391		}
392		stages++;
393		} while (n > 1);
394		n = nbak;
395		/* Reverse the order to get the radix 4 at the end, so we can use the
396		fast degenerate case. It turns out that reversing the order also
397		improves the noise behaviour. */
398		for (i=0;i<stages/2;i++)
399		{
400		int tmp;
401		tmp = facbuf[2*i];
402		facbuf[2i] = facbuf[2(stages-i-1)];
403		facbuf[2*(stages-i-1)] = tmp;
404		}
405		for (i=0;i<stages;i++)
406		{
407		n /= facbuf[2*i];
408		facbuf[2*i+1] = n;
409		}
410		return 1;
411		}
412
413		static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
414		{
415		int i;
416		#ifdef FIXED_POINT
417		for (i=0;i<nfft;++i) {
418		opus_val32 phase = -i;
419		#ifdef ENABLE_QEXT
420		twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648cos((2M_PI/nfft)*phase)));
421		twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648sin((2M_PI/nfft)*phase)));
422		#else
423		kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
424		#endif
425		}
426		#else
427		for (i=0;i<nfft;++i) {
428		const double pi=3.14159265358979323846264338327;
429		double phase = ( -2pi /nfft ) i;
430		kf_cexp(twiddles+i, phase );
431		}
432		#endif
433		}
434
435		int opus_fft_alloc_arch_c(kiss_fft_state *st) {
436		(void)st;
437		return 0;
438		}
439
440		/*
441		*
442		* Allocates all necessary storage space for the fft and ifft.
443		* The return value is a contiguous block of memory. As such,
444		* It can be freed with free().
445		* */
446		kiss_fft_state opus_fft_alloc_twiddles(int nfft,void mem,size_t * lenmem,
447		const kiss_fft_state *base, int arch)
448		{
449		kiss_fft_state *st=NULL;
450		size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
451
452		if ( lenmem==NULL ) {
453		st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
454		}else{
455		if (mem != NULL && *lenmem >= memneeded)
456		st = (kiss_fft_state*)mem;
457		*lenmem = memneeded;
458		}
459		if (st) {
460		opus_int16 *bitrev;
461		kiss_twiddle_cpx *twiddles;
462
463		st->nfft=nfft;
464		#ifdef FIXED_POINT
465		st->scale_shift = celt_ilog2(st->nfft);
466		# ifdef ENABLE_QEXT
467		if (st->nfft == 1<<st->scale_shift)
468		st->scale = QCONST32(1.0f, 30);
469		else
470		st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
471		# else
472		if (st->nfft == 1<<st->scale_shift)
473		st->scale = Q15ONE;
474		else
475		st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
476		# endif
477		#else
478		st->scale = 1.f/nfft;
479		#endif
480		if (base != NULL)
481		{
482		st->twiddles = base->twiddles;
483		st->shift = 0;
484		while (st->shift < 32 && nfft<<st->shift != base->nfft)
485		st->shift++;
486		if (st->shift>=32)
487		goto fail;
488		} else {
489		st->twiddles = twiddles = (kiss_twiddle_cpx)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)nfft);
490		compute_twiddles(twiddles, nfft);
491		st->shift = -1;
492		}
493		if (!kf_factor(nfft,st->factors))
494		{
495		goto fail;
496		}
497
498		/* bitrev */
499		st->bitrev = bitrev = (opus_int16)KISS_FFT_MALLOC(sizeof(opus_int16)nfft);
500		if (st->bitrev==NULL)
501		goto fail;
502		compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
503
504		/* Initialize architecture specific fft parameters */
505		if (opus_fft_alloc_arch(st, arch))
506		goto fail;
507		}
508		return st;
509		fail:
510		opus_fft_free(st, arch);
511		return NULL;
512		}
513
514		kiss_fft_state opus_fft_alloc(int nfft,void mem,size_t * lenmem, int arch)
515		{
516		return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
517		}
518
519		void opus_fft_free_arch_c(kiss_fft_state *st) {
520		(void)st;
521		}
522
523		void opus_fft_free(const kiss_fft_state *cfg, int arch)
524		{
525		if (cfg)
526		{
527		opus_fft_free_arch((kiss_fft_state *)cfg, arch);
528		opus_free((opus_int16*)cfg->bitrev);
529		if (cfg->shift < 0)
530		opus_free((kiss_twiddle_cpx*)cfg->twiddles);
531		opus_free((kiss_fft_state*)cfg);
532		}
533		}
534
535		#endif /* CUSTOM_MODES */
536
537		#ifdef FIXED_POINT
538		#ifndef OVERRIDE_fft_downshift
539		static void fft_downshift(kiss_fft_cpx x, int N, int total, int step) {
540		int shift;
541		shift = IMIN(step, *total);
542		*total -= shift;
543		if (shift == 1) {
544		int i;
545		for (i=0;i<N;i++) {
546		x[i].r = SHR32(x[i].r, 1);
547		x[i].i = SHR32(x[i].i, 1);
548		}
549		} else if (shift>0) {
550		int i;
551		for (i=0;i<N;i++) {
552		x[i].r = PSHR32(x[i].r, shift);
553		x[i].i = PSHR32(x[i].i, shift);
554		}
555		}
556		}
557		#endif /* OVERRIDE_fft_downshift */
558		#else
559		#define fft_downshift(x, N, total, step)
560		#endif
561
562		void opus_fft_impl(const kiss_fft_state st,kiss_fft_cpx fout ARG_FIXED(int downshift))
563	402k	{
564	402k	int m2, m;
565	402k	int p;
566	402k	int L;
567	402k	int fstride[MAXFACTORS];
568	402k	int i;
569	402k	int shift;
570
571		/* st->shift can be -1 */
572	402k	shift = st->shift>0 ? st->shift : 0;
573
574	402k	fstride[0] = 1;
575	402k	L=0;
576	1.55M	do {
577	1.55M	p = st->factors[2*L];
578	1.55M	m = st->factors[2*L+1];
579	1.55M	fstride[L+1] = fstride[L]*p;
580	1.55M	L++;
581	1.55M	} while(m!=1);
582	402k	m = st->factors[2*L-1];
583	1.96M	for (i=L-1;i>=0;i--)
584	1.55M	{
585	1.55M	if (i!=0)
586	1.15M	m2 = st->factors[2*i-1];
587	402k	else
588	402k	m2 = 1;
589	1.55M	switch (st->factors[2*i])
590	1.55M	{
591	115k	case 2:
592	115k	fft_downshift(fout, st->nfft, &downshift, 1);
593	115k	kf_bfly2(fout, m, fstride[i]);
594	115k	break;
595	637k	case 4:
596	637k	fft_downshift(fout, st->nfft, &downshift, 2);
597	637k	kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
598	637k	break;
599	0	#ifndef RADIX_TWO_ONLY
600	402k	case 3:
601	402k	fft_downshift(fout, st->nfft, &downshift, 2);
602	402k	kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
603	402k	break;
604	402k	case 5:
605	402k	fft_downshift(fout, st->nfft, &downshift, 3);
606	402k	kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
607	402k	break;
608	1.55M	#endif
609	1.55M	}
610	1.55M	m = m2;
611	1.55M	}
612	402k	fft_downshift(fout, st->nfft, &downshift, downshift);
613	402k	}
614
615		void opus_fft_c(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)
616	0	{
617	0	int i;
618	0	celt_coef scale;
619		#ifdef FIXED_POINT
620		/* Allows us to scale with MULT16_32_Q16(), which is faster than
621		MULT16_32_Q15() on ARM. */
622		int scale_shift = st->scale_shift-1;
623		#endif
624	0	scale = st->scale;
625
626	0	celt_assert2 (fin != fout, "In-place FFT not supported");
627		/* Bit-reverse the input */
628	0	for (i=0;i<st->nfft;i++)
629	0	{
630	0	kiss_fft_cpx x = fin[i];
631	0	fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
632	0	fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
633	0	}
634	0	opus_fft_impl(st, fout ARG_FIXED(scale_shift));
635	0	}
636
637
638		void opus_ifft_c(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)
639	0	{
640	0	int i;
641	0	celt_assert2 (fin != fout, "In-place FFT not supported");
642		/* Bit-reverse the input */
643	0	for (i=0;i<st->nfft;i++)
644	0	fout[st->bitrev[i]] = fin[i];
645	0	for (i=0;i<st->nfft;i++)
646	0	fout[i].i = -fout[i].i;
647	0	opus_fft_impl(st, fout ARG_FIXED(0));
648	0	for (i=0;i<st->nfft;i++)
649	0	fout[i].i = -fout[i].i;
650	0	}