/src/opus/celt/kiss_fft.c

Source
/*Copyright (c) 2003-2004, Mark Borgerding
  Lots of modifications by Jean-Marc Valin
  Copyright (c) 2005-2007, Xiph.Org Foundation
  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO

  All rights reserved.

  Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
       this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice,
       this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  POSSIBILITY OF SUCH DAMAGE.*/

/* This code is originally from Mark Borgerding's KISS-FFT but has been
   heavily modified to better suit Opus */

#ifndef SKIP_CONFIG_H
#  ifdef HAVE_CONFIG_H
#    include "config.h"
#  endif
#endif

#include "_kiss_fft_guts.h"
#include "arch.h"
#include "os_support.h"
#include "mathops.h"
#include "stack_alloc.h"

#ifndef M_PI
#define M_PI 3.141592653
#endif

/* The guts header contains all the multiplication and addition macros that are defined for
   complex numbers.  It also declares the kf_ internal functions.
*/

static void kf_bfly2(
                     kiss_fft_cpx * Fout,
                     int m,
                     int N
                    )
{
   kiss_fft_cpx * Fout2;
   int i;
   (void)m;
#ifdef CUSTOM_MODES
   if (m==1)
   {
      celt_assert(m==1);
      for (i=0;i<N;i++)
      {
         kiss_fft_cpx t;
         Fout2 = Fout + 1;
         t = *Fout2;
         C_SUB( *Fout2 ,  *Fout , t );
         C_ADDTO( *Fout ,  t );
         Fout += 2;
      }
   } else
#endif
   {
      celt_coef tw;
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
      celt_assert(m==4);
      for (i=0;i<N;i++)
      {
         kiss_fft_cpx t;
         Fout2 = Fout + 4;
         t = Fout2[0];
         C_SUB( Fout2[0] ,  Fout[0] , t );
         C_ADDTO( Fout[0] ,  t );

         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
         C_SUB( Fout2[1] ,  Fout[1] , t );
         C_ADDTO( Fout[1] ,  t );

         t.r = Fout2[2].i;
         t.i = NEG32_ovflw(Fout2[2].r);
         C_SUB( Fout2[2] ,  Fout[2] , t );
         C_ADDTO( Fout[2] ,  t );

         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
         C_SUB( Fout2[3] ,  Fout[3] , t );
         C_ADDTO( Fout[3] ,  t );
         Fout += 8;
      }
   }
}

static void kf_bfly4(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   int i;

   if (m==1)
   {
      /* Degenerate case where all the twiddles are 1. */
      for (i=0;i<N;i++)
      {
         kiss_fft_cpx scratch0, scratch1;

         C_SUB( scratch0 , *Fout, Fout[2] );
         C_ADDTO(*Fout, Fout[2]);
         C_ADD( scratch1 , Fout[1] , Fout[3] );
         C_SUB( Fout[2], *Fout, scratch1 );
         C_ADDTO( *Fout , scratch1 );
         C_SUB( scratch1 , Fout[1] , Fout[3] );

         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
         Fout+=4;
      }
   } else {
      int j;
      kiss_fft_cpx scratch[6];
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
      const int m2=2*m;
      const int m3=3*m;
      kiss_fft_cpx * Fout_beg = Fout;
      for (i=0;i<N;i++)
      {
         Fout = Fout_beg + i*mm;
         tw3 = tw2 = tw1 = st->twiddles;
         /* m is guaranteed to be a multiple of 4. */
         for (j=0;j<m;j++)
         {
            C_MUL(scratch[0],Fout[m] , *tw1 );
            C_MUL(scratch[1],Fout[m2] , *tw2 );
            C_MUL(scratch[2],Fout[m3] , *tw3 );

            C_SUB( scratch[5] , *Fout, scratch[1] );
            C_ADDTO(*Fout, scratch[1]);
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
            C_SUB( Fout[m2], *Fout, scratch[3] );
            tw1 += fstride;
            tw2 += fstride*2;
            tw3 += fstride*3;
            C_ADDTO( *Fout , scratch[3] );

            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
            ++Fout;
         }
      }
   }
}


#ifndef RADIX_TWO_ONLY

static void kf_bfly3(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   int i;
   size_t k;
   const size_t m2 = 2*m;
   const kiss_twiddle_cpx *tw1,*tw2;
   kiss_fft_cpx scratch[5];
   kiss_twiddle_cpx epi3;

   kiss_fft_cpx * Fout_beg = Fout;
#ifdef FIXED_POINT
   /*epi3.r = -16384;*/ /* Unused */
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
#else
   epi3 = st->twiddles[fstride*m];
#endif
   for (i=0;i<N;i++)
   {
      Fout = Fout_beg + i*mm;
      tw1=tw2=st->twiddles;
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
      k=m;
      do {

         C_MUL(scratch[1],Fout[m] , *tw1);
         C_MUL(scratch[2],Fout[m2] , *tw2);

         C_ADD(scratch[3],scratch[1],scratch[2]);
         C_SUB(scratch[0],scratch[1],scratch[2]);
         tw1 += fstride;
         tw2 += fstride*2;

         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));

         C_MULBYSCALAR( scratch[0] , epi3.i );

         C_ADDTO(*Fout,scratch[3]);

         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);

         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);

         ++Fout;
      } while(--k);
   }
}


#ifndef OVERRIDE_kf_bfly5
static void kf_bfly5(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
   int i, u;
   kiss_fft_cpx scratch[13];
   const kiss_twiddle_cpx *tw;
   kiss_twiddle_cpx ya,yb;
   kiss_fft_cpx * Fout_beg = Fout;

#ifdef FIXED_POINT
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
#else
   ya = st->twiddles[fstride*m];
   yb = st->twiddles[fstride*2*m];
#endif
   tw=st->twiddles;

   for (i=0;i<N;i++)
   {
      Fout = Fout_beg + i*mm;
      Fout0=Fout;
      Fout1=Fout0+m;
      Fout2=Fout0+2*m;
      Fout3=Fout0+3*m;
      Fout4=Fout0+4*m;

      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
      for ( u=0; u<m; ++u ) {
         scratch[0] = *Fout0;

         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);

         C_ADD( scratch[7],scratch[1],scratch[4]);
         C_SUB( scratch[10],scratch[1],scratch[4]);
         C_ADD( scratch[8],scratch[2],scratch[3]);
         C_SUB( scratch[9],scratch[2],scratch[3]);

         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));

         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));

         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));

         C_SUB(*Fout1,scratch[5],scratch[6]);
         C_ADD(*Fout4,scratch[5],scratch[6]);

         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));

         C_ADD(*Fout2,scratch[11],scratch[12]);
         C_SUB(*Fout3,scratch[11],scratch[12]);

         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
      }
   }
}
#endif /* OVERRIDE_kf_bfly5 */


#endif


#ifdef CUSTOM_MODES

static
void compute_bitrev_table(
         int Fout,
         opus_int16 *f,
         const size_t fstride,
         int in_stride,
         opus_int16 * factors,
         const kiss_fft_state *st
            )
{
   const int p=*factors++; /* the radix  */
   const int m=*factors++; /* stage's fft length/p */

    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
   if (m==1)
   {
      int j;
      for (j=0;j<p;j++)
      {
         *f = Fout+j;
         f += fstride*in_stride;
      }
   } else {
      int j;
      for (j=0;j<p;j++)
      {
         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
         f += fstride*in_stride;
         Fout += m;
      }
   }
}

/*  facbuf is populated by p1,m1,p2,m2, ...
    where
    p[i] * m[i] = m[i-1]
    m0 = n                  */
static
int kf_factor(int n,opus_int16 * facbuf)
{
    int p=4;
    int i;
    int stages=0;
    int nbak = n;

    /*factor out powers of 4, powers of 2, then any remaining primes */
    do {
        while (n % p) {
            switch (p) {
                case 4: p = 2; break;
                case 2: p = 3; break;
                default: p += 2; break;
            }
            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
                p = n;          /* no more factors, skip to end */
        }
        n /= p;
#ifdef RADIX_TWO_ONLY
        if (p!=2 && p != 4)
#else
        if (p>5)
#endif
        {
           return 0;
        }
        facbuf[2*stages] = p;
        if (p==2 && stages > 1)
        {
           facbuf[2*stages] = 4;
           facbuf[2] = 2;
        }
        stages++;
    } while (n > 1);
    n = nbak;
    /* Reverse the order to get the radix 4 at the end, so we can use the
       fast degenerate case. It turns out that reversing the order also
       improves the noise behaviour. */
    for (i=0;i<stages/2;i++)
    {
       int tmp;
       tmp = facbuf[2*i];
       facbuf[2*i] = facbuf[2*(stages-i-1)];
       facbuf[2*(stages-i-1)] = tmp;
    }
    for (i=0;i<stages;i++)
    {
        n /= facbuf[2*i];
        facbuf[2*i+1] = n;
    }
    return 1;
}

static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
{
   int i;
#ifdef FIXED_POINT
   for (i=0;i<nfft;++i) {
      opus_val32 phase = -i;
#ifdef ENABLE_QEXT
      twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648*cos((2*M_PI/nfft)*phase)));
      twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648*sin((2*M_PI/nfft)*phase)));
#else
      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
#endif
   }
#else
   for (i=0;i<nfft;++i) {
      const double pi=3.14159265358979323846264338327;
      double phase = ( -2*pi /nfft ) * i;
      kf_cexp(twiddles+i, phase );
   }
#endif
}

int opus_fft_alloc_arch_c(kiss_fft_state *st) {
   (void)st;
   return 0;
}

/*
 *
 * Allocates all necessary storage space for the fft and ifft.
 * The return value is a contiguous block of memory.  As such,
 * It can be freed with free().
 * */
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
                                        const kiss_fft_state *base, int arch)
{
    kiss_fft_state *st=NULL;
    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/

    if ( lenmem==NULL ) {
        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
    }else{
        if (mem != NULL && *lenmem >= memneeded)
            st = (kiss_fft_state*)mem;
        *lenmem = memneeded;
    }
    if (st) {
        opus_int16 *bitrev;
        kiss_twiddle_cpx *twiddles;

        st->nfft=nfft;
#ifdef FIXED_POINT
        st->scale_shift = celt_ilog2(st->nfft);
# ifdef ENABLE_QEXT
        if (st->nfft == 1<<st->scale_shift)
           st->scale = QCONST32(1.0f, 30);
        else
           st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
# else
        if (st->nfft == 1<<st->scale_shift)
           st->scale = Q15ONE;
        else
           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
# endif
#else
        st->scale = 1.f/nfft;
#endif
        if (base != NULL)
        {
           st->twiddles = base->twiddles;
           st->shift = 0;
           while (st->shift < 32 && nfft<<st->shift != base->nfft)
              st->shift++;
           if (st->shift>=32)
              goto fail;
        } else {
           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
           compute_twiddles(twiddles, nfft);
           st->shift = -1;
        }
        if (!kf_factor(nfft,st->factors))
        {
           goto fail;
        }

        /* bitrev */
        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
        if (st->bitrev==NULL)
            goto fail;
        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);

        /* Initialize architecture specific fft parameters */
        if (opus_fft_alloc_arch(st, arch))
            goto fail;
    }
    return st;
fail:
    opus_fft_free(st, arch);
    return NULL;
}

kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
{
   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
}

void opus_fft_free_arch_c(kiss_fft_state *st) {
   (void)st;
}

void opus_fft_free(const kiss_fft_state *cfg, int arch)
{
   if (cfg)
   {
      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
      opus_free((opus_int16*)cfg->bitrev);
      if (cfg->shift < 0)
         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
      opus_free((kiss_fft_state*)cfg);
   }
}

#endif /* CUSTOM_MODES */

#ifdef FIXED_POINT
#ifndef OVERRIDE_fft_downshift
static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
   int shift;
   shift = IMIN(step, *total);
   *total -= shift;
   if (shift == 1) {
      int i;
      for (i=0;i<N;i++) {
         x[i].r = SHR32(x[i].r, 1);
         x[i].i = SHR32(x[i].i, 1);
      }
   } else if (shift>0) {
      int i;
      for (i=0;i<N;i++) {
         x[i].r = PSHR32(x[i].r, shift);
         x[i].i = PSHR32(x[i].i, shift);
      }
   }
}
#endif /* OVERRIDE_fft_downshift */
#else
#define fft_downshift(x, N, total, step)
#endif

void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift))
{
    int m2, m;
    int p;
    int L;
    int fstride[MAXFACTORS];
    int i;
    int shift;

    /* st->shift can be -1 */
    shift = st->shift>0 ? st->shift : 0;

    fstride[0] = 1;
    L=0;
    do {
       p = st->factors[2*L];
       m = st->factors[2*L+1];
       fstride[L+1] = fstride[L]*p;
       L++;
    } while(m!=1);
    m = st->factors[2*L-1];
    for (i=L-1;i>=0;i--)
    {
       if (i!=0)
          m2 = st->factors[2*i-1];
       else
          m2 = 1;
       switch (st->factors[2*i])
       {
       case 2:
          fft_downshift(fout, st->nfft, &downshift, 1);
          kf_bfly2(fout, m, fstride[i]);
          break;
       case 4:
          fft_downshift(fout, st->nfft, &downshift, 2);
          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
          break;
 #ifndef RADIX_TWO_ONLY
       case 3:
          fft_downshift(fout, st->nfft, &downshift, 2);
          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
          break;
       case 5:
          fft_downshift(fout, st->nfft, &downshift, 3);
          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
          break;
 #endif
       }
       m = m2;
    }
    fft_downshift(fout, st->nfft, &downshift, downshift);
}

void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
   int i;
   celt_coef scale;
#ifdef FIXED_POINT
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
      MULT16_32_Q15() on ARM. */
   int scale_shift = st->scale_shift-1;
#endif
   scale = st->scale;

   celt_assert2 (fin != fout, "In-place FFT not supported");
   /* Bit-reverse the input */
   for (i=0;i<st->nfft;i++)
   {
      kiss_fft_cpx x = fin[i];
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
   }
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
}


void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
   int i;
   celt_assert2 (fin != fout, "In-place FFT not supported");
   /* Bit-reverse the input */
   for (i=0;i<st->nfft;i++)
      fout[st->bitrev[i]] = fin[i];
   for (i=0;i<st->nfft;i++)
      fout[i].i = -fout[i].i;
   opus_fft_impl(st, fout ARG_FIXED(0));
   for (i=0;i<st->nfft;i++)
      fout[i].i = -fout[i].i;
}

Coverage Report

Created: 2025-11-16 07:20

Line	Count	Source
1		/*Copyright (c) 2003-2004, Mark Borgerding
2		Lots of modifications by Jean-Marc Valin
3		Copyright (c) 2005-2007, Xiph.Org Foundation
4		Copyright (c) 2008, Xiph.Org Foundation, CSIRO
5
6		All rights reserved.
7
8		Redistribution and use in source and binary forms, with or without
9		modification, are permitted provided that the following conditions are met:
10
11		* Redistributions of source code must retain the above copyright notice,
12		this list of conditions and the following disclaimer.
13		* Redistributions in binary form must reproduce the above copyright notice,
14		this list of conditions and the following disclaimer in the
15		documentation and/or other materials provided with the distribution.
16
17		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18		AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19		IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20		ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21		LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22		CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23		SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24		INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25		CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26		ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27		POSSIBILITY OF SUCH DAMAGE.*/
28
29		/* This code is originally from Mark Borgerding's KISS-FFT but has been
30		heavily modified to better suit Opus */
31
32		#ifndef SKIP_CONFIG_H
33		# ifdef HAVE_CONFIG_H
34		# include "config.h"
35		# endif
36		#endif
37
38		#include "_kiss_fft_guts.h"
39		#include "arch.h"
40		#include "os_support.h"
41		#include "mathops.h"
42		#include "stack_alloc.h"
43
44		#ifndef M_PI
45		#define M_PI 3.141592653
46		#endif
47
48		/* The guts header contains all the multiplication and addition macros that are defined for
49		complex numbers. It also declares the kf_ internal functions.
50		*/
51
52		static void kf_bfly2(
53		kiss_fft_cpx * Fout,
54		int m,
55		int N
56		)
57	97.8k	{
58	97.8k	kiss_fft_cpx * Fout2;
59	97.8k	int i;
60	97.8k	(void)m;
61		#ifdef CUSTOM_MODES
62		if (m==1)
63		{
64		celt_assert(m==1);
65		for (i=0;i<N;i++)
66		{
67		kiss_fft_cpx t;
68		Fout2 = Fout + 1;
69		t = *Fout2;
70		C_SUB( Fout2 , Fout , t );
71		C_ADDTO( *Fout , t );
72		Fout += 2;
73		}
74		} else
75		#endif
76	97.8k	{
77	97.8k	celt_coef tw;
78	97.8k	tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79		/* We know that m==4 here because the radix-2 is just after a radix-4 */
80	97.8k	celt_assert(m==4);
81	3.73M	for (i=0;i<N;i++)
82	3.63M	{
83	3.63M	kiss_fft_cpx t;
84	3.63M	Fout2 = Fout + 4;
85	3.63M	t = Fout2[0];
86	3.63M	C_SUB( Fout2[0] , Fout[0] , t );
87	3.63M	C_ADDTO( Fout[0] , t );
88
89	3.63M	t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90	3.63M	t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91	3.63M	C_SUB( Fout2[1] , Fout[1] , t );
92	3.63M	C_ADDTO( Fout[1] , t );
93
94	3.63M	t.r = Fout2[2].i;
95	3.63M	t.i = NEG32_ovflw(Fout2[2].r);
96	3.63M	C_SUB( Fout2[2] , Fout[2] , t );
97	3.63M	C_ADDTO( Fout[2] , t );
98
99	3.63M	t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100	3.63M	t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101	3.63M	C_SUB( Fout2[3] , Fout[3] , t );
102	3.63M	C_ADDTO( Fout[3] , t );
103	3.63M	Fout += 8;
104	3.63M	}
105	97.8k	}
106	97.8k	}
107
108		static void kf_bfly4(
109		kiss_fft_cpx * Fout,
110		const size_t fstride,
111		const kiss_fft_state *st,
112		int m,
113		int N,
114		int mm
115		)
116	633k	{
117	633k	int i;
118
119	633k	if (m==1)
120	401k	{
121		/* Degenerate case where all the twiddles are 1. */
122	20.5M	for (i=0;i<N;i++)
123	20.1M	{
124	20.1M	kiss_fft_cpx scratch0, scratch1;
125
126	20.1M	C_SUB( scratch0 , *Fout, Fout[2] );
127	20.1M	C_ADDTO(*Fout, Fout[2]);
128	20.1M	C_ADD( scratch1 , Fout[1] , Fout[3] );
129	20.1M	C_SUB( Fout[2], *Fout, scratch1 );
130	20.1M	C_ADDTO( *Fout , scratch1 );
131	20.1M	C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133	20.1M	Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134	20.1M	Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135	20.1M	Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136	20.1M	Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137	20.1M	Fout+=4;
138	20.1M	}
139	401k	} else {
140	232k	int j;
141	232k	kiss_fft_cpx scratch[6];
142	232k	const kiss_twiddle_cpx tw1,tw2,*tw3;
143	232k	const int m2=2*m;
144	232k	const int m3=3*m;
145	232k	kiss_fft_cpx * Fout_beg = Fout;
146	3.72M	for (i=0;i<N;i++)
147	3.49M	{
148	3.49M	Fout = Fout_beg + i*mm;
149	3.49M	tw3 = tw2 = tw1 = st->twiddles;
150		/* m is guaranteed to be a multiple of 4. */
151	20.3M	for (j=0;j<m;j++)
152	16.8M	{
153	16.8M	C_MUL(scratch[0],Fout[m] , *tw1 );
154	16.8M	C_MUL(scratch[1],Fout[m2] , *tw2 );
155	16.8M	C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157	16.8M	C_SUB( scratch[5] , *Fout, scratch[1] );
158	16.8M	C_ADDTO(*Fout, scratch[1]);
159	16.8M	C_ADD( scratch[3] , scratch[0] , scratch[2] );
160	16.8M	C_SUB( scratch[4] , scratch[0] , scratch[2] );
161	16.8M	C_SUB( Fout[m2], *Fout, scratch[3] );
162	16.8M	tw1 += fstride;
163	16.8M	tw2 += fstride*2;
164	16.8M	tw3 += fstride*3;
165	16.8M	C_ADDTO( *Fout , scratch[3] );
166
167	16.8M	Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168	16.8M	Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169	16.8M	Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170	16.8M	Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171	16.8M	++Fout;
172	16.8M	}
173	3.49M	}
174	232k	}
175	633k	}
176
177
178		#ifndef RADIX_TWO_ONLY
179
180		static void kf_bfly3(
181		kiss_fft_cpx * Fout,
182		const size_t fstride,
183		const kiss_fft_state *st,
184		int m,
185		int N,
186		int mm
187		)
188	401k	{
189	401k	int i;
190	401k	size_t k;
191	401k	const size_t m2 = 2*m;
192	401k	const kiss_twiddle_cpx tw1,tw2;
193	401k	kiss_fft_cpx scratch[5];
194	401k	kiss_twiddle_cpx epi3;
195
196	401k	kiss_fft_cpx * Fout_beg = Fout;
197		#ifdef FIXED_POINT
198		/epi3.r = -16384;/ /* Unused */
199		epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200		#else
201	401k	epi3 = st->twiddles[fstride*m];
202	401k	#endif
203	2.40M	for (i=0;i<N;i++)
204	2.00M	{
205	2.00M	Fout = Fout_beg + i*mm;
206	2.00M	tw1=tw2=st->twiddles;
207		/* For non-custom modes, m is guaranteed to be a multiple of 4. */
208	2.00M	k=m;
209	26.8M	do {
210
211	26.8M	C_MUL(scratch[1],Fout[m] , *tw1);
212	26.8M	C_MUL(scratch[2],Fout[m2] , *tw2);
213
214	26.8M	C_ADD(scratch[3],scratch[1],scratch[2]);
215	26.8M	C_SUB(scratch[0],scratch[1],scratch[2]);
216	26.8M	tw1 += fstride;
217	26.8M	tw2 += fstride*2;
218
219	26.8M	Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220	26.8M	Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222	26.8M	C_MULBYSCALAR( scratch[0] , epi3.i );
223
224	26.8M	C_ADDTO(*Fout,scratch[3]);
225
226	26.8M	Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227	26.8M	Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229	26.8M	Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230	26.8M	Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232	26.8M	++Fout;
233	26.8M	} while(--k);
234	2.00M	}
235	401k	}
236
237
238		#ifndef OVERRIDE_kf_bfly5
239		static void kf_bfly5(
240		kiss_fft_cpx * Fout,
241		const size_t fstride,
242		const kiss_fft_state *st,
243		int m,
244		int N,
245		int mm
246		)
247	401k	{
248	401k	kiss_fft_cpx Fout0,Fout1,Fout2,Fout3,*Fout4;
249	401k	int i, u;
250	401k	kiss_fft_cpx scratch[13];
251	401k	const kiss_twiddle_cpx *tw;
252	401k	kiss_twiddle_cpx ya,yb;
253	401k	kiss_fft_cpx * Fout_beg = Fout;
254
255		#ifdef FIXED_POINT
256		ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257		ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258		yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259		yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260		#else
261	401k	ya = st->twiddles[fstride*m];
262	401k	yb = st->twiddles[fstride2m];
263	401k	#endif
264	401k	tw=st->twiddles;
265
266	802k	for (i=0;i<N;i++)
267	401k	{
268	401k	Fout = Fout_beg + i*mm;
269	401k	Fout0=Fout;
270	401k	Fout1=Fout0+m;
271	401k	Fout2=Fout0+2*m;
272	401k	Fout3=Fout0+3*m;
273	401k	Fout4=Fout0+4*m;
274
275		/* For non-custom modes, m is guaranteed to be a multiple of 4. */
276	16.5M	for ( u=0; u<m; ++u ) {
277	16.1M	scratch[0] = *Fout0;
278
279	16.1M	C_MUL(scratch[1] ,Fout1, tw[ufstride]);
280	16.1M	C_MUL(scratch[2] ,Fout2, tw[2u*fstride]);
281	16.1M	C_MUL(scratch[3] ,Fout3, tw[3u*fstride]);
282	16.1M	C_MUL(scratch[4] ,Fout4, tw[4u*fstride]);
283
284	16.1M	C_ADD( scratch[7],scratch[1],scratch[4]);
285	16.1M	C_SUB( scratch[10],scratch[1],scratch[4]);
286	16.1M	C_ADD( scratch[8],scratch[2],scratch[3]);
287	16.1M	C_SUB( scratch[9],scratch[2],scratch[3]);
288
289	16.1M	Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290	16.1M	Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292	16.1M	scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293	16.1M	scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295	16.1M	scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296	16.1M	scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298	16.1M	C_SUB(*Fout1,scratch[5],scratch[6]);
299	16.1M	C_ADD(*Fout4,scratch[5],scratch[6]);
300
301	16.1M	scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302	16.1M	scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303	16.1M	scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304	16.1M	scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306	16.1M	C_ADD(*Fout2,scratch[11],scratch[12]);
307	16.1M	C_SUB(*Fout3,scratch[11],scratch[12]);
308
309	16.1M	++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310	16.1M	}
311	401k	}
312	401k	}
313		#endif /* OVERRIDE_kf_bfly5 */
314
315
316		#endif
317
318
319		#ifdef CUSTOM_MODES
320
321		static
322		void compute_bitrev_table(
323		int Fout,
324		opus_int16 *f,
325		const size_t fstride,
326		int in_stride,
327		opus_int16 * factors,
328		const kiss_fft_state *st
329		)
330		{
331		const int p=factors++; / the radix */
332		const int m=factors++; / stage's fft length/p */
333
334		/printf ("fft %d %d %d %d %d %d\n", pm, m, p, s2, fstridein_stride, N);/
335		if (m==1)
336		{
337		int j;
338		for (j=0;j<p;j++)
339		{
340		*f = Fout+j;
341		f += fstride*in_stride;
342		}
343		} else {
344		int j;
345		for (j=0;j<p;j++)
346		{
347		compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
348		f += fstride*in_stride;
349		Fout += m;
350		}
351		}
352		}
353
354		/* facbuf is populated by p1,m1,p2,m2, ...
355		where
356		p[i] * m[i] = m[i-1]
357		m0 = n */
358		static
359		int kf_factor(int n,opus_int16 * facbuf)
360		{
361		int p=4;
362		int i;
363		int stages=0;
364		int nbak = n;
365
366		/factor out powers of 4, powers of 2, then any remaining primes /
367		do {
368		while (n % p) {
369		switch (p) {
370		case 4: p = 2; break;
371		case 2: p = 3; break;
372		default: p += 2; break;
373		}
374		if (p>32000 \|\| (opus_int32)p*(opus_int32)p > n)
375		p = n; /* no more factors, skip to end */
376		}
377		n /= p;
378		#ifdef RADIX_TWO_ONLY
379		if (p!=2 && p != 4)
380		#else
381		if (p>5)
382		#endif
383		{
384		return 0;
385		}
386		facbuf[2*stages] = p;
387		if (p==2 && stages > 1)
388		{
389		facbuf[2*stages] = 4;
390		facbuf[2] = 2;
391		}
392		stages++;
393		} while (n > 1);
394		n = nbak;
395		/* Reverse the order to get the radix 4 at the end, so we can use the
396		fast degenerate case. It turns out that reversing the order also
397		improves the noise behaviour. */
398		for (i=0;i<stages/2;i++)
399		{
400		int tmp;
401		tmp = facbuf[2*i];
402		facbuf[2i] = facbuf[2(stages-i-1)];
403		facbuf[2*(stages-i-1)] = tmp;
404		}
405		for (i=0;i<stages;i++)
406		{
407		n /= facbuf[2*i];
408		facbuf[2*i+1] = n;
409		}
410		return 1;
411		}
412
413		static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
414		{
415		int i;
416		#ifdef FIXED_POINT
417		for (i=0;i<nfft;++i) {
418		opus_val32 phase = -i;
419		#ifdef ENABLE_QEXT
420		twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648cos((2M_PI/nfft)*phase)));
421		twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648sin((2M_PI/nfft)*phase)));
422		#else
423		kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
424		#endif
425		}
426		#else
427		for (i=0;i<nfft;++i) {
428		const double pi=3.14159265358979323846264338327;
429		double phase = ( -2pi /nfft ) i;
430		kf_cexp(twiddles+i, phase );
431		}
432		#endif
433		}
434
435		int opus_fft_alloc_arch_c(kiss_fft_state *st) {
436		(void)st;
437		return 0;
438		}
439
440		/*
441		*
442		* Allocates all necessary storage space for the fft and ifft.
443		* The return value is a contiguous block of memory. As such,
444		* It can be freed with free().
445		* */
446		kiss_fft_state opus_fft_alloc_twiddles(int nfft,void mem,size_t * lenmem,
447		const kiss_fft_state *base, int arch)
448		{
449		kiss_fft_state *st=NULL;
450		size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
451
452		if ( lenmem==NULL ) {
453		st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
454		}else{
455		if (mem != NULL && *lenmem >= memneeded)
456		st = (kiss_fft_state*)mem;
457		*lenmem = memneeded;
458		}
459		if (st) {
460		opus_int16 *bitrev;
461		kiss_twiddle_cpx *twiddles;
462
463		st->nfft=nfft;
464		#ifdef FIXED_POINT
465		st->scale_shift = celt_ilog2(st->nfft);
466		# ifdef ENABLE_QEXT
467		if (st->nfft == 1<<st->scale_shift)
468		st->scale = QCONST32(1.0f, 30);
469		else
470		st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
471		# else
472		if (st->nfft == 1<<st->scale_shift)
473		st->scale = Q15ONE;
474		else
475		st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
476		# endif
477		#else
478		st->scale = 1.f/nfft;
479		#endif
480		if (base != NULL)
481		{
482		st->twiddles = base->twiddles;
483		st->shift = 0;
484		while (st->shift < 32 && nfft<<st->shift != base->nfft)
485		st->shift++;
486		if (st->shift>=32)
487		goto fail;
488		} else {
489		st->twiddles = twiddles = (kiss_twiddle_cpx)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)nfft);
490		compute_twiddles(twiddles, nfft);
491		st->shift = -1;
492		}
493		if (!kf_factor(nfft,st->factors))
494		{
495		goto fail;
496		}
497
498		/* bitrev */
499		st->bitrev = bitrev = (opus_int16)KISS_FFT_MALLOC(sizeof(opus_int16)nfft);
500		if (st->bitrev==NULL)
501		goto fail;
502		compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
503
504		/* Initialize architecture specific fft parameters */
505		if (opus_fft_alloc_arch(st, arch))
506		goto fail;
507		}
508		return st;
509		fail:
510		opus_fft_free(st, arch);
511		return NULL;
512		}
513
514		kiss_fft_state opus_fft_alloc(int nfft,void mem,size_t * lenmem, int arch)
515		{
516		return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
517		}
518
519		void opus_fft_free_arch_c(kiss_fft_state *st) {
520		(void)st;
521		}
522
523		void opus_fft_free(const kiss_fft_state *cfg, int arch)
524		{
525		if (cfg)
526		{
527		opus_fft_free_arch((kiss_fft_state *)cfg, arch);
528		opus_free((opus_int16*)cfg->bitrev);
529		if (cfg->shift < 0)
530		opus_free((kiss_twiddle_cpx*)cfg->twiddles);
531		opus_free((kiss_fft_state*)cfg);
532		}
533		}
534
535		#endif /* CUSTOM_MODES */
536
537		#ifdef FIXED_POINT
538		#ifndef OVERRIDE_fft_downshift
539		static void fft_downshift(kiss_fft_cpx x, int N, int total, int step) {
540		int shift;
541		shift = IMIN(step, *total);
542		*total -= shift;
543		if (shift == 1) {
544		int i;
545		for (i=0;i<N;i++) {
546		x[i].r = SHR32(x[i].r, 1);
547		x[i].i = SHR32(x[i].i, 1);
548		}
549		} else if (shift>0) {
550		int i;
551		for (i=0;i<N;i++) {
552		x[i].r = PSHR32(x[i].r, shift);
553		x[i].i = PSHR32(x[i].i, shift);
554		}
555		}
556		}
557		#endif /* OVERRIDE_fft_downshift */
558		#else
559		#define fft_downshift(x, N, total, step)
560		#endif
561
562		void opus_fft_impl(const kiss_fft_state st,kiss_fft_cpx fout ARG_FIXED(int downshift))
563	401k	{
564	401k	int m2, m;
565	401k	int p;
566	401k	int L;
567	401k	int fstride[MAXFACTORS];
568	401k	int i;
569	401k	int shift;
570
571		/* st->shift can be -1 */
572	401k	shift = st->shift>0 ? st->shift : 0;
573
574	401k	fstride[0] = 1;
575	401k	L=0;
576	1.53M	do {
577	1.53M	p = st->factors[2*L];
578	1.53M	m = st->factors[2*L+1];
579	1.53M	fstride[L+1] = fstride[L]*p;
580	1.53M	L++;
581	1.53M	} while(m!=1);
582	401k	m = st->factors[2*L-1];
583	1.93M	for (i=L-1;i>=0;i--)
584	1.53M	{
585	1.53M	if (i!=0)
586	1.13M	m2 = st->factors[2*i-1];
587	401k	else
588	401k	m2 = 1;
589	1.53M	switch (st->factors[2*i])
590	1.53M	{
591	97.8k	case 2:
592	97.8k	fft_downshift(fout, st->nfft, &downshift, 1);
593	97.8k	kf_bfly2(fout, m, fstride[i]);
594	97.8k	break;
595	633k	case 4:
596	633k	fft_downshift(fout, st->nfft, &downshift, 2);
597	633k	kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
598	633k	break;
599	0	#ifndef RADIX_TWO_ONLY
600	401k	case 3:
601	401k	fft_downshift(fout, st->nfft, &downshift, 2);
602	401k	kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
603	401k	break;
604	401k	case 5:
605	401k	fft_downshift(fout, st->nfft, &downshift, 3);
606	401k	kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
607	401k	break;
608	1.53M	#endif
609	1.53M	}
610	1.53M	m = m2;
611	1.53M	}
612	401k	fft_downshift(fout, st->nfft, &downshift, downshift);
613	401k	}
614
615		void opus_fft_c(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)
616	0	{
617	0	int i;
618	0	celt_coef scale;
619		#ifdef FIXED_POINT
620		/* Allows us to scale with MULT16_32_Q16(), which is faster than
621		MULT16_32_Q15() on ARM. */
622		int scale_shift = st->scale_shift-1;
623		#endif
624	0	scale = st->scale;
625
626	0	celt_assert2 (fin != fout, "In-place FFT not supported");
627		/* Bit-reverse the input */
628	0	for (i=0;i<st->nfft;i++)
629	0	{
630	0	kiss_fft_cpx x = fin[i];
631	0	fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
632	0	fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
633	0	}
634	0	opus_fft_impl(st, fout ARG_FIXED(scale_shift));
635	0	}
636
637
638		void opus_ifft_c(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)
639	0	{
640	0	int i;
641	0	celt_assert2 (fin != fout, "In-place FFT not supported");
642		/* Bit-reverse the input */
643	0	for (i=0;i<st->nfft;i++)
644	0	fout[st->bitrev[i]] = fin[i];
645	0	for (i=0;i<st->nfft;i++)
646	0	fout[i].i = -fout[i].i;
647	0	opus_fft_impl(st, fout ARG_FIXED(0));
648	0	for (i=0;i<st->nfft;i++)
649	0	fout[i].i = -fout[i].i;
650	0	}