/src/opus/celt/kiss_fft.c

Source (jump to first uncovered line)
/*Copyright (c) 2003-2004, Mark Borgerding
  Lots of modifications by Jean-Marc Valin
  Copyright (c) 2005-2007, Xiph.Org Foundation
  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO

  All rights reserved.

  Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
       this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice,
       this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  POSSIBILITY OF SUCH DAMAGE.*/

/* This code is originally from Mark Borgerding's KISS-FFT but has been
   heavily modified to better suit Opus */

#ifndef SKIP_CONFIG_H
#  ifdef HAVE_CONFIG_H
#    include "config.h"
#  endif
#endif

#include "_kiss_fft_guts.h"
#include "arch.h"
#include "os_support.h"
#include "mathops.h"
#include "stack_alloc.h"

#ifndef M_PI
#define M_PI 3.141592653
#endif

/* The guts header contains all the multiplication and addition macros that are defined for
   complex numbers.  It also declares the kf_ internal functions.
*/

static void kf_bfly2(
                     kiss_fft_cpx * Fout,
                     int m,
                     int N
                    )
{
   kiss_fft_cpx * Fout2;
   int i;
   (void)m;
#ifdef CUSTOM_MODES
   if (m==1)
   {
      celt_assert(m==1);
      for (i=0;i<N;i++)
      {
         kiss_fft_cpx t;
         Fout2 = Fout + 1;
         t = *Fout2;
         C_SUB( *Fout2 ,  *Fout , t );
         C_ADDTO( *Fout ,  t );
         Fout += 2;
      }
   } else
#endif
   {
      celt_coef tw;
      tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
      /* We know that m==4 here because the radix-2 is just after a radix-4 */
      celt_assert(m==4);
      for (i=0;i<N;i++)
      {
         kiss_fft_cpx t;
         Fout2 = Fout + 4;
         t = Fout2[0];
         C_SUB( Fout2[0] ,  Fout[0] , t );
         C_ADDTO( Fout[0] ,  t );

         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
         C_SUB( Fout2[1] ,  Fout[1] , t );
         C_ADDTO( Fout[1] ,  t );

         t.r = Fout2[2].i;
         t.i = NEG32_ovflw(Fout2[2].r);
         C_SUB( Fout2[2] ,  Fout[2] , t );
         C_ADDTO( Fout[2] ,  t );

         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
         C_SUB( Fout2[3] ,  Fout[3] , t );
         C_ADDTO( Fout[3] ,  t );
         Fout += 8;
      }
   }
}

static void kf_bfly4(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   int i;

   if (m==1)
   {
      /* Degenerate case where all the twiddles are 1. */
      for (i=0;i<N;i++)
      {
         kiss_fft_cpx scratch0, scratch1;

         C_SUB( scratch0 , *Fout, Fout[2] );
         C_ADDTO(*Fout, Fout[2]);
         C_ADD( scratch1 , Fout[1] , Fout[3] );
         C_SUB( Fout[2], *Fout, scratch1 );
         C_ADDTO( *Fout , scratch1 );
         C_SUB( scratch1 , Fout[1] , Fout[3] );

         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
         Fout+=4;
      }
   } else {
      int j;
      kiss_fft_cpx scratch[6];
      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
      const int m2=2*m;
      const int m3=3*m;
      kiss_fft_cpx * Fout_beg = Fout;
      for (i=0;i<N;i++)
      {
         Fout = Fout_beg + i*mm;
         tw3 = tw2 = tw1 = st->twiddles;
         /* m is guaranteed to be a multiple of 4. */
         for (j=0;j<m;j++)
         {
            C_MUL(scratch[0],Fout[m] , *tw1 );
            C_MUL(scratch[1],Fout[m2] , *tw2 );
            C_MUL(scratch[2],Fout[m3] , *tw3 );

            C_SUB( scratch[5] , *Fout, scratch[1] );
            C_ADDTO(*Fout, scratch[1]);
            C_ADD( scratch[3] , scratch[0] , scratch[2] );
            C_SUB( scratch[4] , scratch[0] , scratch[2] );
            C_SUB( Fout[m2], *Fout, scratch[3] );
            tw1 += fstride;
            tw2 += fstride*2;
            tw3 += fstride*3;
            C_ADDTO( *Fout , scratch[3] );

            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
            ++Fout;
         }
      }
   }
}


#ifndef RADIX_TWO_ONLY

static void kf_bfly3(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   int i;
   size_t k;
   const size_t m2 = 2*m;
   const kiss_twiddle_cpx *tw1,*tw2;
   kiss_fft_cpx scratch[5];
   kiss_twiddle_cpx epi3;

   kiss_fft_cpx * Fout_beg = Fout;
#ifdef FIXED_POINT
   /*epi3.r = -16384;*/ /* Unused */
   epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
#else
   epi3 = st->twiddles[fstride*m];
#endif
   for (i=0;i<N;i++)
   {
      Fout = Fout_beg + i*mm;
      tw1=tw2=st->twiddles;
      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
      k=m;
      do {

         C_MUL(scratch[1],Fout[m] , *tw1);
         C_MUL(scratch[2],Fout[m2] , *tw2);

         C_ADD(scratch[3],scratch[1],scratch[2]);
         C_SUB(scratch[0],scratch[1],scratch[2]);
         tw1 += fstride;
         tw2 += fstride*2;

         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));

         C_MULBYSCALAR( scratch[0] , epi3.i );

         C_ADDTO(*Fout,scratch[3]);

         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);

         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);

         ++Fout;
      } while(--k);
   }
}


#ifndef OVERRIDE_kf_bfly5
static void kf_bfly5(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
                     const kiss_fft_state *st,
                     int m,
                     int N,
                     int mm
                    )
{
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
   int i, u;
   kiss_fft_cpx scratch[13];
   const kiss_twiddle_cpx *tw;
   kiss_twiddle_cpx ya,yb;
   kiss_fft_cpx * Fout_beg = Fout;

#ifdef FIXED_POINT
   ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
   ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
   yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
   yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
#else
   ya = st->twiddles[fstride*m];
   yb = st->twiddles[fstride*2*m];
#endif
   tw=st->twiddles;

   for (i=0;i<N;i++)
   {
      Fout = Fout_beg + i*mm;
      Fout0=Fout;
      Fout1=Fout0+m;
      Fout2=Fout0+2*m;
      Fout3=Fout0+3*m;
      Fout4=Fout0+4*m;

      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
      for ( u=0; u<m; ++u ) {
         scratch[0] = *Fout0;

         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);

         C_ADD( scratch[7],scratch[1],scratch[4]);
         C_SUB( scratch[10],scratch[1],scratch[4]);
         C_ADD( scratch[8],scratch[2],scratch[3]);
         C_SUB( scratch[9],scratch[2],scratch[3]);

         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));

         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));

         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));

         C_SUB(*Fout1,scratch[5],scratch[6]);
         C_ADD(*Fout4,scratch[5],scratch[6]);

         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));

         C_ADD(*Fout2,scratch[11],scratch[12]);
         C_SUB(*Fout3,scratch[11],scratch[12]);

         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
      }
   }
}
#endif /* OVERRIDE_kf_bfly5 */


#endif


#ifdef CUSTOM_MODES

static
void compute_bitrev_table(
         int Fout,
         opus_int16 *f,
         const size_t fstride,
         int in_stride,
         opus_int16 * factors,
         const kiss_fft_state *st
            )
{
   const int p=*factors++; /* the radix  */
   const int m=*factors++; /* stage's fft length/p */

    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
   if (m==1)
   {
      int j;
      for (j=0;j<p;j++)
      {
         *f = Fout+j;
         f += fstride*in_stride;
      }
   } else {
      int j;
      for (j=0;j<p;j++)
      {
         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
         f += fstride*in_stride;
         Fout += m;
      }
   }
}

/*  facbuf is populated by p1,m1,p2,m2, ...
    where
    p[i] * m[i] = m[i-1]
    m0 = n                  */
static
int kf_factor(int n,opus_int16 * facbuf)
{
    int p=4;
    int i;
    int stages=0;
    int nbak = n;

    /*factor out powers of 4, powers of 2, then any remaining primes */
    do {
        while (n % p) {
            switch (p) {
                case 4: p = 2; break;
                case 2: p = 3; break;
                default: p += 2; break;
            }
            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
                p = n;          /* no more factors, skip to end */
        }
        n /= p;
#ifdef RADIX_TWO_ONLY
        if (p!=2 && p != 4)
#else
        if (p>5)
#endif
        {
           return 0;
        }
        facbuf[2*stages] = p;
        if (p==2 && stages > 1)
        {
           facbuf[2*stages] = 4;
           facbuf[2] = 2;
        }
        stages++;
    } while (n > 1);
    n = nbak;
    /* Reverse the order to get the radix 4 at the end, so we can use the
       fast degenerate case. It turns out that reversing the order also
       improves the noise behaviour. */
    for (i=0;i<stages/2;i++)
    {
       int tmp;
       tmp = facbuf[2*i];
       facbuf[2*i] = facbuf[2*(stages-i-1)];
       facbuf[2*(stages-i-1)] = tmp;
    }
    for (i=0;i<stages;i++)
    {
        n /= facbuf[2*i];
        facbuf[2*i+1] = n;
    }
    return 1;
}

static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
{
   int i;
#ifdef FIXED_POINT
   for (i=0;i<nfft;++i) {
      opus_val32 phase = -i;
#ifdef ENABLE_QEXT
      twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648*cos((2*M_PI/nfft)*phase)));
      twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648*sin((2*M_PI/nfft)*phase)));
#else
      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
#endif
   }
#else
   for (i=0;i<nfft;++i) {
      const double pi=3.14159265358979323846264338327;
      double phase = ( -2*pi /nfft ) * i;
      kf_cexp(twiddles+i, phase );
   }
#endif
}

int opus_fft_alloc_arch_c(kiss_fft_state *st) {
   (void)st;
   return 0;
}

/*
 *
 * Allocates all necessary storage space for the fft and ifft.
 * The return value is a contiguous block of memory.  As such,
 * It can be freed with free().
 * */
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
                                        const kiss_fft_state *base, int arch)
{
    kiss_fft_state *st=NULL;
    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/

    if ( lenmem==NULL ) {
        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
    }else{
        if (mem != NULL && *lenmem >= memneeded)
            st = (kiss_fft_state*)mem;
        *lenmem = memneeded;
    }
    if (st) {
        opus_int16 *bitrev;
        kiss_twiddle_cpx *twiddles;

        st->nfft=nfft;
#ifdef FIXED_POINT
        st->scale_shift = celt_ilog2(st->nfft);
# ifdef ENABLE_QEXT
        if (st->nfft == 1<<st->scale_shift)
           st->scale = QCONST32(1.0f, 30);
        else
           st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
# else
        if (st->nfft == 1<<st->scale_shift)
           st->scale = Q15ONE;
        else
           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
# endif
#else
        st->scale = 1.f/nfft;
#endif
        if (base != NULL)
        {
           st->twiddles = base->twiddles;
           st->shift = 0;
           while (st->shift < 32 && nfft<<st->shift != base->nfft)
              st->shift++;
           if (st->shift>=32)
              goto fail;
        } else {
           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
           compute_twiddles(twiddles, nfft);
           st->shift = -1;
        }
        if (!kf_factor(nfft,st->factors))
        {
           goto fail;
        }

        /* bitrev */
        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
        if (st->bitrev==NULL)
            goto fail;
        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);

        /* Initialize architecture specific fft parameters */
        if (opus_fft_alloc_arch(st, arch))
            goto fail;
    }
    return st;
fail:
    opus_fft_free(st, arch);
    return NULL;
}

kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
{
   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
}

void opus_fft_free_arch_c(kiss_fft_state *st) {
   (void)st;
}

void opus_fft_free(const kiss_fft_state *cfg, int arch)
{
   if (cfg)
   {
      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
      opus_free((opus_int16*)cfg->bitrev);
      if (cfg->shift < 0)
         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
      opus_free((kiss_fft_state*)cfg);
   }
}

#endif /* CUSTOM_MODES */

#ifdef FIXED_POINT
static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
   int shift;
   shift = IMIN(step, *total);
   *total -= shift;
   if (shift == 1) {
      int i;
      for (i=0;i<N;i++) {
         x[i].r = SHR32(x[i].r, 1);
         x[i].i = SHR32(x[i].i, 1);
      }
   } else if (shift>0) {
      int i;
      for (i=0;i<N;i++) {
         x[i].r = PSHR32(x[i].r, shift);
         x[i].i = PSHR32(x[i].i, shift);
      }
   }
}
#else
#define fft_downshift(x, N, total, step)
#endif

void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift))
{
    int m2, m;
    int p;
    int L;
    int fstride[MAXFACTORS];
    int i;
    int shift;

    /* st->shift can be -1 */
    shift = st->shift>0 ? st->shift : 0;

    fstride[0] = 1;
    L=0;
    do {
       p = st->factors[2*L];
       m = st->factors[2*L+1];
       fstride[L+1] = fstride[L]*p;
       L++;
    } while(m!=1);
    m = st->factors[2*L-1];
    for (i=L-1;i>=0;i--)
    {
       if (i!=0)
          m2 = st->factors[2*i-1];
       else
          m2 = 1;
       switch (st->factors[2*i])
       {
       case 2:
          fft_downshift(fout, st->nfft, &downshift, 1);
          kf_bfly2(fout, m, fstride[i]);
          break;
       case 4:
          fft_downshift(fout, st->nfft, &downshift, 2);
          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
          break;
 #ifndef RADIX_TWO_ONLY
       case 3:
          fft_downshift(fout, st->nfft, &downshift, 2);
          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
          break;
       case 5:
          fft_downshift(fout, st->nfft, &downshift, 3);
          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
          break;
 #endif
       }
       m = m2;
    }
    fft_downshift(fout, st->nfft, &downshift, downshift);
}

void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
   int i;
   celt_coef scale;
#ifdef FIXED_POINT
   /* Allows us to scale with MULT16_32_Q16(), which is faster than
      MULT16_32_Q15() on ARM. */
   int scale_shift = st->scale_shift-1;
#endif
   scale = st->scale;

   celt_assert2 (fin != fout, "In-place FFT not supported");
   /* Bit-reverse the input */
   for (i=0;i<st->nfft;i++)
   {
      kiss_fft_cpx x = fin[i];
      fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
      fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
   }
   opus_fft_impl(st, fout ARG_FIXED(scale_shift));
}


void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
   int i;
   celt_assert2 (fin != fout, "In-place FFT not supported");
   /* Bit-reverse the input */
   for (i=0;i<st->nfft;i++)
      fout[st->bitrev[i]] = fin[i];
   for (i=0;i<st->nfft;i++)
      fout[i].i = -fout[i].i;
   opus_fft_impl(st, fout ARG_FIXED(0));
   for (i=0;i<st->nfft;i++)
      fout[i].i = -fout[i].i;
}

Coverage Report

Created: 2025-08-28 07:12

Line	Count	Source (jump to first uncovered line)
1		/*Copyright (c) 2003-2004, Mark Borgerding
2		Lots of modifications by Jean-Marc Valin
3		Copyright (c) 2005-2007, Xiph.Org Foundation
4		Copyright (c) 2008, Xiph.Org Foundation, CSIRO
5
6		All rights reserved.
7
8		Redistribution and use in source and binary forms, with or without
9		modification, are permitted provided that the following conditions are met:
10
11		* Redistributions of source code must retain the above copyright notice,
12		this list of conditions and the following disclaimer.
13		* Redistributions in binary form must reproduce the above copyright notice,
14		this list of conditions and the following disclaimer in the
15		documentation and/or other materials provided with the distribution.
16
17		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18		AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19		IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20		ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21		LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22		CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23		SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24		INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25		CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26		ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27		POSSIBILITY OF SUCH DAMAGE.*/
28
29		/* This code is originally from Mark Borgerding's KISS-FFT but has been
30		heavily modified to better suit Opus */
31
32		#ifndef SKIP_CONFIG_H
33		# ifdef HAVE_CONFIG_H
34		# include "config.h"
35		# endif
36		#endif
37
38		#include "_kiss_fft_guts.h"
39		#include "arch.h"
40		#include "os_support.h"
41		#include "mathops.h"
42		#include "stack_alloc.h"
43
44		#ifndef M_PI
45		#define M_PI 3.141592653
46		#endif
47
48		/* The guts header contains all the multiplication and addition macros that are defined for
49		complex numbers. It also declares the kf_ internal functions.
50		*/
51
52		static void kf_bfly2(
53		kiss_fft_cpx * Fout,
54		int m,
55		int N
56		)
57	128k	{
58	128k	kiss_fft_cpx * Fout2;
59	128k	int i;
60	128k	(void)m;
61		#ifdef CUSTOM_MODES
62		if (m==1)
63		{
64		celt_assert(m==1);
65		for (i=0;i<N;i++)
66		{
67		kiss_fft_cpx t;
68		Fout2 = Fout + 1;
69		t = *Fout2;
70		C_SUB( Fout2 , Fout , t );
71		C_ADDTO( *Fout , t );
72		Fout += 2;
73		}
74		} else
75		#endif
76	128k	{
77	128k	celt_coef tw;
78	128k	tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
79		/* We know that m==4 here because the radix-2 is just after a radix-4 */
80	128k	celt_assert(m==4);
81	4.95M	for (i=0;i<N;i++)
82	4.82M	{
83	4.82M	kiss_fft_cpx t;
84	4.82M	Fout2 = Fout + 4;
85	4.82M	t = Fout2[0];
86	4.82M	C_SUB( Fout2[0] , Fout[0] , t );
87	4.82M	C_ADDTO( Fout[0] , t );
88
89	4.82M	t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
90	4.82M	t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
91	4.82M	C_SUB( Fout2[1] , Fout[1] , t );
92	4.82M	C_ADDTO( Fout[1] , t );
93
94	4.82M	t.r = Fout2[2].i;
95	4.82M	t.i = NEG32_ovflw(Fout2[2].r);
96	4.82M	C_SUB( Fout2[2] , Fout[2] , t );
97	4.82M	C_ADDTO( Fout[2] , t );
98
99	4.82M	t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
100	4.82M	t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
101	4.82M	C_SUB( Fout2[3] , Fout[3] , t );
102	4.82M	C_ADDTO( Fout[3] , t );
103	4.82M	Fout += 8;
104	4.82M	}
105	128k	}
106	128k	}
107
108		static void kf_bfly4(
109		kiss_fft_cpx * Fout,
110		const size_t fstride,
111		const kiss_fft_state *st,
112		int m,
113		int N,
114		int mm
115		)
116	689k	{
117	689k	int i;
118
119	689k	if (m==1)
120	424k	{
121		/* Degenerate case where all the twiddles are 1. */
122	23.5M	for (i=0;i<N;i++)
123	23.1M	{
124	23.1M	kiss_fft_cpx scratch0, scratch1;
125
126	23.1M	C_SUB( scratch0 , *Fout, Fout[2] );
127	23.1M	C_ADDTO(*Fout, Fout[2]);
128	23.1M	C_ADD( scratch1 , Fout[1] , Fout[3] );
129	23.1M	C_SUB( Fout[2], *Fout, scratch1 );
130	23.1M	C_ADDTO( *Fout , scratch1 );
131	23.1M	C_SUB( scratch1 , Fout[1] , Fout[3] );
132
133	23.1M	Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
134	23.1M	Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
135	23.1M	Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
136	23.1M	Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
137	23.1M	Fout+=4;
138	23.1M	}
139	424k	} else {
140	264k	int j;
141	264k	kiss_fft_cpx scratch[6];
142	264k	const kiss_twiddle_cpx tw1,tw2,*tw3;
143	264k	const int m2=2*m;
144	264k	const int m3=3*m;
145	264k	kiss_fft_cpx * Fout_beg = Fout;
146	4.23M	for (i=0;i<N;i++)
147	3.97M	{
148	3.97M	Fout = Fout_beg + i*mm;
149	3.97M	tw3 = tw2 = tw1 = st->twiddles;
150		/* m is guaranteed to be a multiple of 4. */
151	23.7M	for (j=0;j<m;j++)
152	19.7M	{
153	19.7M	C_MUL(scratch[0],Fout[m] , *tw1 );
154	19.7M	C_MUL(scratch[1],Fout[m2] , *tw2 );
155	19.7M	C_MUL(scratch[2],Fout[m3] , *tw3 );
156
157	19.7M	C_SUB( scratch[5] , *Fout, scratch[1] );
158	19.7M	C_ADDTO(*Fout, scratch[1]);
159	19.7M	C_ADD( scratch[3] , scratch[0] , scratch[2] );
160	19.7M	C_SUB( scratch[4] , scratch[0] , scratch[2] );
161	19.7M	C_SUB( Fout[m2], *Fout, scratch[3] );
162	19.7M	tw1 += fstride;
163	19.7M	tw2 += fstride*2;
164	19.7M	tw3 += fstride*3;
165	19.7M	C_ADDTO( *Fout , scratch[3] );
166
167	19.7M	Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
168	19.7M	Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
169	19.7M	Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
170	19.7M	Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
171	19.7M	++Fout;
172	19.7M	}
173	3.97M	}
174	264k	}
175	689k	}
176
177
178		#ifndef RADIX_TWO_ONLY
179
180		static void kf_bfly3(
181		kiss_fft_cpx * Fout,
182		const size_t fstride,
183		const kiss_fft_state *st,
184		int m,
185		int N,
186		int mm
187		)
188	424k	{
189	424k	int i;
190	424k	size_t k;
191	424k	const size_t m2 = 2*m;
192	424k	const kiss_twiddle_cpx tw1,tw2;
193	424k	kiss_fft_cpx scratch[5];
194	424k	kiss_twiddle_cpx epi3;
195
196	424k	kiss_fft_cpx * Fout_beg = Fout;
197		#ifdef FIXED_POINT
198		/epi3.r = -16384;/ /* Unused */
199		epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
200		#else
201	424k	epi3 = st->twiddles[fstride*m];
202	424k	#endif
203	2.54M	for (i=0;i<N;i++)
204	2.12M	{
205	2.12M	Fout = Fout_beg + i*mm;
206	2.12M	tw1=tw2=st->twiddles;
207		/* For non-custom modes, m is guaranteed to be a multiple of 4. */
208	2.12M	k=m;
209	30.8M	do {
210
211	30.8M	C_MUL(scratch[1],Fout[m] , *tw1);
212	30.8M	C_MUL(scratch[2],Fout[m2] , *tw2);
213
214	30.8M	C_ADD(scratch[3],scratch[1],scratch[2]);
215	30.8M	C_SUB(scratch[0],scratch[1],scratch[2]);
216	30.8M	tw1 += fstride;
217	30.8M	tw2 += fstride*2;
218
219	30.8M	Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
220	30.8M	Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
221
222	30.8M	C_MULBYSCALAR( scratch[0] , epi3.i );
223
224	30.8M	C_ADDTO(*Fout,scratch[3]);
225
226	30.8M	Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
227	30.8M	Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
228
229	30.8M	Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
230	30.8M	Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
231
232	30.8M	++Fout;
233	30.8M	} while(--k);
234	2.12M	}
235	424k	}
236
237
238		#ifndef OVERRIDE_kf_bfly5
239		static void kf_bfly5(
240		kiss_fft_cpx * Fout,
241		const size_t fstride,
242		const kiss_fft_state *st,
243		int m,
244		int N,
245		int mm
246		)
247	424k	{
248	424k	kiss_fft_cpx Fout0,Fout1,Fout2,Fout3,*Fout4;
249	424k	int i, u;
250	424k	kiss_fft_cpx scratch[13];
251	424k	const kiss_twiddle_cpx *tw;
252	424k	kiss_twiddle_cpx ya,yb;
253	424k	kiss_fft_cpx * Fout_beg = Fout;
254
255		#ifdef FIXED_POINT
256		ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
257		ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
258		yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
259		yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
260		#else
261	424k	ya = st->twiddles[fstride*m];
262	424k	yb = st->twiddles[fstride2m];
263	424k	#endif
264	424k	tw=st->twiddles;
265
266	849k	for (i=0;i<N;i++)
267	424k	{
268	424k	Fout = Fout_beg + i*mm;
269	424k	Fout0=Fout;
270	424k	Fout1=Fout0+m;
271	424k	Fout2=Fout0+2*m;
272	424k	Fout3=Fout0+3*m;
273	424k	Fout4=Fout0+4*m;
274
275		/* For non-custom modes, m is guaranteed to be a multiple of 4. */
276	18.9M	for ( u=0; u<m; ++u ) {
277	18.4M	scratch[0] = *Fout0;
278
279	18.4M	C_MUL(scratch[1] ,Fout1, tw[ufstride]);
280	18.4M	C_MUL(scratch[2] ,Fout2, tw[2u*fstride]);
281	18.4M	C_MUL(scratch[3] ,Fout3, tw[3u*fstride]);
282	18.4M	C_MUL(scratch[4] ,Fout4, tw[4u*fstride]);
283
284	18.4M	C_ADD( scratch[7],scratch[1],scratch[4]);
285	18.4M	C_SUB( scratch[10],scratch[1],scratch[4]);
286	18.4M	C_ADD( scratch[8],scratch[2],scratch[3]);
287	18.4M	C_SUB( scratch[9],scratch[2],scratch[3]);
288
289	18.4M	Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
290	18.4M	Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
291
292	18.4M	scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
293	18.4M	scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
294
295	18.4M	scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
296	18.4M	scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
297
298	18.4M	C_SUB(*Fout1,scratch[5],scratch[6]);
299	18.4M	C_ADD(*Fout4,scratch[5],scratch[6]);
300
301	18.4M	scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
302	18.4M	scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
303	18.4M	scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
304	18.4M	scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
305
306	18.4M	C_ADD(*Fout2,scratch[11],scratch[12]);
307	18.4M	C_SUB(*Fout3,scratch[11],scratch[12]);
308
309	18.4M	++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
310	18.4M	}
311	424k	}
312	424k	}
313		#endif /* OVERRIDE_kf_bfly5 */
314
315
316		#endif
317
318
319		#ifdef CUSTOM_MODES
320
321		static
322		void compute_bitrev_table(
323		int Fout,
324		opus_int16 *f,
325		const size_t fstride,
326		int in_stride,
327		opus_int16 * factors,
328		const kiss_fft_state *st
329		)
330		{
331		const int p=factors++; / the radix */
332		const int m=factors++; / stage's fft length/p */
333
334		/printf ("fft %d %d %d %d %d %d\n", pm, m, p, s2, fstridein_stride, N);/
335		if (m==1)
336		{
337		int j;
338		for (j=0;j<p;j++)
339		{
340		*f = Fout+j;
341		f += fstride*in_stride;
342		}
343		} else {
344		int j;
345		for (j=0;j<p;j++)
346		{
347		compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
348		f += fstride*in_stride;
349		Fout += m;
350		}
351		}
352		}
353
354		/* facbuf is populated by p1,m1,p2,m2, ...
355		where
356		p[i] * m[i] = m[i-1]
357		m0 = n */
358		static
359		int kf_factor(int n,opus_int16 * facbuf)
360		{
361		int p=4;
362		int i;
363		int stages=0;
364		int nbak = n;
365
366		/factor out powers of 4, powers of 2, then any remaining primes /
367		do {
368		while (n % p) {
369		switch (p) {
370		case 4: p = 2; break;
371		case 2: p = 3; break;
372		default: p += 2; break;
373		}
374		if (p>32000 \|\| (opus_int32)p*(opus_int32)p > n)
375		p = n; /* no more factors, skip to end */
376		}
377		n /= p;
378		#ifdef RADIX_TWO_ONLY
379		if (p!=2 && p != 4)
380		#else
381		if (p>5)
382		#endif
383		{
384		return 0;
385		}
386		facbuf[2*stages] = p;
387		if (p==2 && stages > 1)
388		{
389		facbuf[2*stages] = 4;
390		facbuf[2] = 2;
391		}
392		stages++;
393		} while (n > 1);
394		n = nbak;
395		/* Reverse the order to get the radix 4 at the end, so we can use the
396		fast degenerate case. It turns out that reversing the order also
397		improves the noise behaviour. */
398		for (i=0;i<stages/2;i++)
399		{
400		int tmp;
401		tmp = facbuf[2*i];
402		facbuf[2i] = facbuf[2(stages-i-1)];
403		facbuf[2*(stages-i-1)] = tmp;
404		}
405		for (i=0;i<stages;i++)
406		{
407		n /= facbuf[2*i];
408		facbuf[2*i+1] = n;
409		}
410		return 1;
411		}
412
413		static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
414		{
415		int i;
416		#ifdef FIXED_POINT
417		for (i=0;i<nfft;++i) {
418		opus_val32 phase = -i;
419		#ifdef ENABLE_QEXT
420		twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648cos((2M_PI/nfft)*phase)));
421		twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648sin((2M_PI/nfft)*phase)));
422		#else
423		kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
424		#endif
425		}
426		#else
427		for (i=0;i<nfft;++i) {
428		const double pi=3.14159265358979323846264338327;
429		double phase = ( -2pi /nfft ) i;
430		kf_cexp(twiddles+i, phase );
431		}
432		#endif
433		}
434
435		int opus_fft_alloc_arch_c(kiss_fft_state *st) {
436		(void)st;
437		return 0;
438		}
439
440		/*
441		*
442		* Allocates all necessary storage space for the fft and ifft.
443		* The return value is a contiguous block of memory. As such,
444		* It can be freed with free().
445		* */
446		kiss_fft_state opus_fft_alloc_twiddles(int nfft,void mem,size_t * lenmem,
447		const kiss_fft_state *base, int arch)
448		{
449		kiss_fft_state *st=NULL;
450		size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
451
452		if ( lenmem==NULL ) {
453		st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
454		}else{
455		if (mem != NULL && *lenmem >= memneeded)
456		st = (kiss_fft_state*)mem;
457		*lenmem = memneeded;
458		}
459		if (st) {
460		opus_int16 *bitrev;
461		kiss_twiddle_cpx *twiddles;
462
463		st->nfft=nfft;
464		#ifdef FIXED_POINT
465		st->scale_shift = celt_ilog2(st->nfft);
466		# ifdef ENABLE_QEXT
467		if (st->nfft == 1<<st->scale_shift)
468		st->scale = QCONST32(1.0f, 30);
469		else
470		st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
471		# else
472		if (st->nfft == 1<<st->scale_shift)
473		st->scale = Q15ONE;
474		else
475		st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
476		# endif
477		#else
478		st->scale = 1.f/nfft;
479		#endif
480		if (base != NULL)
481		{
482		st->twiddles = base->twiddles;
483		st->shift = 0;
484		while (st->shift < 32 && nfft<<st->shift != base->nfft)
485		st->shift++;
486		if (st->shift>=32)
487		goto fail;
488		} else {
489		st->twiddles = twiddles = (kiss_twiddle_cpx)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)nfft);
490		compute_twiddles(twiddles, nfft);
491		st->shift = -1;
492		}
493		if (!kf_factor(nfft,st->factors))
494		{
495		goto fail;
496		}
497
498		/* bitrev */
499		st->bitrev = bitrev = (opus_int16)KISS_FFT_MALLOC(sizeof(opus_int16)nfft);
500		if (st->bitrev==NULL)
501		goto fail;
502		compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
503
504		/* Initialize architecture specific fft parameters */
505		if (opus_fft_alloc_arch(st, arch))
506		goto fail;
507		}
508		return st;
509		fail:
510		opus_fft_free(st, arch);
511		return NULL;
512		}
513
514		kiss_fft_state opus_fft_alloc(int nfft,void mem,size_t * lenmem, int arch)
515		{
516		return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
517		}
518
519		void opus_fft_free_arch_c(kiss_fft_state *st) {
520		(void)st;
521		}
522
523		void opus_fft_free(const kiss_fft_state *cfg, int arch)
524		{
525		if (cfg)
526		{
527		opus_fft_free_arch((kiss_fft_state *)cfg, arch);
528		opus_free((opus_int16*)cfg->bitrev);
529		if (cfg->shift < 0)
530		opus_free((kiss_twiddle_cpx*)cfg->twiddles);
531		opus_free((kiss_fft_state*)cfg);
532		}
533		}
534
535		#endif /* CUSTOM_MODES */
536
537		#ifdef FIXED_POINT
538		static void fft_downshift(kiss_fft_cpx x, int N, int total, int step) {
539		int shift;
540		shift = IMIN(step, *total);
541		*total -= shift;
542		if (shift == 1) {
543		int i;
544		for (i=0;i<N;i++) {
545		x[i].r = SHR32(x[i].r, 1);
546		x[i].i = SHR32(x[i].i, 1);
547		}
548		} else if (shift>0) {
549		int i;
550		for (i=0;i<N;i++) {
551		x[i].r = PSHR32(x[i].r, shift);
552		x[i].i = PSHR32(x[i].i, shift);
553		}
554		}
555		}
556		#else
557		#define fft_downshift(x, N, total, step)
558		#endif
559
560		void opus_fft_impl(const kiss_fft_state st,kiss_fft_cpx fout ARG_FIXED(int downshift))
561	424k	{
562	424k	int m2, m;
563	424k	int p;
564	424k	int L;
565	424k	int fstride[MAXFACTORS];
566	424k	int i;
567	424k	int shift;
568
569		/* st->shift can be -1 */
570	424k	shift = st->shift>0 ? st->shift : 0;
571
572	424k	fstride[0] = 1;
573	424k	L=0;
574	1.66M	do {
575	1.66M	p = st->factors[2*L];
576	1.66M	m = st->factors[2*L+1];
577	1.66M	fstride[L+1] = fstride[L]*p;
578	1.66M	L++;
579	1.66M	} while(m!=1);
580	424k	m = st->factors[2*L-1];
581	2.09M	for (i=L-1;i>=0;i--)
582	1.66M	{
583	1.66M	if (i!=0)
584	1.24M	m2 = st->factors[2*i-1];
585	424k	else
586	424k	m2 = 1;
587	1.66M	switch (st->factors[2*i])
588	1.66M	{
589	128k	case 2:
590	128k	fft_downshift(fout, st->nfft, &downshift, 1);
591	128k	kf_bfly2(fout, m, fstride[i]);
592	128k	break;
593	689k	case 4:
594	689k	fft_downshift(fout, st->nfft, &downshift, 2);
595	689k	kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
596	689k	break;
597	0	#ifndef RADIX_TWO_ONLY
598	424k	case 3:
599	424k	fft_downshift(fout, st->nfft, &downshift, 2);
600	424k	kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
601	424k	break;
602	424k	case 5:
603	424k	fft_downshift(fout, st->nfft, &downshift, 3);
604	424k	kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
605	424k	break;
606	1.66M	#endif
607	1.66M	}
608	1.66M	m = m2;
609	1.66M	}
610	424k	fft_downshift(fout, st->nfft, &downshift, downshift);
611	424k	}
612
613		void opus_fft_c(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)
614	0	{
615	0	int i;
616	0	celt_coef scale;
617		#ifdef FIXED_POINT
618		/* Allows us to scale with MULT16_32_Q16(), which is faster than
619		MULT16_32_Q15() on ARM. */
620		int scale_shift = st->scale_shift-1;
621		#endif
622	0	scale = st->scale;
623
624	0	celt_assert2 (fin != fout, "In-place FFT not supported");
625		/* Bit-reverse the input */
626	0	for (i=0;i<st->nfft;i++)
627	0	{
628	0	kiss_fft_cpx x = fin[i];
629	0	fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
630	0	fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
631	0	}
632	0	opus_fft_impl(st, fout ARG_FIXED(scale_shift));
633	0	}
634
635
636		void opus_ifft_c(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)
637	0	{
638	0	int i;
639	0	celt_assert2 (fin != fout, "In-place FFT not supported");
640		/* Bit-reverse the input */
641	0	for (i=0;i<st->nfft;i++)
642	0	fout[st->bitrev[i]] = fin[i];
643	0	for (i=0;i<st->nfft;i++)
644	0	fout[i].i = -fout[i].i;
645	0	opus_fft_impl(st, fout ARG_FIXED(0));
646	0	for (i=0;i<st->nfft;i++)
647	0	fout[i].i = -fout[i].i;
648	0	}