Coverage Report

Created: 2026-05-30 06:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aac/libFDK/src/fft.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
Software License for The Fraunhofer FDK AAC Codec Library for Android
3
4
© Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
5
Forschung e.V. All rights reserved.
6
7
 1.    INTRODUCTION
8
The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
9
that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
10
scheme for digital audio. This FDK AAC Codec software is intended to be used on
11
a wide variety of Android devices.
12
13
AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
14
general perceptual audio codecs. AAC-ELD is considered the best-performing
15
full-bandwidth communications codec by independent studies and is widely
16
deployed. AAC has been standardized by ISO and IEC as part of the MPEG
17
specifications.
18
19
Patent licenses for necessary patent claims for the FDK AAC Codec (including
20
those of Fraunhofer) may be obtained through Via Licensing
21
(www.vialicensing.com) or through the respective patent owners individually for
22
the purpose of encoding or decoding bit streams in products that are compliant
23
with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
24
Android devices already license these patent claims through Via Licensing or
25
directly from the patent owners, and therefore FDK AAC Codec software may
26
already be covered under those patent licenses when it is used for those
27
licensed purposes only.
28
29
Commercially-licensed AAC software libraries, including floating-point versions
30
with enhanced sound quality, are also available from Fraunhofer. Users are
31
encouraged to check the Fraunhofer website for additional applications
32
information and documentation.
33
34
2.    COPYRIGHT LICENSE
35
36
Redistribution and use in source and binary forms, with or without modification,
37
are permitted without payment of copyright license fees provided that you
38
satisfy the following conditions:
39
40
You must retain the complete text of this software license in redistributions of
41
the FDK AAC Codec or your modifications thereto in source code form.
42
43
You must retain the complete text of this software license in the documentation
44
and/or other materials provided with redistributions of the FDK AAC Codec or
45
your modifications thereto in binary form. You must make available free of
46
charge copies of the complete source code of the FDK AAC Codec and your
47
modifications thereto to recipients of copies in binary form.
48
49
The name of Fraunhofer may not be used to endorse or promote products derived
50
from this library without prior written permission.
51
52
You may not charge copyright license fees for anyone to use, copy or distribute
53
the FDK AAC Codec software or your modifications thereto.
54
55
Your modified versions of the FDK AAC Codec must carry prominent notices stating
56
that you changed the software and the date of any change. For modified versions
57
of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
58
must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
59
AAC Codec Library for Android."
60
61
3.    NO PATENT LICENSE
62
63
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
64
limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
65
Fraunhofer provides no warranty of patent non-infringement with respect to this
66
software.
67
68
You may use this FDK AAC Codec software or modifications thereto only for
69
purposes that are authorized by appropriate patent licenses.
70
71
4.    DISCLAIMER
72
73
This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
74
holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
75
including but not limited to the implied warranties of merchantability and
76
fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
77
CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
78
or consequential damages, including but not limited to procurement of substitute
79
goods or services; loss of use, data, or profits, or business interruption,
80
however caused and on any theory of liability, whether in contract, strict
81
liability, or tort (including negligence), arising in any way out of the use of
82
this software, even if advised of the possibility of such damage.
83
84
5.    CONTACT INFORMATION
85
86
Fraunhofer Institute for Integrated Circuits IIS
87
Attention: Audio and Multimedia Departments - FDK AAC LL
88
Am Wolfsmantel 33
89
91058 Erlangen, Germany
90
91
www.iis.fraunhofer.de/amm
92
amm-info@iis.fraunhofer.de
93
----------------------------------------------------------------------------- */
94
95
/******************* Library for basic calculation routines ********************
96
97
   Author(s):   Josef Hoepfl, DSP Solutions
98
99
   Description: Fix point FFT
100
101
*******************************************************************************/
102
103
#include "fft_rad2.h"
104
#include "FDK_tools_rom.h"
105
106
231M
#define W_PiFOURTH STC(0x5a82799a)
107
//#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a))
108
#ifndef SUMDIFF_PIFOURTH
109
#define SUMDIFF_PIFOURTH(diff, sum, a, b) \
110
560M
  {                                       \
111
560M
    FIXP_DBL wa, wb;                      \
112
560M
    wa = fMultDiv2(a, W_PiFOURTH);        \
113
560M
    wb = fMultDiv2(b, W_PiFOURTH);        \
114
560M
    diff = wb - wa;                       \
115
560M
    sum = wb + wa;                        \
116
560M
  }
117
#define SUMDIFF_PIFOURTH16(diff, sum, a, b)       \
118
  {                                               \
119
    FIXP_SGL wa, wb;                              \
120
    wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \
121
    wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \
122
    diff = wb - wa;                               \
123
    sum = wb + wa;                                \
124
  }
125
#endif
126
127
#define SCALEFACTOR2048 10
128
#define SCALEFACTOR1024 9
129
139k
#define SCALEFACTOR512 8
130
72.4k
#define SCALEFACTOR256 7
131
72.7k
#define SCALEFACTOR128 6
132
393k
#define SCALEFACTOR64 5
133
30.4M
#define SCALEFACTOR32 4
134
16.7M
#define SCALEFACTOR16 3
135
6.52M
#define SCALEFACTOR8 2
136
1.42M
#define SCALEFACTOR4 1
137
3.00M
#define SCALEFACTOR2 1
138
139
3.06M
#define SCALEFACTOR3 1
140
144k
#define SCALEFACTOR5 1
141
2.99M
#define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2)
142
#define SCALEFACTOR7 2
143
#define SCALEFACTOR9 2
144
3.03M
#define SCALEFACTOR10 5
145
11.2M
#define SCALEFACTOR12 3
146
418k
#define SCALEFACTOR15 3
147
#define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2)
148
144k
#define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2)
149
#define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2)
150
8.37k
#define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2)
151
#define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2)
152
#define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2)
153
340k
#define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2)
154
230k
#define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2)
155
132
#define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2)
156
72.9k
#define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2)
157
3.30k
#define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2)
158
#define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2)
159
#define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2)
160
9.10k
#define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2)
161
171k
#define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2)
162
#define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2)
163
#define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2)
164
89.0k
#define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2)
165
13.7k
#define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2)
166
167
#include "fft.h"
168
169
#ifndef FUNCTION_fft2
170
171
/* Performs the FFT of length 2. Input vector unscaled, output vector scaled
172
 * with factor 0.5 */
173
9.08M
static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) {
174
9.08M
  FIXP_DBL r1, i1;
175
9.08M
  FIXP_DBL r2, i2;
176
177
  /* real part */
178
9.08M
  r1 = pDat[2];
179
9.08M
  r2 = pDat[0];
180
181
  /* imaginary part */
182
9.08M
  i1 = pDat[3];
183
9.08M
  i2 = pDat[1];
184
185
  /* real part */
186
9.08M
  pDat[0] = (r2 + r1) >> 1;
187
9.08M
  pDat[2] = (r2 - r1) >> 1;
188
189
  /* imaginary part */
190
9.08M
  pDat[1] = (i2 + i1) >> 1;
191
9.08M
  pDat[3] = (i2 - i1) >> 1;
192
9.08M
}
193
#endif /* FUNCTION_fft2 */
194
195
16.6M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
196
197
#ifndef FUNCTION_fft3
198
/* Performs the FFT of length 3 according to the algorithm after winograd. */
199
8.32M
static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) {
200
8.32M
  FIXP_DBL r1, r2;
201
8.32M
  FIXP_DBL s1, s2;
202
8.32M
  FIXP_DBL pD;
203
204
  /* real part */
205
8.32M
  r1 = pDat[2] + pDat[4];
206
8.32M
  r2 = fMultDiv2((pDat[2] - pDat[4]), C31);
207
8.32M
  pD = pDat[0] >> 1;
208
8.32M
  pDat[0] = pD + (r1 >> 1);
209
8.32M
  r1 = pD - (r1 >> 2);
210
211
  /* imaginary part */
212
8.32M
  s1 = pDat[3] + pDat[5];
213
8.32M
  s2 = fMultDiv2((pDat[3] - pDat[5]), C31);
214
8.32M
  pD = pDat[1] >> 1;
215
8.32M
  pDat[1] = pD + (s1 >> 1);
216
8.32M
  s1 = pD - (s1 >> 2);
217
218
  /* combination */
219
8.32M
  pDat[2] = r1 - s2;
220
8.32M
  pDat[4] = r1 + s2;
221
8.32M
  pDat[3] = s1 + r2;
222
8.32M
  pDat[5] = s1 - r2;
223
8.32M
}
224
#endif /* #ifndef FUNCTION_fft3 */
225
226
129M
#define F5C(x) STC(x)
227
228
25.9M
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
229
25.9M
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
230
25.9M
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
231
25.9M
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
232
25.9M
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
233
234
/* performs the FFT of length 5 according to the algorithm after winograd */
235
/* This version works with a prescale of 2 instead of 3 */
236
12.9M
static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) {
237
12.9M
  FIXP_DBL r1, r2, r3, r4;
238
12.9M
  FIXP_DBL s1, s2, s3, s4;
239
12.9M
  FIXP_DBL t;
240
241
  /* real part */
242
12.9M
  r1 = (pDat[2] + pDat[8]) >> 1;
243
12.9M
  r4 = (pDat[2] - pDat[8]) >> 1;
244
12.9M
  r3 = (pDat[4] + pDat[6]) >> 1;
245
12.9M
  r2 = (pDat[4] - pDat[6]) >> 1;
246
12.9M
  t = fMult((r1 - r3), C54);
247
12.9M
  r1 = r1 + r3;
248
12.9M
  pDat[0] = (pDat[0] >> 1) + r1;
249
  /* Bit shift left because of the constant C55 which was scaled with the factor
250
     0.5 because of the representation of the values as fracts */
251
12.9M
  r1 = pDat[0] + (fMultDiv2(r1, C55) << (2));
252
12.9M
  r3 = r1 - t;
253
12.9M
  r1 = r1 + t;
254
12.9M
  t = fMult((r4 + r2), C51);
255
  /* Bit shift left because of the constant C55 which was scaled with the factor
256
     0.5 because of the representation of the values as fracts */
257
12.9M
  r4 = t + (fMultDiv2(r4, C52) << (2));
258
12.9M
  r2 = t + fMult(r2, C53);
259
260
  /* imaginary part */
261
12.9M
  s1 = (pDat[3] + pDat[9]) >> 1;
262
12.9M
  s4 = (pDat[3] - pDat[9]) >> 1;
263
12.9M
  s3 = (pDat[5] + pDat[7]) >> 1;
264
12.9M
  s2 = (pDat[5] - pDat[7]) >> 1;
265
12.9M
  t = fMult((s1 - s3), C54);
266
12.9M
  s1 = s1 + s3;
267
12.9M
  pDat[1] = (pDat[1] >> 1) + s1;
268
  /* Bit shift left because of the constant C55 which was scaled with the factor
269
     0.5 because of the representation of the values as fracts */
270
12.9M
  s1 = pDat[1] + (fMultDiv2(s1, C55) << (2));
271
12.9M
  s3 = s1 - t;
272
12.9M
  s1 = s1 + t;
273
12.9M
  t = fMult((s4 + s2), C51);
274
  /* Bit shift left because of the constant C55 which was scaled with the factor
275
     0.5 because of the representation of the values as fracts */
276
12.9M
  s4 = t + (fMultDiv2(s4, C52) << (2));
277
12.9M
  s2 = t + fMult(s2, C53);
278
279
  /* combination */
280
12.9M
  pDat[2] = r1 + s2;
281
12.9M
  pDat[8] = r1 - s2;
282
12.9M
  pDat[4] = r3 - s4;
283
12.9M
  pDat[6] = r3 + s4;
284
285
12.9M
  pDat[3] = s1 - r2;
286
12.9M
  pDat[9] = s1 + r2;
287
12.9M
  pDat[5] = s3 + r4;
288
12.9M
  pDat[7] = s3 - r4;
289
12.9M
}
290
291
2.88M
#define F5C(x) STC(x)
292
293
577k
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
294
577k
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
295
577k
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
296
577k
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
297
577k
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
298
/**
299
 * \brief    Function performs a complex 10-point FFT
300
 *           The FFT is performed inplace. The result of the FFT
301
 *           is scaled by SCALEFACTOR10 bits.
302
 *
303
 *           WOPS FLC version:                    1093 cycles
304
 *           WOPS with 32x16 bit multiplications:  196 cycles
305
 *
306
 * \param    [i/o] re    real input / output
307
 * \param    [i/o] im    imag input / output
308
 * \param    [i  ] s     stride real and imag input / output
309
 *
310
 * \return   void
311
 */
312
static void fft10(FIXP_DBL *x)  // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s)
313
144k
{
314
144k
  FIXP_DBL t;
315
144k
  FIXP_DBL x0, x1, x2, x3, x4;
316
144k
  FIXP_DBL r1, r2, r3, r4;
317
144k
  FIXP_DBL s1, s2, s3, s4;
318
144k
  FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
319
144k
  FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
320
321
144k
  const int s = 1;  // stride factor
322
323
  /* 2 fft5 stages */
324
325
  /* real part */
326
144k
  x0 = (x[s * 0] >> SCALEFACTOR10);
327
144k
  x1 = (x[s * 4] >> SCALEFACTOR10);
328
144k
  x2 = (x[s * 8] >> SCALEFACTOR10);
329
144k
  x3 = (x[s * 12] >> SCALEFACTOR10);
330
144k
  x4 = (x[s * 16] >> SCALEFACTOR10);
331
332
144k
  r1 = (x3 + x2);
333
144k
  r4 = (x3 - x2);
334
144k
  r3 = (x1 + x4);
335
144k
  r2 = (x1 - x4);
336
144k
  t = fMult((r1 - r3), C54);
337
144k
  r1 = (r1 + r3);
338
144k
  y00 = (x0 + r1);
339
144k
  r1 = (y00 + ((fMult(r1, C55) << 1)));
340
144k
  r3 = (r1 - t);
341
144k
  r1 = (r1 + t);
342
144k
  t = fMult((r4 + r2), C51);
343
144k
  r4 = (t + (fMult(r4, C52) << 1));
344
144k
  r2 = (t + fMult(r2, C53));
345
346
  /* imaginary part */
347
144k
  x0 = (x[s * 0 + 1] >> SCALEFACTOR10);
348
144k
  x1 = (x[s * 4 + 1] >> SCALEFACTOR10);
349
144k
  x2 = (x[s * 8 + 1] >> SCALEFACTOR10);
350
144k
  x3 = (x[s * 12 + 1] >> SCALEFACTOR10);
351
144k
  x4 = (x[s * 16 + 1] >> SCALEFACTOR10);
352
353
144k
  s1 = (x3 + x2);
354
144k
  s4 = (x3 - x2);
355
144k
  s3 = (x1 + x4);
356
144k
  s2 = (x1 - x4);
357
144k
  t = fMult((s1 - s3), C54);
358
144k
  s1 = (s1 + s3);
359
144k
  y01 = (x0 + s1);
360
144k
  s1 = (y01 + (fMult(s1, C55) << 1));
361
144k
  s3 = (s1 - t);
362
144k
  s1 = (s1 + t);
363
144k
  t = fMult((s4 + s2), C51);
364
144k
  s4 = (t + (fMult(s4, C52) << 1));
365
144k
  s2 = (t + fMult(s2, C53));
366
367
  /* combination */
368
144k
  y04 = (r1 + s2);
369
144k
  y16 = (r1 - s2);
370
144k
  y08 = (r3 - s4);
371
144k
  y12 = (r3 + s4);
372
373
144k
  y05 = (s1 - r2);
374
144k
  y17 = (s1 + r2);
375
144k
  y09 = (s3 + r4);
376
144k
  y13 = (s3 - r4);
377
378
  /* real part */
379
144k
  x0 = (x[s * 10] >> SCALEFACTOR10);
380
144k
  x1 = (x[s * 2] >> SCALEFACTOR10);
381
144k
  x2 = (x[s * 6] >> SCALEFACTOR10);
382
144k
  x3 = (x[s * 14] >> SCALEFACTOR10);
383
144k
  x4 = (x[s * 18] >> SCALEFACTOR10);
384
385
144k
  r1 = (x1 + x4);
386
144k
  r4 = (x1 - x4);
387
144k
  r3 = (x3 + x2);
388
144k
  r2 = (x3 - x2);
389
144k
  t = fMult((r1 - r3), C54);
390
144k
  r1 = (r1 + r3);
391
144k
  y02 = (x0 + r1);
392
144k
  r1 = (y02 + ((fMult(r1, C55) << 1)));
393
144k
  r3 = (r1 - t);
394
144k
  r1 = (r1 + t);
395
144k
  t = fMult(((r4 + r2)), C51);
396
144k
  r4 = (t + (fMult(r4, C52) << 1));
397
144k
  r2 = (t + fMult(r2, C53));
398
399
  /* imaginary part */
400
144k
  x0 = (x[s * 10 + 1] >> SCALEFACTOR10);
401
144k
  x1 = (x[s * 2 + 1] >> SCALEFACTOR10);
402
144k
  x2 = (x[s * 6 + 1] >> SCALEFACTOR10);
403
144k
  x3 = (x[s * 14 + 1] >> SCALEFACTOR10);
404
144k
  x4 = (x[s * 18 + 1] >> SCALEFACTOR10);
405
406
144k
  s1 = (x1 + x4);
407
144k
  s4 = (x1 - x4);
408
144k
  s3 = (x3 + x2);
409
144k
  s2 = (x3 - x2);
410
144k
  t = fMult((s1 - s3), C54);
411
144k
  s1 = (s1 + s3);
412
144k
  y03 = (x0 + s1);
413
144k
  s1 = (y03 + (fMult(s1, C55) << 1));
414
144k
  s3 = (s1 - t);
415
144k
  s1 = (s1 + t);
416
144k
  t = fMult((s4 + s2), C51);
417
144k
  s4 = (t + (fMult(s4, C52) << 1));
418
144k
  s2 = (t + fMult(s2, C53));
419
420
  /* combination */
421
144k
  y06 = (r1 + s2);
422
144k
  y18 = (r1 - s2);
423
144k
  y10 = (r3 - s4);
424
144k
  y14 = (r3 + s4);
425
426
144k
  y07 = (s1 - r2);
427
144k
  y19 = (s1 + r2);
428
144k
  y11 = (s3 + r4);
429
144k
  y15 = (s3 - r4);
430
431
  /* 5 fft2 stages */
432
144k
  x[s * 0] = (y00 + y02);
433
144k
  x[s * 0 + 1] = (y01 + y03);
434
144k
  x[s * 10] = (y00 - y02);
435
144k
  x[s * 10 + 1] = (y01 - y03);
436
437
144k
  x[s * 4] = (y04 + y06);
438
144k
  x[s * 4 + 1] = (y05 + y07);
439
144k
  x[s * 14] = (y04 - y06);
440
144k
  x[s * 14 + 1] = (y05 - y07);
441
442
144k
  x[s * 8] = (y08 + y10);
443
144k
  x[s * 8 + 1] = (y09 + y11);
444
144k
  x[s * 18] = (y08 - y10);
445
144k
  x[s * 18 + 1] = (y09 - y11);
446
447
144k
  x[s * 12] = (y12 + y14);
448
144k
  x[s * 12 + 1] = (y13 + y15);
449
144k
  x[s * 2] = (y12 - y14);
450
144k
  x[s * 2 + 1] = (y13 - y15);
451
452
144k
  x[s * 16] = (y16 + y18);
453
144k
  x[s * 16 + 1] = (y17 + y19);
454
144k
  x[s * 6] = (y16 - y18);
455
144k
  x[s * 6 + 1] = (y17 - y19);
456
144k
}
457
458
#ifndef FUNCTION_fft12
459
#define FUNCTION_fft12
460
461
#undef C31
462
162M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
463
464
15.1M
static inline void fft12(FIXP_DBL *pInput) {
465
15.1M
  FIXP_DBL aDst[24];
466
15.1M
  FIXP_DBL *pSrc, *pDst;
467
15.1M
  int i;
468
469
15.1M
  pSrc = pInput;
470
15.1M
  pDst = aDst;
471
15.1M
  FIXP_DBL r1, r2, s1, s2, pD;
472
473
  /* First 3*2 samples are shifted right by 2 before output */
474
15.1M
  r1 = pSrc[8] + pSrc[16];
475
15.1M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
476
15.1M
  pD = pSrc[0] >> 1;
477
15.1M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
478
15.1M
  r1 = pD - (r1 >> 2);
479
480
  /* imaginary part */
481
15.1M
  s1 = pSrc[9] + pSrc[17];
482
15.1M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
483
15.1M
  pD = pSrc[1] >> 1;
484
15.1M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
485
15.1M
  s1 = pD - (s1 >> 2);
486
487
  /* combination */
488
15.1M
  pDst[2] = (r1 - s2) >> 1;
489
15.1M
  pDst[3] = (s1 + r2) >> 1;
490
15.1M
  pDst[4] = (r1 + s2) >> 1;
491
15.1M
  pDst[5] = (s1 - r2) >> 1;
492
15.1M
  pSrc += 2;
493
15.1M
  pDst += 6;
494
495
15.1M
  const FIXP_STB *pVecRe = RotVectorReal12;
496
15.1M
  const FIXP_STB *pVecIm = RotVectorImag12;
497
15.1M
  FIXP_DBL re, im;
498
15.1M
  FIXP_STB vre, vim;
499
45.4M
  for (i = 0; i < 2; i++) {
500
    /* sample 0,1 are shifted right by 2 before output */
501
    /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before
502
     * output */
503
504
30.2M
    r1 = pSrc[8] + pSrc[16];
505
30.2M
    r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
506
30.2M
    pD = pSrc[0] >> 1;
507
30.2M
    pDst[0] = (pD + (r1 >> 1)) >> 1;
508
30.2M
    r1 = pD - (r1 >> 2);
509
510
    /* imaginary part */
511
30.2M
    s1 = pSrc[9] + pSrc[17];
512
30.2M
    s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
513
30.2M
    pD = pSrc[1] >> 1;
514
30.2M
    pDst[1] = (pD + (s1 >> 1)) >> 1;
515
30.2M
    s1 = pD - (s1 >> 2);
516
517
    /* combination */
518
30.2M
    re = (r1 - s2) >> 0;
519
30.2M
    im = (s1 + r2) >> 0;
520
30.2M
    vre = *pVecRe++;
521
30.2M
    vim = *pVecIm++;
522
30.2M
    cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim);
523
524
30.2M
    re = (r1 + s2) >> 0;
525
30.2M
    im = (s1 - r2) >> 0;
526
30.2M
    vre = *pVecRe++;
527
30.2M
    vim = *pVecIm++;
528
30.2M
    cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim);
529
530
30.2M
    pDst += 6;
531
30.2M
    pSrc += 2;
532
30.2M
  }
533
  /* sample 0,1 are shifted right by 2 before output */
534
  /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
535
  /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
536
15.1M
  r1 = pSrc[8] + pSrc[16];
537
15.1M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
538
15.1M
  pD = pSrc[0] >> 1;
539
15.1M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
540
15.1M
  r1 = pD - (r1 >> 2);
541
542
  /* imaginary part */
543
15.1M
  s1 = pSrc[9] + pSrc[17];
544
15.1M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
545
15.1M
  pD = pSrc[1] >> 1;
546
15.1M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
547
15.1M
  s1 = pD - (s1 >> 2);
548
549
  /* combination */
550
15.1M
  pDst[2] = (s1 + r2) >> 1;
551
15.1M
  pDst[3] = (s2 - r1) >> 1;
552
15.1M
  pDst[4] = -((r1 + s2) >> 1);
553
15.1M
  pDst[5] = (r2 - s1) >> 1;
554
555
  /* Perform 3 times the fft of length 4. The input samples are at the address
556
  of aDst and the output samples are at the address of pInput. The input vector
557
  for the fft of length 4 is built of the interleaved samples in aDst, the
558
  output samples are stored consecutively at the address of pInput.
559
  */
560
15.1M
  pSrc = aDst;
561
15.1M
  pDst = pInput;
562
60.5M
  for (i = 0; i < 3; i++) {
563
    /* inline FFT4 merged with incoming resorting loop */
564
45.4M
    FIXP_DBL a00, a10, a20, a30, tmp0, tmp1;
565
566
45.4M
    a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */
567
45.4M
    a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */
568
45.4M
    a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */
569
45.4M
    a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */
570
571
45.4M
    pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
572
45.4M
    pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
573
574
45.4M
    tmp0 = a00 - pSrc[12]; /* Re A - Re B */
575
45.4M
    tmp1 = a20 - pSrc[13]; /* Im A - Im B */
576
577
45.4M
    pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
578
45.4M
    pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
579
580
45.4M
    a10 = a10 - pSrc[18]; /* Re C - Re D */
581
45.4M
    a30 = a30 - pSrc[19]; /* Im C - Im D */
582
583
45.4M
    pDst[6] = tmp0 + a30;  /* Re B' = Re A - Re B + Im C - Im D */
584
45.4M
    pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */
585
45.4M
    pDst[7] = tmp1 - a10;  /* Im B' = Im A - Im B - Re C + Re D */
586
45.4M
    pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */
587
588
45.4M
    pSrc += 2;
589
45.4M
    pDst += 2;
590
45.4M
  }
591
15.1M
}
592
#endif /* FUNCTION_fft12 */
593
594
#ifndef FUNCTION_fft15
595
596
152M
#define N3 3
597
86.6M
#define N5 5
598
49.5M
#define N6 6
599
152M
#define N15 15
600
601
/* Performs the FFT of length 15. It is split into FFTs of length 3 and
602
 * length 5. */
603
4.12M
static inline void fft15(FIXP_DBL *pInput) {
604
4.12M
  FIXP_DBL aDst[2 * N15];
605
4.12M
  FIXP_DBL aDst1[2 * N15];
606
4.12M
  int i, k, l;
607
608
  /* Sort input vector for fft's of length 3
609
  input3(0:2)   = [input(0) input(5) input(10)];
610
  input3(3:5)   = [input(3) input(8) input(13)];
611
  input3(6:8)   = [input(6) input(11) input(1)];
612
  input3(9:11)  = [input(9) input(14) input(4)];
613
  input3(12:14) = [input(12) input(2) input(7)]; */
614
4.12M
  {
615
4.12M
    const FIXP_DBL *pSrc = pInput;
616
4.12M
    FIXP_DBL *RESTRICT pDst = aDst;
617
    /* Merge 3 loops into one, skip call of fft3 */
618
24.7M
    for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) {
619
20.6M
      pDst[k + 0] = pSrc[l];
620
20.6M
      pDst[k + 1] = pSrc[l + 1];
621
20.6M
      l += 2 * N5;
622
20.6M
      if (l >= (2 * N15)) l -= (2 * N15);
623
624
20.6M
      pDst[k + 2] = pSrc[l];
625
20.6M
      pDst[k + 3] = pSrc[l + 1];
626
20.6M
      l += 2 * N5;
627
20.6M
      if (l >= (2 * N15)) l -= (2 * N15);
628
20.6M
      pDst[k + 4] = pSrc[l];
629
20.6M
      pDst[k + 5] = pSrc[l + 1];
630
20.6M
      l += (2 * N5) + (2 * N3);
631
20.6M
      if (l >= (2 * N15)) l -= (2 * N15);
632
633
      /* fft3 merged with shift right by 2 loop */
634
20.6M
      FIXP_DBL r1, r2, r3;
635
20.6M
      FIXP_DBL s1, s2;
636
      /* real part */
637
20.6M
      r1 = pDst[k + 2] + pDst[k + 4];
638
20.6M
      r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31);
639
20.6M
      s1 = pDst[k + 0];
640
20.6M
      pDst[k + 0] = (s1 + r1) >> 2;
641
20.6M
      r1 = s1 - (r1 >> 1);
642
643
      /* imaginary part */
644
20.6M
      s1 = pDst[k + 3] + pDst[k + 5];
645
20.6M
      s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31);
646
20.6M
      r3 = pDst[k + 1];
647
20.6M
      pDst[k + 1] = (r3 + s1) >> 2;
648
20.6M
      s1 = r3 - (s1 >> 1);
649
650
      /* combination */
651
20.6M
      pDst[k + 2] = (r1 - s2) >> 2;
652
20.6M
      pDst[k + 4] = (r1 + s2) >> 2;
653
20.6M
      pDst[k + 3] = (s1 + r2) >> 2;
654
20.6M
      pDst[k + 5] = (s1 - r2) >> 2;
655
20.6M
    }
656
4.12M
  }
657
  /* Sort input vector for fft's of length 5
658
  input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
659
  input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
660
  input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
661
  /* Merge 2 loops into one, brings about 10% */
662
4.12M
  {
663
4.12M
    const FIXP_DBL *pSrc = aDst;
664
4.12M
    FIXP_DBL *RESTRICT pDst = aDst1;
665
16.5M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
666
12.3M
      l = 2 * i;
667
12.3M
      pDst[k + 0] = pSrc[l + 0];
668
12.3M
      pDst[k + 1] = pSrc[l + 1];
669
12.3M
      pDst[k + 2] = pSrc[l + 0 + (2 * N3)];
670
12.3M
      pDst[k + 3] = pSrc[l + 1 + (2 * N3)];
671
12.3M
      pDst[k + 4] = pSrc[l + 0 + (4 * N3)];
672
12.3M
      pDst[k + 5] = pSrc[l + 1 + (4 * N3)];
673
12.3M
      pDst[k + 6] = pSrc[l + 0 + (6 * N3)];
674
12.3M
      pDst[k + 7] = pSrc[l + 1 + (6 * N3)];
675
12.3M
      pDst[k + 8] = pSrc[l + 0 + (8 * N3)];
676
12.3M
      pDst[k + 9] = pSrc[l + 1 + (8 * N3)];
677
12.3M
      fft5(&pDst[k]);
678
12.3M
    }
679
4.12M
  }
680
  /* Sort output vector of length 15
681
  output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
682
            out5(10) out5(1)  out5(7)  out5(13) out5(4)
683
            out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
684
  /* optimize clumsy loop, brings about 5% */
685
4.12M
  {
686
4.12M
    const FIXP_DBL *pSrc = aDst1;
687
4.12M
    FIXP_DBL *RESTRICT pDst = pInput;
688
16.5M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
689
12.3M
      pDst[k + 0] = pSrc[l];
690
12.3M
      pDst[k + 1] = pSrc[l + 1];
691
12.3M
      l += (2 * N6);
692
12.3M
      if (l >= (2 * N15)) l -= (2 * N15);
693
12.3M
      pDst[k + 2] = pSrc[l];
694
12.3M
      pDst[k + 3] = pSrc[l + 1];
695
12.3M
      l += (2 * N6);
696
12.3M
      if (l >= (2 * N15)) l -= (2 * N15);
697
12.3M
      pDst[k + 4] = pSrc[l];
698
12.3M
      pDst[k + 5] = pSrc[l + 1];
699
12.3M
      l += (2 * N6);
700
12.3M
      if (l >= (2 * N15)) l -= (2 * N15);
701
12.3M
      pDst[k + 6] = pSrc[l];
702
12.3M
      pDst[k + 7] = pSrc[l + 1];
703
12.3M
      l += (2 * N6);
704
12.3M
      if (l >= (2 * N15)) l -= (2 * N15);
705
12.3M
      pDst[k + 8] = pSrc[l];
706
12.3M
      pDst[k + 9] = pSrc[l + 1];
707
12.3M
      l += 2; /* no modulo check needed, it cannot occur */
708
12.3M
    }
709
4.12M
  }
710
4.12M
}
711
#endif /* FUNCTION_fft15 */
712
713
/*
714
 Select shift placement.
715
 Some processors like ARM may shift "for free" in combination with an addition
716
 or substraction, but others don't so either combining shift with +/- or reduce
717
 the total amount or shift operations is optimal
718
 */
719
#if !defined(__arm__)
720
816M
#define SHIFT_A >> 1
721
#define SHIFT_B
722
#else
723
#define SHIFT_A
724
#define SHIFT_B >> 1
725
#endif
726
727
#ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \
728
                         */
729
730
/* This defines prevents this array to be declared twice, if 16-bit fft is
731
 * enabled too */
732
#define FUNCTION_DATA_fft_16_w16
733
static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d),
734
                                      STCP(0x30fbc54d, 0x7641af3d)};
735
736
LNK_SECTION_CODE_L1
737
19.2M
inline void fft_16(FIXP_DBL *RESTRICT x) {
738
19.2M
  FIXP_DBL vr, ur;
739
19.2M
  FIXP_DBL vr2, ur2;
740
19.2M
  FIXP_DBL vr3, ur3;
741
19.2M
  FIXP_DBL vr4, ur4;
742
19.2M
  FIXP_DBL vi, ui;
743
19.2M
  FIXP_DBL vi2, ui2;
744
19.2M
  FIXP_DBL vi3, ui3;
745
746
19.2M
  vr = (x[0] >> 1) + (x[16] >> 1);       /* Re A + Re B */
747
19.2M
  ur = (x[1] >> 1) + (x[17] >> 1);       /* Im A + Im B */
748
19.2M
  vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */
749
19.2M
  ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */
750
19.2M
  x[0] = vr + (vi SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
751
19.2M
  x[1] = ur + (ui SHIFT_B);              /* Im A' = sum of imag values */
752
753
19.2M
  vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */
754
19.2M
  ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */
755
756
19.2M
  x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
757
19.2M
  x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
758
19.2M
  vr -= x[16];              /* Re A - Re B */
759
19.2M
  vi = (vi SHIFT_B)-x[24];  /* Re C - Re D */
760
19.2M
  ur -= x[17];              /* Im A - Im B */
761
19.2M
  ui = (ui SHIFT_B)-x[25];  /* Im C - Im D */
762
763
19.2M
  vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */
764
19.2M
  ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */
765
766
19.2M
  x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
767
19.2M
  x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
768
769
19.2M
  vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */
770
19.2M
  ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */
771
772
19.2M
  x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
773
19.2M
  x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
774
775
19.2M
  vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */
776
19.2M
  ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */
777
19.2M
  x[8] = vr2 + (vi2 SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
778
19.2M
  x[9] = ur2 + (ui2 SHIFT_B);              /* Im A' = sum of imag values */
779
19.2M
  x[12] = vr2 - (vi2 SHIFT_B);             /* Re C' = -(ReC+ReD) + (ReA+ReB) */
780
19.2M
  x[13] = ur2 - (ui2 SHIFT_B);             /* Im C' = -Im C -Im D +Im A +Im B */
781
19.2M
  vr2 -= x[20];                            /* Re A - Re B */
782
19.2M
  ur2 -= x[21];                            /* Im A - Im B */
783
19.2M
  vi2 = (vi2 SHIFT_B)-x[28];               /* Re C - Re D */
784
19.2M
  ui2 = (ui2 SHIFT_B)-x[29];               /* Im C - Im D */
785
786
19.2M
  vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */
787
19.2M
  ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */
788
789
19.2M
  x[10] = ui2 + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
790
19.2M
  x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
791
792
19.2M
  vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */
793
19.2M
  ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */
794
795
19.2M
  x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
796
19.2M
  x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */
797
798
19.2M
  x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
799
19.2M
  x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */
800
19.2M
  x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
801
19.2M
  x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
802
19.2M
  vr3 -= x[18];               /* Re A - Re B */
803
19.2M
  ur3 -= x[19];               /* Im A - Im B */
804
19.2M
  vi = (vi SHIFT_B)-x[26];    /* Re C - Re D */
805
19.2M
  ui = (ui SHIFT_B)-x[27];    /* Im C - Im D */
806
19.2M
  x[18] = ui + vr3;           /* Re B' = Im C - Im D  + Re A - Re B */
807
19.2M
  x[19] = ur3 - vi;           /* Im B'= -Re C + Re D + Im A - Im B */
808
809
19.2M
  x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
810
19.2M
  x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
811
19.2M
  x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
812
19.2M
  x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
813
19.2M
  vr4 -= x[22];                /* Re A - Re B */
814
19.2M
  ur4 -= x[23];                /* Im A - Im B */
815
816
19.2M
  x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
817
19.2M
  x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */
818
819
19.2M
  vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */
820
19.2M
  ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */
821
19.2M
  x[26] = ui3 + vr4;         /* Re B' = Im C - Im D  + Re A - Re B */
822
19.2M
  x[30] = vr4 - ui3;         /* Re D' = -Im C + Im D + Re A - Re B */
823
19.2M
  x[27] = ur4 - vi3;         /* Im B'= -Re C + Re D + Im A - Im B */
824
19.2M
  x[31] = vi3 + ur4;         /* Im D'= Re C - Re D + Im A - Im B */
825
826
  // xt1 =  0
827
  // xt2 =  8
828
19.2M
  vr = x[8];
829
19.2M
  vi = x[9];
830
19.2M
  ur = x[0] >> 1;
831
19.2M
  ui = x[1] >> 1;
832
19.2M
  x[0] = ur + (vr >> 1);
833
19.2M
  x[1] = ui + (vi >> 1);
834
19.2M
  x[8] = ur - (vr >> 1);
835
19.2M
  x[9] = ui - (vi >> 1);
836
837
  // xt1 =  4
838
  // xt2 = 12
839
19.2M
  vr = x[13];
840
19.2M
  vi = x[12];
841
19.2M
  ur = x[4] >> 1;
842
19.2M
  ui = x[5] >> 1;
843
19.2M
  x[4] = ur + (vr >> 1);
844
19.2M
  x[5] = ui - (vi >> 1);
845
19.2M
  x[12] = ur - (vr >> 1);
846
19.2M
  x[13] = ui + (vi >> 1);
847
848
  // xt1 = 16
849
  // xt2 = 24
850
19.2M
  vr = x[24];
851
19.2M
  vi = x[25];
852
19.2M
  ur = x[16] >> 1;
853
19.2M
  ui = x[17] >> 1;
854
19.2M
  x[16] = ur + (vr >> 1);
855
19.2M
  x[17] = ui + (vi >> 1);
856
19.2M
  x[24] = ur - (vr >> 1);
857
19.2M
  x[25] = ui - (vi >> 1);
858
859
  // xt1 = 20
860
  // xt2 = 28
861
19.2M
  vr = x[29];
862
19.2M
  vi = x[28];
863
19.2M
  ur = x[20] >> 1;
864
19.2M
  ui = x[21] >> 1;
865
19.2M
  x[20] = ur + (vr >> 1);
866
19.2M
  x[21] = ui - (vi >> 1);
867
19.2M
  x[28] = ur - (vr >> 1);
868
19.2M
  x[29] = ui + (vi >> 1);
869
870
  // xt1 =  2
871
  // xt2 = 10
872
19.2M
  SUMDIFF_PIFOURTH(vi, vr, x[10], x[11])
873
  // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH);
874
  // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH);
875
19.2M
  ur = x[2];
876
19.2M
  ui = x[3];
877
19.2M
  x[2] = (ur >> 1) + vr;
878
19.2M
  x[3] = (ui >> 1) + vi;
879
19.2M
  x[10] = (ur >> 1) - vr;
880
19.2M
  x[11] = (ui >> 1) - vi;
881
882
  // xt1 =  6
883
  // xt2 = 14
884
19.2M
  SUMDIFF_PIFOURTH(vr, vi, x[14], x[15])
885
19.2M
  ur = x[6];
886
19.2M
  ui = x[7];
887
19.2M
  x[6] = (ur >> 1) + vr;
888
19.2M
  x[7] = (ui >> 1) - vi;
889
19.2M
  x[14] = (ur >> 1) - vr;
890
19.2M
  x[15] = (ui >> 1) + vi;
891
892
  // xt1 = 18
893
  // xt2 = 26
894
19.2M
  SUMDIFF_PIFOURTH(vi, vr, x[26], x[27])
895
19.2M
  ur = x[18];
896
19.2M
  ui = x[19];
897
19.2M
  x[18] = (ur >> 1) + vr;
898
19.2M
  x[19] = (ui >> 1) + vi;
899
19.2M
  x[26] = (ur >> 1) - vr;
900
19.2M
  x[27] = (ui >> 1) - vi;
901
902
  // xt1 = 22
903
  // xt2 = 30
904
19.2M
  SUMDIFF_PIFOURTH(vr, vi, x[30], x[31])
905
19.2M
  ur = x[22];
906
19.2M
  ui = x[23];
907
19.2M
  x[22] = (ur >> 1) + vr;
908
19.2M
  x[23] = (ui >> 1) - vi;
909
19.2M
  x[30] = (ur >> 1) - vr;
910
19.2M
  x[31] = (ui >> 1) + vi;
911
912
  // xt1 =  0
913
  // xt2 = 16
914
19.2M
  vr = x[16];
915
19.2M
  vi = x[17];
916
19.2M
  ur = x[0] >> 1;
917
19.2M
  ui = x[1] >> 1;
918
19.2M
  x[0] = ur + (vr >> 1);
919
19.2M
  x[1] = ui + (vi >> 1);
920
19.2M
  x[16] = ur - (vr >> 1);
921
19.2M
  x[17] = ui - (vi >> 1);
922
923
  // xt1 =  8
924
  // xt2 = 24
925
19.2M
  vi = x[24];
926
19.2M
  vr = x[25];
927
19.2M
  ur = x[8] >> 1;
928
19.2M
  ui = x[9] >> 1;
929
19.2M
  x[8] = ur + (vr >> 1);
930
19.2M
  x[9] = ui - (vi >> 1);
931
19.2M
  x[24] = ur - (vr >> 1);
932
19.2M
  x[25] = ui + (vi >> 1);
933
934
  // xt1 =  2
935
  // xt2 = 18
936
19.2M
  cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
937
19.2M
  ur = x[2];
938
19.2M
  ui = x[3];
939
19.2M
  x[2] = (ur >> 1) + vr;
940
19.2M
  x[3] = (ui >> 1) + vi;
941
19.2M
  x[18] = (ur >> 1) - vr;
942
19.2M
  x[19] = (ui >> 1) - vi;
943
944
  // xt1 = 10
945
  // xt2 = 26
946
19.2M
  cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
947
19.2M
  ur = x[10];
948
19.2M
  ui = x[11];
949
19.2M
  x[10] = (ur >> 1) + vr;
950
19.2M
  x[11] = (ui >> 1) - vi;
951
19.2M
  x[26] = (ur >> 1) - vr;
952
19.2M
  x[27] = (ui >> 1) + vi;
953
954
  // xt1 =  4
955
  // xt2 = 20
956
19.2M
  SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
957
19.2M
  ur = x[4];
958
19.2M
  ui = x[5];
959
19.2M
  x[4] = (ur >> 1) + vr;
960
19.2M
  x[5] = (ui >> 1) + vi;
961
19.2M
  x[20] = (ur >> 1) - vr;
962
19.2M
  x[21] = (ui >> 1) - vi;
963
964
  // xt1 = 12
965
  // xt2 = 28
966
19.2M
  SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
967
19.2M
  ur = x[12];
968
19.2M
  ui = x[13];
969
19.2M
  x[12] = (ur >> 1) + vr;
970
19.2M
  x[13] = (ui >> 1) - vi;
971
19.2M
  x[28] = (ur >> 1) - vr;
972
19.2M
  x[29] = (ui >> 1) + vi;
973
974
  // xt1 =  6
975
  // xt2 = 22
976
19.2M
  cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
977
19.2M
  ur = x[6];
978
19.2M
  ui = x[7];
979
19.2M
  x[6] = (ur >> 1) + vr;
980
19.2M
  x[7] = (ui >> 1) + vi;
981
19.2M
  x[22] = (ur >> 1) - vr;
982
19.2M
  x[23] = (ui >> 1) - vi;
983
984
  // xt1 = 14
985
  // xt2 = 30
986
19.2M
  cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
987
19.2M
  ur = x[14];
988
19.2M
  ui = x[15];
989
19.2M
  x[14] = (ur >> 1) + vr;
990
19.2M
  x[15] = (ui >> 1) - vi;
991
19.2M
  x[30] = (ur >> 1) - vr;
992
19.2M
  x[31] = (ui >> 1) + vi;
993
19.2M
}
994
#endif /* FUNCTION_fft_16 */
995
996
#ifndef FUNCTION_fft_32
997
static const FIXP_STP fft32_w32[6] = {
998
    STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d),
999
    STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7),
1000
    STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)};
1001
889M
#define W_PiFOURTH STC(0x5a82799a)
1002
1003
LNK_SECTION_CODE_L1
1004
31.7M
inline void fft_32(FIXP_DBL *const _x) {
1005
  /*
1006
   * 1+2 stage radix 4
1007
   */
1008
1009
  /////////////////////////////////////////////////////////////////////////////////////////
1010
31.7M
  {
1011
31.7M
    FIXP_DBL *const x = _x;
1012
31.7M
    FIXP_DBL vi, ui;
1013
31.7M
    FIXP_DBL vi2, ui2;
1014
31.7M
    FIXP_DBL vi3, ui3;
1015
31.7M
    FIXP_DBL vr, ur;
1016
31.7M
    FIXP_DBL vr2, ur2;
1017
31.7M
    FIXP_DBL vr3, ur3;
1018
31.7M
    FIXP_DBL vr4, ur4;
1019
1020
    // i = 0
1021
31.7M
    vr = (x[0] + x[32]) >> 1;     /* Re A + Re B */
1022
31.7M
    ur = (x[1] + x[33]) >> 1;     /* Im A + Im B */
1023
31.7M
    vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */
1024
31.7M
    ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */
1025
1026
31.7M
    x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1027
31.7M
    x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1028
1029
31.7M
    vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */
1030
31.7M
    ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */
1031
1032
31.7M
    x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1033
31.7M
    x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1034
1035
31.7M
    vr -= x[32];             /* Re A - Re B */
1036
31.7M
    ur -= x[33];             /* Im A - Im B */
1037
31.7M
    vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */
1038
31.7M
    ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */
1039
1040
31.7M
    vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */
1041
31.7M
    ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */
1042
1043
31.7M
    x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1044
31.7M
    x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1045
1046
31.7M
    vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */
1047
31.7M
    ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */
1048
1049
31.7M
    x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1050
31.7M
    x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1051
1052
    // i=16
1053
31.7M
    vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */
1054
31.7M
    ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */
1055
1056
31.7M
    x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1057
31.7M
    x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1058
31.7M
    x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1059
31.7M
    x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1060
1061
31.7M
    vr2 -= x[36];            /* Re A - Re B */
1062
31.7M
    ur2 -= x[37];            /* Im A - Im B */
1063
31.7M
    vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */
1064
31.7M
    ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */
1065
1066
31.7M
    vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */
1067
31.7M
    ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */
1068
1069
31.7M
    x[18] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1070
31.7M
    x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1071
1072
31.7M
    vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */
1073
31.7M
    ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */
1074
1075
31.7M
    x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1076
31.7M
    x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1077
1078
    // i = 32
1079
1080
31.7M
    x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1081
31.7M
    x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1082
31.7M
    x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1083
31.7M
    x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1084
1085
31.7M
    vr3 -= x[34];              /* Re A - Re B */
1086
31.7M
    ur3 -= x[35];              /* Im A - Im B */
1087
31.7M
    vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */
1088
31.7M
    ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */
1089
1090
31.7M
    x[34] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1091
31.7M
    x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1092
1093
    // i=48
1094
1095
31.7M
    x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1096
31.7M
    x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1097
31.7M
    x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1098
31.7M
    x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1099
1100
31.7M
    vr4 -= x[38]; /* Re A - Re B */
1101
31.7M
    ur4 -= x[39]; /* Im A - Im B */
1102
1103
31.7M
    x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1104
31.7M
    x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1105
1106
31.7M
    vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */
1107
31.7M
    ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */
1108
1109
31.7M
    x[50] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1110
31.7M
    x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1111
31.7M
    x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1112
31.7M
    x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1113
1114
    // i=8
1115
31.7M
    vr = (x[8] + x[40]) >> 1;     /* Re A + Re B */
1116
31.7M
    ur = (x[9] + x[41]) >> 1;     /* Im A + Im B */
1117
31.7M
    vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */
1118
31.7M
    ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */
1119
1120
31.7M
    x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1121
31.7M
    x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1122
1123
31.7M
    vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */
1124
31.7M
    ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */
1125
1126
31.7M
    x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1127
31.7M
    x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1128
1129
31.7M
    vr -= x[40];             /* Re A - Re B */
1130
31.7M
    ur -= x[41];             /* Im A - Im B */
1131
31.7M
    vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */
1132
31.7M
    ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */
1133
1134
31.7M
    vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */
1135
31.7M
    ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */
1136
1137
31.7M
    x[10] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1138
31.7M
    x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1139
1140
31.7M
    vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */
1141
31.7M
    ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */
1142
1143
31.7M
    x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1144
31.7M
    x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1145
1146
    // i=24
1147
31.7M
    vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */
1148
31.7M
    ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */
1149
1150
31.7M
    x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1151
31.7M
    x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1152
31.7M
    x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1153
31.7M
    x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1154
1155
31.7M
    vr2 -= x[44];            /* Re A - Re B */
1156
31.7M
    ur2 -= x[45];            /* Im A - Im B */
1157
31.7M
    vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */
1158
31.7M
    ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */
1159
1160
31.7M
    vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */
1161
31.7M
    ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */
1162
1163
31.7M
    x[26] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1164
31.7M
    x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1165
1166
31.7M
    vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */
1167
31.7M
    ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */
1168
1169
31.7M
    x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1170
31.7M
    x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1171
1172
    // i=40
1173
1174
31.7M
    x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1175
31.7M
    x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1176
31.7M
    x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1177
31.7M
    x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1178
1179
31.7M
    vr3 -= x[42];              /* Re A - Re B */
1180
31.7M
    ur3 -= x[43];              /* Im A - Im B */
1181
31.7M
    vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */
1182
31.7M
    ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */
1183
1184
31.7M
    x[42] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1185
31.7M
    x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1186
1187
    // i=56
1188
1189
31.7M
    x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1190
31.7M
    x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1191
31.7M
    x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1192
31.7M
    x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1193
1194
31.7M
    vr4 -= x[46]; /* Re A - Re B */
1195
31.7M
    ur4 -= x[47]; /* Im A - Im B */
1196
1197
31.7M
    x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1198
31.7M
    x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1199
1200
31.7M
    vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */
1201
31.7M
    ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */
1202
1203
31.7M
    x[58] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1204
31.7M
    x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1205
31.7M
    x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1206
31.7M
    x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1207
31.7M
  }
1208
1209
31.7M
  {
1210
31.7M
    FIXP_DBL *xt = _x;
1211
1212
31.7M
    int j = 4;
1213
127M
    do {
1214
127M
      FIXP_DBL vi, ui, vr, ur;
1215
1216
127M
      vr = xt[8];
1217
127M
      vi = xt[9];
1218
127M
      ur = xt[0] >> 1;
1219
127M
      ui = xt[1] >> 1;
1220
127M
      xt[0] = ur + (vr >> 1);
1221
127M
      xt[1] = ui + (vi >> 1);
1222
127M
      xt[8] = ur - (vr >> 1);
1223
127M
      xt[9] = ui - (vi >> 1);
1224
1225
127M
      vr = xt[13];
1226
127M
      vi = xt[12];
1227
127M
      ur = xt[4] >> 1;
1228
127M
      ui = xt[5] >> 1;
1229
127M
      xt[4] = ur + (vr >> 1);
1230
127M
      xt[5] = ui - (vi >> 1);
1231
127M
      xt[12] = ur - (vr >> 1);
1232
127M
      xt[13] = ui + (vi >> 1);
1233
1234
127M
      SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11])
1235
127M
      ur = xt[2];
1236
127M
      ui = xt[3];
1237
127M
      xt[2] = (ur >> 1) + vr;
1238
127M
      xt[3] = (ui >> 1) + vi;
1239
127M
      xt[10] = (ur >> 1) - vr;
1240
127M
      xt[11] = (ui >> 1) - vi;
1241
1242
127M
      SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15])
1243
127M
      ur = xt[6];
1244
127M
      ui = xt[7];
1245
1246
127M
      xt[6] = (ur >> 1) + vr;
1247
127M
      xt[7] = (ui >> 1) - vi;
1248
127M
      xt[14] = (ur >> 1) - vr;
1249
127M
      xt[15] = (ui >> 1) + vi;
1250
127M
      xt += 16;
1251
127M
    } while (--j != 0);
1252
31.7M
  }
1253
1254
31.7M
  {
1255
31.7M
    FIXP_DBL *const x = _x;
1256
31.7M
    FIXP_DBL vi, ui, vr, ur;
1257
1258
31.7M
    vr = x[16];
1259
31.7M
    vi = x[17];
1260
31.7M
    ur = x[0] >> 1;
1261
31.7M
    ui = x[1] >> 1;
1262
31.7M
    x[0] = ur + (vr >> 1);
1263
31.7M
    x[1] = ui + (vi >> 1);
1264
31.7M
    x[16] = ur - (vr >> 1);
1265
31.7M
    x[17] = ui - (vi >> 1);
1266
1267
31.7M
    vi = x[24];
1268
31.7M
    vr = x[25];
1269
31.7M
    ur = x[8] >> 1;
1270
31.7M
    ui = x[9] >> 1;
1271
31.7M
    x[8] = ur + (vr >> 1);
1272
31.7M
    x[9] = ui - (vi >> 1);
1273
31.7M
    x[24] = ur - (vr >> 1);
1274
31.7M
    x[25] = ui + (vi >> 1);
1275
1276
31.7M
    vr = x[48];
1277
31.7M
    vi = x[49];
1278
31.7M
    ur = x[32] >> 1;
1279
31.7M
    ui = x[33] >> 1;
1280
31.7M
    x[32] = ur + (vr >> 1);
1281
31.7M
    x[33] = ui + (vi >> 1);
1282
31.7M
    x[48] = ur - (vr >> 1);
1283
31.7M
    x[49] = ui - (vi >> 1);
1284
1285
31.7M
    vi = x[56];
1286
31.7M
    vr = x[57];
1287
31.7M
    ur = x[40] >> 1;
1288
31.7M
    ui = x[41] >> 1;
1289
31.7M
    x[40] = ur + (vr >> 1);
1290
31.7M
    x[41] = ui - (vi >> 1);
1291
31.7M
    x[56] = ur - (vr >> 1);
1292
31.7M
    x[57] = ui + (vi >> 1);
1293
1294
31.7M
    cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
1295
31.7M
    ur = x[2];
1296
31.7M
    ui = x[3];
1297
31.7M
    x[2] = (ur >> 1) + vr;
1298
31.7M
    x[3] = (ui >> 1) + vi;
1299
31.7M
    x[18] = (ur >> 1) - vr;
1300
31.7M
    x[19] = (ui >> 1) - vi;
1301
1302
31.7M
    cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
1303
31.7M
    ur = x[10];
1304
31.7M
    ui = x[11];
1305
31.7M
    x[10] = (ur >> 1) + vr;
1306
31.7M
    x[11] = (ui >> 1) - vi;
1307
31.7M
    x[26] = (ur >> 1) - vr;
1308
31.7M
    x[27] = (ui >> 1) + vi;
1309
1310
31.7M
    cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
1311
31.7M
    ur = x[34];
1312
31.7M
    ui = x[35];
1313
31.7M
    x[34] = (ur >> 1) + vr;
1314
31.7M
    x[35] = (ui >> 1) + vi;
1315
31.7M
    x[50] = (ur >> 1) - vr;
1316
31.7M
    x[51] = (ui >> 1) - vi;
1317
1318
31.7M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
1319
31.7M
    ur = x[42];
1320
31.7M
    ui = x[43];
1321
31.7M
    x[42] = (ur >> 1) + vr;
1322
31.7M
    x[43] = (ui >> 1) - vi;
1323
31.7M
    x[58] = (ur >> 1) - vr;
1324
31.7M
    x[59] = (ui >> 1) + vi;
1325
1326
31.7M
    SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
1327
31.7M
    ur = x[4];
1328
31.7M
    ui = x[5];
1329
31.7M
    x[4] = (ur >> 1) + vr;
1330
31.7M
    x[5] = (ui >> 1) + vi;
1331
31.7M
    x[20] = (ur >> 1) - vr;
1332
31.7M
    x[21] = (ui >> 1) - vi;
1333
1334
31.7M
    SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
1335
31.7M
    ur = x[12];
1336
31.7M
    ui = x[13];
1337
31.7M
    x[12] = (ur >> 1) + vr;
1338
31.7M
    x[13] = (ui >> 1) - vi;
1339
31.7M
    x[28] = (ur >> 1) - vr;
1340
31.7M
    x[29] = (ui >> 1) + vi;
1341
1342
31.7M
    SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
1343
31.7M
    ur = x[36];
1344
31.7M
    ui = x[37];
1345
31.7M
    x[36] = (ur >> 1) + vr;
1346
31.7M
    x[37] = (ui >> 1) + vi;
1347
31.7M
    x[52] = (ur >> 1) - vr;
1348
31.7M
    x[53] = (ui >> 1) - vi;
1349
1350
31.7M
    SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
1351
31.7M
    ur = x[44];
1352
31.7M
    ui = x[45];
1353
31.7M
    x[44] = (ur >> 1) + vr;
1354
31.7M
    x[45] = (ui >> 1) - vi;
1355
31.7M
    x[60] = (ur >> 1) - vr;
1356
31.7M
    x[61] = (ui >> 1) + vi;
1357
1358
31.7M
    cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
1359
31.7M
    ur = x[6];
1360
31.7M
    ui = x[7];
1361
31.7M
    x[6] = (ur >> 1) + vr;
1362
31.7M
    x[7] = (ui >> 1) + vi;
1363
31.7M
    x[22] = (ur >> 1) - vr;
1364
31.7M
    x[23] = (ui >> 1) - vi;
1365
1366
31.7M
    cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
1367
31.7M
    ur = x[14];
1368
31.7M
    ui = x[15];
1369
31.7M
    x[14] = (ur >> 1) + vr;
1370
31.7M
    x[15] = (ui >> 1) - vi;
1371
31.7M
    x[30] = (ur >> 1) - vr;
1372
31.7M
    x[31] = (ui >> 1) + vi;
1373
1374
31.7M
    cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
1375
31.7M
    ur = x[38];
1376
31.7M
    ui = x[39];
1377
31.7M
    x[38] = (ur >> 1) + vr;
1378
31.7M
    x[39] = (ui >> 1) + vi;
1379
31.7M
    x[54] = (ur >> 1) - vr;
1380
31.7M
    x[55] = (ui >> 1) - vi;
1381
1382
31.7M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
1383
31.7M
    ur = x[46];
1384
31.7M
    ui = x[47];
1385
1386
31.7M
    x[46] = (ur >> 1) + vr;
1387
31.7M
    x[47] = (ui >> 1) - vi;
1388
31.7M
    x[62] = (ur >> 1) - vr;
1389
31.7M
    x[63] = (ui >> 1) + vi;
1390
1391
31.7M
    vr = x[32];
1392
31.7M
    vi = x[33];
1393
31.7M
    ur = x[0] >> 1;
1394
31.7M
    ui = x[1] >> 1;
1395
31.7M
    x[0] = ur + (vr >> 1);
1396
31.7M
    x[1] = ui + (vi >> 1);
1397
31.7M
    x[32] = ur - (vr >> 1);
1398
31.7M
    x[33] = ui - (vi >> 1);
1399
1400
31.7M
    vi = x[48];
1401
31.7M
    vr = x[49];
1402
31.7M
    ur = x[16] >> 1;
1403
31.7M
    ui = x[17] >> 1;
1404
31.7M
    x[16] = ur + (vr >> 1);
1405
31.7M
    x[17] = ui - (vi >> 1);
1406
31.7M
    x[48] = ur - (vr >> 1);
1407
31.7M
    x[49] = ui + (vi >> 1);
1408
1409
31.7M
    cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
1410
31.7M
    ur = x[2];
1411
31.7M
    ui = x[3];
1412
31.7M
    x[2] = (ur >> 1) + vr;
1413
31.7M
    x[3] = (ui >> 1) + vi;
1414
31.7M
    x[34] = (ur >> 1) - vr;
1415
31.7M
    x[35] = (ui >> 1) - vi;
1416
1417
31.7M
    cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
1418
31.7M
    ur = x[18];
1419
31.7M
    ui = x[19];
1420
31.7M
    x[18] = (ur >> 1) + vr;
1421
31.7M
    x[19] = (ui >> 1) - vi;
1422
31.7M
    x[50] = (ur >> 1) - vr;
1423
31.7M
    x[51] = (ui >> 1) + vi;
1424
1425
31.7M
    cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
1426
31.7M
    ur = x[4];
1427
31.7M
    ui = x[5];
1428
31.7M
    x[4] = (ur >> 1) + vr;
1429
31.7M
    x[5] = (ui >> 1) + vi;
1430
31.7M
    x[36] = (ur >> 1) - vr;
1431
31.7M
    x[37] = (ui >> 1) - vi;
1432
1433
31.7M
    cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
1434
31.7M
    ur = x[20];
1435
31.7M
    ui = x[21];
1436
31.7M
    x[20] = (ur >> 1) + vr;
1437
31.7M
    x[21] = (ui >> 1) - vi;
1438
31.7M
    x[52] = (ur >> 1) - vr;
1439
31.7M
    x[53] = (ui >> 1) + vi;
1440
1441
31.7M
    cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
1442
31.7M
    ur = x[6];
1443
31.7M
    ui = x[7];
1444
31.7M
    x[6] = (ur >> 1) + vr;
1445
31.7M
    x[7] = (ui >> 1) + vi;
1446
31.7M
    x[38] = (ur >> 1) - vr;
1447
31.7M
    x[39] = (ui >> 1) - vi;
1448
1449
31.7M
    cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
1450
31.7M
    ur = x[22];
1451
31.7M
    ui = x[23];
1452
31.7M
    x[22] = (ur >> 1) + vr;
1453
31.7M
    x[23] = (ui >> 1) - vi;
1454
31.7M
    x[54] = (ur >> 1) - vr;
1455
31.7M
    x[55] = (ui >> 1) + vi;
1456
1457
31.7M
    SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
1458
31.7M
    ur = x[8];
1459
31.7M
    ui = x[9];
1460
31.7M
    x[8] = (ur >> 1) + vr;
1461
31.7M
    x[9] = (ui >> 1) + vi;
1462
31.7M
    x[40] = (ur >> 1) - vr;
1463
31.7M
    x[41] = (ui >> 1) - vi;
1464
1465
31.7M
    SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
1466
31.7M
    ur = x[24];
1467
31.7M
    ui = x[25];
1468
31.7M
    x[24] = (ur >> 1) + vr;
1469
31.7M
    x[25] = (ui >> 1) - vi;
1470
31.7M
    x[56] = (ur >> 1) - vr;
1471
31.7M
    x[57] = (ui >> 1) + vi;
1472
1473
31.7M
    cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
1474
31.7M
    ur = x[10];
1475
31.7M
    ui = x[11];
1476
1477
31.7M
    x[10] = (ur >> 1) + vr;
1478
31.7M
    x[11] = (ui >> 1) + vi;
1479
31.7M
    x[42] = (ur >> 1) - vr;
1480
31.7M
    x[43] = (ui >> 1) - vi;
1481
1482
31.7M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
1483
31.7M
    ur = x[26];
1484
31.7M
    ui = x[27];
1485
31.7M
    x[26] = (ur >> 1) + vr;
1486
31.7M
    x[27] = (ui >> 1) - vi;
1487
31.7M
    x[58] = (ur >> 1) - vr;
1488
31.7M
    x[59] = (ui >> 1) + vi;
1489
1490
31.7M
    cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1491
31.7M
    ur = x[12];
1492
31.7M
    ui = x[13];
1493
31.7M
    x[12] = (ur >> 1) + vr;
1494
31.7M
    x[13] = (ui >> 1) + vi;
1495
31.7M
    x[44] = (ur >> 1) - vr;
1496
31.7M
    x[45] = (ui >> 1) - vi;
1497
1498
31.7M
    cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1499
31.7M
    ur = x[28];
1500
31.7M
    ui = x[29];
1501
31.7M
    x[28] = (ur >> 1) + vr;
1502
31.7M
    x[29] = (ui >> 1) - vi;
1503
31.7M
    x[60] = (ur >> 1) - vr;
1504
31.7M
    x[61] = (ui >> 1) + vi;
1505
1506
31.7M
    cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1507
31.7M
    ur = x[14];
1508
31.7M
    ui = x[15];
1509
31.7M
    x[14] = (ur >> 1) + vr;
1510
31.7M
    x[15] = (ui >> 1) + vi;
1511
31.7M
    x[46] = (ur >> 1) - vr;
1512
31.7M
    x[47] = (ui >> 1) - vi;
1513
1514
31.7M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1515
31.7M
    ur = x[30];
1516
31.7M
    ui = x[31];
1517
31.7M
    x[30] = (ur >> 1) + vr;
1518
31.7M
    x[31] = (ui >> 1) - vi;
1519
31.7M
    x[62] = (ur >> 1) - vr;
1520
31.7M
    x[63] = (ui >> 1) + vi;
1521
31.7M
  }
1522
31.7M
}
1523
#endif /* #ifndef FUNCTION_fft_32 */
1524
1525
/**
1526
 * \brief Apply rotation vectors to a data buffer.
1527
 * \param cl length of each row of input data.
1528
 * \param l total length of input data.
1529
 * \param pVecRe real part of rotation coefficient vector.
1530
 * \param pVecIm imaginary part of rotation coefficient vector.
1531
 */
1532
1533
/*
1534
   This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000
1535
   (-1.0) instead. At the end, the sign of the result is inverted
1536
*/
1537
#define noFFT_APPLY_ROT_VECTOR_HQ
1538
1539
#ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL
1540
static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl,
1541
                                        const int l, const FIXP_STB *pVecRe,
1542
4.07M
                                        const FIXP_STB *pVecIm) {
1543
4.07M
  FIXP_DBL re, im;
1544
4.07M
  FIXP_STB vre, vim;
1545
1546
4.07M
  int i, c;
1547
1548
17.5M
  for (i = 0; i < cl; i++) {
1549
13.5M
    re = pData[2 * i];
1550
13.5M
    im = pData[2 * i + 1];
1551
1552
13.5M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1553
13.5M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1554
13.5M
  }
1555
25.4M
  for (; i < l; i += cl) {
1556
21.3M
    re = pData[2 * i];
1557
21.3M
    im = pData[2 * i + 1];
1558
1559
21.3M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1560
21.3M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1561
1562
128M
    for (c = i + 1; c < i + cl; c++) {
1563
107M
      re = pData[2 * c] >> 1;
1564
107M
      im = pData[2 * c + 1] >> 1;
1565
107M
      vre = *pVecRe++;
1566
107M
      vim = *pVecIm++;
1567
1568
107M
      cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim);
1569
107M
    }
1570
21.3M
  }
1571
4.07M
}
1572
#endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */
1573
1574
/* select either switch case of function pointer. */
1575
//#define FFT_TWO_STAGE_SWITCH_CASE
1576
#ifndef FUNCTION_fftN2_func
1577
static inline void fftN2_func(FIXP_DBL *pInput, const int length,
1578
                              const int dim1, const int dim2,
1579
                              void (*const fft1)(FIXP_DBL *),
1580
                              void (*const fft2)(FIXP_DBL *),
1581
                              const FIXP_STB *RotVectorReal,
1582
                              const FIXP_STB *RotVectorImag, FIXP_DBL *aDst,
1583
4.07M
                              FIXP_DBL *aDst2) {
1584
  /* The real part of the input samples are at the addresses with even indices
1585
  and the imaginary part of the input samples are at the addresses with odd
1586
  indices. The output samples are stored at the address of pInput
1587
  */
1588
4.07M
  FIXP_DBL *pSrc, *pDst, *pDstOut;
1589
4.07M
  int i;
1590
1591
4.07M
  FDK_ASSERT(length == dim1 * dim2);
1592
1593
  /* Perform dim2 times the fft of length dim1. The input samples are at the
1594
  address of pSrc and the output samples are at the address of pDst. The input
1595
  vector for the fft of length dim1 is built of the interleaved samples in pSrc,
1596
  the output samples are stored consecutively.
1597
  */
1598
4.07M
  pSrc = pInput;
1599
4.07M
  pDst = aDst;
1600
29.5M
  for (i = 0; i < dim2; i++) {
1601
167M
    for (int j = 0; j < dim1; j++) {
1602
142M
      pDst[2 * j] = pSrc[2 * j * dim2];
1603
142M
      pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1];
1604
142M
    }
1605
1606
      /* fft of size dim1 */
1607
25.4M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1608
25.4M
    fft1(pDst);
1609
#else
1610
    switch (dim1) {
1611
      case 2:
1612
        fft2(pDst);
1613
        break;
1614
      case 3:
1615
        fft3(pDst);
1616
        break;
1617
      case 4:
1618
        fft_4(pDst);
1619
        break;
1620
      /* case 5: fft5(pDst); break; */
1621
      /* case 8: fft_8(pDst); break; */
1622
      case 12:
1623
        fft12(pDst);
1624
        break;
1625
      /* case 15: fft15(pDst); break; */
1626
      case 16:
1627
        fft_16(pDst);
1628
        break;
1629
      case 32:
1630
        fft_32(pDst);
1631
        break;
1632
        /*case 64: fft_64(pDst); break;*/
1633
        /* case 128: fft_128(pDst); break; */
1634
    }
1635
#endif
1636
25.4M
    pSrc += 2;
1637
25.4M
    pDst = pDst + 2 * dim1;
1638
25.4M
  }
1639
1640
  /* Perform the modulation of the output of the fft of length dim1 */
1641
4.07M
  pSrc = aDst;
1642
4.07M
  fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag);
1643
1644
  /* Perform dim1 times the fft of length dim2. The input samples are at the
1645
  address of aDst and the output samples are at the address of pInput. The input
1646
  vector for the fft of length dim2 is built of the interleaved samples in aDst,
1647
  the output samples are stored consecutively at the address of pInput.
1648
  */
1649
4.07M
  pSrc = aDst;
1650
4.07M
  pDst = aDst2;
1651
4.07M
  pDstOut = pInput;
1652
17.5M
  for (i = 0; i < dim1; i++) {
1653
155M
    for (int j = 0; j < dim2; j++) {
1654
142M
      pDst[2 * j] = pSrc[2 * j * dim1];
1655
142M
      pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1];
1656
142M
    }
1657
1658
13.5M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1659
13.5M
    fft2(pDst);
1660
#else
1661
    switch (dim2) {
1662
      case 4:
1663
        fft_4(pDst);
1664
        break;
1665
      case 9:
1666
        fft9(pDst);
1667
        break;
1668
      case 12:
1669
        fft12(pDst);
1670
        break;
1671
      case 15:
1672
        fft15(pDst);
1673
        break;
1674
      case 16:
1675
        fft_16(pDst);
1676
        break;
1677
      case 32:
1678
        fft_32(pDst);
1679
        break;
1680
    }
1681
#endif
1682
1683
155M
    for (int j = 0; j < dim2; j++) {
1684
142M
      pDstOut[2 * j * dim1] = pDst[2 * j];
1685
142M
      pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1];
1686
142M
    }
1687
13.5M
    pSrc += 2;
1688
13.5M
    pDstOut += 2;
1689
13.5M
  }
1690
4.07M
}
1691
#endif /* FUNCTION_fftN2_function */
1692
1693
#define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \
1694
              RotVectorReal, RotVectorImag)                                \
1695
4.07M
  {                                                                        \
1696
4.07M
    C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length)                    \
1697
4.07M
    C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2)                     \
1698
4.07M
    fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2,           \
1699
4.07M
               RotVectorReal, RotVectorImag, aDst, aDst2);                 \
1700
4.07M
    C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2)                       \
1701
4.07M
    C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length)                      \
1702
4.07M
  }
1703
1704
  /*!
1705
   *
1706
   *  \brief  complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480
1707
   *  \param  pInput contains the input signal prescaled right by 2
1708
   *          pInput contains the output signal scaled by SCALEFACTOR<#length>
1709
   *          The output signal does not have any fixed headroom
1710
   *  \return void
1711
   *
1712
   */
1713
1714
#ifndef FUNCTION_fft6
1715
2.99M
static inline void fft6(FIXP_DBL *pInput) {
1716
2.99M
  fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6);
1717
2.99M
}
1718
#endif /* #ifndef FUNCTION_fft6 */
1719
1720
#ifndef FUNCTION_fft12
1721
static inline void fft12(FIXP_DBL *pInput) {
1722
  fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12,
1723
        RotVectorImag12); /* 16,58 */
1724
}
1725
#endif /* #ifndef FUNCTION_fft12 */
1726
1727
#ifndef FUNCTION_fft20
1728
144k
static inline void fft20(FIXP_DBL *pInput) {
1729
144k
  fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20,
1730
144k
        RotVectorImag20);
1731
144k
}
1732
#endif /* FUNCTION_fft20 */
1733
1734
8.37k
static inline void fft24(FIXP_DBL *pInput) {
1735
8.37k
  fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24,
1736
8.37k
        RotVectorImag24); /* 16,73 */
1737
8.37k
}
1738
1739
340k
static inline void fft48(FIXP_DBL *pInput) {
1740
340k
  fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48,
1741
340k
        RotVectorImag48); /* 16,32 */
1742
340k
}
1743
1744
#ifndef FUNCTION_fft60
1745
230k
static inline void fft60(FIXP_DBL *pInput) {
1746
230k
  fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60,
1747
230k
        RotVectorImag60); /* 15,51 */
1748
230k
}
1749
#endif /* FUNCTION_fft60 */
1750
1751
#ifndef FUNCTION_fft80
1752
132
static inline void fft80(FIXP_DBL *pInput) {
1753
132
  fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80,
1754
132
        RotVectorImag80); /*  */
1755
132
}
1756
#endif
1757
1758
#ifndef FUNCTION_fft96
1759
72.9k
static inline void fft96(FIXP_DBL *pInput) {
1760
72.9k
  fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96,
1761
72.9k
        RotVectorImag96); /* 15,47 */
1762
72.9k
}
1763
#endif /* FUNCTION_fft96*/
1764
1765
#ifndef FUNCTION_fft120
1766
3.30k
static inline void fft120(FIXP_DBL *pInput) {
1767
3.30k
  fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120,
1768
3.30k
        RotVectorImag120);
1769
3.30k
}
1770
#endif /* FUNCTION_fft120 */
1771
1772
#ifndef FUNCTION_fft192
1773
9.10k
static inline void fft192(FIXP_DBL *pInput) {
1774
9.10k
  fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192,
1775
9.10k
        RotVectorImag192); /* 15,50 */
1776
9.10k
}
1777
#endif
1778
1779
#ifndef FUNCTION_fft240
1780
171k
static inline void fft240(FIXP_DBL *pInput) {
1781
171k
  fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240,
1782
171k
        RotVectorImag240); /* 15.44 */
1783
171k
}
1784
#endif
1785
1786
#ifndef FUNCTION_fft384
1787
89.0k
static inline void fft384(FIXP_DBL *pInput) {
1788
89.0k
  fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384,
1789
89.0k
        RotVectorImag384); /* 16.02 */
1790
89.0k
}
1791
#endif /* FUNCTION_fft384 */
1792
1793
#ifndef FUNCTION_fft480
1794
13.7k
static inline void fft480(FIXP_DBL *pInput) {
1795
13.7k
  fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480,
1796
13.7k
        RotVectorImag480); /* 15.84 */
1797
13.7k
}
1798
#endif /* FUNCTION_fft480 */
1799
1800
69.7M
void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) {
1801
  /* Ensure, that the io-ptr is always (at least 8-byte) aligned */
1802
69.7M
  C_ALLOC_ALIGNED_CHECK(pInput);
1803
1804
69.7M
  if (length == 32) {
1805
30.2M
    fft_32(pInput);
1806
30.2M
    *pScalefactor += SCALEFACTOR32;
1807
39.5M
  } else {
1808
39.5M
    switch (length) {
1809
16.6M
      case 16:
1810
16.6M
        fft_16(pInput);
1811
16.6M
        *pScalefactor += SCALEFACTOR16;
1812
16.6M
        break;
1813
6.52M
      case 8:
1814
6.52M
        fft_8(pInput);
1815
6.52M
        *pScalefactor += SCALEFACTOR8;
1816
6.52M
        break;
1817
0
      case 2:
1818
0
        fft2(pInput);
1819
0
        *pScalefactor += SCALEFACTOR2;
1820
0
        break;
1821
0
      case 3:
1822
0
        fft3(pInput);
1823
0
        *pScalefactor += SCALEFACTOR3;
1824
0
        break;
1825
706k
      case 4:
1826
706k
        fft_4(pInput);
1827
706k
        *pScalefactor += SCALEFACTOR4;
1828
706k
        break;
1829
0
      case 5:
1830
0
        fft5(pInput);
1831
0
        *pScalefactor += SCALEFACTOR5;
1832
0
        break;
1833
2.99M
      case 6:
1834
2.99M
        fft6(pInput);
1835
2.99M
        *pScalefactor += SCALEFACTOR6;
1836
2.99M
        break;
1837
144k
      case 10:
1838
144k
        fft10(pInput);
1839
144k
        *pScalefactor += SCALEFACTOR10;
1840
144k
        break;
1841
10.7M
      case 12:
1842
10.7M
        fft12(pInput);
1843
10.7M
        *pScalefactor += SCALEFACTOR12;
1844
10.7M
        break;
1845
0
      case 15:
1846
0
        fft15(pInput);
1847
0
        *pScalefactor += SCALEFACTOR15;
1848
0
        break;
1849
144k
      case 20:
1850
144k
        fft20(pInput);
1851
144k
        *pScalefactor += SCALEFACTOR20;
1852
144k
        break;
1853
8.37k
      case 24:
1854
8.37k
        fft24(pInput);
1855
8.37k
        *pScalefactor += SCALEFACTOR24;
1856
8.37k
        break;
1857
340k
      case 48:
1858
340k
        fft48(pInput);
1859
340k
        *pScalefactor += SCALEFACTOR48;
1860
340k
        break;
1861
230k
      case 60:
1862
230k
        fft60(pInput);
1863
230k
        *pScalefactor += SCALEFACTOR60;
1864
230k
        break;
1865
393k
      case 64:
1866
393k
        dit_fft(pInput, 6, SineTable512, 512);
1867
393k
        *pScalefactor += SCALEFACTOR64;
1868
393k
        break;
1869
132
      case 80:
1870
132
        fft80(pInput);
1871
132
        *pScalefactor += SCALEFACTOR80;
1872
132
        break;
1873
72.9k
      case 96:
1874
72.9k
        fft96(pInput);
1875
72.9k
        *pScalefactor += SCALEFACTOR96;
1876
72.9k
        break;
1877
3.30k
      case 120:
1878
3.30k
        fft120(pInput);
1879
3.30k
        *pScalefactor += SCALEFACTOR120;
1880
3.30k
        break;
1881
72.7k
      case 128:
1882
72.7k
        dit_fft(pInput, 7, SineTable512, 512);
1883
72.7k
        *pScalefactor += SCALEFACTOR128;
1884
72.7k
        break;
1885
9.10k
      case 192:
1886
9.10k
        fft192(pInput);
1887
9.10k
        *pScalefactor += SCALEFACTOR192;
1888
9.10k
        break;
1889
171k
      case 240:
1890
171k
        fft240(pInput);
1891
171k
        *pScalefactor += SCALEFACTOR240;
1892
171k
        break;
1893
72.4k
      case 256:
1894
72.4k
        dit_fft(pInput, 8, SineTable512, 512);
1895
72.4k
        *pScalefactor += SCALEFACTOR256;
1896
72.4k
        break;
1897
89.0k
      case 384:
1898
89.0k
        fft384(pInput);
1899
89.0k
        *pScalefactor += SCALEFACTOR384;
1900
89.0k
        break;
1901
13.7k
      case 480:
1902
13.7k
        fft480(pInput);
1903
13.7k
        *pScalefactor += SCALEFACTOR480;
1904
13.7k
        break;
1905
139k
      case 512:
1906
139k
        dit_fft(pInput, 9, SineTable512, 512);
1907
139k
        *pScalefactor += SCALEFACTOR512;
1908
139k
        break;
1909
0
      default:
1910
0
        FDK_ASSERT(0); /* FFT length not supported! */
1911
0
        break;
1912
39.5M
    }
1913
39.5M
  }
1914
69.7M
}
1915
1916
0
void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) {
1917
0
  switch (length) {
1918
0
    default:
1919
0
      FDK_ASSERT(0); /* IFFT length not supported! */
1920
0
      break;
1921
0
  }
1922
0
}