Coverage Report

Created: 2025-10-13 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aac/libFDK/src/fft.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
Software License for The Fraunhofer FDK AAC Codec Library for Android
3
4
© Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
5
Forschung e.V. All rights reserved.
6
7
 1.    INTRODUCTION
8
The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
9
that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
10
scheme for digital audio. This FDK AAC Codec software is intended to be used on
11
a wide variety of Android devices.
12
13
AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
14
general perceptual audio codecs. AAC-ELD is considered the best-performing
15
full-bandwidth communications codec by independent studies and is widely
16
deployed. AAC has been standardized by ISO and IEC as part of the MPEG
17
specifications.
18
19
Patent licenses for necessary patent claims for the FDK AAC Codec (including
20
those of Fraunhofer) may be obtained through Via Licensing
21
(www.vialicensing.com) or through the respective patent owners individually for
22
the purpose of encoding or decoding bit streams in products that are compliant
23
with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
24
Android devices already license these patent claims through Via Licensing or
25
directly from the patent owners, and therefore FDK AAC Codec software may
26
already be covered under those patent licenses when it is used for those
27
licensed purposes only.
28
29
Commercially-licensed AAC software libraries, including floating-point versions
30
with enhanced sound quality, are also available from Fraunhofer. Users are
31
encouraged to check the Fraunhofer website for additional applications
32
information and documentation.
33
34
2.    COPYRIGHT LICENSE
35
36
Redistribution and use in source and binary forms, with or without modification,
37
are permitted without payment of copyright license fees provided that you
38
satisfy the following conditions:
39
40
You must retain the complete text of this software license in redistributions of
41
the FDK AAC Codec or your modifications thereto in source code form.
42
43
You must retain the complete text of this software license in the documentation
44
and/or other materials provided with redistributions of the FDK AAC Codec or
45
your modifications thereto in binary form. You must make available free of
46
charge copies of the complete source code of the FDK AAC Codec and your
47
modifications thereto to recipients of copies in binary form.
48
49
The name of Fraunhofer may not be used to endorse or promote products derived
50
from this library without prior written permission.
51
52
You may not charge copyright license fees for anyone to use, copy or distribute
53
the FDK AAC Codec software or your modifications thereto.
54
55
Your modified versions of the FDK AAC Codec must carry prominent notices stating
56
that you changed the software and the date of any change. For modified versions
57
of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
58
must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
59
AAC Codec Library for Android."
60
61
3.    NO PATENT LICENSE
62
63
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
64
limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
65
Fraunhofer provides no warranty of patent non-infringement with respect to this
66
software.
67
68
You may use this FDK AAC Codec software or modifications thereto only for
69
purposes that are authorized by appropriate patent licenses.
70
71
4.    DISCLAIMER
72
73
This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
74
holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
75
including but not limited to the implied warranties of merchantability and
76
fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
77
CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
78
or consequential damages, including but not limited to procurement of substitute
79
goods or services; loss of use, data, or profits, or business interruption,
80
however caused and on any theory of liability, whether in contract, strict
81
liability, or tort (including negligence), arising in any way out of the use of
82
this software, even if advised of the possibility of such damage.
83
84
5.    CONTACT INFORMATION
85
86
Fraunhofer Institute for Integrated Circuits IIS
87
Attention: Audio and Multimedia Departments - FDK AAC LL
88
Am Wolfsmantel 33
89
91058 Erlangen, Germany
90
91
www.iis.fraunhofer.de/amm
92
amm-info@iis.fraunhofer.de
93
----------------------------------------------------------------------------- */
94
95
/******************* Library for basic calculation routines ********************
96
97
   Author(s):   Josef Hoepfl, DSP Solutions
98
99
   Description: Fix point FFT
100
101
*******************************************************************************/
102
103
#include "fft_rad2.h"
104
#include "FDK_tools_rom.h"
105
106
244M
#define W_PiFOURTH STC(0x5a82799a)
107
//#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a))
108
#ifndef SUMDIFF_PIFOURTH
109
#define SUMDIFF_PIFOURTH(diff, sum, a, b) \
110
607M
  {                                       \
111
607M
    FIXP_DBL wa, wb;                      \
112
607M
    wa = fMultDiv2(a, W_PiFOURTH);        \
113
607M
    wb = fMultDiv2(b, W_PiFOURTH);        \
114
607M
    diff = wb - wa;                       \
115
607M
    sum = wb + wa;                        \
116
607M
  }
117
#define SUMDIFF_PIFOURTH16(diff, sum, a, b)       \
118
  {                                               \
119
    FIXP_SGL wa, wb;                              \
120
    wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \
121
    wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \
122
    diff = wb - wa;                               \
123
    sum = wb + wa;                                \
124
  }
125
#endif
126
127
#define SCALEFACTOR2048 10
128
#define SCALEFACTOR1024 9
129
157k
#define SCALEFACTOR512 8
130
76.3k
#define SCALEFACTOR256 7
131
67.9k
#define SCALEFACTOR128 6
132
421k
#define SCALEFACTOR64 5
133
33.3M
#define SCALEFACTOR32 4
134
18.0M
#define SCALEFACTOR16 3
135
6.61M
#define SCALEFACTOR8 2
136
1.51M
#define SCALEFACTOR4 1
137
2.81M
#define SCALEFACTOR2 1
138
139
2.88M
#define SCALEFACTOR3 1
140
132k
#define SCALEFACTOR5 1
141
2.80M
#define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2)
142
#define SCALEFACTOR7 2
143
#define SCALEFACTOR9 2
144
2.78M
#define SCALEFACTOR10 5
145
11.7M
#define SCALEFACTOR12 3
146
436k
#define SCALEFACTOR15 3
147
#define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2)
148
132k
#define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2)
149
#define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2)
150
7.77k
#define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2)
151
#define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2)
152
#define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2)
153
379k
#define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2)
154
258k
#define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2)
155
250
#define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2)
156
75.4k
#define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2)
157
3.57k
#define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2)
158
#define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2)
159
#define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2)
160
10.1k
#define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2)
161
159k
#define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2)
162
#define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2)
163
#define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2)
164
86.1k
#define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2)
165
15.4k
#define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2)
166
167
#include "fft.h"
168
169
#ifndef FUNCTION_fft2
170
171
/* Performs the FFT of length 2. Input vector unscaled, output vector scaled
172
 * with factor 0.5 */
173
8.51M
static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) {
174
8.51M
  FIXP_DBL r1, i1;
175
8.51M
  FIXP_DBL r2, i2;
176
177
  /* real part */
178
8.51M
  r1 = pDat[2];
179
8.51M
  r2 = pDat[0];
180
181
  /* imaginary part */
182
8.51M
  i1 = pDat[3];
183
8.51M
  i2 = pDat[1];
184
185
  /* real part */
186
8.51M
  pDat[0] = (r2 + r1) >> 1;
187
8.51M
  pDat[2] = (r2 - r1) >> 1;
188
189
  /* imaginary part */
190
8.51M
  pDat[1] = (i2 + i1) >> 1;
191
8.51M
  pDat[3] = (i2 - i1) >> 1;
192
8.51M
}
193
#endif /* FUNCTION_fft2 */
194
195
16.0M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
196
197
#ifndef FUNCTION_fft3
198
/* Performs the FFT of length 3 according to the algorithm after winograd. */
199
8.03M
static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) {
200
8.03M
  FIXP_DBL r1, r2;
201
8.03M
  FIXP_DBL s1, s2;
202
8.03M
  FIXP_DBL pD;
203
204
  /* real part */
205
8.03M
  r1 = pDat[2] + pDat[4];
206
8.03M
  r2 = fMultDiv2((pDat[2] - pDat[4]), C31);
207
8.03M
  pD = pDat[0] >> 1;
208
8.03M
  pDat[0] = pD + (r1 >> 1);
209
8.03M
  r1 = pD - (r1 >> 2);
210
211
  /* imaginary part */
212
8.03M
  s1 = pDat[3] + pDat[5];
213
8.03M
  s2 = fMultDiv2((pDat[3] - pDat[5]), C31);
214
8.03M
  pD = pDat[1] >> 1;
215
8.03M
  pDat[1] = pD + (s1 >> 1);
216
8.03M
  s1 = pD - (s1 >> 2);
217
218
  /* combination */
219
8.03M
  pDat[2] = r1 - s2;
220
8.03M
  pDat[4] = r1 + s2;
221
8.03M
  pDat[3] = s1 + r2;
222
8.03M
  pDat[5] = s1 - r2;
223
8.03M
}
224
#endif /* #ifndef FUNCTION_fft3 */
225
226
128M
#define F5C(x) STC(x)
227
228
25.6M
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
229
25.6M
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
230
25.6M
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
231
25.6M
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
232
25.6M
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
233
234
/* performs the FFT of length 5 according to the algorithm after winograd */
235
/* This version works with a prescale of 2 instead of 3 */
236
12.8M
static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) {
237
12.8M
  FIXP_DBL r1, r2, r3, r4;
238
12.8M
  FIXP_DBL s1, s2, s3, s4;
239
12.8M
  FIXP_DBL t;
240
241
  /* real part */
242
12.8M
  r1 = (pDat[2] + pDat[8]) >> 1;
243
12.8M
  r4 = (pDat[2] - pDat[8]) >> 1;
244
12.8M
  r3 = (pDat[4] + pDat[6]) >> 1;
245
12.8M
  r2 = (pDat[4] - pDat[6]) >> 1;
246
12.8M
  t = fMult((r1 - r3), C54);
247
12.8M
  r1 = r1 + r3;
248
12.8M
  pDat[0] = (pDat[0] >> 1) + r1;
249
  /* Bit shift left because of the constant C55 which was scaled with the factor
250
     0.5 because of the representation of the values as fracts */
251
12.8M
  r1 = pDat[0] + (fMultDiv2(r1, C55) << (2));
252
12.8M
  r3 = r1 - t;
253
12.8M
  r1 = r1 + t;
254
12.8M
  t = fMult((r4 + r2), C51);
255
  /* Bit shift left because of the constant C55 which was scaled with the factor
256
     0.5 because of the representation of the values as fracts */
257
12.8M
  r4 = t + (fMultDiv2(r4, C52) << (2));
258
12.8M
  r2 = t + fMult(r2, C53);
259
260
  /* imaginary part */
261
12.8M
  s1 = (pDat[3] + pDat[9]) >> 1;
262
12.8M
  s4 = (pDat[3] - pDat[9]) >> 1;
263
12.8M
  s3 = (pDat[5] + pDat[7]) >> 1;
264
12.8M
  s2 = (pDat[5] - pDat[7]) >> 1;
265
12.8M
  t = fMult((s1 - s3), C54);
266
12.8M
  s1 = s1 + s3;
267
12.8M
  pDat[1] = (pDat[1] >> 1) + s1;
268
  /* Bit shift left because of the constant C55 which was scaled with the factor
269
     0.5 because of the representation of the values as fracts */
270
12.8M
  s1 = pDat[1] + (fMultDiv2(s1, C55) << (2));
271
12.8M
  s3 = s1 - t;
272
12.8M
  s1 = s1 + t;
273
12.8M
  t = fMult((s4 + s2), C51);
274
  /* Bit shift left because of the constant C55 which was scaled with the factor
275
     0.5 because of the representation of the values as fracts */
276
12.8M
  s4 = t + (fMultDiv2(s4, C52) << (2));
277
12.8M
  s2 = t + fMult(s2, C53);
278
279
  /* combination */
280
12.8M
  pDat[2] = r1 + s2;
281
12.8M
  pDat[8] = r1 - s2;
282
12.8M
  pDat[4] = r3 - s4;
283
12.8M
  pDat[6] = r3 + s4;
284
285
12.8M
  pDat[3] = s1 - r2;
286
12.8M
  pDat[9] = s1 + r2;
287
12.8M
  pDat[5] = s3 + r4;
288
12.8M
  pDat[7] = s3 - r4;
289
12.8M
}
290
291
2.64M
#define F5C(x) STC(x)
292
293
529k
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
294
529k
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
295
529k
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
296
529k
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
297
529k
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
298
/**
299
 * \brief    Function performs a complex 10-point FFT
300
 *           The FFT is performed inplace. The result of the FFT
301
 *           is scaled by SCALEFACTOR10 bits.
302
 *
303
 *           WOPS FLC version:                    1093 cycles
304
 *           WOPS with 32x16 bit multiplications:  196 cycles
305
 *
306
 * \param    [i/o] re    real input / output
307
 * \param    [i/o] im    imag input / output
308
 * \param    [i  ] s     stride real and imag input / output
309
 *
310
 * \return   void
311
 */
312
static void fft10(FIXP_DBL *x)  // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s)
313
132k
{
314
132k
  FIXP_DBL t;
315
132k
  FIXP_DBL x0, x1, x2, x3, x4;
316
132k
  FIXP_DBL r1, r2, r3, r4;
317
132k
  FIXP_DBL s1, s2, s3, s4;
318
132k
  FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
319
132k
  FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
320
321
132k
  const int s = 1;  // stride factor
322
323
  /* 2 fft5 stages */
324
325
  /* real part */
326
132k
  x0 = (x[s * 0] >> SCALEFACTOR10);
327
132k
  x1 = (x[s * 4] >> SCALEFACTOR10);
328
132k
  x2 = (x[s * 8] >> SCALEFACTOR10);
329
132k
  x3 = (x[s * 12] >> SCALEFACTOR10);
330
132k
  x4 = (x[s * 16] >> SCALEFACTOR10);
331
332
132k
  r1 = (x3 + x2);
333
132k
  r4 = (x3 - x2);
334
132k
  r3 = (x1 + x4);
335
132k
  r2 = (x1 - x4);
336
132k
  t = fMult((r1 - r3), C54);
337
132k
  r1 = (r1 + r3);
338
132k
  y00 = (x0 + r1);
339
132k
  r1 = (y00 + ((fMult(r1, C55) << 1)));
340
132k
  r3 = (r1 - t);
341
132k
  r1 = (r1 + t);
342
132k
  t = fMult((r4 + r2), C51);
343
132k
  r4 = (t + (fMult(r4, C52) << 1));
344
132k
  r2 = (t + fMult(r2, C53));
345
346
  /* imaginary part */
347
132k
  x0 = (x[s * 0 + 1] >> SCALEFACTOR10);
348
132k
  x1 = (x[s * 4 + 1] >> SCALEFACTOR10);
349
132k
  x2 = (x[s * 8 + 1] >> SCALEFACTOR10);
350
132k
  x3 = (x[s * 12 + 1] >> SCALEFACTOR10);
351
132k
  x4 = (x[s * 16 + 1] >> SCALEFACTOR10);
352
353
132k
  s1 = (x3 + x2);
354
132k
  s4 = (x3 - x2);
355
132k
  s3 = (x1 + x4);
356
132k
  s2 = (x1 - x4);
357
132k
  t = fMult((s1 - s3), C54);
358
132k
  s1 = (s1 + s3);
359
132k
  y01 = (x0 + s1);
360
132k
  s1 = (y01 + (fMult(s1, C55) << 1));
361
132k
  s3 = (s1 - t);
362
132k
  s1 = (s1 + t);
363
132k
  t = fMult((s4 + s2), C51);
364
132k
  s4 = (t + (fMult(s4, C52) << 1));
365
132k
  s2 = (t + fMult(s2, C53));
366
367
  /* combination */
368
132k
  y04 = (r1 + s2);
369
132k
  y16 = (r1 - s2);
370
132k
  y08 = (r3 - s4);
371
132k
  y12 = (r3 + s4);
372
373
132k
  y05 = (s1 - r2);
374
132k
  y17 = (s1 + r2);
375
132k
  y09 = (s3 + r4);
376
132k
  y13 = (s3 - r4);
377
378
  /* real part */
379
132k
  x0 = (x[s * 10] >> SCALEFACTOR10);
380
132k
  x1 = (x[s * 2] >> SCALEFACTOR10);
381
132k
  x2 = (x[s * 6] >> SCALEFACTOR10);
382
132k
  x3 = (x[s * 14] >> SCALEFACTOR10);
383
132k
  x4 = (x[s * 18] >> SCALEFACTOR10);
384
385
132k
  r1 = (x1 + x4);
386
132k
  r4 = (x1 - x4);
387
132k
  r3 = (x3 + x2);
388
132k
  r2 = (x3 - x2);
389
132k
  t = fMult((r1 - r3), C54);
390
132k
  r1 = (r1 + r3);
391
132k
  y02 = (x0 + r1);
392
132k
  r1 = (y02 + ((fMult(r1, C55) << 1)));
393
132k
  r3 = (r1 - t);
394
132k
  r1 = (r1 + t);
395
132k
  t = fMult(((r4 + r2)), C51);
396
132k
  r4 = (t + (fMult(r4, C52) << 1));
397
132k
  r2 = (t + fMult(r2, C53));
398
399
  /* imaginary part */
400
132k
  x0 = (x[s * 10 + 1] >> SCALEFACTOR10);
401
132k
  x1 = (x[s * 2 + 1] >> SCALEFACTOR10);
402
132k
  x2 = (x[s * 6 + 1] >> SCALEFACTOR10);
403
132k
  x3 = (x[s * 14 + 1] >> SCALEFACTOR10);
404
132k
  x4 = (x[s * 18 + 1] >> SCALEFACTOR10);
405
406
132k
  s1 = (x1 + x4);
407
132k
  s4 = (x1 - x4);
408
132k
  s3 = (x3 + x2);
409
132k
  s2 = (x3 - x2);
410
132k
  t = fMult((s1 - s3), C54);
411
132k
  s1 = (s1 + s3);
412
132k
  y03 = (x0 + s1);
413
132k
  s1 = (y03 + (fMult(s1, C55) << 1));
414
132k
  s3 = (s1 - t);
415
132k
  s1 = (s1 + t);
416
132k
  t = fMult((s4 + s2), C51);
417
132k
  s4 = (t + (fMult(s4, C52) << 1));
418
132k
  s2 = (t + fMult(s2, C53));
419
420
  /* combination */
421
132k
  y06 = (r1 + s2);
422
132k
  y18 = (r1 - s2);
423
132k
  y10 = (r3 - s4);
424
132k
  y14 = (r3 + s4);
425
426
132k
  y07 = (s1 - r2);
427
132k
  y19 = (s1 + r2);
428
132k
  y11 = (s3 + r4);
429
132k
  y15 = (s3 - r4);
430
431
  /* 5 fft2 stages */
432
132k
  x[s * 0] = (y00 + y02);
433
132k
  x[s * 0 + 1] = (y01 + y03);
434
132k
  x[s * 10] = (y00 - y02);
435
132k
  x[s * 10 + 1] = (y01 - y03);
436
437
132k
  x[s * 4] = (y04 + y06);
438
132k
  x[s * 4 + 1] = (y05 + y07);
439
132k
  x[s * 14] = (y04 - y06);
440
132k
  x[s * 14 + 1] = (y05 - y07);
441
442
132k
  x[s * 8] = (y08 + y10);
443
132k
  x[s * 8 + 1] = (y09 + y11);
444
132k
  x[s * 18] = (y08 - y10);
445
132k
  x[s * 18 + 1] = (y09 - y11);
446
447
132k
  x[s * 12] = (y12 + y14);
448
132k
  x[s * 12 + 1] = (y13 + y15);
449
132k
  x[s * 2] = (y12 - y14);
450
132k
  x[s * 2 + 1] = (y13 - y15);
451
452
132k
  x[s * 16] = (y16 + y18);
453
132k
  x[s * 16 + 1] = (y17 + y19);
454
132k
  x[s * 6] = (y16 - y18);
455
132k
  x[s * 6 + 1] = (y17 - y19);
456
132k
}
457
458
#ifndef FUNCTION_fft12
459
#define FUNCTION_fft12
460
461
#undef C31
462
166M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
463
464
15.7M
static inline void fft12(FIXP_DBL *pInput) {
465
15.7M
  FIXP_DBL aDst[24];
466
15.7M
  FIXP_DBL *pSrc, *pDst;
467
15.7M
  int i;
468
469
15.7M
  pSrc = pInput;
470
15.7M
  pDst = aDst;
471
15.7M
  FIXP_DBL r1, r2, s1, s2, pD;
472
473
  /* First 3*2 samples are shifted right by 2 before output */
474
15.7M
  r1 = pSrc[8] + pSrc[16];
475
15.7M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
476
15.7M
  pD = pSrc[0] >> 1;
477
15.7M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
478
15.7M
  r1 = pD - (r1 >> 2);
479
480
  /* imaginary part */
481
15.7M
  s1 = pSrc[9] + pSrc[17];
482
15.7M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
483
15.7M
  pD = pSrc[1] >> 1;
484
15.7M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
485
15.7M
  s1 = pD - (s1 >> 2);
486
487
  /* combination */
488
15.7M
  pDst[2] = (r1 - s2) >> 1;
489
15.7M
  pDst[3] = (s1 + r2) >> 1;
490
15.7M
  pDst[4] = (r1 + s2) >> 1;
491
15.7M
  pDst[5] = (s1 - r2) >> 1;
492
15.7M
  pSrc += 2;
493
15.7M
  pDst += 6;
494
495
15.7M
  const FIXP_STB *pVecRe = RotVectorReal12;
496
15.7M
  const FIXP_STB *pVecIm = RotVectorImag12;
497
15.7M
  FIXP_DBL re, im;
498
15.7M
  FIXP_STB vre, vim;
499
47.2M
  for (i = 0; i < 2; i++) {
500
    /* sample 0,1 are shifted right by 2 before output */
501
    /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before
502
     * output */
503
504
31.4M
    r1 = pSrc[8] + pSrc[16];
505
31.4M
    r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
506
31.4M
    pD = pSrc[0] >> 1;
507
31.4M
    pDst[0] = (pD + (r1 >> 1)) >> 1;
508
31.4M
    r1 = pD - (r1 >> 2);
509
510
    /* imaginary part */
511
31.4M
    s1 = pSrc[9] + pSrc[17];
512
31.4M
    s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
513
31.4M
    pD = pSrc[1] >> 1;
514
31.4M
    pDst[1] = (pD + (s1 >> 1)) >> 1;
515
31.4M
    s1 = pD - (s1 >> 2);
516
517
    /* combination */
518
31.4M
    re = (r1 - s2) >> 0;
519
31.4M
    im = (s1 + r2) >> 0;
520
31.4M
    vre = *pVecRe++;
521
31.4M
    vim = *pVecIm++;
522
31.4M
    cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim);
523
524
31.4M
    re = (r1 + s2) >> 0;
525
31.4M
    im = (s1 - r2) >> 0;
526
31.4M
    vre = *pVecRe++;
527
31.4M
    vim = *pVecIm++;
528
31.4M
    cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim);
529
530
31.4M
    pDst += 6;
531
31.4M
    pSrc += 2;
532
31.4M
  }
533
  /* sample 0,1 are shifted right by 2 before output */
534
  /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
535
  /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
536
15.7M
  r1 = pSrc[8] + pSrc[16];
537
15.7M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
538
15.7M
  pD = pSrc[0] >> 1;
539
15.7M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
540
15.7M
  r1 = pD - (r1 >> 2);
541
542
  /* imaginary part */
543
15.7M
  s1 = pSrc[9] + pSrc[17];
544
15.7M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
545
15.7M
  pD = pSrc[1] >> 1;
546
15.7M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
547
15.7M
  s1 = pD - (s1 >> 2);
548
549
  /* combination */
550
15.7M
  pDst[2] = (s1 + r2) >> 1;
551
15.7M
  pDst[3] = (s2 - r1) >> 1;
552
15.7M
  pDst[4] = -((r1 + s2) >> 1);
553
15.7M
  pDst[5] = (r2 - s1) >> 1;
554
555
  /* Perform 3 times the fft of length 4. The input samples are at the address
556
  of aDst and the output samples are at the address of pInput. The input vector
557
  for the fft of length 4 is built of the interleaved samples in aDst, the
558
  output samples are stored consecutively at the address of pInput.
559
  */
560
15.7M
  pSrc = aDst;
561
15.7M
  pDst = pInput;
562
62.9M
  for (i = 0; i < 3; i++) {
563
    /* inline FFT4 merged with incoming resorting loop */
564
47.2M
    FIXP_DBL a00, a10, a20, a30, tmp0, tmp1;
565
566
47.2M
    a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */
567
47.2M
    a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */
568
47.2M
    a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */
569
47.2M
    a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */
570
571
47.2M
    pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
572
47.2M
    pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
573
574
47.2M
    tmp0 = a00 - pSrc[12]; /* Re A - Re B */
575
47.2M
    tmp1 = a20 - pSrc[13]; /* Im A - Im B */
576
577
47.2M
    pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
578
47.2M
    pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
579
580
47.2M
    a10 = a10 - pSrc[18]; /* Re C - Re D */
581
47.2M
    a30 = a30 - pSrc[19]; /* Im C - Im D */
582
583
47.2M
    pDst[6] = tmp0 + a30;  /* Re B' = Re A - Re B + Im C - Im D */
584
47.2M
    pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */
585
47.2M
    pDst[7] = tmp1 - a10;  /* Im B' = Im A - Im B - Re C + Re D */
586
47.2M
    pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */
587
588
47.2M
    pSrc += 2;
589
47.2M
    pDst += 2;
590
47.2M
  }
591
15.7M
}
592
#endif /* FUNCTION_fft12 */
593
594
#ifndef FUNCTION_fft15
595
596
151M
#define N3 3
597
86.1M
#define N5 5
598
49.2M
#define N6 6
599
151M
#define N15 15
600
601
/* Performs the FFT of length 15. It is split into FFTs of length 3 and
602
 * length 5. */
603
4.10M
static inline void fft15(FIXP_DBL *pInput) {
604
4.10M
  FIXP_DBL aDst[2 * N15];
605
4.10M
  FIXP_DBL aDst1[2 * N15];
606
4.10M
  int i, k, l;
607
608
  /* Sort input vector for fft's of length 3
609
  input3(0:2)   = [input(0) input(5) input(10)];
610
  input3(3:5)   = [input(3) input(8) input(13)];
611
  input3(6:8)   = [input(6) input(11) input(1)];
612
  input3(9:11)  = [input(9) input(14) input(4)];
613
  input3(12:14) = [input(12) input(2) input(7)]; */
614
4.10M
  {
615
4.10M
    const FIXP_DBL *pSrc = pInput;
616
4.10M
    FIXP_DBL *RESTRICT pDst = aDst;
617
    /* Merge 3 loops into one, skip call of fft3 */
618
24.6M
    for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) {
619
20.5M
      pDst[k + 0] = pSrc[l];
620
20.5M
      pDst[k + 1] = pSrc[l + 1];
621
20.5M
      l += 2 * N5;
622
20.5M
      if (l >= (2 * N15)) l -= (2 * N15);
623
624
20.5M
      pDst[k + 2] = pSrc[l];
625
20.5M
      pDst[k + 3] = pSrc[l + 1];
626
20.5M
      l += 2 * N5;
627
20.5M
      if (l >= (2 * N15)) l -= (2 * N15);
628
20.5M
      pDst[k + 4] = pSrc[l];
629
20.5M
      pDst[k + 5] = pSrc[l + 1];
630
20.5M
      l += (2 * N5) + (2 * N3);
631
20.5M
      if (l >= (2 * N15)) l -= (2 * N15);
632
633
      /* fft3 merged with shift right by 2 loop */
634
20.5M
      FIXP_DBL r1, r2, r3;
635
20.5M
      FIXP_DBL s1, s2;
636
      /* real part */
637
20.5M
      r1 = pDst[k + 2] + pDst[k + 4];
638
20.5M
      r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31);
639
20.5M
      s1 = pDst[k + 0];
640
20.5M
      pDst[k + 0] = (s1 + r1) >> 2;
641
20.5M
      r1 = s1 - (r1 >> 1);
642
643
      /* imaginary part */
644
20.5M
      s1 = pDst[k + 3] + pDst[k + 5];
645
20.5M
      s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31);
646
20.5M
      r3 = pDst[k + 1];
647
20.5M
      pDst[k + 1] = (r3 + s1) >> 2;
648
20.5M
      s1 = r3 - (s1 >> 1);
649
650
      /* combination */
651
20.5M
      pDst[k + 2] = (r1 - s2) >> 2;
652
20.5M
      pDst[k + 4] = (r1 + s2) >> 2;
653
20.5M
      pDst[k + 3] = (s1 + r2) >> 2;
654
20.5M
      pDst[k + 5] = (s1 - r2) >> 2;
655
20.5M
    }
656
4.10M
  }
657
  /* Sort input vector for fft's of length 5
658
  input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
659
  input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
660
  input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
661
  /* Merge 2 loops into one, brings about 10% */
662
4.10M
  {
663
4.10M
    const FIXP_DBL *pSrc = aDst;
664
4.10M
    FIXP_DBL *RESTRICT pDst = aDst1;
665
16.4M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
666
12.3M
      l = 2 * i;
667
12.3M
      pDst[k + 0] = pSrc[l + 0];
668
12.3M
      pDst[k + 1] = pSrc[l + 1];
669
12.3M
      pDst[k + 2] = pSrc[l + 0 + (2 * N3)];
670
12.3M
      pDst[k + 3] = pSrc[l + 1 + (2 * N3)];
671
12.3M
      pDst[k + 4] = pSrc[l + 0 + (4 * N3)];
672
12.3M
      pDst[k + 5] = pSrc[l + 1 + (4 * N3)];
673
12.3M
      pDst[k + 6] = pSrc[l + 0 + (6 * N3)];
674
12.3M
      pDst[k + 7] = pSrc[l + 1 + (6 * N3)];
675
12.3M
      pDst[k + 8] = pSrc[l + 0 + (8 * N3)];
676
12.3M
      pDst[k + 9] = pSrc[l + 1 + (8 * N3)];
677
12.3M
      fft5(&pDst[k]);
678
12.3M
    }
679
4.10M
  }
680
  /* Sort output vector of length 15
681
  output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
682
            out5(10) out5(1)  out5(7)  out5(13) out5(4)
683
            out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
684
  /* optimize clumsy loop, brings about 5% */
685
4.10M
  {
686
4.10M
    const FIXP_DBL *pSrc = aDst1;
687
4.10M
    FIXP_DBL *RESTRICT pDst = pInput;
688
16.4M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
689
12.3M
      pDst[k + 0] = pSrc[l];
690
12.3M
      pDst[k + 1] = pSrc[l + 1];
691
12.3M
      l += (2 * N6);
692
12.3M
      if (l >= (2 * N15)) l -= (2 * N15);
693
12.3M
      pDst[k + 2] = pSrc[l];
694
12.3M
      pDst[k + 3] = pSrc[l + 1];
695
12.3M
      l += (2 * N6);
696
12.3M
      if (l >= (2 * N15)) l -= (2 * N15);
697
12.3M
      pDst[k + 4] = pSrc[l];
698
12.3M
      pDst[k + 5] = pSrc[l + 1];
699
12.3M
      l += (2 * N6);
700
12.3M
      if (l >= (2 * N15)) l -= (2 * N15);
701
12.3M
      pDst[k + 6] = pSrc[l];
702
12.3M
      pDst[k + 7] = pSrc[l + 1];
703
12.3M
      l += (2 * N6);
704
12.3M
      if (l >= (2 * N15)) l -= (2 * N15);
705
12.3M
      pDst[k + 8] = pSrc[l];
706
12.3M
      pDst[k + 9] = pSrc[l + 1];
707
12.3M
      l += 2; /* no modulo check needed, it cannot occur */
708
12.3M
    }
709
4.10M
  }
710
4.10M
}
711
#endif /* FUNCTION_fft15 */
712
713
/*
714
 Select shift placement.
715
 Some processors like ARM may shift "for free" in combination with an addition
716
 or substraction, but others don't so either combining shift with +/- or reduce
717
 the total amount or shift operations is optimal
718
 */
719
#if !defined(__arm__)
720
880M
#define SHIFT_A >> 1
721
#define SHIFT_B
722
#else
723
#define SHIFT_A
724
#define SHIFT_B >> 1
725
#endif
726
727
#ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \
728
                         */
729
730
/* This defines prevents this array to be declared twice, if 16-bit fft is
731
 * enabled too */
732
#define FUNCTION_DATA_fft_16_w16
733
static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d),
734
                                      STCP(0x30fbc54d, 0x7641af3d)};
735
736
LNK_SECTION_CODE_L1
737
20.4M
inline void fft_16(FIXP_DBL *RESTRICT x) {
738
20.4M
  FIXP_DBL vr, ur;
739
20.4M
  FIXP_DBL vr2, ur2;
740
20.4M
  FIXP_DBL vr3, ur3;
741
20.4M
  FIXP_DBL vr4, ur4;
742
20.4M
  FIXP_DBL vi, ui;
743
20.4M
  FIXP_DBL vi2, ui2;
744
20.4M
  FIXP_DBL vi3, ui3;
745
746
20.4M
  vr = (x[0] >> 1) + (x[16] >> 1);       /* Re A + Re B */
747
20.4M
  ur = (x[1] >> 1) + (x[17] >> 1);       /* Im A + Im B */
748
20.4M
  vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */
749
20.4M
  ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */
750
20.4M
  x[0] = vr + (vi SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
751
20.4M
  x[1] = ur + (ui SHIFT_B);              /* Im A' = sum of imag values */
752
753
20.4M
  vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */
754
20.4M
  ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */
755
756
20.4M
  x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
757
20.4M
  x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
758
20.4M
  vr -= x[16];              /* Re A - Re B */
759
20.4M
  vi = (vi SHIFT_B)-x[24];  /* Re C - Re D */
760
20.4M
  ur -= x[17];              /* Im A - Im B */
761
20.4M
  ui = (ui SHIFT_B)-x[25];  /* Im C - Im D */
762
763
20.4M
  vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */
764
20.4M
  ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */
765
766
20.4M
  x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
767
20.4M
  x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
768
769
20.4M
  vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */
770
20.4M
  ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */
771
772
20.4M
  x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
773
20.4M
  x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
774
775
20.4M
  vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */
776
20.4M
  ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */
777
20.4M
  x[8] = vr2 + (vi2 SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
778
20.4M
  x[9] = ur2 + (ui2 SHIFT_B);              /* Im A' = sum of imag values */
779
20.4M
  x[12] = vr2 - (vi2 SHIFT_B);             /* Re C' = -(ReC+ReD) + (ReA+ReB) */
780
20.4M
  x[13] = ur2 - (ui2 SHIFT_B);             /* Im C' = -Im C -Im D +Im A +Im B */
781
20.4M
  vr2 -= x[20];                            /* Re A - Re B */
782
20.4M
  ur2 -= x[21];                            /* Im A - Im B */
783
20.4M
  vi2 = (vi2 SHIFT_B)-x[28];               /* Re C - Re D */
784
20.4M
  ui2 = (ui2 SHIFT_B)-x[29];               /* Im C - Im D */
785
786
20.4M
  vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */
787
20.4M
  ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */
788
789
20.4M
  x[10] = ui2 + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
790
20.4M
  x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
791
792
20.4M
  vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */
793
20.4M
  ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */
794
795
20.4M
  x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
796
20.4M
  x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */
797
798
20.4M
  x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
799
20.4M
  x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */
800
20.4M
  x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
801
20.4M
  x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
802
20.4M
  vr3 -= x[18];               /* Re A - Re B */
803
20.4M
  ur3 -= x[19];               /* Im A - Im B */
804
20.4M
  vi = (vi SHIFT_B)-x[26];    /* Re C - Re D */
805
20.4M
  ui = (ui SHIFT_B)-x[27];    /* Im C - Im D */
806
20.4M
  x[18] = ui + vr3;           /* Re B' = Im C - Im D  + Re A - Re B */
807
20.4M
  x[19] = ur3 - vi;           /* Im B'= -Re C + Re D + Im A - Im B */
808
809
20.4M
  x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
810
20.4M
  x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
811
20.4M
  x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
812
20.4M
  x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
813
20.4M
  vr4 -= x[22];                /* Re A - Re B */
814
20.4M
  ur4 -= x[23];                /* Im A - Im B */
815
816
20.4M
  x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
817
20.4M
  x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */
818
819
20.4M
  vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */
820
20.4M
  ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */
821
20.4M
  x[26] = ui3 + vr4;         /* Re B' = Im C - Im D  + Re A - Re B */
822
20.4M
  x[30] = vr4 - ui3;         /* Re D' = -Im C + Im D + Re A - Re B */
823
20.4M
  x[27] = ur4 - vi3;         /* Im B'= -Re C + Re D + Im A - Im B */
824
20.4M
  x[31] = vi3 + ur4;         /* Im D'= Re C - Re D + Im A - Im B */
825
826
  // xt1 =  0
827
  // xt2 =  8
828
20.4M
  vr = x[8];
829
20.4M
  vi = x[9];
830
20.4M
  ur = x[0] >> 1;
831
20.4M
  ui = x[1] >> 1;
832
20.4M
  x[0] = ur + (vr >> 1);
833
20.4M
  x[1] = ui + (vi >> 1);
834
20.4M
  x[8] = ur - (vr >> 1);
835
20.4M
  x[9] = ui - (vi >> 1);
836
837
  // xt1 =  4
838
  // xt2 = 12
839
20.4M
  vr = x[13];
840
20.4M
  vi = x[12];
841
20.4M
  ur = x[4] >> 1;
842
20.4M
  ui = x[5] >> 1;
843
20.4M
  x[4] = ur + (vr >> 1);
844
20.4M
  x[5] = ui - (vi >> 1);
845
20.4M
  x[12] = ur - (vr >> 1);
846
20.4M
  x[13] = ui + (vi >> 1);
847
848
  // xt1 = 16
849
  // xt2 = 24
850
20.4M
  vr = x[24];
851
20.4M
  vi = x[25];
852
20.4M
  ur = x[16] >> 1;
853
20.4M
  ui = x[17] >> 1;
854
20.4M
  x[16] = ur + (vr >> 1);
855
20.4M
  x[17] = ui + (vi >> 1);
856
20.4M
  x[24] = ur - (vr >> 1);
857
20.4M
  x[25] = ui - (vi >> 1);
858
859
  // xt1 = 20
860
  // xt2 = 28
861
20.4M
  vr = x[29];
862
20.4M
  vi = x[28];
863
20.4M
  ur = x[20] >> 1;
864
20.4M
  ui = x[21] >> 1;
865
20.4M
  x[20] = ur + (vr >> 1);
866
20.4M
  x[21] = ui - (vi >> 1);
867
20.4M
  x[28] = ur - (vr >> 1);
868
20.4M
  x[29] = ui + (vi >> 1);
869
870
  // xt1 =  2
871
  // xt2 = 10
872
20.4M
  SUMDIFF_PIFOURTH(vi, vr, x[10], x[11])
873
  // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH);
874
  // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH);
875
20.4M
  ur = x[2];
876
20.4M
  ui = x[3];
877
20.4M
  x[2] = (ur >> 1) + vr;
878
20.4M
  x[3] = (ui >> 1) + vi;
879
20.4M
  x[10] = (ur >> 1) - vr;
880
20.4M
  x[11] = (ui >> 1) - vi;
881
882
  // xt1 =  6
883
  // xt2 = 14
884
20.4M
  SUMDIFF_PIFOURTH(vr, vi, x[14], x[15])
885
20.4M
  ur = x[6];
886
20.4M
  ui = x[7];
887
20.4M
  x[6] = (ur >> 1) + vr;
888
20.4M
  x[7] = (ui >> 1) - vi;
889
20.4M
  x[14] = (ur >> 1) - vr;
890
20.4M
  x[15] = (ui >> 1) + vi;
891
892
  // xt1 = 18
893
  // xt2 = 26
894
20.4M
  SUMDIFF_PIFOURTH(vi, vr, x[26], x[27])
895
20.4M
  ur = x[18];
896
20.4M
  ui = x[19];
897
20.4M
  x[18] = (ur >> 1) + vr;
898
20.4M
  x[19] = (ui >> 1) + vi;
899
20.4M
  x[26] = (ur >> 1) - vr;
900
20.4M
  x[27] = (ui >> 1) - vi;
901
902
  // xt1 = 22
903
  // xt2 = 30
904
20.4M
  SUMDIFF_PIFOURTH(vr, vi, x[30], x[31])
905
20.4M
  ur = x[22];
906
20.4M
  ui = x[23];
907
20.4M
  x[22] = (ur >> 1) + vr;
908
20.4M
  x[23] = (ui >> 1) - vi;
909
20.4M
  x[30] = (ur >> 1) - vr;
910
20.4M
  x[31] = (ui >> 1) + vi;
911
912
  // xt1 =  0
913
  // xt2 = 16
914
20.4M
  vr = x[16];
915
20.4M
  vi = x[17];
916
20.4M
  ur = x[0] >> 1;
917
20.4M
  ui = x[1] >> 1;
918
20.4M
  x[0] = ur + (vr >> 1);
919
20.4M
  x[1] = ui + (vi >> 1);
920
20.4M
  x[16] = ur - (vr >> 1);
921
20.4M
  x[17] = ui - (vi >> 1);
922
923
  // xt1 =  8
924
  // xt2 = 24
925
20.4M
  vi = x[24];
926
20.4M
  vr = x[25];
927
20.4M
  ur = x[8] >> 1;
928
20.4M
  ui = x[9] >> 1;
929
20.4M
  x[8] = ur + (vr >> 1);
930
20.4M
  x[9] = ui - (vi >> 1);
931
20.4M
  x[24] = ur - (vr >> 1);
932
20.4M
  x[25] = ui + (vi >> 1);
933
934
  // xt1 =  2
935
  // xt2 = 18
936
20.4M
  cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
937
20.4M
  ur = x[2];
938
20.4M
  ui = x[3];
939
20.4M
  x[2] = (ur >> 1) + vr;
940
20.4M
  x[3] = (ui >> 1) + vi;
941
20.4M
  x[18] = (ur >> 1) - vr;
942
20.4M
  x[19] = (ui >> 1) - vi;
943
944
  // xt1 = 10
945
  // xt2 = 26
946
20.4M
  cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
947
20.4M
  ur = x[10];
948
20.4M
  ui = x[11];
949
20.4M
  x[10] = (ur >> 1) + vr;
950
20.4M
  x[11] = (ui >> 1) - vi;
951
20.4M
  x[26] = (ur >> 1) - vr;
952
20.4M
  x[27] = (ui >> 1) + vi;
953
954
  // xt1 =  4
955
  // xt2 = 20
956
20.4M
  SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
957
20.4M
  ur = x[4];
958
20.4M
  ui = x[5];
959
20.4M
  x[4] = (ur >> 1) + vr;
960
20.4M
  x[5] = (ui >> 1) + vi;
961
20.4M
  x[20] = (ur >> 1) - vr;
962
20.4M
  x[21] = (ui >> 1) - vi;
963
964
  // xt1 = 12
965
  // xt2 = 28
966
20.4M
  SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
967
20.4M
  ur = x[12];
968
20.4M
  ui = x[13];
969
20.4M
  x[12] = (ur >> 1) + vr;
970
20.4M
  x[13] = (ui >> 1) - vi;
971
20.4M
  x[28] = (ur >> 1) - vr;
972
20.4M
  x[29] = (ui >> 1) + vi;
973
974
  // xt1 =  6
975
  // xt2 = 22
976
20.4M
  cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
977
20.4M
  ur = x[6];
978
20.4M
  ui = x[7];
979
20.4M
  x[6] = (ur >> 1) + vr;
980
20.4M
  x[7] = (ui >> 1) + vi;
981
20.4M
  x[22] = (ur >> 1) - vr;
982
20.4M
  x[23] = (ui >> 1) - vi;
983
984
  // xt1 = 14
985
  // xt2 = 30
986
20.4M
  cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
987
20.4M
  ur = x[14];
988
20.4M
  ui = x[15];
989
20.4M
  x[14] = (ur >> 1) + vr;
990
20.4M
  x[15] = (ui >> 1) - vi;
991
20.4M
  x[30] = (ur >> 1) - vr;
992
20.4M
  x[31] = (ui >> 1) + vi;
993
20.4M
}
994
#endif /* FUNCTION_fft_16 */
995
996
#ifndef FUNCTION_fft_32
997
static const FIXP_STP fft32_w32[6] = {
998
    STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d),
999
    STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7),
1000
    STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)};
1001
969M
#define W_PiFOURTH STC(0x5a82799a)
1002
1003
LNK_SECTION_CODE_L1
1004
34.6M
inline void fft_32(FIXP_DBL *const _x) {
1005
  /*
1006
   * 1+2 stage radix 4
1007
   */
1008
1009
  /////////////////////////////////////////////////////////////////////////////////////////
1010
34.6M
  {
1011
34.6M
    FIXP_DBL *const x = _x;
1012
34.6M
    FIXP_DBL vi, ui;
1013
34.6M
    FIXP_DBL vi2, ui2;
1014
34.6M
    FIXP_DBL vi3, ui3;
1015
34.6M
    FIXP_DBL vr, ur;
1016
34.6M
    FIXP_DBL vr2, ur2;
1017
34.6M
    FIXP_DBL vr3, ur3;
1018
34.6M
    FIXP_DBL vr4, ur4;
1019
1020
    // i = 0
1021
34.6M
    vr = (x[0] + x[32]) >> 1;     /* Re A + Re B */
1022
34.6M
    ur = (x[1] + x[33]) >> 1;     /* Im A + Im B */
1023
34.6M
    vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */
1024
34.6M
    ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */
1025
1026
34.6M
    x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1027
34.6M
    x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1028
1029
34.6M
    vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */
1030
34.6M
    ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */
1031
1032
34.6M
    x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1033
34.6M
    x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1034
1035
34.6M
    vr -= x[32];             /* Re A - Re B */
1036
34.6M
    ur -= x[33];             /* Im A - Im B */
1037
34.6M
    vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */
1038
34.6M
    ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */
1039
1040
34.6M
    vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */
1041
34.6M
    ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */
1042
1043
34.6M
    x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1044
34.6M
    x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1045
1046
34.6M
    vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */
1047
34.6M
    ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */
1048
1049
34.6M
    x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1050
34.6M
    x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1051
1052
    // i=16
1053
34.6M
    vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */
1054
34.6M
    ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */
1055
1056
34.6M
    x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1057
34.6M
    x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1058
34.6M
    x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1059
34.6M
    x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1060
1061
34.6M
    vr2 -= x[36];            /* Re A - Re B */
1062
34.6M
    ur2 -= x[37];            /* Im A - Im B */
1063
34.6M
    vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */
1064
34.6M
    ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */
1065
1066
34.6M
    vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */
1067
34.6M
    ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */
1068
1069
34.6M
    x[18] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1070
34.6M
    x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1071
1072
34.6M
    vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */
1073
34.6M
    ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */
1074
1075
34.6M
    x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1076
34.6M
    x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1077
1078
    // i = 32
1079
1080
34.6M
    x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1081
34.6M
    x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1082
34.6M
    x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1083
34.6M
    x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1084
1085
34.6M
    vr3 -= x[34];              /* Re A - Re B */
1086
34.6M
    ur3 -= x[35];              /* Im A - Im B */
1087
34.6M
    vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */
1088
34.6M
    ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */
1089
1090
34.6M
    x[34] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1091
34.6M
    x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1092
1093
    // i=48
1094
1095
34.6M
    x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1096
34.6M
    x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1097
34.6M
    x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1098
34.6M
    x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1099
1100
34.6M
    vr4 -= x[38]; /* Re A - Re B */
1101
34.6M
    ur4 -= x[39]; /* Im A - Im B */
1102
1103
34.6M
    x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1104
34.6M
    x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1105
1106
34.6M
    vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */
1107
34.6M
    ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */
1108
1109
34.6M
    x[50] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1110
34.6M
    x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1111
34.6M
    x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1112
34.6M
    x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1113
1114
    // i=8
1115
34.6M
    vr = (x[8] + x[40]) >> 1;     /* Re A + Re B */
1116
34.6M
    ur = (x[9] + x[41]) >> 1;     /* Im A + Im B */
1117
34.6M
    vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */
1118
34.6M
    ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */
1119
1120
34.6M
    x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1121
34.6M
    x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1122
1123
34.6M
    vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */
1124
34.6M
    ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */
1125
1126
34.6M
    x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1127
34.6M
    x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1128
1129
34.6M
    vr -= x[40];             /* Re A - Re B */
1130
34.6M
    ur -= x[41];             /* Im A - Im B */
1131
34.6M
    vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */
1132
34.6M
    ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */
1133
1134
34.6M
    vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */
1135
34.6M
    ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */
1136
1137
34.6M
    x[10] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1138
34.6M
    x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1139
1140
34.6M
    vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */
1141
34.6M
    ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */
1142
1143
34.6M
    x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1144
34.6M
    x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1145
1146
    // i=24
1147
34.6M
    vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */
1148
34.6M
    ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */
1149
1150
34.6M
    x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1151
34.6M
    x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1152
34.6M
    x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1153
34.6M
    x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1154
1155
34.6M
    vr2 -= x[44];            /* Re A - Re B */
1156
34.6M
    ur2 -= x[45];            /* Im A - Im B */
1157
34.6M
    vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */
1158
34.6M
    ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */
1159
1160
34.6M
    vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */
1161
34.6M
    ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */
1162
1163
34.6M
    x[26] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1164
34.6M
    x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1165
1166
34.6M
    vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */
1167
34.6M
    ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */
1168
1169
34.6M
    x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1170
34.6M
    x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1171
1172
    // i=40
1173
1174
34.6M
    x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1175
34.6M
    x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1176
34.6M
    x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1177
34.6M
    x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1178
1179
34.6M
    vr3 -= x[42];              /* Re A - Re B */
1180
34.6M
    ur3 -= x[43];              /* Im A - Im B */
1181
34.6M
    vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */
1182
34.6M
    ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */
1183
1184
34.6M
    x[42] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1185
34.6M
    x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1186
1187
    // i=56
1188
1189
34.6M
    x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1190
34.6M
    x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1191
34.6M
    x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1192
34.6M
    x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1193
1194
34.6M
    vr4 -= x[46]; /* Re A - Re B */
1195
34.6M
    ur4 -= x[47]; /* Im A - Im B */
1196
1197
34.6M
    x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1198
34.6M
    x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1199
1200
34.6M
    vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */
1201
34.6M
    ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */
1202
1203
34.6M
    x[58] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1204
34.6M
    x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1205
34.6M
    x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1206
34.6M
    x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1207
34.6M
  }
1208
1209
34.6M
  {
1210
34.6M
    FIXP_DBL *xt = _x;
1211
1212
34.6M
    int j = 4;
1213
138M
    do {
1214
138M
      FIXP_DBL vi, ui, vr, ur;
1215
1216
138M
      vr = xt[8];
1217
138M
      vi = xt[9];
1218
138M
      ur = xt[0] >> 1;
1219
138M
      ui = xt[1] >> 1;
1220
138M
      xt[0] = ur + (vr >> 1);
1221
138M
      xt[1] = ui + (vi >> 1);
1222
138M
      xt[8] = ur - (vr >> 1);
1223
138M
      xt[9] = ui - (vi >> 1);
1224
1225
138M
      vr = xt[13];
1226
138M
      vi = xt[12];
1227
138M
      ur = xt[4] >> 1;
1228
138M
      ui = xt[5] >> 1;
1229
138M
      xt[4] = ur + (vr >> 1);
1230
138M
      xt[5] = ui - (vi >> 1);
1231
138M
      xt[12] = ur - (vr >> 1);
1232
138M
      xt[13] = ui + (vi >> 1);
1233
1234
138M
      SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11])
1235
138M
      ur = xt[2];
1236
138M
      ui = xt[3];
1237
138M
      xt[2] = (ur >> 1) + vr;
1238
138M
      xt[3] = (ui >> 1) + vi;
1239
138M
      xt[10] = (ur >> 1) - vr;
1240
138M
      xt[11] = (ui >> 1) - vi;
1241
1242
138M
      SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15])
1243
138M
      ur = xt[6];
1244
138M
      ui = xt[7];
1245
1246
138M
      xt[6] = (ur >> 1) + vr;
1247
138M
      xt[7] = (ui >> 1) - vi;
1248
138M
      xt[14] = (ur >> 1) - vr;
1249
138M
      xt[15] = (ui >> 1) + vi;
1250
138M
      xt += 16;
1251
138M
    } while (--j != 0);
1252
34.6M
  }
1253
1254
34.6M
  {
1255
34.6M
    FIXP_DBL *const x = _x;
1256
34.6M
    FIXP_DBL vi, ui, vr, ur;
1257
1258
34.6M
    vr = x[16];
1259
34.6M
    vi = x[17];
1260
34.6M
    ur = x[0] >> 1;
1261
34.6M
    ui = x[1] >> 1;
1262
34.6M
    x[0] = ur + (vr >> 1);
1263
34.6M
    x[1] = ui + (vi >> 1);
1264
34.6M
    x[16] = ur - (vr >> 1);
1265
34.6M
    x[17] = ui - (vi >> 1);
1266
1267
34.6M
    vi = x[24];
1268
34.6M
    vr = x[25];
1269
34.6M
    ur = x[8] >> 1;
1270
34.6M
    ui = x[9] >> 1;
1271
34.6M
    x[8] = ur + (vr >> 1);
1272
34.6M
    x[9] = ui - (vi >> 1);
1273
34.6M
    x[24] = ur - (vr >> 1);
1274
34.6M
    x[25] = ui + (vi >> 1);
1275
1276
34.6M
    vr = x[48];
1277
34.6M
    vi = x[49];
1278
34.6M
    ur = x[32] >> 1;
1279
34.6M
    ui = x[33] >> 1;
1280
34.6M
    x[32] = ur + (vr >> 1);
1281
34.6M
    x[33] = ui + (vi >> 1);
1282
34.6M
    x[48] = ur - (vr >> 1);
1283
34.6M
    x[49] = ui - (vi >> 1);
1284
1285
34.6M
    vi = x[56];
1286
34.6M
    vr = x[57];
1287
34.6M
    ur = x[40] >> 1;
1288
34.6M
    ui = x[41] >> 1;
1289
34.6M
    x[40] = ur + (vr >> 1);
1290
34.6M
    x[41] = ui - (vi >> 1);
1291
34.6M
    x[56] = ur - (vr >> 1);
1292
34.6M
    x[57] = ui + (vi >> 1);
1293
1294
34.6M
    cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
1295
34.6M
    ur = x[2];
1296
34.6M
    ui = x[3];
1297
34.6M
    x[2] = (ur >> 1) + vr;
1298
34.6M
    x[3] = (ui >> 1) + vi;
1299
34.6M
    x[18] = (ur >> 1) - vr;
1300
34.6M
    x[19] = (ui >> 1) - vi;
1301
1302
34.6M
    cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
1303
34.6M
    ur = x[10];
1304
34.6M
    ui = x[11];
1305
34.6M
    x[10] = (ur >> 1) + vr;
1306
34.6M
    x[11] = (ui >> 1) - vi;
1307
34.6M
    x[26] = (ur >> 1) - vr;
1308
34.6M
    x[27] = (ui >> 1) + vi;
1309
1310
34.6M
    cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
1311
34.6M
    ur = x[34];
1312
34.6M
    ui = x[35];
1313
34.6M
    x[34] = (ur >> 1) + vr;
1314
34.6M
    x[35] = (ui >> 1) + vi;
1315
34.6M
    x[50] = (ur >> 1) - vr;
1316
34.6M
    x[51] = (ui >> 1) - vi;
1317
1318
34.6M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
1319
34.6M
    ur = x[42];
1320
34.6M
    ui = x[43];
1321
34.6M
    x[42] = (ur >> 1) + vr;
1322
34.6M
    x[43] = (ui >> 1) - vi;
1323
34.6M
    x[58] = (ur >> 1) - vr;
1324
34.6M
    x[59] = (ui >> 1) + vi;
1325
1326
34.6M
    SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
1327
34.6M
    ur = x[4];
1328
34.6M
    ui = x[5];
1329
34.6M
    x[4] = (ur >> 1) + vr;
1330
34.6M
    x[5] = (ui >> 1) + vi;
1331
34.6M
    x[20] = (ur >> 1) - vr;
1332
34.6M
    x[21] = (ui >> 1) - vi;
1333
1334
34.6M
    SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
1335
34.6M
    ur = x[12];
1336
34.6M
    ui = x[13];
1337
34.6M
    x[12] = (ur >> 1) + vr;
1338
34.6M
    x[13] = (ui >> 1) - vi;
1339
34.6M
    x[28] = (ur >> 1) - vr;
1340
34.6M
    x[29] = (ui >> 1) + vi;
1341
1342
34.6M
    SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
1343
34.6M
    ur = x[36];
1344
34.6M
    ui = x[37];
1345
34.6M
    x[36] = (ur >> 1) + vr;
1346
34.6M
    x[37] = (ui >> 1) + vi;
1347
34.6M
    x[52] = (ur >> 1) - vr;
1348
34.6M
    x[53] = (ui >> 1) - vi;
1349
1350
34.6M
    SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
1351
34.6M
    ur = x[44];
1352
34.6M
    ui = x[45];
1353
34.6M
    x[44] = (ur >> 1) + vr;
1354
34.6M
    x[45] = (ui >> 1) - vi;
1355
34.6M
    x[60] = (ur >> 1) - vr;
1356
34.6M
    x[61] = (ui >> 1) + vi;
1357
1358
34.6M
    cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
1359
34.6M
    ur = x[6];
1360
34.6M
    ui = x[7];
1361
34.6M
    x[6] = (ur >> 1) + vr;
1362
34.6M
    x[7] = (ui >> 1) + vi;
1363
34.6M
    x[22] = (ur >> 1) - vr;
1364
34.6M
    x[23] = (ui >> 1) - vi;
1365
1366
34.6M
    cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
1367
34.6M
    ur = x[14];
1368
34.6M
    ui = x[15];
1369
34.6M
    x[14] = (ur >> 1) + vr;
1370
34.6M
    x[15] = (ui >> 1) - vi;
1371
34.6M
    x[30] = (ur >> 1) - vr;
1372
34.6M
    x[31] = (ui >> 1) + vi;
1373
1374
34.6M
    cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
1375
34.6M
    ur = x[38];
1376
34.6M
    ui = x[39];
1377
34.6M
    x[38] = (ur >> 1) + vr;
1378
34.6M
    x[39] = (ui >> 1) + vi;
1379
34.6M
    x[54] = (ur >> 1) - vr;
1380
34.6M
    x[55] = (ui >> 1) - vi;
1381
1382
34.6M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
1383
34.6M
    ur = x[46];
1384
34.6M
    ui = x[47];
1385
1386
34.6M
    x[46] = (ur >> 1) + vr;
1387
34.6M
    x[47] = (ui >> 1) - vi;
1388
34.6M
    x[62] = (ur >> 1) - vr;
1389
34.6M
    x[63] = (ui >> 1) + vi;
1390
1391
34.6M
    vr = x[32];
1392
34.6M
    vi = x[33];
1393
34.6M
    ur = x[0] >> 1;
1394
34.6M
    ui = x[1] >> 1;
1395
34.6M
    x[0] = ur + (vr >> 1);
1396
34.6M
    x[1] = ui + (vi >> 1);
1397
34.6M
    x[32] = ur - (vr >> 1);
1398
34.6M
    x[33] = ui - (vi >> 1);
1399
1400
34.6M
    vi = x[48];
1401
34.6M
    vr = x[49];
1402
34.6M
    ur = x[16] >> 1;
1403
34.6M
    ui = x[17] >> 1;
1404
34.6M
    x[16] = ur + (vr >> 1);
1405
34.6M
    x[17] = ui - (vi >> 1);
1406
34.6M
    x[48] = ur - (vr >> 1);
1407
34.6M
    x[49] = ui + (vi >> 1);
1408
1409
34.6M
    cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
1410
34.6M
    ur = x[2];
1411
34.6M
    ui = x[3];
1412
34.6M
    x[2] = (ur >> 1) + vr;
1413
34.6M
    x[3] = (ui >> 1) + vi;
1414
34.6M
    x[34] = (ur >> 1) - vr;
1415
34.6M
    x[35] = (ui >> 1) - vi;
1416
1417
34.6M
    cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
1418
34.6M
    ur = x[18];
1419
34.6M
    ui = x[19];
1420
34.6M
    x[18] = (ur >> 1) + vr;
1421
34.6M
    x[19] = (ui >> 1) - vi;
1422
34.6M
    x[50] = (ur >> 1) - vr;
1423
34.6M
    x[51] = (ui >> 1) + vi;
1424
1425
34.6M
    cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
1426
34.6M
    ur = x[4];
1427
34.6M
    ui = x[5];
1428
34.6M
    x[4] = (ur >> 1) + vr;
1429
34.6M
    x[5] = (ui >> 1) + vi;
1430
34.6M
    x[36] = (ur >> 1) - vr;
1431
34.6M
    x[37] = (ui >> 1) - vi;
1432
1433
34.6M
    cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
1434
34.6M
    ur = x[20];
1435
34.6M
    ui = x[21];
1436
34.6M
    x[20] = (ur >> 1) + vr;
1437
34.6M
    x[21] = (ui >> 1) - vi;
1438
34.6M
    x[52] = (ur >> 1) - vr;
1439
34.6M
    x[53] = (ui >> 1) + vi;
1440
1441
34.6M
    cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
1442
34.6M
    ur = x[6];
1443
34.6M
    ui = x[7];
1444
34.6M
    x[6] = (ur >> 1) + vr;
1445
34.6M
    x[7] = (ui >> 1) + vi;
1446
34.6M
    x[38] = (ur >> 1) - vr;
1447
34.6M
    x[39] = (ui >> 1) - vi;
1448
1449
34.6M
    cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
1450
34.6M
    ur = x[22];
1451
34.6M
    ui = x[23];
1452
34.6M
    x[22] = (ur >> 1) + vr;
1453
34.6M
    x[23] = (ui >> 1) - vi;
1454
34.6M
    x[54] = (ur >> 1) - vr;
1455
34.6M
    x[55] = (ui >> 1) + vi;
1456
1457
34.6M
    SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
1458
34.6M
    ur = x[8];
1459
34.6M
    ui = x[9];
1460
34.6M
    x[8] = (ur >> 1) + vr;
1461
34.6M
    x[9] = (ui >> 1) + vi;
1462
34.6M
    x[40] = (ur >> 1) - vr;
1463
34.6M
    x[41] = (ui >> 1) - vi;
1464
1465
34.6M
    SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
1466
34.6M
    ur = x[24];
1467
34.6M
    ui = x[25];
1468
34.6M
    x[24] = (ur >> 1) + vr;
1469
34.6M
    x[25] = (ui >> 1) - vi;
1470
34.6M
    x[56] = (ur >> 1) - vr;
1471
34.6M
    x[57] = (ui >> 1) + vi;
1472
1473
34.6M
    cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
1474
34.6M
    ur = x[10];
1475
34.6M
    ui = x[11];
1476
1477
34.6M
    x[10] = (ur >> 1) + vr;
1478
34.6M
    x[11] = (ui >> 1) + vi;
1479
34.6M
    x[42] = (ur >> 1) - vr;
1480
34.6M
    x[43] = (ui >> 1) - vi;
1481
1482
34.6M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
1483
34.6M
    ur = x[26];
1484
34.6M
    ui = x[27];
1485
34.6M
    x[26] = (ur >> 1) + vr;
1486
34.6M
    x[27] = (ui >> 1) - vi;
1487
34.6M
    x[58] = (ur >> 1) - vr;
1488
34.6M
    x[59] = (ui >> 1) + vi;
1489
1490
34.6M
    cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1491
34.6M
    ur = x[12];
1492
34.6M
    ui = x[13];
1493
34.6M
    x[12] = (ur >> 1) + vr;
1494
34.6M
    x[13] = (ui >> 1) + vi;
1495
34.6M
    x[44] = (ur >> 1) - vr;
1496
34.6M
    x[45] = (ui >> 1) - vi;
1497
1498
34.6M
    cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1499
34.6M
    ur = x[28];
1500
34.6M
    ui = x[29];
1501
34.6M
    x[28] = (ur >> 1) + vr;
1502
34.6M
    x[29] = (ui >> 1) - vi;
1503
34.6M
    x[60] = (ur >> 1) - vr;
1504
34.6M
    x[61] = (ui >> 1) + vi;
1505
1506
34.6M
    cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1507
34.6M
    ur = x[14];
1508
34.6M
    ui = x[15];
1509
34.6M
    x[14] = (ur >> 1) + vr;
1510
34.6M
    x[15] = (ui >> 1) + vi;
1511
34.6M
    x[46] = (ur >> 1) - vr;
1512
34.6M
    x[47] = (ui >> 1) - vi;
1513
1514
34.6M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1515
34.6M
    ur = x[30];
1516
34.6M
    ui = x[31];
1517
34.6M
    x[30] = (ur >> 1) + vr;
1518
34.6M
    x[31] = (ui >> 1) - vi;
1519
34.6M
    x[62] = (ur >> 1) - vr;
1520
34.6M
    x[63] = (ui >> 1) + vi;
1521
34.6M
  }
1522
34.6M
}
1523
#endif /* #ifndef FUNCTION_fft_32 */
1524
1525
/**
1526
 * \brief Apply rotation vectors to a data buffer.
1527
 * \param cl length of each row of input data.
1528
 * \param l total length of input data.
1529
 * \param pVecRe real part of rotation coefficient vector.
1530
 * \param pVecIm imaginary part of rotation coefficient vector.
1531
 */
1532
1533
/*
1534
   This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000
1535
   (-1.0) instead. At the end, the sign of the result is inverted
1536
*/
1537
#define noFFT_APPLY_ROT_VECTOR_HQ
1538
1539
#ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL
1540
static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl,
1541
                                        const int l, const FIXP_STB *pVecRe,
1542
3.93M
                                        const FIXP_STB *pVecIm) {
1543
3.93M
  FIXP_DBL re, im;
1544
3.93M
  FIXP_STB vre, vim;
1545
1546
3.93M
  int i, c;
1547
1548
17.1M
  for (i = 0; i < cl; i++) {
1549
13.2M
    re = pData[2 * i];
1550
13.2M
    im = pData[2 * i + 1];
1551
1552
13.2M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1553
13.2M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1554
13.2M
  }
1555
25.5M
  for (; i < l; i += cl) {
1556
21.6M
    re = pData[2 * i];
1557
21.6M
    im = pData[2 * i + 1];
1558
1559
21.6M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1560
21.6M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1561
1562
128M
    for (c = i + 1; c < i + cl; c++) {
1563
106M
      re = pData[2 * c] >> 1;
1564
106M
      im = pData[2 * c + 1] >> 1;
1565
106M
      vre = *pVecRe++;
1566
106M
      vim = *pVecIm++;
1567
1568
106M
      cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim);
1569
106M
    }
1570
21.6M
  }
1571
3.93M
}
1572
#endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */
1573
1574
/* select either switch case of function pointer. */
1575
//#define FFT_TWO_STAGE_SWITCH_CASE
1576
#ifndef FUNCTION_fftN2_func
1577
static inline void fftN2_func(FIXP_DBL *pInput, const int length,
1578
                              const int dim1, const int dim2,
1579
                              void (*const fft1)(FIXP_DBL *),
1580
                              void (*const fft2)(FIXP_DBL *),
1581
                              const FIXP_STB *RotVectorReal,
1582
                              const FIXP_STB *RotVectorImag, FIXP_DBL *aDst,
1583
3.93M
                              FIXP_DBL *aDst2) {
1584
  /* The real part of the input samples are at the addresses with even indices
1585
  and the imaginary part of the input samples are at the addresses with odd
1586
  indices. The output samples are stored at the address of pInput
1587
  */
1588
3.93M
  FIXP_DBL *pSrc, *pDst, *pDstOut;
1589
3.93M
  int i;
1590
1591
3.93M
  FDK_ASSERT(length == dim1 * dim2);
1592
1593
  /* Perform dim2 times the fft of length dim1. The input samples are at the
1594
  address of pSrc and the output samples are at the address of pDst. The input
1595
  vector for the fft of length dim1 is built of the interleaved samples in pSrc,
1596
  the output samples are stored consecutively.
1597
  */
1598
3.93M
  pSrc = pInput;
1599
3.93M
  pDst = aDst;
1600
29.5M
  for (i = 0; i < dim2; i++) {
1601
167M
    for (int j = 0; j < dim1; j++) {
1602
141M
      pDst[2 * j] = pSrc[2 * j * dim2];
1603
141M
      pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1];
1604
141M
    }
1605
1606
      /* fft of size dim1 */
1607
25.5M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1608
25.5M
    fft1(pDst);
1609
#else
1610
    switch (dim1) {
1611
      case 2:
1612
        fft2(pDst);
1613
        break;
1614
      case 3:
1615
        fft3(pDst);
1616
        break;
1617
      case 4:
1618
        fft_4(pDst);
1619
        break;
1620
      /* case 5: fft5(pDst); break; */
1621
      /* case 8: fft_8(pDst); break; */
1622
      case 12:
1623
        fft12(pDst);
1624
        break;
1625
      /* case 15: fft15(pDst); break; */
1626
      case 16:
1627
        fft_16(pDst);
1628
        break;
1629
      case 32:
1630
        fft_32(pDst);
1631
        break;
1632
        /*case 64: fft_64(pDst); break;*/
1633
        /* case 128: fft_128(pDst); break; */
1634
    }
1635
#endif
1636
25.5M
    pSrc += 2;
1637
25.5M
    pDst = pDst + 2 * dim1;
1638
25.5M
  }
1639
1640
  /* Perform the modulation of the output of the fft of length dim1 */
1641
3.93M
  pSrc = aDst;
1642
3.93M
  fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag);
1643
1644
  /* Perform dim1 times the fft of length dim2. The input samples are at the
1645
  address of aDst and the output samples are at the address of pInput. The input
1646
  vector for the fft of length dim2 is built of the interleaved samples in aDst,
1647
  the output samples are stored consecutively at the address of pInput.
1648
  */
1649
3.93M
  pSrc = aDst;
1650
3.93M
  pDst = aDst2;
1651
3.93M
  pDstOut = pInput;
1652
17.1M
  for (i = 0; i < dim1; i++) {
1653
154M
    for (int j = 0; j < dim2; j++) {
1654
141M
      pDst[2 * j] = pSrc[2 * j * dim1];
1655
141M
      pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1];
1656
141M
    }
1657
1658
13.2M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1659
13.2M
    fft2(pDst);
1660
#else
1661
    switch (dim2) {
1662
      case 4:
1663
        fft_4(pDst);
1664
        break;
1665
      case 9:
1666
        fft9(pDst);
1667
        break;
1668
      case 12:
1669
        fft12(pDst);
1670
        break;
1671
      case 15:
1672
        fft15(pDst);
1673
        break;
1674
      case 16:
1675
        fft_16(pDst);
1676
        break;
1677
      case 32:
1678
        fft_32(pDst);
1679
        break;
1680
    }
1681
#endif
1682
1683
154M
    for (int j = 0; j < dim2; j++) {
1684
141M
      pDstOut[2 * j * dim1] = pDst[2 * j];
1685
141M
      pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1];
1686
141M
    }
1687
13.2M
    pSrc += 2;
1688
13.2M
    pDstOut += 2;
1689
13.2M
  }
1690
3.93M
}
1691
#endif /* FUNCTION_fftN2_function */
1692
1693
#define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \
1694
              RotVectorReal, RotVectorImag)                                \
1695
3.93M
  {                                                                        \
1696
3.93M
    C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length)                    \
1697
3.93M
    C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2)                     \
1698
3.93M
    fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2,           \
1699
3.93M
               RotVectorReal, RotVectorImag, aDst, aDst2);                 \
1700
3.93M
    C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2)                       \
1701
3.93M
    C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length)                      \
1702
3.93M
  }
1703
1704
  /*!
1705
   *
1706
   *  \brief  complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480
1707
   *  \param  pInput contains the input signal prescaled right by 2
1708
   *          pInput contains the output signal scaled by SCALEFACTOR<#length>
1709
   *          The output signal does not have any fixed headroom
1710
   *  \return void
1711
   *
1712
   */
1713
1714
#ifndef FUNCTION_fft6
1715
2.80M
static inline void fft6(FIXP_DBL *pInput) {
1716
2.80M
  fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6);
1717
2.80M
}
1718
#endif /* #ifndef FUNCTION_fft6 */
1719
1720
#ifndef FUNCTION_fft12
1721
static inline void fft12(FIXP_DBL *pInput) {
1722
  fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12,
1723
        RotVectorImag12); /* 16,58 */
1724
}
1725
#endif /* #ifndef FUNCTION_fft12 */
1726
1727
#ifndef FUNCTION_fft20
1728
132k
static inline void fft20(FIXP_DBL *pInput) {
1729
132k
  fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20,
1730
132k
        RotVectorImag20);
1731
132k
}
1732
#endif /* FUNCTION_fft20 */
1733
1734
7.77k
static inline void fft24(FIXP_DBL *pInput) {
1735
7.77k
  fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24,
1736
7.77k
        RotVectorImag24); /* 16,73 */
1737
7.77k
}
1738
1739
379k
static inline void fft48(FIXP_DBL *pInput) {
1740
379k
  fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48,
1741
379k
        RotVectorImag48); /* 16,32 */
1742
379k
}
1743
1744
#ifndef FUNCTION_fft60
1745
258k
static inline void fft60(FIXP_DBL *pInput) {
1746
258k
  fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60,
1747
258k
        RotVectorImag60); /* 15,51 */
1748
258k
}
1749
#endif /* FUNCTION_fft60 */
1750
1751
#ifndef FUNCTION_fft80
1752
250
static inline void fft80(FIXP_DBL *pInput) {
1753
250
  fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80,
1754
250
        RotVectorImag80); /*  */
1755
250
}
1756
#endif
1757
1758
#ifndef FUNCTION_fft96
1759
75.4k
static inline void fft96(FIXP_DBL *pInput) {
1760
75.4k
  fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96,
1761
75.4k
        RotVectorImag96); /* 15,47 */
1762
75.4k
}
1763
#endif /* FUNCTION_fft96*/
1764
1765
#ifndef FUNCTION_fft120
1766
3.57k
static inline void fft120(FIXP_DBL *pInput) {
1767
3.57k
  fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120,
1768
3.57k
        RotVectorImag120);
1769
3.57k
}
1770
#endif /* FUNCTION_fft120 */
1771
1772
#ifndef FUNCTION_fft192
1773
10.1k
static inline void fft192(FIXP_DBL *pInput) {
1774
10.1k
  fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192,
1775
10.1k
        RotVectorImag192); /* 15,50 */
1776
10.1k
}
1777
#endif
1778
1779
#ifndef FUNCTION_fft240
1780
159k
static inline void fft240(FIXP_DBL *pInput) {
1781
159k
  fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240,
1782
159k
        RotVectorImag240); /* 15.44 */
1783
159k
}
1784
#endif
1785
1786
#ifndef FUNCTION_fft384
1787
86.1k
static inline void fft384(FIXP_DBL *pInput) {
1788
86.1k
  fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384,
1789
86.1k
        RotVectorImag384); /* 16.02 */
1790
86.1k
}
1791
#endif /* FUNCTION_fft384 */
1792
1793
#ifndef FUNCTION_fft480
1794
15.4k
static inline void fft480(FIXP_DBL *pInput) {
1795
15.4k
  fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480,
1796
15.4k
        RotVectorImag480); /* 15.84 */
1797
15.4k
}
1798
#endif /* FUNCTION_fft480 */
1799
1800
74.4M
void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) {
1801
  /* Ensure, that the io-ptr is always (at least 8-byte) aligned */
1802
74.4M
  C_ALLOC_ALIGNED_CHECK(pInput);
1803
1804
74.4M
  if (length == 32) {
1805
33.1M
    fft_32(pInput);
1806
33.1M
    *pScalefactor += SCALEFACTOR32;
1807
41.3M
  } else {
1808
41.3M
    switch (length) {
1809
17.9M
      case 16:
1810
17.9M
        fft_16(pInput);
1811
17.9M
        *pScalefactor += SCALEFACTOR16;
1812
17.9M
        break;
1813
6.60M
      case 8:
1814
6.60M
        fft_8(pInput);
1815
6.60M
        *pScalefactor += SCALEFACTOR8;
1816
6.60M
        break;
1817
0
      case 2:
1818
0
        fft2(pInput);
1819
0
        *pScalefactor += SCALEFACTOR2;
1820
0
        break;
1821
0
      case 3:
1822
0
        fft3(pInput);
1823
0
        *pScalefactor += SCALEFACTOR3;
1824
0
        break;
1825
741k
      case 4:
1826
741k
        fft_4(pInput);
1827
741k
        *pScalefactor += SCALEFACTOR4;
1828
741k
        break;
1829
0
      case 5:
1830
0
        fft5(pInput);
1831
0
        *pScalefactor += SCALEFACTOR5;
1832
0
        break;
1833
2.80M
      case 6:
1834
2.80M
        fft6(pInput);
1835
2.80M
        *pScalefactor += SCALEFACTOR6;
1836
2.80M
        break;
1837
132k
      case 10:
1838
132k
        fft10(pInput);
1839
132k
        *pScalefactor += SCALEFACTOR10;
1840
132k
        break;
1841
11.2M
      case 12:
1842
11.2M
        fft12(pInput);
1843
11.2M
        *pScalefactor += SCALEFACTOR12;
1844
11.2M
        break;
1845
0
      case 15:
1846
0
        fft15(pInput);
1847
0
        *pScalefactor += SCALEFACTOR15;
1848
0
        break;
1849
132k
      case 20:
1850
132k
        fft20(pInput);
1851
132k
        *pScalefactor += SCALEFACTOR20;
1852
132k
        break;
1853
7.77k
      case 24:
1854
7.77k
        fft24(pInput);
1855
7.77k
        *pScalefactor += SCALEFACTOR24;
1856
7.77k
        break;
1857
379k
      case 48:
1858
379k
        fft48(pInput);
1859
379k
        *pScalefactor += SCALEFACTOR48;
1860
379k
        break;
1861
258k
      case 60:
1862
258k
        fft60(pInput);
1863
258k
        *pScalefactor += SCALEFACTOR60;
1864
258k
        break;
1865
421k
      case 64:
1866
421k
        dit_fft(pInput, 6, SineTable512, 512);
1867
421k
        *pScalefactor += SCALEFACTOR64;
1868
421k
        break;
1869
250
      case 80:
1870
250
        fft80(pInput);
1871
250
        *pScalefactor += SCALEFACTOR80;
1872
250
        break;
1873
75.4k
      case 96:
1874
75.4k
        fft96(pInput);
1875
75.4k
        *pScalefactor += SCALEFACTOR96;
1876
75.4k
        break;
1877
3.57k
      case 120:
1878
3.57k
        fft120(pInput);
1879
3.57k
        *pScalefactor += SCALEFACTOR120;
1880
3.57k
        break;
1881
67.9k
      case 128:
1882
67.9k
        dit_fft(pInput, 7, SineTable512, 512);
1883
67.9k
        *pScalefactor += SCALEFACTOR128;
1884
67.9k
        break;
1885
10.1k
      case 192:
1886
10.1k
        fft192(pInput);
1887
10.1k
        *pScalefactor += SCALEFACTOR192;
1888
10.1k
        break;
1889
159k
      case 240:
1890
159k
        fft240(pInput);
1891
159k
        *pScalefactor += SCALEFACTOR240;
1892
159k
        break;
1893
76.3k
      case 256:
1894
76.3k
        dit_fft(pInput, 8, SineTable512, 512);
1895
76.3k
        *pScalefactor += SCALEFACTOR256;
1896
76.3k
        break;
1897
86.1k
      case 384:
1898
86.1k
        fft384(pInput);
1899
86.1k
        *pScalefactor += SCALEFACTOR384;
1900
86.1k
        break;
1901
15.4k
      case 480:
1902
15.4k
        fft480(pInput);
1903
15.4k
        *pScalefactor += SCALEFACTOR480;
1904
15.4k
        break;
1905
157k
      case 512:
1906
157k
        dit_fft(pInput, 9, SineTable512, 512);
1907
157k
        *pScalefactor += SCALEFACTOR512;
1908
157k
        break;
1909
0
      default:
1910
0
        FDK_ASSERT(0); /* FFT length not supported! */
1911
0
        break;
1912
41.3M
    }
1913
41.3M
  }
1914
74.4M
}
1915
1916
0
void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) {
1917
0
  switch (length) {
1918
0
    default:
1919
0
      FDK_ASSERT(0); /* IFFT length not supported! */
1920
0
      break;
1921
0
  }
1922
0
}