Coverage Report

Created: 2025-11-24 06:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aac/libFDK/src/fft.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
Software License for The Fraunhofer FDK AAC Codec Library for Android
3
4
© Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
5
Forschung e.V. All rights reserved.
6
7
 1.    INTRODUCTION
8
The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
9
that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
10
scheme for digital audio. This FDK AAC Codec software is intended to be used on
11
a wide variety of Android devices.
12
13
AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
14
general perceptual audio codecs. AAC-ELD is considered the best-performing
15
full-bandwidth communications codec by independent studies and is widely
16
deployed. AAC has been standardized by ISO and IEC as part of the MPEG
17
specifications.
18
19
Patent licenses for necessary patent claims for the FDK AAC Codec (including
20
those of Fraunhofer) may be obtained through Via Licensing
21
(www.vialicensing.com) or through the respective patent owners individually for
22
the purpose of encoding or decoding bit streams in products that are compliant
23
with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
24
Android devices already license these patent claims through Via Licensing or
25
directly from the patent owners, and therefore FDK AAC Codec software may
26
already be covered under those patent licenses when it is used for those
27
licensed purposes only.
28
29
Commercially-licensed AAC software libraries, including floating-point versions
30
with enhanced sound quality, are also available from Fraunhofer. Users are
31
encouraged to check the Fraunhofer website for additional applications
32
information and documentation.
33
34
2.    COPYRIGHT LICENSE
35
36
Redistribution and use in source and binary forms, with or without modification,
37
are permitted without payment of copyright license fees provided that you
38
satisfy the following conditions:
39
40
You must retain the complete text of this software license in redistributions of
41
the FDK AAC Codec or your modifications thereto in source code form.
42
43
You must retain the complete text of this software license in the documentation
44
and/or other materials provided with redistributions of the FDK AAC Codec or
45
your modifications thereto in binary form. You must make available free of
46
charge copies of the complete source code of the FDK AAC Codec and your
47
modifications thereto to recipients of copies in binary form.
48
49
The name of Fraunhofer may not be used to endorse or promote products derived
50
from this library without prior written permission.
51
52
You may not charge copyright license fees for anyone to use, copy or distribute
53
the FDK AAC Codec software or your modifications thereto.
54
55
Your modified versions of the FDK AAC Codec must carry prominent notices stating
56
that you changed the software and the date of any change. For modified versions
57
of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
58
must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
59
AAC Codec Library for Android."
60
61
3.    NO PATENT LICENSE
62
63
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
64
limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
65
Fraunhofer provides no warranty of patent non-infringement with respect to this
66
software.
67
68
You may use this FDK AAC Codec software or modifications thereto only for
69
purposes that are authorized by appropriate patent licenses.
70
71
4.    DISCLAIMER
72
73
This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
74
holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
75
including but not limited to the implied warranties of merchantability and
76
fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
77
CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
78
or consequential damages, including but not limited to procurement of substitute
79
goods or services; loss of use, data, or profits, or business interruption,
80
however caused and on any theory of liability, whether in contract, strict
81
liability, or tort (including negligence), arising in any way out of the use of
82
this software, even if advised of the possibility of such damage.
83
84
5.    CONTACT INFORMATION
85
86
Fraunhofer Institute for Integrated Circuits IIS
87
Attention: Audio and Multimedia Departments - FDK AAC LL
88
Am Wolfsmantel 33
89
91058 Erlangen, Germany
90
91
www.iis.fraunhofer.de/amm
92
amm-info@iis.fraunhofer.de
93
----------------------------------------------------------------------------- */
94
95
/******************* Library for basic calculation routines ********************
96
97
   Author(s):   Josef Hoepfl, DSP Solutions
98
99
   Description: Fix point FFT
100
101
*******************************************************************************/
102
103
#include "fft_rad2.h"
104
#include "FDK_tools_rom.h"
105
106
262M
#define W_PiFOURTH STC(0x5a82799a)
107
//#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a))
108
#ifndef SUMDIFF_PIFOURTH
109
#define SUMDIFF_PIFOURTH(diff, sum, a, b) \
110
652M
  {                                       \
111
652M
    FIXP_DBL wa, wb;                      \
112
652M
    wa = fMultDiv2(a, W_PiFOURTH);        \
113
652M
    wb = fMultDiv2(b, W_PiFOURTH);        \
114
652M
    diff = wb - wa;                       \
115
652M
    sum = wb + wa;                        \
116
652M
  }
117
#define SUMDIFF_PIFOURTH16(diff, sum, a, b)       \
118
  {                                               \
119
    FIXP_SGL wa, wb;                              \
120
    wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \
121
    wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \
122
    diff = wb - wa;                               \
123
    sum = wb + wa;                                \
124
  }
125
#endif
126
127
#define SCALEFACTOR2048 10
128
#define SCALEFACTOR1024 9
129
167k
#define SCALEFACTOR512 8
130
78.5k
#define SCALEFACTOR256 7
131
75.4k
#define SCALEFACTOR128 6
132
405k
#define SCALEFACTOR64 5
133
35.7M
#define SCALEFACTOR32 4
134
19.0M
#define SCALEFACTOR16 3
135
7.88M
#define SCALEFACTOR8 2
136
1.80M
#define SCALEFACTOR4 1
137
3.11M
#define SCALEFACTOR2 1
138
139
3.17M
#define SCALEFACTOR3 1
140
225k
#define SCALEFACTOR5 1
141
3.10M
#define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2)
142
#define SCALEFACTOR7 2
143
#define SCALEFACTOR9 2
144
4.72M
#define SCALEFACTOR10 5
145
13.3M
#define SCALEFACTOR12 3
146
430k
#define SCALEFACTOR15 3
147
#define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2)
148
225k
#define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2)
149
#define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2)
150
9.74k
#define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2)
151
#define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2)
152
#define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2)
153
448k
#define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2)
154
217k
#define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2)
155
251
#define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2)
156
76.5k
#define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2)
157
3.94k
#define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2)
158
#define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2)
159
#define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2)
160
10.5k
#define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2)
161
193k
#define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2)
162
#define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2)
163
#define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2)
164
98.2k
#define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2)
165
15.8k
#define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2)
166
167
#include "fft.h"
168
169
#ifndef FUNCTION_fft2
170
171
/* Performs the FFT of length 2. Input vector unscaled, output vector scaled
172
 * with factor 0.5 */
173
9.42M
static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) {
174
9.42M
  FIXP_DBL r1, i1;
175
9.42M
  FIXP_DBL r2, i2;
176
177
  /* real part */
178
9.42M
  r1 = pDat[2];
179
9.42M
  r2 = pDat[0];
180
181
  /* imaginary part */
182
9.42M
  i1 = pDat[3];
183
9.42M
  i2 = pDat[1];
184
185
  /* real part */
186
9.42M
  pDat[0] = (r2 + r1) >> 1;
187
9.42M
  pDat[2] = (r2 - r1) >> 1;
188
189
  /* imaginary part */
190
9.42M
  pDat[1] = (i2 + i1) >> 1;
191
9.42M
  pDat[3] = (i2 - i1) >> 1;
192
9.42M
}
193
#endif /* FUNCTION_fft2 */
194
195
17.3M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
196
197
#ifndef FUNCTION_fft3
198
/* Performs the FFT of length 3 according to the algorithm after winograd. */
199
8.65M
static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) {
200
8.65M
  FIXP_DBL r1, r2;
201
8.65M
  FIXP_DBL s1, s2;
202
8.65M
  FIXP_DBL pD;
203
204
  /* real part */
205
8.65M
  r1 = pDat[2] + pDat[4];
206
8.65M
  r2 = fMultDiv2((pDat[2] - pDat[4]), C31);
207
8.65M
  pD = pDat[0] >> 1;
208
8.65M
  pDat[0] = pD + (r1 >> 1);
209
8.65M
  r1 = pD - (r1 >> 2);
210
211
  /* imaginary part */
212
8.65M
  s1 = pDat[3] + pDat[5];
213
8.65M
  s2 = fMultDiv2((pDat[3] - pDat[5]), C31);
214
8.65M
  pD = pDat[1] >> 1;
215
8.65M
  pDat[1] = pD + (s1 >> 1);
216
8.65M
  s1 = pD - (s1 >> 2);
217
218
  /* combination */
219
8.65M
  pDat[2] = r1 - s2;
220
8.65M
  pDat[4] = r1 + s2;
221
8.65M
  pDat[3] = s1 + r2;
222
8.65M
  pDat[5] = s1 - r2;
223
8.65M
}
224
#endif /* #ifndef FUNCTION_fft3 */
225
226
144M
#define F5C(x) STC(x)
227
228
28.8M
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
229
28.8M
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
230
28.8M
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
231
28.8M
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
232
28.8M
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
233
234
/* performs the FFT of length 5 according to the algorithm after winograd */
235
/* This version works with a prescale of 2 instead of 3 */
236
14.4M
static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) {
237
14.4M
  FIXP_DBL r1, r2, r3, r4;
238
14.4M
  FIXP_DBL s1, s2, s3, s4;
239
14.4M
  FIXP_DBL t;
240
241
  /* real part */
242
14.4M
  r1 = (pDat[2] + pDat[8]) >> 1;
243
14.4M
  r4 = (pDat[2] - pDat[8]) >> 1;
244
14.4M
  r3 = (pDat[4] + pDat[6]) >> 1;
245
14.4M
  r2 = (pDat[4] - pDat[6]) >> 1;
246
14.4M
  t = fMult((r1 - r3), C54);
247
14.4M
  r1 = r1 + r3;
248
14.4M
  pDat[0] = (pDat[0] >> 1) + r1;
249
  /* Bit shift left because of the constant C55 which was scaled with the factor
250
     0.5 because of the representation of the values as fracts */
251
14.4M
  r1 = pDat[0] + (fMultDiv2(r1, C55) << (2));
252
14.4M
  r3 = r1 - t;
253
14.4M
  r1 = r1 + t;
254
14.4M
  t = fMult((r4 + r2), C51);
255
  /* Bit shift left because of the constant C55 which was scaled with the factor
256
     0.5 because of the representation of the values as fracts */
257
14.4M
  r4 = t + (fMultDiv2(r4, C52) << (2));
258
14.4M
  r2 = t + fMult(r2, C53);
259
260
  /* imaginary part */
261
14.4M
  s1 = (pDat[3] + pDat[9]) >> 1;
262
14.4M
  s4 = (pDat[3] - pDat[9]) >> 1;
263
14.4M
  s3 = (pDat[5] + pDat[7]) >> 1;
264
14.4M
  s2 = (pDat[5] - pDat[7]) >> 1;
265
14.4M
  t = fMult((s1 - s3), C54);
266
14.4M
  s1 = s1 + s3;
267
14.4M
  pDat[1] = (pDat[1] >> 1) + s1;
268
  /* Bit shift left because of the constant C55 which was scaled with the factor
269
     0.5 because of the representation of the values as fracts */
270
14.4M
  s1 = pDat[1] + (fMultDiv2(s1, C55) << (2));
271
14.4M
  s3 = s1 - t;
272
14.4M
  s1 = s1 + t;
273
14.4M
  t = fMult((s4 + s2), C51);
274
  /* Bit shift left because of the constant C55 which was scaled with the factor
275
     0.5 because of the representation of the values as fracts */
276
14.4M
  s4 = t + (fMultDiv2(s4, C52) << (2));
277
14.4M
  s2 = t + fMult(s2, C53);
278
279
  /* combination */
280
14.4M
  pDat[2] = r1 + s2;
281
14.4M
  pDat[8] = r1 - s2;
282
14.4M
  pDat[4] = r3 - s4;
283
14.4M
  pDat[6] = r3 + s4;
284
285
14.4M
  pDat[3] = s1 - r2;
286
14.4M
  pDat[9] = s1 + r2;
287
14.4M
  pDat[5] = s3 + r4;
288
14.4M
  pDat[7] = s3 - r4;
289
14.4M
}
290
291
4.50M
#define F5C(x) STC(x)
292
293
900k
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
294
900k
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
295
900k
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
296
900k
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
297
900k
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
298
/**
299
 * \brief    Function performs a complex 10-point FFT
300
 *           The FFT is performed inplace. The result of the FFT
301
 *           is scaled by SCALEFACTOR10 bits.
302
 *
303
 *           WOPS FLC version:                    1093 cycles
304
 *           WOPS with 32x16 bit multiplications:  196 cycles
305
 *
306
 * \param    [i/o] re    real input / output
307
 * \param    [i/o] im    imag input / output
308
 * \param    [i  ] s     stride real and imag input / output
309
 *
310
 * \return   void
311
 */
312
static void fft10(FIXP_DBL *x)  // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s)
313
225k
{
314
225k
  FIXP_DBL t;
315
225k
  FIXP_DBL x0, x1, x2, x3, x4;
316
225k
  FIXP_DBL r1, r2, r3, r4;
317
225k
  FIXP_DBL s1, s2, s3, s4;
318
225k
  FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
319
225k
  FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
320
321
225k
  const int s = 1;  // stride factor
322
323
  /* 2 fft5 stages */
324
325
  /* real part */
326
225k
  x0 = (x[s * 0] >> SCALEFACTOR10);
327
225k
  x1 = (x[s * 4] >> SCALEFACTOR10);
328
225k
  x2 = (x[s * 8] >> SCALEFACTOR10);
329
225k
  x3 = (x[s * 12] >> SCALEFACTOR10);
330
225k
  x4 = (x[s * 16] >> SCALEFACTOR10);
331
332
225k
  r1 = (x3 + x2);
333
225k
  r4 = (x3 - x2);
334
225k
  r3 = (x1 + x4);
335
225k
  r2 = (x1 - x4);
336
225k
  t = fMult((r1 - r3), C54);
337
225k
  r1 = (r1 + r3);
338
225k
  y00 = (x0 + r1);
339
225k
  r1 = (y00 + ((fMult(r1, C55) << 1)));
340
225k
  r3 = (r1 - t);
341
225k
  r1 = (r1 + t);
342
225k
  t = fMult((r4 + r2), C51);
343
225k
  r4 = (t + (fMult(r4, C52) << 1));
344
225k
  r2 = (t + fMult(r2, C53));
345
346
  /* imaginary part */
347
225k
  x0 = (x[s * 0 + 1] >> SCALEFACTOR10);
348
225k
  x1 = (x[s * 4 + 1] >> SCALEFACTOR10);
349
225k
  x2 = (x[s * 8 + 1] >> SCALEFACTOR10);
350
225k
  x3 = (x[s * 12 + 1] >> SCALEFACTOR10);
351
225k
  x4 = (x[s * 16 + 1] >> SCALEFACTOR10);
352
353
225k
  s1 = (x3 + x2);
354
225k
  s4 = (x3 - x2);
355
225k
  s3 = (x1 + x4);
356
225k
  s2 = (x1 - x4);
357
225k
  t = fMult((s1 - s3), C54);
358
225k
  s1 = (s1 + s3);
359
225k
  y01 = (x0 + s1);
360
225k
  s1 = (y01 + (fMult(s1, C55) << 1));
361
225k
  s3 = (s1 - t);
362
225k
  s1 = (s1 + t);
363
225k
  t = fMult((s4 + s2), C51);
364
225k
  s4 = (t + (fMult(s4, C52) << 1));
365
225k
  s2 = (t + fMult(s2, C53));
366
367
  /* combination */
368
225k
  y04 = (r1 + s2);
369
225k
  y16 = (r1 - s2);
370
225k
  y08 = (r3 - s4);
371
225k
  y12 = (r3 + s4);
372
373
225k
  y05 = (s1 - r2);
374
225k
  y17 = (s1 + r2);
375
225k
  y09 = (s3 + r4);
376
225k
  y13 = (s3 - r4);
377
378
  /* real part */
379
225k
  x0 = (x[s * 10] >> SCALEFACTOR10);
380
225k
  x1 = (x[s * 2] >> SCALEFACTOR10);
381
225k
  x2 = (x[s * 6] >> SCALEFACTOR10);
382
225k
  x3 = (x[s * 14] >> SCALEFACTOR10);
383
225k
  x4 = (x[s * 18] >> SCALEFACTOR10);
384
385
225k
  r1 = (x1 + x4);
386
225k
  r4 = (x1 - x4);
387
225k
  r3 = (x3 + x2);
388
225k
  r2 = (x3 - x2);
389
225k
  t = fMult((r1 - r3), C54);
390
225k
  r1 = (r1 + r3);
391
225k
  y02 = (x0 + r1);
392
225k
  r1 = (y02 + ((fMult(r1, C55) << 1)));
393
225k
  r3 = (r1 - t);
394
225k
  r1 = (r1 + t);
395
225k
  t = fMult(((r4 + r2)), C51);
396
225k
  r4 = (t + (fMult(r4, C52) << 1));
397
225k
  r2 = (t + fMult(r2, C53));
398
399
  /* imaginary part */
400
225k
  x0 = (x[s * 10 + 1] >> SCALEFACTOR10);
401
225k
  x1 = (x[s * 2 + 1] >> SCALEFACTOR10);
402
225k
  x2 = (x[s * 6 + 1] >> SCALEFACTOR10);
403
225k
  x3 = (x[s * 14 + 1] >> SCALEFACTOR10);
404
225k
  x4 = (x[s * 18 + 1] >> SCALEFACTOR10);
405
406
225k
  s1 = (x1 + x4);
407
225k
  s4 = (x1 - x4);
408
225k
  s3 = (x3 + x2);
409
225k
  s2 = (x3 - x2);
410
225k
  t = fMult((s1 - s3), C54);
411
225k
  s1 = (s1 + s3);
412
225k
  y03 = (x0 + s1);
413
225k
  s1 = (y03 + (fMult(s1, C55) << 1));
414
225k
  s3 = (s1 - t);
415
225k
  s1 = (s1 + t);
416
225k
  t = fMult((s4 + s2), C51);
417
225k
  s4 = (t + (fMult(s4, C52) << 1));
418
225k
  s2 = (t + fMult(s2, C53));
419
420
  /* combination */
421
225k
  y06 = (r1 + s2);
422
225k
  y18 = (r1 - s2);
423
225k
  y10 = (r3 - s4);
424
225k
  y14 = (r3 + s4);
425
426
225k
  y07 = (s1 - r2);
427
225k
  y19 = (s1 + r2);
428
225k
  y11 = (s3 + r4);
429
225k
  y15 = (s3 - r4);
430
431
  /* 5 fft2 stages */
432
225k
  x[s * 0] = (y00 + y02);
433
225k
  x[s * 0 + 1] = (y01 + y03);
434
225k
  x[s * 10] = (y00 - y02);
435
225k
  x[s * 10 + 1] = (y01 - y03);
436
437
225k
  x[s * 4] = (y04 + y06);
438
225k
  x[s * 4 + 1] = (y05 + y07);
439
225k
  x[s * 14] = (y04 - y06);
440
225k
  x[s * 14 + 1] = (y05 - y07);
441
442
225k
  x[s * 8] = (y08 + y10);
443
225k
  x[s * 8 + 1] = (y09 + y11);
444
225k
  x[s * 18] = (y08 - y10);
445
225k
  x[s * 18 + 1] = (y09 - y11);
446
447
225k
  x[s * 12] = (y12 + y14);
448
225k
  x[s * 12 + 1] = (y13 + y15);
449
225k
  x[s * 2] = (y12 - y14);
450
225k
  x[s * 2 + 1] = (y13 - y15);
451
452
225k
  x[s * 16] = (y16 + y18);
453
225k
  x[s * 16 + 1] = (y17 + y19);
454
225k
  x[s * 6] = (y16 - y18);
455
225k
  x[s * 6 + 1] = (y17 - y19);
456
225k
}
457
458
#ifndef FUNCTION_fft12
459
#define FUNCTION_fft12
460
461
#undef C31
462
188M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
463
464
17.8M
static inline void fft12(FIXP_DBL *pInput) {
465
17.8M
  FIXP_DBL aDst[24];
466
17.8M
  FIXP_DBL *pSrc, *pDst;
467
17.8M
  int i;
468
469
17.8M
  pSrc = pInput;
470
17.8M
  pDst = aDst;
471
17.8M
  FIXP_DBL r1, r2, s1, s2, pD;
472
473
  /* First 3*2 samples are shifted right by 2 before output */
474
17.8M
  r1 = pSrc[8] + pSrc[16];
475
17.8M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
476
17.8M
  pD = pSrc[0] >> 1;
477
17.8M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
478
17.8M
  r1 = pD - (r1 >> 2);
479
480
  /* imaginary part */
481
17.8M
  s1 = pSrc[9] + pSrc[17];
482
17.8M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
483
17.8M
  pD = pSrc[1] >> 1;
484
17.8M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
485
17.8M
  s1 = pD - (s1 >> 2);
486
487
  /* combination */
488
17.8M
  pDst[2] = (r1 - s2) >> 1;
489
17.8M
  pDst[3] = (s1 + r2) >> 1;
490
17.8M
  pDst[4] = (r1 + s2) >> 1;
491
17.8M
  pDst[5] = (s1 - r2) >> 1;
492
17.8M
  pSrc += 2;
493
17.8M
  pDst += 6;
494
495
17.8M
  const FIXP_STB *pVecRe = RotVectorReal12;
496
17.8M
  const FIXP_STB *pVecIm = RotVectorImag12;
497
17.8M
  FIXP_DBL re, im;
498
17.8M
  FIXP_STB vre, vim;
499
53.6M
  for (i = 0; i < 2; i++) {
500
    /* sample 0,1 are shifted right by 2 before output */
501
    /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before
502
     * output */
503
504
35.7M
    r1 = pSrc[8] + pSrc[16];
505
35.7M
    r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
506
35.7M
    pD = pSrc[0] >> 1;
507
35.7M
    pDst[0] = (pD + (r1 >> 1)) >> 1;
508
35.7M
    r1 = pD - (r1 >> 2);
509
510
    /* imaginary part */
511
35.7M
    s1 = pSrc[9] + pSrc[17];
512
35.7M
    s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
513
35.7M
    pD = pSrc[1] >> 1;
514
35.7M
    pDst[1] = (pD + (s1 >> 1)) >> 1;
515
35.7M
    s1 = pD - (s1 >> 2);
516
517
    /* combination */
518
35.7M
    re = (r1 - s2) >> 0;
519
35.7M
    im = (s1 + r2) >> 0;
520
35.7M
    vre = *pVecRe++;
521
35.7M
    vim = *pVecIm++;
522
35.7M
    cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim);
523
524
35.7M
    re = (r1 + s2) >> 0;
525
35.7M
    im = (s1 - r2) >> 0;
526
35.7M
    vre = *pVecRe++;
527
35.7M
    vim = *pVecIm++;
528
35.7M
    cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim);
529
530
35.7M
    pDst += 6;
531
35.7M
    pSrc += 2;
532
35.7M
  }
533
  /* sample 0,1 are shifted right by 2 before output */
534
  /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
535
  /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
536
17.8M
  r1 = pSrc[8] + pSrc[16];
537
17.8M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
538
17.8M
  pD = pSrc[0] >> 1;
539
17.8M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
540
17.8M
  r1 = pD - (r1 >> 2);
541
542
  /* imaginary part */
543
17.8M
  s1 = pSrc[9] + pSrc[17];
544
17.8M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
545
17.8M
  pD = pSrc[1] >> 1;
546
17.8M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
547
17.8M
  s1 = pD - (s1 >> 2);
548
549
  /* combination */
550
17.8M
  pDst[2] = (s1 + r2) >> 1;
551
17.8M
  pDst[3] = (s2 - r1) >> 1;
552
17.8M
  pDst[4] = -((r1 + s2) >> 1);
553
17.8M
  pDst[5] = (r2 - s1) >> 1;
554
555
  /* Perform 3 times the fft of length 4. The input samples are at the address
556
  of aDst and the output samples are at the address of pInput. The input vector
557
  for the fft of length 4 is built of the interleaved samples in aDst, the
558
  output samples are stored consecutively at the address of pInput.
559
  */
560
17.8M
  pSrc = aDst;
561
17.8M
  pDst = pInput;
562
71.5M
  for (i = 0; i < 3; i++) {
563
    /* inline FFT4 merged with incoming resorting loop */
564
53.6M
    FIXP_DBL a00, a10, a20, a30, tmp0, tmp1;
565
566
53.6M
    a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */
567
53.6M
    a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */
568
53.6M
    a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */
569
53.6M
    a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */
570
571
53.6M
    pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
572
53.6M
    pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
573
574
53.6M
    tmp0 = a00 - pSrc[12]; /* Re A - Re B */
575
53.6M
    tmp1 = a20 - pSrc[13]; /* Im A - Im B */
576
577
53.6M
    pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
578
53.6M
    pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
579
580
53.6M
    a10 = a10 - pSrc[18]; /* Re C - Re D */
581
53.6M
    a30 = a30 - pSrc[19]; /* Im C - Im D */
582
583
53.6M
    pDst[6] = tmp0 + a30;  /* Re B' = Re A - Re B + Im C - Im D */
584
53.6M
    pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */
585
53.6M
    pDst[7] = tmp1 - a10;  /* Im B' = Im A - Im B - Re C + Re D */
586
53.6M
    pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */
587
588
53.6M
    pSrc += 2;
589
53.6M
    pDst += 2;
590
53.6M
  }
591
17.8M
}
592
#endif /* FUNCTION_fft12 */
593
594
#ifndef FUNCTION_fft15
595
596
166M
#define N3 3
597
94.5M
#define N5 5
598
54.0M
#define N6 6
599
166M
#define N15 15
600
601
/* Performs the FFT of length 15. It is split into FFTs of length 3 and
602
 * length 5. */
603
4.50M
static inline void fft15(FIXP_DBL *pInput) {
604
4.50M
  FIXP_DBL aDst[2 * N15];
605
4.50M
  FIXP_DBL aDst1[2 * N15];
606
4.50M
  int i, k, l;
607
608
  /* Sort input vector for fft's of length 3
609
  input3(0:2)   = [input(0) input(5) input(10)];
610
  input3(3:5)   = [input(3) input(8) input(13)];
611
  input3(6:8)   = [input(6) input(11) input(1)];
612
  input3(9:11)  = [input(9) input(14) input(4)];
613
  input3(12:14) = [input(12) input(2) input(7)]; */
614
4.50M
  {
615
4.50M
    const FIXP_DBL *pSrc = pInput;
616
4.50M
    FIXP_DBL *RESTRICT pDst = aDst;
617
    /* Merge 3 loops into one, skip call of fft3 */
618
27.0M
    for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) {
619
22.5M
      pDst[k + 0] = pSrc[l];
620
22.5M
      pDst[k + 1] = pSrc[l + 1];
621
22.5M
      l += 2 * N5;
622
22.5M
      if (l >= (2 * N15)) l -= (2 * N15);
623
624
22.5M
      pDst[k + 2] = pSrc[l];
625
22.5M
      pDst[k + 3] = pSrc[l + 1];
626
22.5M
      l += 2 * N5;
627
22.5M
      if (l >= (2 * N15)) l -= (2 * N15);
628
22.5M
      pDst[k + 4] = pSrc[l];
629
22.5M
      pDst[k + 5] = pSrc[l + 1];
630
22.5M
      l += (2 * N5) + (2 * N3);
631
22.5M
      if (l >= (2 * N15)) l -= (2 * N15);
632
633
      /* fft3 merged with shift right by 2 loop */
634
22.5M
      FIXP_DBL r1, r2, r3;
635
22.5M
      FIXP_DBL s1, s2;
636
      /* real part */
637
22.5M
      r1 = pDst[k + 2] + pDst[k + 4];
638
22.5M
      r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31);
639
22.5M
      s1 = pDst[k + 0];
640
22.5M
      pDst[k + 0] = (s1 + r1) >> 2;
641
22.5M
      r1 = s1 - (r1 >> 1);
642
643
      /* imaginary part */
644
22.5M
      s1 = pDst[k + 3] + pDst[k + 5];
645
22.5M
      s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31);
646
22.5M
      r3 = pDst[k + 1];
647
22.5M
      pDst[k + 1] = (r3 + s1) >> 2;
648
22.5M
      s1 = r3 - (s1 >> 1);
649
650
      /* combination */
651
22.5M
      pDst[k + 2] = (r1 - s2) >> 2;
652
22.5M
      pDst[k + 4] = (r1 + s2) >> 2;
653
22.5M
      pDst[k + 3] = (s1 + r2) >> 2;
654
22.5M
      pDst[k + 5] = (s1 - r2) >> 2;
655
22.5M
    }
656
4.50M
  }
657
  /* Sort input vector for fft's of length 5
658
  input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
659
  input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
660
  input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
661
  /* Merge 2 loops into one, brings about 10% */
662
4.50M
  {
663
4.50M
    const FIXP_DBL *pSrc = aDst;
664
4.50M
    FIXP_DBL *RESTRICT pDst = aDst1;
665
18.0M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
666
13.5M
      l = 2 * i;
667
13.5M
      pDst[k + 0] = pSrc[l + 0];
668
13.5M
      pDst[k + 1] = pSrc[l + 1];
669
13.5M
      pDst[k + 2] = pSrc[l + 0 + (2 * N3)];
670
13.5M
      pDst[k + 3] = pSrc[l + 1 + (2 * N3)];
671
13.5M
      pDst[k + 4] = pSrc[l + 0 + (4 * N3)];
672
13.5M
      pDst[k + 5] = pSrc[l + 1 + (4 * N3)];
673
13.5M
      pDst[k + 6] = pSrc[l + 0 + (6 * N3)];
674
13.5M
      pDst[k + 7] = pSrc[l + 1 + (6 * N3)];
675
13.5M
      pDst[k + 8] = pSrc[l + 0 + (8 * N3)];
676
13.5M
      pDst[k + 9] = pSrc[l + 1 + (8 * N3)];
677
13.5M
      fft5(&pDst[k]);
678
13.5M
    }
679
4.50M
  }
680
  /* Sort output vector of length 15
681
  output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
682
            out5(10) out5(1)  out5(7)  out5(13) out5(4)
683
            out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
684
  /* optimize clumsy loop, brings about 5% */
685
4.50M
  {
686
4.50M
    const FIXP_DBL *pSrc = aDst1;
687
4.50M
    FIXP_DBL *RESTRICT pDst = pInput;
688
18.0M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
689
13.5M
      pDst[k + 0] = pSrc[l];
690
13.5M
      pDst[k + 1] = pSrc[l + 1];
691
13.5M
      l += (2 * N6);
692
13.5M
      if (l >= (2 * N15)) l -= (2 * N15);
693
13.5M
      pDst[k + 2] = pSrc[l];
694
13.5M
      pDst[k + 3] = pSrc[l + 1];
695
13.5M
      l += (2 * N6);
696
13.5M
      if (l >= (2 * N15)) l -= (2 * N15);
697
13.5M
      pDst[k + 4] = pSrc[l];
698
13.5M
      pDst[k + 5] = pSrc[l + 1];
699
13.5M
      l += (2 * N6);
700
13.5M
      if (l >= (2 * N15)) l -= (2 * N15);
701
13.5M
      pDst[k + 6] = pSrc[l];
702
13.5M
      pDst[k + 7] = pSrc[l + 1];
703
13.5M
      l += (2 * N6);
704
13.5M
      if (l >= (2 * N15)) l -= (2 * N15);
705
13.5M
      pDst[k + 8] = pSrc[l];
706
13.5M
      pDst[k + 9] = pSrc[l + 1];
707
13.5M
      l += 2; /* no modulo check needed, it cannot occur */
708
13.5M
    }
709
4.50M
  }
710
4.50M
}
711
#endif /* FUNCTION_fft15 */
712
713
/*
714
 Select shift placement.
715
 Some processors like ARM may shift "for free" in combination with an addition
716
 or substraction, but others don't so either combining shift with +/- or reduce
717
 the total amount or shift operations is optimal
718
 */
719
#if !defined(__arm__)
720
945M
#define SHIFT_A >> 1
721
#define SHIFT_B
722
#else
723
#define SHIFT_A
724
#define SHIFT_B >> 1
725
#endif
726
727
#ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \
728
                         */
729
730
/* This defines prevents this array to be declared twice, if 16-bit fft is
731
 * enabled too */
732
#define FUNCTION_DATA_fft_16_w16
733
static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d),
734
                                      STCP(0x30fbc54d, 0x7641af3d)};
735
736
LNK_SECTION_CODE_L1
737
21.8M
inline void fft_16(FIXP_DBL *RESTRICT x) {
738
21.8M
  FIXP_DBL vr, ur;
739
21.8M
  FIXP_DBL vr2, ur2;
740
21.8M
  FIXP_DBL vr3, ur3;
741
21.8M
  FIXP_DBL vr4, ur4;
742
21.8M
  FIXP_DBL vi, ui;
743
21.8M
  FIXP_DBL vi2, ui2;
744
21.8M
  FIXP_DBL vi3, ui3;
745
746
21.8M
  vr = (x[0] >> 1) + (x[16] >> 1);       /* Re A + Re B */
747
21.8M
  ur = (x[1] >> 1) + (x[17] >> 1);       /* Im A + Im B */
748
21.8M
  vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */
749
21.8M
  ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */
750
21.8M
  x[0] = vr + (vi SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
751
21.8M
  x[1] = ur + (ui SHIFT_B);              /* Im A' = sum of imag values */
752
753
21.8M
  vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */
754
21.8M
  ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */
755
756
21.8M
  x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
757
21.8M
  x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
758
21.8M
  vr -= x[16];              /* Re A - Re B */
759
21.8M
  vi = (vi SHIFT_B)-x[24];  /* Re C - Re D */
760
21.8M
  ur -= x[17];              /* Im A - Im B */
761
21.8M
  ui = (ui SHIFT_B)-x[25];  /* Im C - Im D */
762
763
21.8M
  vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */
764
21.8M
  ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */
765
766
21.8M
  x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
767
21.8M
  x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
768
769
21.8M
  vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */
770
21.8M
  ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */
771
772
21.8M
  x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
773
21.8M
  x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
774
775
21.8M
  vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */
776
21.8M
  ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */
777
21.8M
  x[8] = vr2 + (vi2 SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
778
21.8M
  x[9] = ur2 + (ui2 SHIFT_B);              /* Im A' = sum of imag values */
779
21.8M
  x[12] = vr2 - (vi2 SHIFT_B);             /* Re C' = -(ReC+ReD) + (ReA+ReB) */
780
21.8M
  x[13] = ur2 - (ui2 SHIFT_B);             /* Im C' = -Im C -Im D +Im A +Im B */
781
21.8M
  vr2 -= x[20];                            /* Re A - Re B */
782
21.8M
  ur2 -= x[21];                            /* Im A - Im B */
783
21.8M
  vi2 = (vi2 SHIFT_B)-x[28];               /* Re C - Re D */
784
21.8M
  ui2 = (ui2 SHIFT_B)-x[29];               /* Im C - Im D */
785
786
21.8M
  vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */
787
21.8M
  ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */
788
789
21.8M
  x[10] = ui2 + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
790
21.8M
  x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
791
792
21.8M
  vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */
793
21.8M
  ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */
794
795
21.8M
  x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
796
21.8M
  x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */
797
798
21.8M
  x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
799
21.8M
  x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */
800
21.8M
  x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
801
21.8M
  x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
802
21.8M
  vr3 -= x[18];               /* Re A - Re B */
803
21.8M
  ur3 -= x[19];               /* Im A - Im B */
804
21.8M
  vi = (vi SHIFT_B)-x[26];    /* Re C - Re D */
805
21.8M
  ui = (ui SHIFT_B)-x[27];    /* Im C - Im D */
806
21.8M
  x[18] = ui + vr3;           /* Re B' = Im C - Im D  + Re A - Re B */
807
21.8M
  x[19] = ur3 - vi;           /* Im B'= -Re C + Re D + Im A - Im B */
808
809
21.8M
  x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
810
21.8M
  x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
811
21.8M
  x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
812
21.8M
  x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
813
21.8M
  vr4 -= x[22];                /* Re A - Re B */
814
21.8M
  ur4 -= x[23];                /* Im A - Im B */
815
816
21.8M
  x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
817
21.8M
  x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */
818
819
21.8M
  vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */
820
21.8M
  ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */
821
21.8M
  x[26] = ui3 + vr4;         /* Re B' = Im C - Im D  + Re A - Re B */
822
21.8M
  x[30] = vr4 - ui3;         /* Re D' = -Im C + Im D + Re A - Re B */
823
21.8M
  x[27] = ur4 - vi3;         /* Im B'= -Re C + Re D + Im A - Im B */
824
21.8M
  x[31] = vi3 + ur4;         /* Im D'= Re C - Re D + Im A - Im B */
825
826
  // xt1 =  0
827
  // xt2 =  8
828
21.8M
  vr = x[8];
829
21.8M
  vi = x[9];
830
21.8M
  ur = x[0] >> 1;
831
21.8M
  ui = x[1] >> 1;
832
21.8M
  x[0] = ur + (vr >> 1);
833
21.8M
  x[1] = ui + (vi >> 1);
834
21.8M
  x[8] = ur - (vr >> 1);
835
21.8M
  x[9] = ui - (vi >> 1);
836
837
  // xt1 =  4
838
  // xt2 = 12
839
21.8M
  vr = x[13];
840
21.8M
  vi = x[12];
841
21.8M
  ur = x[4] >> 1;
842
21.8M
  ui = x[5] >> 1;
843
21.8M
  x[4] = ur + (vr >> 1);
844
21.8M
  x[5] = ui - (vi >> 1);
845
21.8M
  x[12] = ur - (vr >> 1);
846
21.8M
  x[13] = ui + (vi >> 1);
847
848
  // xt1 = 16
849
  // xt2 = 24
850
21.8M
  vr = x[24];
851
21.8M
  vi = x[25];
852
21.8M
  ur = x[16] >> 1;
853
21.8M
  ui = x[17] >> 1;
854
21.8M
  x[16] = ur + (vr >> 1);
855
21.8M
  x[17] = ui + (vi >> 1);
856
21.8M
  x[24] = ur - (vr >> 1);
857
21.8M
  x[25] = ui - (vi >> 1);
858
859
  // xt1 = 20
860
  // xt2 = 28
861
21.8M
  vr = x[29];
862
21.8M
  vi = x[28];
863
21.8M
  ur = x[20] >> 1;
864
21.8M
  ui = x[21] >> 1;
865
21.8M
  x[20] = ur + (vr >> 1);
866
21.8M
  x[21] = ui - (vi >> 1);
867
21.8M
  x[28] = ur - (vr >> 1);
868
21.8M
  x[29] = ui + (vi >> 1);
869
870
  // xt1 =  2
871
  // xt2 = 10
872
21.8M
  SUMDIFF_PIFOURTH(vi, vr, x[10], x[11])
873
  // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH);
874
  // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH);
875
21.8M
  ur = x[2];
876
21.8M
  ui = x[3];
877
21.8M
  x[2] = (ur >> 1) + vr;
878
21.8M
  x[3] = (ui >> 1) + vi;
879
21.8M
  x[10] = (ur >> 1) - vr;
880
21.8M
  x[11] = (ui >> 1) - vi;
881
882
  // xt1 =  6
883
  // xt2 = 14
884
21.8M
  SUMDIFF_PIFOURTH(vr, vi, x[14], x[15])
885
21.8M
  ur = x[6];
886
21.8M
  ui = x[7];
887
21.8M
  x[6] = (ur >> 1) + vr;
888
21.8M
  x[7] = (ui >> 1) - vi;
889
21.8M
  x[14] = (ur >> 1) - vr;
890
21.8M
  x[15] = (ui >> 1) + vi;
891
892
  // xt1 = 18
893
  // xt2 = 26
894
21.8M
  SUMDIFF_PIFOURTH(vi, vr, x[26], x[27])
895
21.8M
  ur = x[18];
896
21.8M
  ui = x[19];
897
21.8M
  x[18] = (ur >> 1) + vr;
898
21.8M
  x[19] = (ui >> 1) + vi;
899
21.8M
  x[26] = (ur >> 1) - vr;
900
21.8M
  x[27] = (ui >> 1) - vi;
901
902
  // xt1 = 22
903
  // xt2 = 30
904
21.8M
  SUMDIFF_PIFOURTH(vr, vi, x[30], x[31])
905
21.8M
  ur = x[22];
906
21.8M
  ui = x[23];
907
21.8M
  x[22] = (ur >> 1) + vr;
908
21.8M
  x[23] = (ui >> 1) - vi;
909
21.8M
  x[30] = (ur >> 1) - vr;
910
21.8M
  x[31] = (ui >> 1) + vi;
911
912
  // xt1 =  0
913
  // xt2 = 16
914
21.8M
  vr = x[16];
915
21.8M
  vi = x[17];
916
21.8M
  ur = x[0] >> 1;
917
21.8M
  ui = x[1] >> 1;
918
21.8M
  x[0] = ur + (vr >> 1);
919
21.8M
  x[1] = ui + (vi >> 1);
920
21.8M
  x[16] = ur - (vr >> 1);
921
21.8M
  x[17] = ui - (vi >> 1);
922
923
  // xt1 =  8
924
  // xt2 = 24
925
21.8M
  vi = x[24];
926
21.8M
  vr = x[25];
927
21.8M
  ur = x[8] >> 1;
928
21.8M
  ui = x[9] >> 1;
929
21.8M
  x[8] = ur + (vr >> 1);
930
21.8M
  x[9] = ui - (vi >> 1);
931
21.8M
  x[24] = ur - (vr >> 1);
932
21.8M
  x[25] = ui + (vi >> 1);
933
934
  // xt1 =  2
935
  // xt2 = 18
936
21.8M
  cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
937
21.8M
  ur = x[2];
938
21.8M
  ui = x[3];
939
21.8M
  x[2] = (ur >> 1) + vr;
940
21.8M
  x[3] = (ui >> 1) + vi;
941
21.8M
  x[18] = (ur >> 1) - vr;
942
21.8M
  x[19] = (ui >> 1) - vi;
943
944
  // xt1 = 10
945
  // xt2 = 26
946
21.8M
  cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
947
21.8M
  ur = x[10];
948
21.8M
  ui = x[11];
949
21.8M
  x[10] = (ur >> 1) + vr;
950
21.8M
  x[11] = (ui >> 1) - vi;
951
21.8M
  x[26] = (ur >> 1) - vr;
952
21.8M
  x[27] = (ui >> 1) + vi;
953
954
  // xt1 =  4
955
  // xt2 = 20
956
21.8M
  SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
957
21.8M
  ur = x[4];
958
21.8M
  ui = x[5];
959
21.8M
  x[4] = (ur >> 1) + vr;
960
21.8M
  x[5] = (ui >> 1) + vi;
961
21.8M
  x[20] = (ur >> 1) - vr;
962
21.8M
  x[21] = (ui >> 1) - vi;
963
964
  // xt1 = 12
965
  // xt2 = 28
966
21.8M
  SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
967
21.8M
  ur = x[12];
968
21.8M
  ui = x[13];
969
21.8M
  x[12] = (ur >> 1) + vr;
970
21.8M
  x[13] = (ui >> 1) - vi;
971
21.8M
  x[28] = (ur >> 1) - vr;
972
21.8M
  x[29] = (ui >> 1) + vi;
973
974
  // xt1 =  6
975
  // xt2 = 22
976
21.8M
  cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
977
21.8M
  ur = x[6];
978
21.8M
  ui = x[7];
979
21.8M
  x[6] = (ur >> 1) + vr;
980
21.8M
  x[7] = (ui >> 1) + vi;
981
21.8M
  x[22] = (ur >> 1) - vr;
982
21.8M
  x[23] = (ui >> 1) - vi;
983
984
  // xt1 = 14
985
  // xt2 = 30
986
21.8M
  cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
987
21.8M
  ur = x[14];
988
21.8M
  ui = x[15];
989
21.8M
  x[14] = (ur >> 1) + vr;
990
21.8M
  x[15] = (ui >> 1) - vi;
991
21.8M
  x[30] = (ur >> 1) - vr;
992
21.8M
  x[31] = (ui >> 1) + vi;
993
21.8M
}
994
#endif /* FUNCTION_fft_16 */
995
996
#ifndef FUNCTION_fft_32
997
static const FIXP_STP fft32_w32[6] = {
998
    STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d),
999
    STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7),
1000
    STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)};
1001
1.04G
#define W_PiFOURTH STC(0x5a82799a)
1002
1003
LNK_SECTION_CODE_L1
1004
37.2M
inline void fft_32(FIXP_DBL *const _x) {
1005
  /*
1006
   * 1+2 stage radix 4
1007
   */
1008
1009
  /////////////////////////////////////////////////////////////////////////////////////////
1010
37.2M
  {
1011
37.2M
    FIXP_DBL *const x = _x;
1012
37.2M
    FIXP_DBL vi, ui;
1013
37.2M
    FIXP_DBL vi2, ui2;
1014
37.2M
    FIXP_DBL vi3, ui3;
1015
37.2M
    FIXP_DBL vr, ur;
1016
37.2M
    FIXP_DBL vr2, ur2;
1017
37.2M
    FIXP_DBL vr3, ur3;
1018
37.2M
    FIXP_DBL vr4, ur4;
1019
1020
    // i = 0
1021
37.2M
    vr = (x[0] + x[32]) >> 1;     /* Re A + Re B */
1022
37.2M
    ur = (x[1] + x[33]) >> 1;     /* Im A + Im B */
1023
37.2M
    vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */
1024
37.2M
    ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */
1025
1026
37.2M
    x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1027
37.2M
    x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1028
1029
37.2M
    vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */
1030
37.2M
    ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */
1031
1032
37.2M
    x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1033
37.2M
    x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1034
1035
37.2M
    vr -= x[32];             /* Re A - Re B */
1036
37.2M
    ur -= x[33];             /* Im A - Im B */
1037
37.2M
    vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */
1038
37.2M
    ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */
1039
1040
37.2M
    vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */
1041
37.2M
    ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */
1042
1043
37.2M
    x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1044
37.2M
    x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1045
1046
37.2M
    vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */
1047
37.2M
    ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */
1048
1049
37.2M
    x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1050
37.2M
    x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1051
1052
    // i=16
1053
37.2M
    vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */
1054
37.2M
    ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */
1055
1056
37.2M
    x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1057
37.2M
    x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1058
37.2M
    x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1059
37.2M
    x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1060
1061
37.2M
    vr2 -= x[36];            /* Re A - Re B */
1062
37.2M
    ur2 -= x[37];            /* Im A - Im B */
1063
37.2M
    vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */
1064
37.2M
    ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */
1065
1066
37.2M
    vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */
1067
37.2M
    ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */
1068
1069
37.2M
    x[18] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1070
37.2M
    x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1071
1072
37.2M
    vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */
1073
37.2M
    ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */
1074
1075
37.2M
    x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1076
37.2M
    x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1077
1078
    // i = 32
1079
1080
37.2M
    x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1081
37.2M
    x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1082
37.2M
    x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1083
37.2M
    x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1084
1085
37.2M
    vr3 -= x[34];              /* Re A - Re B */
1086
37.2M
    ur3 -= x[35];              /* Im A - Im B */
1087
37.2M
    vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */
1088
37.2M
    ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */
1089
1090
37.2M
    x[34] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1091
37.2M
    x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1092
1093
    // i=48
1094
1095
37.2M
    x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1096
37.2M
    x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1097
37.2M
    x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1098
37.2M
    x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1099
1100
37.2M
    vr4 -= x[38]; /* Re A - Re B */
1101
37.2M
    ur4 -= x[39]; /* Im A - Im B */
1102
1103
37.2M
    x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1104
37.2M
    x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1105
1106
37.2M
    vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */
1107
37.2M
    ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */
1108
1109
37.2M
    x[50] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1110
37.2M
    x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1111
37.2M
    x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1112
37.2M
    x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1113
1114
    // i=8
1115
37.2M
    vr = (x[8] + x[40]) >> 1;     /* Re A + Re B */
1116
37.2M
    ur = (x[9] + x[41]) >> 1;     /* Im A + Im B */
1117
37.2M
    vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */
1118
37.2M
    ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */
1119
1120
37.2M
    x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1121
37.2M
    x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1122
1123
37.2M
    vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */
1124
37.2M
    ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */
1125
1126
37.2M
    x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1127
37.2M
    x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1128
1129
37.2M
    vr -= x[40];             /* Re A - Re B */
1130
37.2M
    ur -= x[41];             /* Im A - Im B */
1131
37.2M
    vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */
1132
37.2M
    ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */
1133
1134
37.2M
    vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */
1135
37.2M
    ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */
1136
1137
37.2M
    x[10] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1138
37.2M
    x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1139
1140
37.2M
    vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */
1141
37.2M
    ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */
1142
1143
37.2M
    x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1144
37.2M
    x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1145
1146
    // i=24
1147
37.2M
    vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */
1148
37.2M
    ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */
1149
1150
37.2M
    x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1151
37.2M
    x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1152
37.2M
    x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1153
37.2M
    x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1154
1155
37.2M
    vr2 -= x[44];            /* Re A - Re B */
1156
37.2M
    ur2 -= x[45];            /* Im A - Im B */
1157
37.2M
    vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */
1158
37.2M
    ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */
1159
1160
37.2M
    vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */
1161
37.2M
    ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */
1162
1163
37.2M
    x[26] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1164
37.2M
    x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1165
1166
37.2M
    vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */
1167
37.2M
    ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */
1168
1169
37.2M
    x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1170
37.2M
    x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1171
1172
    // i=40
1173
1174
37.2M
    x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1175
37.2M
    x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1176
37.2M
    x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1177
37.2M
    x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1178
1179
37.2M
    vr3 -= x[42];              /* Re A - Re B */
1180
37.2M
    ur3 -= x[43];              /* Im A - Im B */
1181
37.2M
    vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */
1182
37.2M
    ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */
1183
1184
37.2M
    x[42] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1185
37.2M
    x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1186
1187
    // i=56
1188
1189
37.2M
    x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1190
37.2M
    x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1191
37.2M
    x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1192
37.2M
    x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1193
1194
37.2M
    vr4 -= x[46]; /* Re A - Re B */
1195
37.2M
    ur4 -= x[47]; /* Im A - Im B */
1196
1197
37.2M
    x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1198
37.2M
    x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1199
1200
37.2M
    vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */
1201
37.2M
    ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */
1202
1203
37.2M
    x[58] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1204
37.2M
    x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1205
37.2M
    x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1206
37.2M
    x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1207
37.2M
  }
1208
1209
37.2M
  {
1210
37.2M
    FIXP_DBL *xt = _x;
1211
1212
37.2M
    int j = 4;
1213
148M
    do {
1214
148M
      FIXP_DBL vi, ui, vr, ur;
1215
1216
148M
      vr = xt[8];
1217
148M
      vi = xt[9];
1218
148M
      ur = xt[0] >> 1;
1219
148M
      ui = xt[1] >> 1;
1220
148M
      xt[0] = ur + (vr >> 1);
1221
148M
      xt[1] = ui + (vi >> 1);
1222
148M
      xt[8] = ur - (vr >> 1);
1223
148M
      xt[9] = ui - (vi >> 1);
1224
1225
148M
      vr = xt[13];
1226
148M
      vi = xt[12];
1227
148M
      ur = xt[4] >> 1;
1228
148M
      ui = xt[5] >> 1;
1229
148M
      xt[4] = ur + (vr >> 1);
1230
148M
      xt[5] = ui - (vi >> 1);
1231
148M
      xt[12] = ur - (vr >> 1);
1232
148M
      xt[13] = ui + (vi >> 1);
1233
1234
148M
      SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11])
1235
148M
      ur = xt[2];
1236
148M
      ui = xt[3];
1237
148M
      xt[2] = (ur >> 1) + vr;
1238
148M
      xt[3] = (ui >> 1) + vi;
1239
148M
      xt[10] = (ur >> 1) - vr;
1240
148M
      xt[11] = (ui >> 1) - vi;
1241
1242
148M
      SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15])
1243
148M
      ur = xt[6];
1244
148M
      ui = xt[7];
1245
1246
148M
      xt[6] = (ur >> 1) + vr;
1247
148M
      xt[7] = (ui >> 1) - vi;
1248
148M
      xt[14] = (ur >> 1) - vr;
1249
148M
      xt[15] = (ui >> 1) + vi;
1250
148M
      xt += 16;
1251
148M
    } while (--j != 0);
1252
37.2M
  }
1253
1254
37.2M
  {
1255
37.2M
    FIXP_DBL *const x = _x;
1256
37.2M
    FIXP_DBL vi, ui, vr, ur;
1257
1258
37.2M
    vr = x[16];
1259
37.2M
    vi = x[17];
1260
37.2M
    ur = x[0] >> 1;
1261
37.2M
    ui = x[1] >> 1;
1262
37.2M
    x[0] = ur + (vr >> 1);
1263
37.2M
    x[1] = ui + (vi >> 1);
1264
37.2M
    x[16] = ur - (vr >> 1);
1265
37.2M
    x[17] = ui - (vi >> 1);
1266
1267
37.2M
    vi = x[24];
1268
37.2M
    vr = x[25];
1269
37.2M
    ur = x[8] >> 1;
1270
37.2M
    ui = x[9] >> 1;
1271
37.2M
    x[8] = ur + (vr >> 1);
1272
37.2M
    x[9] = ui - (vi >> 1);
1273
37.2M
    x[24] = ur - (vr >> 1);
1274
37.2M
    x[25] = ui + (vi >> 1);
1275
1276
37.2M
    vr = x[48];
1277
37.2M
    vi = x[49];
1278
37.2M
    ur = x[32] >> 1;
1279
37.2M
    ui = x[33] >> 1;
1280
37.2M
    x[32] = ur + (vr >> 1);
1281
37.2M
    x[33] = ui + (vi >> 1);
1282
37.2M
    x[48] = ur - (vr >> 1);
1283
37.2M
    x[49] = ui - (vi >> 1);
1284
1285
37.2M
    vi = x[56];
1286
37.2M
    vr = x[57];
1287
37.2M
    ur = x[40] >> 1;
1288
37.2M
    ui = x[41] >> 1;
1289
37.2M
    x[40] = ur + (vr >> 1);
1290
37.2M
    x[41] = ui - (vi >> 1);
1291
37.2M
    x[56] = ur - (vr >> 1);
1292
37.2M
    x[57] = ui + (vi >> 1);
1293
1294
37.2M
    cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
1295
37.2M
    ur = x[2];
1296
37.2M
    ui = x[3];
1297
37.2M
    x[2] = (ur >> 1) + vr;
1298
37.2M
    x[3] = (ui >> 1) + vi;
1299
37.2M
    x[18] = (ur >> 1) - vr;
1300
37.2M
    x[19] = (ui >> 1) - vi;
1301
1302
37.2M
    cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
1303
37.2M
    ur = x[10];
1304
37.2M
    ui = x[11];
1305
37.2M
    x[10] = (ur >> 1) + vr;
1306
37.2M
    x[11] = (ui >> 1) - vi;
1307
37.2M
    x[26] = (ur >> 1) - vr;
1308
37.2M
    x[27] = (ui >> 1) + vi;
1309
1310
37.2M
    cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
1311
37.2M
    ur = x[34];
1312
37.2M
    ui = x[35];
1313
37.2M
    x[34] = (ur >> 1) + vr;
1314
37.2M
    x[35] = (ui >> 1) + vi;
1315
37.2M
    x[50] = (ur >> 1) - vr;
1316
37.2M
    x[51] = (ui >> 1) - vi;
1317
1318
37.2M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
1319
37.2M
    ur = x[42];
1320
37.2M
    ui = x[43];
1321
37.2M
    x[42] = (ur >> 1) + vr;
1322
37.2M
    x[43] = (ui >> 1) - vi;
1323
37.2M
    x[58] = (ur >> 1) - vr;
1324
37.2M
    x[59] = (ui >> 1) + vi;
1325
1326
37.2M
    SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
1327
37.2M
    ur = x[4];
1328
37.2M
    ui = x[5];
1329
37.2M
    x[4] = (ur >> 1) + vr;
1330
37.2M
    x[5] = (ui >> 1) + vi;
1331
37.2M
    x[20] = (ur >> 1) - vr;
1332
37.2M
    x[21] = (ui >> 1) - vi;
1333
1334
37.2M
    SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
1335
37.2M
    ur = x[12];
1336
37.2M
    ui = x[13];
1337
37.2M
    x[12] = (ur >> 1) + vr;
1338
37.2M
    x[13] = (ui >> 1) - vi;
1339
37.2M
    x[28] = (ur >> 1) - vr;
1340
37.2M
    x[29] = (ui >> 1) + vi;
1341
1342
37.2M
    SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
1343
37.2M
    ur = x[36];
1344
37.2M
    ui = x[37];
1345
37.2M
    x[36] = (ur >> 1) + vr;
1346
37.2M
    x[37] = (ui >> 1) + vi;
1347
37.2M
    x[52] = (ur >> 1) - vr;
1348
37.2M
    x[53] = (ui >> 1) - vi;
1349
1350
37.2M
    SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
1351
37.2M
    ur = x[44];
1352
37.2M
    ui = x[45];
1353
37.2M
    x[44] = (ur >> 1) + vr;
1354
37.2M
    x[45] = (ui >> 1) - vi;
1355
37.2M
    x[60] = (ur >> 1) - vr;
1356
37.2M
    x[61] = (ui >> 1) + vi;
1357
1358
37.2M
    cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
1359
37.2M
    ur = x[6];
1360
37.2M
    ui = x[7];
1361
37.2M
    x[6] = (ur >> 1) + vr;
1362
37.2M
    x[7] = (ui >> 1) + vi;
1363
37.2M
    x[22] = (ur >> 1) - vr;
1364
37.2M
    x[23] = (ui >> 1) - vi;
1365
1366
37.2M
    cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
1367
37.2M
    ur = x[14];
1368
37.2M
    ui = x[15];
1369
37.2M
    x[14] = (ur >> 1) + vr;
1370
37.2M
    x[15] = (ui >> 1) - vi;
1371
37.2M
    x[30] = (ur >> 1) - vr;
1372
37.2M
    x[31] = (ui >> 1) + vi;
1373
1374
37.2M
    cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
1375
37.2M
    ur = x[38];
1376
37.2M
    ui = x[39];
1377
37.2M
    x[38] = (ur >> 1) + vr;
1378
37.2M
    x[39] = (ui >> 1) + vi;
1379
37.2M
    x[54] = (ur >> 1) - vr;
1380
37.2M
    x[55] = (ui >> 1) - vi;
1381
1382
37.2M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
1383
37.2M
    ur = x[46];
1384
37.2M
    ui = x[47];
1385
1386
37.2M
    x[46] = (ur >> 1) + vr;
1387
37.2M
    x[47] = (ui >> 1) - vi;
1388
37.2M
    x[62] = (ur >> 1) - vr;
1389
37.2M
    x[63] = (ui >> 1) + vi;
1390
1391
37.2M
    vr = x[32];
1392
37.2M
    vi = x[33];
1393
37.2M
    ur = x[0] >> 1;
1394
37.2M
    ui = x[1] >> 1;
1395
37.2M
    x[0] = ur + (vr >> 1);
1396
37.2M
    x[1] = ui + (vi >> 1);
1397
37.2M
    x[32] = ur - (vr >> 1);
1398
37.2M
    x[33] = ui - (vi >> 1);
1399
1400
37.2M
    vi = x[48];
1401
37.2M
    vr = x[49];
1402
37.2M
    ur = x[16] >> 1;
1403
37.2M
    ui = x[17] >> 1;
1404
37.2M
    x[16] = ur + (vr >> 1);
1405
37.2M
    x[17] = ui - (vi >> 1);
1406
37.2M
    x[48] = ur - (vr >> 1);
1407
37.2M
    x[49] = ui + (vi >> 1);
1408
1409
37.2M
    cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
1410
37.2M
    ur = x[2];
1411
37.2M
    ui = x[3];
1412
37.2M
    x[2] = (ur >> 1) + vr;
1413
37.2M
    x[3] = (ui >> 1) + vi;
1414
37.2M
    x[34] = (ur >> 1) - vr;
1415
37.2M
    x[35] = (ui >> 1) - vi;
1416
1417
37.2M
    cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
1418
37.2M
    ur = x[18];
1419
37.2M
    ui = x[19];
1420
37.2M
    x[18] = (ur >> 1) + vr;
1421
37.2M
    x[19] = (ui >> 1) - vi;
1422
37.2M
    x[50] = (ur >> 1) - vr;
1423
37.2M
    x[51] = (ui >> 1) + vi;
1424
1425
37.2M
    cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
1426
37.2M
    ur = x[4];
1427
37.2M
    ui = x[5];
1428
37.2M
    x[4] = (ur >> 1) + vr;
1429
37.2M
    x[5] = (ui >> 1) + vi;
1430
37.2M
    x[36] = (ur >> 1) - vr;
1431
37.2M
    x[37] = (ui >> 1) - vi;
1432
1433
37.2M
    cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
1434
37.2M
    ur = x[20];
1435
37.2M
    ui = x[21];
1436
37.2M
    x[20] = (ur >> 1) + vr;
1437
37.2M
    x[21] = (ui >> 1) - vi;
1438
37.2M
    x[52] = (ur >> 1) - vr;
1439
37.2M
    x[53] = (ui >> 1) + vi;
1440
1441
37.2M
    cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
1442
37.2M
    ur = x[6];
1443
37.2M
    ui = x[7];
1444
37.2M
    x[6] = (ur >> 1) + vr;
1445
37.2M
    x[7] = (ui >> 1) + vi;
1446
37.2M
    x[38] = (ur >> 1) - vr;
1447
37.2M
    x[39] = (ui >> 1) - vi;
1448
1449
37.2M
    cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
1450
37.2M
    ur = x[22];
1451
37.2M
    ui = x[23];
1452
37.2M
    x[22] = (ur >> 1) + vr;
1453
37.2M
    x[23] = (ui >> 1) - vi;
1454
37.2M
    x[54] = (ur >> 1) - vr;
1455
37.2M
    x[55] = (ui >> 1) + vi;
1456
1457
37.2M
    SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
1458
37.2M
    ur = x[8];
1459
37.2M
    ui = x[9];
1460
37.2M
    x[8] = (ur >> 1) + vr;
1461
37.2M
    x[9] = (ui >> 1) + vi;
1462
37.2M
    x[40] = (ur >> 1) - vr;
1463
37.2M
    x[41] = (ui >> 1) - vi;
1464
1465
37.2M
    SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
1466
37.2M
    ur = x[24];
1467
37.2M
    ui = x[25];
1468
37.2M
    x[24] = (ur >> 1) + vr;
1469
37.2M
    x[25] = (ui >> 1) - vi;
1470
37.2M
    x[56] = (ur >> 1) - vr;
1471
37.2M
    x[57] = (ui >> 1) + vi;
1472
1473
37.2M
    cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
1474
37.2M
    ur = x[10];
1475
37.2M
    ui = x[11];
1476
1477
37.2M
    x[10] = (ur >> 1) + vr;
1478
37.2M
    x[11] = (ui >> 1) + vi;
1479
37.2M
    x[42] = (ur >> 1) - vr;
1480
37.2M
    x[43] = (ui >> 1) - vi;
1481
1482
37.2M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
1483
37.2M
    ur = x[26];
1484
37.2M
    ui = x[27];
1485
37.2M
    x[26] = (ur >> 1) + vr;
1486
37.2M
    x[27] = (ui >> 1) - vi;
1487
37.2M
    x[58] = (ur >> 1) - vr;
1488
37.2M
    x[59] = (ui >> 1) + vi;
1489
1490
37.2M
    cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1491
37.2M
    ur = x[12];
1492
37.2M
    ui = x[13];
1493
37.2M
    x[12] = (ur >> 1) + vr;
1494
37.2M
    x[13] = (ui >> 1) + vi;
1495
37.2M
    x[44] = (ur >> 1) - vr;
1496
37.2M
    x[45] = (ui >> 1) - vi;
1497
1498
37.2M
    cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1499
37.2M
    ur = x[28];
1500
37.2M
    ui = x[29];
1501
37.2M
    x[28] = (ur >> 1) + vr;
1502
37.2M
    x[29] = (ui >> 1) - vi;
1503
37.2M
    x[60] = (ur >> 1) - vr;
1504
37.2M
    x[61] = (ui >> 1) + vi;
1505
1506
37.2M
    cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1507
37.2M
    ur = x[14];
1508
37.2M
    ui = x[15];
1509
37.2M
    x[14] = (ur >> 1) + vr;
1510
37.2M
    x[15] = (ui >> 1) + vi;
1511
37.2M
    x[46] = (ur >> 1) - vr;
1512
37.2M
    x[47] = (ui >> 1) - vi;
1513
1514
37.2M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1515
37.2M
    ur = x[30];
1516
37.2M
    ui = x[31];
1517
37.2M
    x[30] = (ur >> 1) + vr;
1518
37.2M
    x[31] = (ui >> 1) - vi;
1519
37.2M
    x[62] = (ur >> 1) - vr;
1520
37.2M
    x[63] = (ui >> 1) + vi;
1521
37.2M
  }
1522
37.2M
}
1523
#endif /* #ifndef FUNCTION_fft_32 */
1524
1525
/**
1526
 * \brief Apply rotation vectors to a data buffer.
1527
 * \param cl length of each row of input data.
1528
 * \param l total length of input data.
1529
 * \param pVecRe real part of rotation coefficient vector.
1530
 * \param pVecIm imaginary part of rotation coefficient vector.
1531
 */
1532
1533
/*
1534
   This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000
1535
   (-1.0) instead. At the end, the sign of the result is inverted
1536
*/
1537
#define noFFT_APPLY_ROT_VECTOR_HQ
1538
1539
#ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL
1540
static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl,
1541
                                        const int l, const FIXP_STB *pVecRe,
1542
4.40M
                                        const FIXP_STB *pVecIm) {
1543
4.40M
  FIXP_DBL re, im;
1544
4.40M
  FIXP_STB vre, vim;
1545
1546
4.40M
  int i, c;
1547
1548
19.3M
  for (i = 0; i < cl; i++) {
1549
14.9M
    re = pData[2 * i];
1550
14.9M
    im = pData[2 * i + 1];
1551
1552
14.9M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1553
14.9M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1554
14.9M
  }
1555
28.1M
  for (; i < l; i += cl) {
1556
23.7M
    re = pData[2 * i];
1557
23.7M
    im = pData[2 * i + 1];
1558
1559
23.7M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1560
23.7M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1561
1562
144M
    for (c = i + 1; c < i + cl; c++) {
1563
120M
      re = pData[2 * c] >> 1;
1564
120M
      im = pData[2 * c + 1] >> 1;
1565
120M
      vre = *pVecRe++;
1566
120M
      vim = *pVecIm++;
1567
1568
120M
      cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim);
1569
120M
    }
1570
23.7M
  }
1571
4.40M
}
1572
#endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */
1573
1574
/* select either switch case of function pointer. */
1575
//#define FFT_TWO_STAGE_SWITCH_CASE
1576
#ifndef FUNCTION_fftN2_func
1577
static inline void fftN2_func(FIXP_DBL *pInput, const int length,
1578
                              const int dim1, const int dim2,
1579
                              void (*const fft1)(FIXP_DBL *),
1580
                              void (*const fft2)(FIXP_DBL *),
1581
                              const FIXP_STB *RotVectorReal,
1582
                              const FIXP_STB *RotVectorImag, FIXP_DBL *aDst,
1583
4.40M
                              FIXP_DBL *aDst2) {
1584
  /* The real part of the input samples are at the addresses with even indices
1585
  and the imaginary part of the input samples are at the addresses with odd
1586
  indices. The output samples are stored at the address of pInput
1587
  */
1588
4.40M
  FIXP_DBL *pSrc, *pDst, *pDstOut;
1589
4.40M
  int i;
1590
1591
4.40M
  FDK_ASSERT(length == dim1 * dim2);
1592
1593
  /* Perform dim2 times the fft of length dim1. The input samples are at the
1594
  address of pSrc and the output samples are at the address of pDst. The input
1595
  vector for the fft of length dim1 is built of the interleaved samples in pSrc,
1596
  the output samples are stored consecutively.
1597
  */
1598
4.40M
  pSrc = pInput;
1599
4.40M
  pDst = aDst;
1600
32.5M
  for (i = 0; i < dim2; i++) {
1601
187M
    for (int j = 0; j < dim1; j++) {
1602
159M
      pDst[2 * j] = pSrc[2 * j * dim2];
1603
159M
      pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1];
1604
159M
    }
1605
1606
      /* fft of size dim1 */
1607
28.1M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1608
28.1M
    fft1(pDst);
1609
#else
1610
    switch (dim1) {
1611
      case 2:
1612
        fft2(pDst);
1613
        break;
1614
      case 3:
1615
        fft3(pDst);
1616
        break;
1617
      case 4:
1618
        fft_4(pDst);
1619
        break;
1620
      /* case 5: fft5(pDst); break; */
1621
      /* case 8: fft_8(pDst); break; */
1622
      case 12:
1623
        fft12(pDst);
1624
        break;
1625
      /* case 15: fft15(pDst); break; */
1626
      case 16:
1627
        fft_16(pDst);
1628
        break;
1629
      case 32:
1630
        fft_32(pDst);
1631
        break;
1632
        /*case 64: fft_64(pDst); break;*/
1633
        /* case 128: fft_128(pDst); break; */
1634
    }
1635
#endif
1636
28.1M
    pSrc += 2;
1637
28.1M
    pDst = pDst + 2 * dim1;
1638
28.1M
  }
1639
1640
  /* Perform the modulation of the output of the fft of length dim1 */
1641
4.40M
  pSrc = aDst;
1642
4.40M
  fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag);
1643
1644
  /* Perform dim1 times the fft of length dim2. The input samples are at the
1645
  address of aDst and the output samples are at the address of pInput. The input
1646
  vector for the fft of length dim2 is built of the interleaved samples in aDst,
1647
  the output samples are stored consecutively at the address of pInput.
1648
  */
1649
4.40M
  pSrc = aDst;
1650
4.40M
  pDst = aDst2;
1651
4.40M
  pDstOut = pInput;
1652
19.3M
  for (i = 0; i < dim1; i++) {
1653
174M
    for (int j = 0; j < dim2; j++) {
1654
159M
      pDst[2 * j] = pSrc[2 * j * dim1];
1655
159M
      pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1];
1656
159M
    }
1657
1658
14.9M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1659
14.9M
    fft2(pDst);
1660
#else
1661
    switch (dim2) {
1662
      case 4:
1663
        fft_4(pDst);
1664
        break;
1665
      case 9:
1666
        fft9(pDst);
1667
        break;
1668
      case 12:
1669
        fft12(pDst);
1670
        break;
1671
      case 15:
1672
        fft15(pDst);
1673
        break;
1674
      case 16:
1675
        fft_16(pDst);
1676
        break;
1677
      case 32:
1678
        fft_32(pDst);
1679
        break;
1680
    }
1681
#endif
1682
1683
174M
    for (int j = 0; j < dim2; j++) {
1684
159M
      pDstOut[2 * j * dim1] = pDst[2 * j];
1685
159M
      pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1];
1686
159M
    }
1687
14.9M
    pSrc += 2;
1688
14.9M
    pDstOut += 2;
1689
14.9M
  }
1690
4.40M
}
1691
#endif /* FUNCTION_fftN2_function */
1692
1693
#define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \
1694
              RotVectorReal, RotVectorImag)                                \
1695
4.40M
  {                                                                        \
1696
4.40M
    C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length)                    \
1697
4.40M
    C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2)                     \
1698
4.40M
    fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2,           \
1699
4.40M
               RotVectorReal, RotVectorImag, aDst, aDst2);                 \
1700
4.40M
    C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2)                       \
1701
4.40M
    C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length)                      \
1702
4.40M
  }
1703
1704
  /*!
1705
   *
1706
   *  \brief  complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480
1707
   *  \param  pInput contains the input signal prescaled right by 2
1708
   *          pInput contains the output signal scaled by SCALEFACTOR<#length>
1709
   *          The output signal does not have any fixed headroom
1710
   *  \return void
1711
   *
1712
   */
1713
1714
#ifndef FUNCTION_fft6
1715
3.10M
static inline void fft6(FIXP_DBL *pInput) {
1716
3.10M
  fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6);
1717
3.10M
}
1718
#endif /* #ifndef FUNCTION_fft6 */
1719
1720
#ifndef FUNCTION_fft12
1721
static inline void fft12(FIXP_DBL *pInput) {
1722
  fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12,
1723
        RotVectorImag12); /* 16,58 */
1724
}
1725
#endif /* #ifndef FUNCTION_fft12 */
1726
1727
#ifndef FUNCTION_fft20
1728
225k
static inline void fft20(FIXP_DBL *pInput) {
1729
225k
  fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20,
1730
225k
        RotVectorImag20);
1731
225k
}
1732
#endif /* FUNCTION_fft20 */
1733
1734
9.74k
static inline void fft24(FIXP_DBL *pInput) {
1735
9.74k
  fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24,
1736
9.74k
        RotVectorImag24); /* 16,73 */
1737
9.74k
}
1738
1739
448k
static inline void fft48(FIXP_DBL *pInput) {
1740
448k
  fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48,
1741
448k
        RotVectorImag48); /* 16,32 */
1742
448k
}
1743
1744
#ifndef FUNCTION_fft60
1745
217k
static inline void fft60(FIXP_DBL *pInput) {
1746
217k
  fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60,
1747
217k
        RotVectorImag60); /* 15,51 */
1748
217k
}
1749
#endif /* FUNCTION_fft60 */
1750
1751
#ifndef FUNCTION_fft80
1752
251
static inline void fft80(FIXP_DBL *pInput) {
1753
251
  fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80,
1754
251
        RotVectorImag80); /*  */
1755
251
}
1756
#endif
1757
1758
#ifndef FUNCTION_fft96
1759
76.5k
static inline void fft96(FIXP_DBL *pInput) {
1760
76.5k
  fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96,
1761
76.5k
        RotVectorImag96); /* 15,47 */
1762
76.5k
}
1763
#endif /* FUNCTION_fft96*/
1764
1765
#ifndef FUNCTION_fft120
1766
3.94k
static inline void fft120(FIXP_DBL *pInput) {
1767
3.94k
  fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120,
1768
3.94k
        RotVectorImag120);
1769
3.94k
}
1770
#endif /* FUNCTION_fft120 */
1771
1772
#ifndef FUNCTION_fft192
1773
10.5k
static inline void fft192(FIXP_DBL *pInput) {
1774
10.5k
  fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192,
1775
10.5k
        RotVectorImag192); /* 15,50 */
1776
10.5k
}
1777
#endif
1778
1779
#ifndef FUNCTION_fft240
1780
193k
static inline void fft240(FIXP_DBL *pInput) {
1781
193k
  fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240,
1782
193k
        RotVectorImag240); /* 15.44 */
1783
193k
}
1784
#endif
1785
1786
#ifndef FUNCTION_fft384
1787
98.2k
static inline void fft384(FIXP_DBL *pInput) {
1788
98.2k
  fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384,
1789
98.2k
        RotVectorImag384); /* 16.02 */
1790
98.2k
}
1791
#endif /* FUNCTION_fft384 */
1792
1793
#ifndef FUNCTION_fft480
1794
15.8k
static inline void fft480(FIXP_DBL *pInput) {
1795
15.8k
  fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480,
1796
15.8k
        RotVectorImag480); /* 15.84 */
1797
15.8k
}
1798
#endif /* FUNCTION_fft480 */
1799
1800
81.3M
void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) {
1801
  /* Ensure, that the io-ptr is always (at least 8-byte) aligned */
1802
81.3M
  C_ALLOC_ALIGNED_CHECK(pInput);
1803
1804
81.3M
  if (length == 32) {
1805
35.5M
    fft_32(pInput);
1806
35.5M
    *pScalefactor += SCALEFACTOR32;
1807
45.7M
  } else {
1808
45.7M
    switch (length) {
1809
18.8M
      case 16:
1810
18.8M
        fft_16(pInput);
1811
18.8M
        *pScalefactor += SCALEFACTOR16;
1812
18.8M
        break;
1813
7.87M
      case 8:
1814
7.87M
        fft_8(pInput);
1815
7.87M
        *pScalefactor += SCALEFACTOR8;
1816
7.87M
        break;
1817
0
      case 2:
1818
0
        fft2(pInput);
1819
0
        *pScalefactor += SCALEFACTOR2;
1820
0
        break;
1821
0
      case 3:
1822
0
        fft3(pInput);
1823
0
        *pScalefactor += SCALEFACTOR3;
1824
0
        break;
1825
909k
      case 4:
1826
909k
        fft_4(pInput);
1827
909k
        *pScalefactor += SCALEFACTOR4;
1828
909k
        break;
1829
0
      case 5:
1830
0
        fft5(pInput);
1831
0
        *pScalefactor += SCALEFACTOR5;
1832
0
        break;
1833
3.10M
      case 6:
1834
3.10M
        fft6(pInput);
1835
3.10M
        *pScalefactor += SCALEFACTOR6;
1836
3.10M
        break;
1837
225k
      case 10:
1838
225k
        fft10(pInput);
1839
225k
        *pScalefactor += SCALEFACTOR10;
1840
225k
        break;
1841
12.7M
      case 12:
1842
12.7M
        fft12(pInput);
1843
12.7M
        *pScalefactor += SCALEFACTOR12;
1844
12.7M
        break;
1845
0
      case 15:
1846
0
        fft15(pInput);
1847
0
        *pScalefactor += SCALEFACTOR15;
1848
0
        break;
1849
225k
      case 20:
1850
225k
        fft20(pInput);
1851
225k
        *pScalefactor += SCALEFACTOR20;
1852
225k
        break;
1853
9.74k
      case 24:
1854
9.74k
        fft24(pInput);
1855
9.74k
        *pScalefactor += SCALEFACTOR24;
1856
9.74k
        break;
1857
448k
      case 48:
1858
448k
        fft48(pInput);
1859
448k
        *pScalefactor += SCALEFACTOR48;
1860
448k
        break;
1861
217k
      case 60:
1862
217k
        fft60(pInput);
1863
217k
        *pScalefactor += SCALEFACTOR60;
1864
217k
        break;
1865
405k
      case 64:
1866
405k
        dit_fft(pInput, 6, SineTable512, 512);
1867
405k
        *pScalefactor += SCALEFACTOR64;
1868
405k
        break;
1869
251
      case 80:
1870
251
        fft80(pInput);
1871
251
        *pScalefactor += SCALEFACTOR80;
1872
251
        break;
1873
76.5k
      case 96:
1874
76.5k
        fft96(pInput);
1875
76.5k
        *pScalefactor += SCALEFACTOR96;
1876
76.5k
        break;
1877
3.94k
      case 120:
1878
3.94k
        fft120(pInput);
1879
3.94k
        *pScalefactor += SCALEFACTOR120;
1880
3.94k
        break;
1881
75.4k
      case 128:
1882
75.4k
        dit_fft(pInput, 7, SineTable512, 512);
1883
75.4k
        *pScalefactor += SCALEFACTOR128;
1884
75.4k
        break;
1885
10.5k
      case 192:
1886
10.5k
        fft192(pInput);
1887
10.5k
        *pScalefactor += SCALEFACTOR192;
1888
10.5k
        break;
1889
193k
      case 240:
1890
193k
        fft240(pInput);
1891
193k
        *pScalefactor += SCALEFACTOR240;
1892
193k
        break;
1893
78.5k
      case 256:
1894
78.5k
        dit_fft(pInput, 8, SineTable512, 512);
1895
78.5k
        *pScalefactor += SCALEFACTOR256;
1896
78.5k
        break;
1897
98.2k
      case 384:
1898
98.2k
        fft384(pInput);
1899
98.2k
        *pScalefactor += SCALEFACTOR384;
1900
98.2k
        break;
1901
15.8k
      case 480:
1902
15.8k
        fft480(pInput);
1903
15.8k
        *pScalefactor += SCALEFACTOR480;
1904
15.8k
        break;
1905
167k
      case 512:
1906
167k
        dit_fft(pInput, 9, SineTable512, 512);
1907
167k
        *pScalefactor += SCALEFACTOR512;
1908
167k
        break;
1909
0
      default:
1910
0
        FDK_ASSERT(0); /* FFT length not supported! */
1911
0
        break;
1912
45.7M
    }
1913
45.7M
  }
1914
81.3M
}
1915
1916
0
void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) {
1917
0
  switch (length) {
1918
0
    default:
1919
0
      FDK_ASSERT(0); /* IFFT length not supported! */
1920
0
      break;
1921
0
  }
1922
0
}