Coverage Report

Created: 2025-12-14 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aac/libFDK/src/fft.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
Software License for The Fraunhofer FDK AAC Codec Library for Android
3
4
© Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
5
Forschung e.V. All rights reserved.
6
7
 1.    INTRODUCTION
8
The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
9
that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
10
scheme for digital audio. This FDK AAC Codec software is intended to be used on
11
a wide variety of Android devices.
12
13
AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
14
general perceptual audio codecs. AAC-ELD is considered the best-performing
15
full-bandwidth communications codec by independent studies and is widely
16
deployed. AAC has been standardized by ISO and IEC as part of the MPEG
17
specifications.
18
19
Patent licenses for necessary patent claims for the FDK AAC Codec (including
20
those of Fraunhofer) may be obtained through Via Licensing
21
(www.vialicensing.com) or through the respective patent owners individually for
22
the purpose of encoding or decoding bit streams in products that are compliant
23
with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
24
Android devices already license these patent claims through Via Licensing or
25
directly from the patent owners, and therefore FDK AAC Codec software may
26
already be covered under those patent licenses when it is used for those
27
licensed purposes only.
28
29
Commercially-licensed AAC software libraries, including floating-point versions
30
with enhanced sound quality, are also available from Fraunhofer. Users are
31
encouraged to check the Fraunhofer website for additional applications
32
information and documentation.
33
34
2.    COPYRIGHT LICENSE
35
36
Redistribution and use in source and binary forms, with or without modification,
37
are permitted without payment of copyright license fees provided that you
38
satisfy the following conditions:
39
40
You must retain the complete text of this software license in redistributions of
41
the FDK AAC Codec or your modifications thereto in source code form.
42
43
You must retain the complete text of this software license in the documentation
44
and/or other materials provided with redistributions of the FDK AAC Codec or
45
your modifications thereto in binary form. You must make available free of
46
charge copies of the complete source code of the FDK AAC Codec and your
47
modifications thereto to recipients of copies in binary form.
48
49
The name of Fraunhofer may not be used to endorse or promote products derived
50
from this library without prior written permission.
51
52
You may not charge copyright license fees for anyone to use, copy or distribute
53
the FDK AAC Codec software or your modifications thereto.
54
55
Your modified versions of the FDK AAC Codec must carry prominent notices stating
56
that you changed the software and the date of any change. For modified versions
57
of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
58
must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
59
AAC Codec Library for Android."
60
61
3.    NO PATENT LICENSE
62
63
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
64
limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
65
Fraunhofer provides no warranty of patent non-infringement with respect to this
66
software.
67
68
You may use this FDK AAC Codec software or modifications thereto only for
69
purposes that are authorized by appropriate patent licenses.
70
71
4.    DISCLAIMER
72
73
This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
74
holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
75
including but not limited to the implied warranties of merchantability and
76
fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
77
CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
78
or consequential damages, including but not limited to procurement of substitute
79
goods or services; loss of use, data, or profits, or business interruption,
80
however caused and on any theory of liability, whether in contract, strict
81
liability, or tort (including negligence), arising in any way out of the use of
82
this software, even if advised of the possibility of such damage.
83
84
5.    CONTACT INFORMATION
85
86
Fraunhofer Institute for Integrated Circuits IIS
87
Attention: Audio and Multimedia Departments - FDK AAC LL
88
Am Wolfsmantel 33
89
91058 Erlangen, Germany
90
91
www.iis.fraunhofer.de/amm
92
amm-info@iis.fraunhofer.de
93
----------------------------------------------------------------------------- */
94
95
/******************* Library for basic calculation routines ********************
96
97
   Author(s):   Josef Hoepfl, DSP Solutions
98
99
   Description: Fix point FFT
100
101
*******************************************************************************/
102
103
#include "fft_rad2.h"
104
#include "FDK_tools_rom.h"
105
106
270M
#define W_PiFOURTH STC(0x5a82799a)
107
//#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a))
108
#ifndef SUMDIFF_PIFOURTH
109
#define SUMDIFF_PIFOURTH(diff, sum, a, b) \
110
694M
  {                                       \
111
694M
    FIXP_DBL wa, wb;                      \
112
694M
    wa = fMultDiv2(a, W_PiFOURTH);        \
113
694M
    wb = fMultDiv2(b, W_PiFOURTH);        \
114
694M
    diff = wb - wa;                       \
115
694M
    sum = wb + wa;                        \
116
694M
  }
117
#define SUMDIFF_PIFOURTH16(diff, sum, a, b)       \
118
  {                                               \
119
    FIXP_SGL wa, wb;                              \
120
    wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \
121
    wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \
122
    diff = wb - wa;                               \
123
    sum = wb + wa;                                \
124
  }
125
#endif
126
127
#define SCALEFACTOR2048 10
128
#define SCALEFACTOR1024 9
129
184k
#define SCALEFACTOR512 8
130
83.9k
#define SCALEFACTOR256 7
131
79.5k
#define SCALEFACTOR128 6
132
432k
#define SCALEFACTOR64 5
133
38.4M
#define SCALEFACTOR32 4
134
19.9M
#define SCALEFACTOR16 3
135
7.85M
#define SCALEFACTOR8 2
136
1.82M
#define SCALEFACTOR4 1
137
3.35M
#define SCALEFACTOR2 1
138
139
3.42M
#define SCALEFACTOR3 1
140
207k
#define SCALEFACTOR5 1
141
3.34M
#define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2)
142
#define SCALEFACTOR7 2
143
#define SCALEFACTOR9 2
144
4.34M
#define SCALEFACTOR10 5
145
14.0M
#define SCALEFACTOR12 3
146
470k
#define SCALEFACTOR15 3
147
#define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2)
148
207k
#define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2)
149
#define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2)
150
10.2k
#define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2)
151
#define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2)
152
#define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2)
153
480k
#define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2)
154
272k
#define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2)
155
187
#define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2)
156
82.5k
#define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2)
157
2.79k
#define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2)
158
#define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2)
159
#define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2)
160
11.3k
#define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2)
161
178k
#define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2)
162
#define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2)
163
#define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2)
164
102k
#define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2)
165
16.3k
#define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2)
166
167
#include "fft.h"
168
169
#ifndef FUNCTION_fft2
170
171
/* Performs the FFT of length 2. Input vector unscaled, output vector scaled
172
 * with factor 0.5 */
173
10.1M
static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) {
174
10.1M
  FIXP_DBL r1, i1;
175
10.1M
  FIXP_DBL r2, i2;
176
177
  /* real part */
178
10.1M
  r1 = pDat[2];
179
10.1M
  r2 = pDat[0];
180
181
  /* imaginary part */
182
10.1M
  i1 = pDat[3];
183
10.1M
  i2 = pDat[1];
184
185
  /* real part */
186
10.1M
  pDat[0] = (r2 + r1) >> 1;
187
10.1M
  pDat[2] = (r2 - r1) >> 1;
188
189
  /* imaginary part */
190
10.1M
  pDat[1] = (i2 + i1) >> 1;
191
10.1M
  pDat[3] = (i2 - i1) >> 1;
192
10.1M
}
193
#endif /* FUNCTION_fft2 */
194
195
18.6M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
196
197
#ifndef FUNCTION_fft3
198
/* Performs the FFT of length 3 according to the algorithm after winograd. */
199
9.33M
static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) {
200
9.33M
  FIXP_DBL r1, r2;
201
9.33M
  FIXP_DBL s1, s2;
202
9.33M
  FIXP_DBL pD;
203
204
  /* real part */
205
9.33M
  r1 = pDat[2] + pDat[4];
206
9.33M
  r2 = fMultDiv2((pDat[2] - pDat[4]), C31);
207
9.33M
  pD = pDat[0] >> 1;
208
9.33M
  pDat[0] = pD + (r1 >> 1);
209
9.33M
  r1 = pD - (r1 >> 2);
210
211
  /* imaginary part */
212
9.33M
  s1 = pDat[3] + pDat[5];
213
9.33M
  s2 = fMultDiv2((pDat[3] - pDat[5]), C31);
214
9.33M
  pD = pDat[1] >> 1;
215
9.33M
  pDat[1] = pD + (s1 >> 1);
216
9.33M
  s1 = pD - (s1 >> 2);
217
218
  /* combination */
219
9.33M
  pDat[2] = r1 - s2;
220
9.33M
  pDat[4] = r1 + s2;
221
9.33M
  pDat[3] = s1 + r2;
222
9.33M
  pDat[5] = s1 - r2;
223
9.33M
}
224
#endif /* #ifndef FUNCTION_fft3 */
225
226
143M
#define F5C(x) STC(x)
227
228
28.6M
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
229
28.6M
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
230
28.6M
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
231
28.6M
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
232
28.6M
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
233
234
/* performs the FFT of length 5 according to the algorithm after winograd */
235
/* This version works with a prescale of 2 instead of 3 */
236
14.3M
static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) {
237
14.3M
  FIXP_DBL r1, r2, r3, r4;
238
14.3M
  FIXP_DBL s1, s2, s3, s4;
239
14.3M
  FIXP_DBL t;
240
241
  /* real part */
242
14.3M
  r1 = (pDat[2] + pDat[8]) >> 1;
243
14.3M
  r4 = (pDat[2] - pDat[8]) >> 1;
244
14.3M
  r3 = (pDat[4] + pDat[6]) >> 1;
245
14.3M
  r2 = (pDat[4] - pDat[6]) >> 1;
246
14.3M
  t = fMult((r1 - r3), C54);
247
14.3M
  r1 = r1 + r3;
248
14.3M
  pDat[0] = (pDat[0] >> 1) + r1;
249
  /* Bit shift left because of the constant C55 which was scaled with the factor
250
     0.5 because of the representation of the values as fracts */
251
14.3M
  r1 = pDat[0] + (fMultDiv2(r1, C55) << (2));
252
14.3M
  r3 = r1 - t;
253
14.3M
  r1 = r1 + t;
254
14.3M
  t = fMult((r4 + r2), C51);
255
  /* Bit shift left because of the constant C55 which was scaled with the factor
256
     0.5 because of the representation of the values as fracts */
257
14.3M
  r4 = t + (fMultDiv2(r4, C52) << (2));
258
14.3M
  r2 = t + fMult(r2, C53);
259
260
  /* imaginary part */
261
14.3M
  s1 = (pDat[3] + pDat[9]) >> 1;
262
14.3M
  s4 = (pDat[3] - pDat[9]) >> 1;
263
14.3M
  s3 = (pDat[5] + pDat[7]) >> 1;
264
14.3M
  s2 = (pDat[5] - pDat[7]) >> 1;
265
14.3M
  t = fMult((s1 - s3), C54);
266
14.3M
  s1 = s1 + s3;
267
14.3M
  pDat[1] = (pDat[1] >> 1) + s1;
268
  /* Bit shift left because of the constant C55 which was scaled with the factor
269
     0.5 because of the representation of the values as fracts */
270
14.3M
  s1 = pDat[1] + (fMultDiv2(s1, C55) << (2));
271
14.3M
  s3 = s1 - t;
272
14.3M
  s1 = s1 + t;
273
14.3M
  t = fMult((s4 + s2), C51);
274
  /* Bit shift left because of the constant C55 which was scaled with the factor
275
     0.5 because of the representation of the values as fracts */
276
14.3M
  s4 = t + (fMultDiv2(s4, C52) << (2));
277
14.3M
  s2 = t + fMult(s2, C53);
278
279
  /* combination */
280
14.3M
  pDat[2] = r1 + s2;
281
14.3M
  pDat[8] = r1 - s2;
282
14.3M
  pDat[4] = r3 - s4;
283
14.3M
  pDat[6] = r3 + s4;
284
285
14.3M
  pDat[3] = s1 - r2;
286
14.3M
  pDat[9] = s1 + r2;
287
14.3M
  pDat[5] = s3 + r4;
288
14.3M
  pDat[7] = s3 - r4;
289
14.3M
}
290
291
4.14M
#define F5C(x) STC(x)
292
293
828k
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
294
828k
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
295
828k
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
296
828k
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
297
828k
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
298
/**
299
 * \brief    Function performs a complex 10-point FFT
300
 *           The FFT is performed inplace. The result of the FFT
301
 *           is scaled by SCALEFACTOR10 bits.
302
 *
303
 *           WOPS FLC version:                    1093 cycles
304
 *           WOPS with 32x16 bit multiplications:  196 cycles
305
 *
306
 * \param    [i/o] re    real input / output
307
 * \param    [i/o] im    imag input / output
308
 * \param    [i  ] s     stride real and imag input / output
309
 *
310
 * \return   void
311
 */
312
static void fft10(FIXP_DBL *x)  // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s)
313
207k
{
314
207k
  FIXP_DBL t;
315
207k
  FIXP_DBL x0, x1, x2, x3, x4;
316
207k
  FIXP_DBL r1, r2, r3, r4;
317
207k
  FIXP_DBL s1, s2, s3, s4;
318
207k
  FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
319
207k
  FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
320
321
207k
  const int s = 1;  // stride factor
322
323
  /* 2 fft5 stages */
324
325
  /* real part */
326
207k
  x0 = (x[s * 0] >> SCALEFACTOR10);
327
207k
  x1 = (x[s * 4] >> SCALEFACTOR10);
328
207k
  x2 = (x[s * 8] >> SCALEFACTOR10);
329
207k
  x3 = (x[s * 12] >> SCALEFACTOR10);
330
207k
  x4 = (x[s * 16] >> SCALEFACTOR10);
331
332
207k
  r1 = (x3 + x2);
333
207k
  r4 = (x3 - x2);
334
207k
  r3 = (x1 + x4);
335
207k
  r2 = (x1 - x4);
336
207k
  t = fMult((r1 - r3), C54);
337
207k
  r1 = (r1 + r3);
338
207k
  y00 = (x0 + r1);
339
207k
  r1 = (y00 + ((fMult(r1, C55) << 1)));
340
207k
  r3 = (r1 - t);
341
207k
  r1 = (r1 + t);
342
207k
  t = fMult((r4 + r2), C51);
343
207k
  r4 = (t + (fMult(r4, C52) << 1));
344
207k
  r2 = (t + fMult(r2, C53));
345
346
  /* imaginary part */
347
207k
  x0 = (x[s * 0 + 1] >> SCALEFACTOR10);
348
207k
  x1 = (x[s * 4 + 1] >> SCALEFACTOR10);
349
207k
  x2 = (x[s * 8 + 1] >> SCALEFACTOR10);
350
207k
  x3 = (x[s * 12 + 1] >> SCALEFACTOR10);
351
207k
  x4 = (x[s * 16 + 1] >> SCALEFACTOR10);
352
353
207k
  s1 = (x3 + x2);
354
207k
  s4 = (x3 - x2);
355
207k
  s3 = (x1 + x4);
356
207k
  s2 = (x1 - x4);
357
207k
  t = fMult((s1 - s3), C54);
358
207k
  s1 = (s1 + s3);
359
207k
  y01 = (x0 + s1);
360
207k
  s1 = (y01 + (fMult(s1, C55) << 1));
361
207k
  s3 = (s1 - t);
362
207k
  s1 = (s1 + t);
363
207k
  t = fMult((s4 + s2), C51);
364
207k
  s4 = (t + (fMult(s4, C52) << 1));
365
207k
  s2 = (t + fMult(s2, C53));
366
367
  /* combination */
368
207k
  y04 = (r1 + s2);
369
207k
  y16 = (r1 - s2);
370
207k
  y08 = (r3 - s4);
371
207k
  y12 = (r3 + s4);
372
373
207k
  y05 = (s1 - r2);
374
207k
  y17 = (s1 + r2);
375
207k
  y09 = (s3 + r4);
376
207k
  y13 = (s3 - r4);
377
378
  /* real part */
379
207k
  x0 = (x[s * 10] >> SCALEFACTOR10);
380
207k
  x1 = (x[s * 2] >> SCALEFACTOR10);
381
207k
  x2 = (x[s * 6] >> SCALEFACTOR10);
382
207k
  x3 = (x[s * 14] >> SCALEFACTOR10);
383
207k
  x4 = (x[s * 18] >> SCALEFACTOR10);
384
385
207k
  r1 = (x1 + x4);
386
207k
  r4 = (x1 - x4);
387
207k
  r3 = (x3 + x2);
388
207k
  r2 = (x3 - x2);
389
207k
  t = fMult((r1 - r3), C54);
390
207k
  r1 = (r1 + r3);
391
207k
  y02 = (x0 + r1);
392
207k
  r1 = (y02 + ((fMult(r1, C55) << 1)));
393
207k
  r3 = (r1 - t);
394
207k
  r1 = (r1 + t);
395
207k
  t = fMult(((r4 + r2)), C51);
396
207k
  r4 = (t + (fMult(r4, C52) << 1));
397
207k
  r2 = (t + fMult(r2, C53));
398
399
  /* imaginary part */
400
207k
  x0 = (x[s * 10 + 1] >> SCALEFACTOR10);
401
207k
  x1 = (x[s * 2 + 1] >> SCALEFACTOR10);
402
207k
  x2 = (x[s * 6 + 1] >> SCALEFACTOR10);
403
207k
  x3 = (x[s * 14 + 1] >> SCALEFACTOR10);
404
207k
  x4 = (x[s * 18 + 1] >> SCALEFACTOR10);
405
406
207k
  s1 = (x1 + x4);
407
207k
  s4 = (x1 - x4);
408
207k
  s3 = (x3 + x2);
409
207k
  s2 = (x3 - x2);
410
207k
  t = fMult((s1 - s3), C54);
411
207k
  s1 = (s1 + s3);
412
207k
  y03 = (x0 + s1);
413
207k
  s1 = (y03 + (fMult(s1, C55) << 1));
414
207k
  s3 = (s1 - t);
415
207k
  s1 = (s1 + t);
416
207k
  t = fMult((s4 + s2), C51);
417
207k
  s4 = (t + (fMult(s4, C52) << 1));
418
207k
  s2 = (t + fMult(s2, C53));
419
420
  /* combination */
421
207k
  y06 = (r1 + s2);
422
207k
  y18 = (r1 - s2);
423
207k
  y10 = (r3 - s4);
424
207k
  y14 = (r3 + s4);
425
426
207k
  y07 = (s1 - r2);
427
207k
  y19 = (s1 + r2);
428
207k
  y11 = (s3 + r4);
429
207k
  y15 = (s3 - r4);
430
431
  /* 5 fft2 stages */
432
207k
  x[s * 0] = (y00 + y02);
433
207k
  x[s * 0 + 1] = (y01 + y03);
434
207k
  x[s * 10] = (y00 - y02);
435
207k
  x[s * 10 + 1] = (y01 - y03);
436
437
207k
  x[s * 4] = (y04 + y06);
438
207k
  x[s * 4 + 1] = (y05 + y07);
439
207k
  x[s * 14] = (y04 - y06);
440
207k
  x[s * 14 + 1] = (y05 - y07);
441
442
207k
  x[s * 8] = (y08 + y10);
443
207k
  x[s * 8 + 1] = (y09 + y11);
444
207k
  x[s * 18] = (y08 - y10);
445
207k
  x[s * 18 + 1] = (y09 - y11);
446
447
207k
  x[s * 12] = (y12 + y14);
448
207k
  x[s * 12 + 1] = (y13 + y15);
449
207k
  x[s * 2] = (y12 - y14);
450
207k
  x[s * 2 + 1] = (y13 - y15);
451
452
207k
  x[s * 16] = (y16 + y18);
453
207k
  x[s * 16 + 1] = (y17 + y19);
454
207k
  x[s * 6] = (y16 - y18);
455
207k
  x[s * 6 + 1] = (y17 - y19);
456
207k
}
457
458
#ifndef FUNCTION_fft12
459
#define FUNCTION_fft12
460
461
#undef C31
462
195M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
463
464
18.8M
static inline void fft12(FIXP_DBL *pInput) {
465
18.8M
  FIXP_DBL aDst[24];
466
18.8M
  FIXP_DBL *pSrc, *pDst;
467
18.8M
  int i;
468
469
18.8M
  pSrc = pInput;
470
18.8M
  pDst = aDst;
471
18.8M
  FIXP_DBL r1, r2, s1, s2, pD;
472
473
  /* First 3*2 samples are shifted right by 2 before output */
474
18.8M
  r1 = pSrc[8] + pSrc[16];
475
18.8M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
476
18.8M
  pD = pSrc[0] >> 1;
477
18.8M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
478
18.8M
  r1 = pD - (r1 >> 2);
479
480
  /* imaginary part */
481
18.8M
  s1 = pSrc[9] + pSrc[17];
482
18.8M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
483
18.8M
  pD = pSrc[1] >> 1;
484
18.8M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
485
18.8M
  s1 = pD - (s1 >> 2);
486
487
  /* combination */
488
18.8M
  pDst[2] = (r1 - s2) >> 1;
489
18.8M
  pDst[3] = (s1 + r2) >> 1;
490
18.8M
  pDst[4] = (r1 + s2) >> 1;
491
18.8M
  pDst[5] = (s1 - r2) >> 1;
492
18.8M
  pSrc += 2;
493
18.8M
  pDst += 6;
494
495
18.8M
  const FIXP_STB *pVecRe = RotVectorReal12;
496
18.8M
  const FIXP_STB *pVecIm = RotVectorImag12;
497
18.8M
  FIXP_DBL re, im;
498
18.8M
  FIXP_STB vre, vim;
499
56.5M
  for (i = 0; i < 2; i++) {
500
    /* sample 0,1 are shifted right by 2 before output */
501
    /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before
502
     * output */
503
504
37.7M
    r1 = pSrc[8] + pSrc[16];
505
37.7M
    r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
506
37.7M
    pD = pSrc[0] >> 1;
507
37.7M
    pDst[0] = (pD + (r1 >> 1)) >> 1;
508
37.7M
    r1 = pD - (r1 >> 2);
509
510
    /* imaginary part */
511
37.7M
    s1 = pSrc[9] + pSrc[17];
512
37.7M
    s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
513
37.7M
    pD = pSrc[1] >> 1;
514
37.7M
    pDst[1] = (pD + (s1 >> 1)) >> 1;
515
37.7M
    s1 = pD - (s1 >> 2);
516
517
    /* combination */
518
37.7M
    re = (r1 - s2) >> 0;
519
37.7M
    im = (s1 + r2) >> 0;
520
37.7M
    vre = *pVecRe++;
521
37.7M
    vim = *pVecIm++;
522
37.7M
    cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim);
523
524
37.7M
    re = (r1 + s2) >> 0;
525
37.7M
    im = (s1 - r2) >> 0;
526
37.7M
    vre = *pVecRe++;
527
37.7M
    vim = *pVecIm++;
528
37.7M
    cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim);
529
530
37.7M
    pDst += 6;
531
37.7M
    pSrc += 2;
532
37.7M
  }
533
  /* sample 0,1 are shifted right by 2 before output */
534
  /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
535
  /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
536
18.8M
  r1 = pSrc[8] + pSrc[16];
537
18.8M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
538
18.8M
  pD = pSrc[0] >> 1;
539
18.8M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
540
18.8M
  r1 = pD - (r1 >> 2);
541
542
  /* imaginary part */
543
18.8M
  s1 = pSrc[9] + pSrc[17];
544
18.8M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
545
18.8M
  pD = pSrc[1] >> 1;
546
18.8M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
547
18.8M
  s1 = pD - (s1 >> 2);
548
549
  /* combination */
550
18.8M
  pDst[2] = (s1 + r2) >> 1;
551
18.8M
  pDst[3] = (s2 - r1) >> 1;
552
18.8M
  pDst[4] = -((r1 + s2) >> 1);
553
18.8M
  pDst[5] = (r2 - s1) >> 1;
554
555
  /* Perform 3 times the fft of length 4. The input samples are at the address
556
  of aDst and the output samples are at the address of pInput. The input vector
557
  for the fft of length 4 is built of the interleaved samples in aDst, the
558
  output samples are stored consecutively at the address of pInput.
559
  */
560
18.8M
  pSrc = aDst;
561
18.8M
  pDst = pInput;
562
75.4M
  for (i = 0; i < 3; i++) {
563
    /* inline FFT4 merged with incoming resorting loop */
564
56.5M
    FIXP_DBL a00, a10, a20, a30, tmp0, tmp1;
565
566
56.5M
    a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */
567
56.5M
    a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */
568
56.5M
    a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */
569
56.5M
    a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */
570
571
56.5M
    pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
572
56.5M
    pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
573
574
56.5M
    tmp0 = a00 - pSrc[12]; /* Re A - Re B */
575
56.5M
    tmp1 = a20 - pSrc[13]; /* Im A - Im B */
576
577
56.5M
    pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
578
56.5M
    pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
579
580
56.5M
    a10 = a10 - pSrc[18]; /* Re C - Re D */
581
56.5M
    a30 = a30 - pSrc[19]; /* Im C - Im D */
582
583
56.5M
    pDst[6] = tmp0 + a30;  /* Re B' = Re A - Re B + Im C - Im D */
584
56.5M
    pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */
585
56.5M
    pDst[7] = tmp1 - a10;  /* Im B' = Im A - Im B - Re C + Re D */
586
56.5M
    pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */
587
588
56.5M
    pSrc += 2;
589
56.5M
    pDst += 2;
590
56.5M
  }
591
18.8M
}
592
#endif /* FUNCTION_fft12 */
593
594
#ifndef FUNCTION_fft15
595
596
166M
#define N3 3
597
94.4M
#define N5 5
598
53.9M
#define N6 6
599
166M
#define N15 15
600
601
/* Performs the FFT of length 15. It is split into FFTs of length 3 and
602
 * length 5. */
603
4.49M
static inline void fft15(FIXP_DBL *pInput) {
604
4.49M
  FIXP_DBL aDst[2 * N15];
605
4.49M
  FIXP_DBL aDst1[2 * N15];
606
4.49M
  int i, k, l;
607
608
  /* Sort input vector for fft's of length 3
609
  input3(0:2)   = [input(0) input(5) input(10)];
610
  input3(3:5)   = [input(3) input(8) input(13)];
611
  input3(6:8)   = [input(6) input(11) input(1)];
612
  input3(9:11)  = [input(9) input(14) input(4)];
613
  input3(12:14) = [input(12) input(2) input(7)]; */
614
4.49M
  {
615
4.49M
    const FIXP_DBL *pSrc = pInput;
616
4.49M
    FIXP_DBL *RESTRICT pDst = aDst;
617
    /* Merge 3 loops into one, skip call of fft3 */
618
26.9M
    for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) {
619
22.4M
      pDst[k + 0] = pSrc[l];
620
22.4M
      pDst[k + 1] = pSrc[l + 1];
621
22.4M
      l += 2 * N5;
622
22.4M
      if (l >= (2 * N15)) l -= (2 * N15);
623
624
22.4M
      pDst[k + 2] = pSrc[l];
625
22.4M
      pDst[k + 3] = pSrc[l + 1];
626
22.4M
      l += 2 * N5;
627
22.4M
      if (l >= (2 * N15)) l -= (2 * N15);
628
22.4M
      pDst[k + 4] = pSrc[l];
629
22.4M
      pDst[k + 5] = pSrc[l + 1];
630
22.4M
      l += (2 * N5) + (2 * N3);
631
22.4M
      if (l >= (2 * N15)) l -= (2 * N15);
632
633
      /* fft3 merged with shift right by 2 loop */
634
22.4M
      FIXP_DBL r1, r2, r3;
635
22.4M
      FIXP_DBL s1, s2;
636
      /* real part */
637
22.4M
      r1 = pDst[k + 2] + pDst[k + 4];
638
22.4M
      r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31);
639
22.4M
      s1 = pDst[k + 0];
640
22.4M
      pDst[k + 0] = (s1 + r1) >> 2;
641
22.4M
      r1 = s1 - (r1 >> 1);
642
643
      /* imaginary part */
644
22.4M
      s1 = pDst[k + 3] + pDst[k + 5];
645
22.4M
      s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31);
646
22.4M
      r3 = pDst[k + 1];
647
22.4M
      pDst[k + 1] = (r3 + s1) >> 2;
648
22.4M
      s1 = r3 - (s1 >> 1);
649
650
      /* combination */
651
22.4M
      pDst[k + 2] = (r1 - s2) >> 2;
652
22.4M
      pDst[k + 4] = (r1 + s2) >> 2;
653
22.4M
      pDst[k + 3] = (s1 + r2) >> 2;
654
22.4M
      pDst[k + 5] = (s1 - r2) >> 2;
655
22.4M
    }
656
4.49M
  }
657
  /* Sort input vector for fft's of length 5
658
  input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
659
  input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
660
  input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
661
  /* Merge 2 loops into one, brings about 10% */
662
4.49M
  {
663
4.49M
    const FIXP_DBL *pSrc = aDst;
664
4.49M
    FIXP_DBL *RESTRICT pDst = aDst1;
665
17.9M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
666
13.4M
      l = 2 * i;
667
13.4M
      pDst[k + 0] = pSrc[l + 0];
668
13.4M
      pDst[k + 1] = pSrc[l + 1];
669
13.4M
      pDst[k + 2] = pSrc[l + 0 + (2 * N3)];
670
13.4M
      pDst[k + 3] = pSrc[l + 1 + (2 * N3)];
671
13.4M
      pDst[k + 4] = pSrc[l + 0 + (4 * N3)];
672
13.4M
      pDst[k + 5] = pSrc[l + 1 + (4 * N3)];
673
13.4M
      pDst[k + 6] = pSrc[l + 0 + (6 * N3)];
674
13.4M
      pDst[k + 7] = pSrc[l + 1 + (6 * N3)];
675
13.4M
      pDst[k + 8] = pSrc[l + 0 + (8 * N3)];
676
13.4M
      pDst[k + 9] = pSrc[l + 1 + (8 * N3)];
677
13.4M
      fft5(&pDst[k]);
678
13.4M
    }
679
4.49M
  }
680
  /* Sort output vector of length 15
681
  output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
682
            out5(10) out5(1)  out5(7)  out5(13) out5(4)
683
            out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
684
  /* optimize clumsy loop, brings about 5% */
685
4.49M
  {
686
4.49M
    const FIXP_DBL *pSrc = aDst1;
687
4.49M
    FIXP_DBL *RESTRICT pDst = pInput;
688
17.9M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
689
13.4M
      pDst[k + 0] = pSrc[l];
690
13.4M
      pDst[k + 1] = pSrc[l + 1];
691
13.4M
      l += (2 * N6);
692
13.4M
      if (l >= (2 * N15)) l -= (2 * N15);
693
13.4M
      pDst[k + 2] = pSrc[l];
694
13.4M
      pDst[k + 3] = pSrc[l + 1];
695
13.4M
      l += (2 * N6);
696
13.4M
      if (l >= (2 * N15)) l -= (2 * N15);
697
13.4M
      pDst[k + 4] = pSrc[l];
698
13.4M
      pDst[k + 5] = pSrc[l + 1];
699
13.4M
      l += (2 * N6);
700
13.4M
      if (l >= (2 * N15)) l -= (2 * N15);
701
13.4M
      pDst[k + 6] = pSrc[l];
702
13.4M
      pDst[k + 7] = pSrc[l + 1];
703
13.4M
      l += (2 * N6);
704
13.4M
      if (l >= (2 * N15)) l -= (2 * N15);
705
13.4M
      pDst[k + 8] = pSrc[l];
706
13.4M
      pDst[k + 9] = pSrc[l + 1];
707
13.4M
      l += 2; /* no modulo check needed, it cannot occur */
708
13.4M
    }
709
4.49M
  }
710
4.49M
}
711
#endif /* FUNCTION_fft15 */
712
713
/*
714
 Select shift placement.
715
 Some processors like ARM may shift "for free" in combination with an addition
716
 or substraction, but others don't so either combining shift with +/- or reduce
717
 the total amount or shift operations is optimal
718
 */
719
#if !defined(__arm__)
720
1.00G
#define SHIFT_A >> 1
721
#define SHIFT_B
722
#else
723
#define SHIFT_A
724
#define SHIFT_B >> 1
725
#endif
726
727
#ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \
728
                         */
729
730
/* This defines prevents this array to be declared twice, if 16-bit fft is
731
 * enabled too */
732
#define FUNCTION_DATA_fft_16_w16
733
static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d),
734
                                      STCP(0x30fbc54d, 0x7641af3d)};
735
736
LNK_SECTION_CODE_L1
737
22.5M
inline void fft_16(FIXP_DBL *RESTRICT x) {
738
22.5M
  FIXP_DBL vr, ur;
739
22.5M
  FIXP_DBL vr2, ur2;
740
22.5M
  FIXP_DBL vr3, ur3;
741
22.5M
  FIXP_DBL vr4, ur4;
742
22.5M
  FIXP_DBL vi, ui;
743
22.5M
  FIXP_DBL vi2, ui2;
744
22.5M
  FIXP_DBL vi3, ui3;
745
746
22.5M
  vr = (x[0] >> 1) + (x[16] >> 1);       /* Re A + Re B */
747
22.5M
  ur = (x[1] >> 1) + (x[17] >> 1);       /* Im A + Im B */
748
22.5M
  vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */
749
22.5M
  ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */
750
22.5M
  x[0] = vr + (vi SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
751
22.5M
  x[1] = ur + (ui SHIFT_B);              /* Im A' = sum of imag values */
752
753
22.5M
  vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */
754
22.5M
  ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */
755
756
22.5M
  x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
757
22.5M
  x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
758
22.5M
  vr -= x[16];              /* Re A - Re B */
759
22.5M
  vi = (vi SHIFT_B)-x[24];  /* Re C - Re D */
760
22.5M
  ur -= x[17];              /* Im A - Im B */
761
22.5M
  ui = (ui SHIFT_B)-x[25];  /* Im C - Im D */
762
763
22.5M
  vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */
764
22.5M
  ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */
765
766
22.5M
  x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
767
22.5M
  x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
768
769
22.5M
  vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */
770
22.5M
  ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */
771
772
22.5M
  x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
773
22.5M
  x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
774
775
22.5M
  vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */
776
22.5M
  ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */
777
22.5M
  x[8] = vr2 + (vi2 SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
778
22.5M
  x[9] = ur2 + (ui2 SHIFT_B);              /* Im A' = sum of imag values */
779
22.5M
  x[12] = vr2 - (vi2 SHIFT_B);             /* Re C' = -(ReC+ReD) + (ReA+ReB) */
780
22.5M
  x[13] = ur2 - (ui2 SHIFT_B);             /* Im C' = -Im C -Im D +Im A +Im B */
781
22.5M
  vr2 -= x[20];                            /* Re A - Re B */
782
22.5M
  ur2 -= x[21];                            /* Im A - Im B */
783
22.5M
  vi2 = (vi2 SHIFT_B)-x[28];               /* Re C - Re D */
784
22.5M
  ui2 = (ui2 SHIFT_B)-x[29];               /* Im C - Im D */
785
786
22.5M
  vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */
787
22.5M
  ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */
788
789
22.5M
  x[10] = ui2 + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
790
22.5M
  x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
791
792
22.5M
  vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */
793
22.5M
  ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */
794
795
22.5M
  x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
796
22.5M
  x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */
797
798
22.5M
  x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
799
22.5M
  x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */
800
22.5M
  x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
801
22.5M
  x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
802
22.5M
  vr3 -= x[18];               /* Re A - Re B */
803
22.5M
  ur3 -= x[19];               /* Im A - Im B */
804
22.5M
  vi = (vi SHIFT_B)-x[26];    /* Re C - Re D */
805
22.5M
  ui = (ui SHIFT_B)-x[27];    /* Im C - Im D */
806
22.5M
  x[18] = ui + vr3;           /* Re B' = Im C - Im D  + Re A - Re B */
807
22.5M
  x[19] = ur3 - vi;           /* Im B'= -Re C + Re D + Im A - Im B */
808
809
22.5M
  x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
810
22.5M
  x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
811
22.5M
  x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
812
22.5M
  x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
813
22.5M
  vr4 -= x[22];                /* Re A - Re B */
814
22.5M
  ur4 -= x[23];                /* Im A - Im B */
815
816
22.5M
  x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
817
22.5M
  x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */
818
819
22.5M
  vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */
820
22.5M
  ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */
821
22.5M
  x[26] = ui3 + vr4;         /* Re B' = Im C - Im D  + Re A - Re B */
822
22.5M
  x[30] = vr4 - ui3;         /* Re D' = -Im C + Im D + Re A - Re B */
823
22.5M
  x[27] = ur4 - vi3;         /* Im B'= -Re C + Re D + Im A - Im B */
824
22.5M
  x[31] = vi3 + ur4;         /* Im D'= Re C - Re D + Im A - Im B */
825
826
  // xt1 =  0
827
  // xt2 =  8
828
22.5M
  vr = x[8];
829
22.5M
  vi = x[9];
830
22.5M
  ur = x[0] >> 1;
831
22.5M
  ui = x[1] >> 1;
832
22.5M
  x[0] = ur + (vr >> 1);
833
22.5M
  x[1] = ui + (vi >> 1);
834
22.5M
  x[8] = ur - (vr >> 1);
835
22.5M
  x[9] = ui - (vi >> 1);
836
837
  // xt1 =  4
838
  // xt2 = 12
839
22.5M
  vr = x[13];
840
22.5M
  vi = x[12];
841
22.5M
  ur = x[4] >> 1;
842
22.5M
  ui = x[5] >> 1;
843
22.5M
  x[4] = ur + (vr >> 1);
844
22.5M
  x[5] = ui - (vi >> 1);
845
22.5M
  x[12] = ur - (vr >> 1);
846
22.5M
  x[13] = ui + (vi >> 1);
847
848
  // xt1 = 16
849
  // xt2 = 24
850
22.5M
  vr = x[24];
851
22.5M
  vi = x[25];
852
22.5M
  ur = x[16] >> 1;
853
22.5M
  ui = x[17] >> 1;
854
22.5M
  x[16] = ur + (vr >> 1);
855
22.5M
  x[17] = ui + (vi >> 1);
856
22.5M
  x[24] = ur - (vr >> 1);
857
22.5M
  x[25] = ui - (vi >> 1);
858
859
  // xt1 = 20
860
  // xt2 = 28
861
22.5M
  vr = x[29];
862
22.5M
  vi = x[28];
863
22.5M
  ur = x[20] >> 1;
864
22.5M
  ui = x[21] >> 1;
865
22.5M
  x[20] = ur + (vr >> 1);
866
22.5M
  x[21] = ui - (vi >> 1);
867
22.5M
  x[28] = ur - (vr >> 1);
868
22.5M
  x[29] = ui + (vi >> 1);
869
870
  // xt1 =  2
871
  // xt2 = 10
872
22.5M
  SUMDIFF_PIFOURTH(vi, vr, x[10], x[11])
873
  // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH);
874
  // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH);
875
22.5M
  ur = x[2];
876
22.5M
  ui = x[3];
877
22.5M
  x[2] = (ur >> 1) + vr;
878
22.5M
  x[3] = (ui >> 1) + vi;
879
22.5M
  x[10] = (ur >> 1) - vr;
880
22.5M
  x[11] = (ui >> 1) - vi;
881
882
  // xt1 =  6
883
  // xt2 = 14
884
22.5M
  SUMDIFF_PIFOURTH(vr, vi, x[14], x[15])
885
22.5M
  ur = x[6];
886
22.5M
  ui = x[7];
887
22.5M
  x[6] = (ur >> 1) + vr;
888
22.5M
  x[7] = (ui >> 1) - vi;
889
22.5M
  x[14] = (ur >> 1) - vr;
890
22.5M
  x[15] = (ui >> 1) + vi;
891
892
  // xt1 = 18
893
  // xt2 = 26
894
22.5M
  SUMDIFF_PIFOURTH(vi, vr, x[26], x[27])
895
22.5M
  ur = x[18];
896
22.5M
  ui = x[19];
897
22.5M
  x[18] = (ur >> 1) + vr;
898
22.5M
  x[19] = (ui >> 1) + vi;
899
22.5M
  x[26] = (ur >> 1) - vr;
900
22.5M
  x[27] = (ui >> 1) - vi;
901
902
  // xt1 = 22
903
  // xt2 = 30
904
22.5M
  SUMDIFF_PIFOURTH(vr, vi, x[30], x[31])
905
22.5M
  ur = x[22];
906
22.5M
  ui = x[23];
907
22.5M
  x[22] = (ur >> 1) + vr;
908
22.5M
  x[23] = (ui >> 1) - vi;
909
22.5M
  x[30] = (ur >> 1) - vr;
910
22.5M
  x[31] = (ui >> 1) + vi;
911
912
  // xt1 =  0
913
  // xt2 = 16
914
22.5M
  vr = x[16];
915
22.5M
  vi = x[17];
916
22.5M
  ur = x[0] >> 1;
917
22.5M
  ui = x[1] >> 1;
918
22.5M
  x[0] = ur + (vr >> 1);
919
22.5M
  x[1] = ui + (vi >> 1);
920
22.5M
  x[16] = ur - (vr >> 1);
921
22.5M
  x[17] = ui - (vi >> 1);
922
923
  // xt1 =  8
924
  // xt2 = 24
925
22.5M
  vi = x[24];
926
22.5M
  vr = x[25];
927
22.5M
  ur = x[8] >> 1;
928
22.5M
  ui = x[9] >> 1;
929
22.5M
  x[8] = ur + (vr >> 1);
930
22.5M
  x[9] = ui - (vi >> 1);
931
22.5M
  x[24] = ur - (vr >> 1);
932
22.5M
  x[25] = ui + (vi >> 1);
933
934
  // xt1 =  2
935
  // xt2 = 18
936
22.5M
  cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
937
22.5M
  ur = x[2];
938
22.5M
  ui = x[3];
939
22.5M
  x[2] = (ur >> 1) + vr;
940
22.5M
  x[3] = (ui >> 1) + vi;
941
22.5M
  x[18] = (ur >> 1) - vr;
942
22.5M
  x[19] = (ui >> 1) - vi;
943
944
  // xt1 = 10
945
  // xt2 = 26
946
22.5M
  cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
947
22.5M
  ur = x[10];
948
22.5M
  ui = x[11];
949
22.5M
  x[10] = (ur >> 1) + vr;
950
22.5M
  x[11] = (ui >> 1) - vi;
951
22.5M
  x[26] = (ur >> 1) - vr;
952
22.5M
  x[27] = (ui >> 1) + vi;
953
954
  // xt1 =  4
955
  // xt2 = 20
956
22.5M
  SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
957
22.5M
  ur = x[4];
958
22.5M
  ui = x[5];
959
22.5M
  x[4] = (ur >> 1) + vr;
960
22.5M
  x[5] = (ui >> 1) + vi;
961
22.5M
  x[20] = (ur >> 1) - vr;
962
22.5M
  x[21] = (ui >> 1) - vi;
963
964
  // xt1 = 12
965
  // xt2 = 28
966
22.5M
  SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
967
22.5M
  ur = x[12];
968
22.5M
  ui = x[13];
969
22.5M
  x[12] = (ur >> 1) + vr;
970
22.5M
  x[13] = (ui >> 1) - vi;
971
22.5M
  x[28] = (ur >> 1) - vr;
972
22.5M
  x[29] = (ui >> 1) + vi;
973
974
  // xt1 =  6
975
  // xt2 = 22
976
22.5M
  cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
977
22.5M
  ur = x[6];
978
22.5M
  ui = x[7];
979
22.5M
  x[6] = (ur >> 1) + vr;
980
22.5M
  x[7] = (ui >> 1) + vi;
981
22.5M
  x[22] = (ur >> 1) - vr;
982
22.5M
  x[23] = (ui >> 1) - vi;
983
984
  // xt1 = 14
985
  // xt2 = 30
986
22.5M
  cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
987
22.5M
  ur = x[14];
988
22.5M
  ui = x[15];
989
22.5M
  x[14] = (ur >> 1) + vr;
990
22.5M
  x[15] = (ui >> 1) - vi;
991
22.5M
  x[30] = (ur >> 1) - vr;
992
22.5M
  x[31] = (ui >> 1) + vi;
993
22.5M
}
994
#endif /* FUNCTION_fft_16 */
995
996
#ifndef FUNCTION_fft_32
997
static const FIXP_STP fft32_w32[6] = {
998
    STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d),
999
    STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7),
1000
    STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)};
1001
1.11G
#define W_PiFOURTH STC(0x5a82799a)
1002
1003
LNK_SECTION_CODE_L1
1004
39.9M
inline void fft_32(FIXP_DBL *const _x) {
1005
  /*
1006
   * 1+2 stage radix 4
1007
   */
1008
1009
  /////////////////////////////////////////////////////////////////////////////////////////
1010
39.9M
  {
1011
39.9M
    FIXP_DBL *const x = _x;
1012
39.9M
    FIXP_DBL vi, ui;
1013
39.9M
    FIXP_DBL vi2, ui2;
1014
39.9M
    FIXP_DBL vi3, ui3;
1015
39.9M
    FIXP_DBL vr, ur;
1016
39.9M
    FIXP_DBL vr2, ur2;
1017
39.9M
    FIXP_DBL vr3, ur3;
1018
39.9M
    FIXP_DBL vr4, ur4;
1019
1020
    // i = 0
1021
39.9M
    vr = (x[0] + x[32]) >> 1;     /* Re A + Re B */
1022
39.9M
    ur = (x[1] + x[33]) >> 1;     /* Im A + Im B */
1023
39.9M
    vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */
1024
39.9M
    ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */
1025
1026
39.9M
    x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1027
39.9M
    x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1028
1029
39.9M
    vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */
1030
39.9M
    ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */
1031
1032
39.9M
    x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1033
39.9M
    x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1034
1035
39.9M
    vr -= x[32];             /* Re A - Re B */
1036
39.9M
    ur -= x[33];             /* Im A - Im B */
1037
39.9M
    vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */
1038
39.9M
    ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */
1039
1040
39.9M
    vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */
1041
39.9M
    ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */
1042
1043
39.9M
    x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1044
39.9M
    x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1045
1046
39.9M
    vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */
1047
39.9M
    ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */
1048
1049
39.9M
    x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1050
39.9M
    x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1051
1052
    // i=16
1053
39.9M
    vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */
1054
39.9M
    ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */
1055
1056
39.9M
    x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1057
39.9M
    x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1058
39.9M
    x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1059
39.9M
    x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1060
1061
39.9M
    vr2 -= x[36];            /* Re A - Re B */
1062
39.9M
    ur2 -= x[37];            /* Im A - Im B */
1063
39.9M
    vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */
1064
39.9M
    ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */
1065
1066
39.9M
    vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */
1067
39.9M
    ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */
1068
1069
39.9M
    x[18] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1070
39.9M
    x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1071
1072
39.9M
    vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */
1073
39.9M
    ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */
1074
1075
39.9M
    x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1076
39.9M
    x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1077
1078
    // i = 32
1079
1080
39.9M
    x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1081
39.9M
    x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1082
39.9M
    x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1083
39.9M
    x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1084
1085
39.9M
    vr3 -= x[34];              /* Re A - Re B */
1086
39.9M
    ur3 -= x[35];              /* Im A - Im B */
1087
39.9M
    vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */
1088
39.9M
    ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */
1089
1090
39.9M
    x[34] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1091
39.9M
    x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1092
1093
    // i=48
1094
1095
39.9M
    x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1096
39.9M
    x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1097
39.9M
    x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1098
39.9M
    x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1099
1100
39.9M
    vr4 -= x[38]; /* Re A - Re B */
1101
39.9M
    ur4 -= x[39]; /* Im A - Im B */
1102
1103
39.9M
    x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1104
39.9M
    x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1105
1106
39.9M
    vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */
1107
39.9M
    ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */
1108
1109
39.9M
    x[50] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1110
39.9M
    x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1111
39.9M
    x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1112
39.9M
    x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1113
1114
    // i=8
1115
39.9M
    vr = (x[8] + x[40]) >> 1;     /* Re A + Re B */
1116
39.9M
    ur = (x[9] + x[41]) >> 1;     /* Im A + Im B */
1117
39.9M
    vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */
1118
39.9M
    ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */
1119
1120
39.9M
    x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1121
39.9M
    x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1122
1123
39.9M
    vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */
1124
39.9M
    ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */
1125
1126
39.9M
    x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1127
39.9M
    x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1128
1129
39.9M
    vr -= x[40];             /* Re A - Re B */
1130
39.9M
    ur -= x[41];             /* Im A - Im B */
1131
39.9M
    vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */
1132
39.9M
    ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */
1133
1134
39.9M
    vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */
1135
39.9M
    ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */
1136
1137
39.9M
    x[10] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1138
39.9M
    x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1139
1140
39.9M
    vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */
1141
39.9M
    ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */
1142
1143
39.9M
    x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1144
39.9M
    x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1145
1146
    // i=24
1147
39.9M
    vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */
1148
39.9M
    ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */
1149
1150
39.9M
    x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1151
39.9M
    x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1152
39.9M
    x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1153
39.9M
    x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1154
1155
39.9M
    vr2 -= x[44];            /* Re A - Re B */
1156
39.9M
    ur2 -= x[45];            /* Im A - Im B */
1157
39.9M
    vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */
1158
39.9M
    ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */
1159
1160
39.9M
    vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */
1161
39.9M
    ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */
1162
1163
39.9M
    x[26] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1164
39.9M
    x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1165
1166
39.9M
    vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */
1167
39.9M
    ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */
1168
1169
39.9M
    x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1170
39.9M
    x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1171
1172
    // i=40
1173
1174
39.9M
    x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1175
39.9M
    x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1176
39.9M
    x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1177
39.9M
    x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1178
1179
39.9M
    vr3 -= x[42];              /* Re A - Re B */
1180
39.9M
    ur3 -= x[43];              /* Im A - Im B */
1181
39.9M
    vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */
1182
39.9M
    ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */
1183
1184
39.9M
    x[42] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1185
39.9M
    x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1186
1187
    // i=56
1188
1189
39.9M
    x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1190
39.9M
    x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1191
39.9M
    x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1192
39.9M
    x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1193
1194
39.9M
    vr4 -= x[46]; /* Re A - Re B */
1195
39.9M
    ur4 -= x[47]; /* Im A - Im B */
1196
1197
39.9M
    x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1198
39.9M
    x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1199
1200
39.9M
    vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */
1201
39.9M
    ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */
1202
1203
39.9M
    x[58] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1204
39.9M
    x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1205
39.9M
    x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1206
39.9M
    x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1207
39.9M
  }
1208
1209
39.9M
  {
1210
39.9M
    FIXP_DBL *xt = _x;
1211
1212
39.9M
    int j = 4;
1213
159M
    do {
1214
159M
      FIXP_DBL vi, ui, vr, ur;
1215
1216
159M
      vr = xt[8];
1217
159M
      vi = xt[9];
1218
159M
      ur = xt[0] >> 1;
1219
159M
      ui = xt[1] >> 1;
1220
159M
      xt[0] = ur + (vr >> 1);
1221
159M
      xt[1] = ui + (vi >> 1);
1222
159M
      xt[8] = ur - (vr >> 1);
1223
159M
      xt[9] = ui - (vi >> 1);
1224
1225
159M
      vr = xt[13];
1226
159M
      vi = xt[12];
1227
159M
      ur = xt[4] >> 1;
1228
159M
      ui = xt[5] >> 1;
1229
159M
      xt[4] = ur + (vr >> 1);
1230
159M
      xt[5] = ui - (vi >> 1);
1231
159M
      xt[12] = ur - (vr >> 1);
1232
159M
      xt[13] = ui + (vi >> 1);
1233
1234
159M
      SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11])
1235
159M
      ur = xt[2];
1236
159M
      ui = xt[3];
1237
159M
      xt[2] = (ur >> 1) + vr;
1238
159M
      xt[3] = (ui >> 1) + vi;
1239
159M
      xt[10] = (ur >> 1) - vr;
1240
159M
      xt[11] = (ui >> 1) - vi;
1241
1242
159M
      SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15])
1243
159M
      ur = xt[6];
1244
159M
      ui = xt[7];
1245
1246
159M
      xt[6] = (ur >> 1) + vr;
1247
159M
      xt[7] = (ui >> 1) - vi;
1248
159M
      xt[14] = (ur >> 1) - vr;
1249
159M
      xt[15] = (ui >> 1) + vi;
1250
159M
      xt += 16;
1251
159M
    } while (--j != 0);
1252
39.9M
  }
1253
1254
39.9M
  {
1255
39.9M
    FIXP_DBL *const x = _x;
1256
39.9M
    FIXP_DBL vi, ui, vr, ur;
1257
1258
39.9M
    vr = x[16];
1259
39.9M
    vi = x[17];
1260
39.9M
    ur = x[0] >> 1;
1261
39.9M
    ui = x[1] >> 1;
1262
39.9M
    x[0] = ur + (vr >> 1);
1263
39.9M
    x[1] = ui + (vi >> 1);
1264
39.9M
    x[16] = ur - (vr >> 1);
1265
39.9M
    x[17] = ui - (vi >> 1);
1266
1267
39.9M
    vi = x[24];
1268
39.9M
    vr = x[25];
1269
39.9M
    ur = x[8] >> 1;
1270
39.9M
    ui = x[9] >> 1;
1271
39.9M
    x[8] = ur + (vr >> 1);
1272
39.9M
    x[9] = ui - (vi >> 1);
1273
39.9M
    x[24] = ur - (vr >> 1);
1274
39.9M
    x[25] = ui + (vi >> 1);
1275
1276
39.9M
    vr = x[48];
1277
39.9M
    vi = x[49];
1278
39.9M
    ur = x[32] >> 1;
1279
39.9M
    ui = x[33] >> 1;
1280
39.9M
    x[32] = ur + (vr >> 1);
1281
39.9M
    x[33] = ui + (vi >> 1);
1282
39.9M
    x[48] = ur - (vr >> 1);
1283
39.9M
    x[49] = ui - (vi >> 1);
1284
1285
39.9M
    vi = x[56];
1286
39.9M
    vr = x[57];
1287
39.9M
    ur = x[40] >> 1;
1288
39.9M
    ui = x[41] >> 1;
1289
39.9M
    x[40] = ur + (vr >> 1);
1290
39.9M
    x[41] = ui - (vi >> 1);
1291
39.9M
    x[56] = ur - (vr >> 1);
1292
39.9M
    x[57] = ui + (vi >> 1);
1293
1294
39.9M
    cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
1295
39.9M
    ur = x[2];
1296
39.9M
    ui = x[3];
1297
39.9M
    x[2] = (ur >> 1) + vr;
1298
39.9M
    x[3] = (ui >> 1) + vi;
1299
39.9M
    x[18] = (ur >> 1) - vr;
1300
39.9M
    x[19] = (ui >> 1) - vi;
1301
1302
39.9M
    cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
1303
39.9M
    ur = x[10];
1304
39.9M
    ui = x[11];
1305
39.9M
    x[10] = (ur >> 1) + vr;
1306
39.9M
    x[11] = (ui >> 1) - vi;
1307
39.9M
    x[26] = (ur >> 1) - vr;
1308
39.9M
    x[27] = (ui >> 1) + vi;
1309
1310
39.9M
    cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
1311
39.9M
    ur = x[34];
1312
39.9M
    ui = x[35];
1313
39.9M
    x[34] = (ur >> 1) + vr;
1314
39.9M
    x[35] = (ui >> 1) + vi;
1315
39.9M
    x[50] = (ur >> 1) - vr;
1316
39.9M
    x[51] = (ui >> 1) - vi;
1317
1318
39.9M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
1319
39.9M
    ur = x[42];
1320
39.9M
    ui = x[43];
1321
39.9M
    x[42] = (ur >> 1) + vr;
1322
39.9M
    x[43] = (ui >> 1) - vi;
1323
39.9M
    x[58] = (ur >> 1) - vr;
1324
39.9M
    x[59] = (ui >> 1) + vi;
1325
1326
39.9M
    SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
1327
39.9M
    ur = x[4];
1328
39.9M
    ui = x[5];
1329
39.9M
    x[4] = (ur >> 1) + vr;
1330
39.9M
    x[5] = (ui >> 1) + vi;
1331
39.9M
    x[20] = (ur >> 1) - vr;
1332
39.9M
    x[21] = (ui >> 1) - vi;
1333
1334
39.9M
    SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
1335
39.9M
    ur = x[12];
1336
39.9M
    ui = x[13];
1337
39.9M
    x[12] = (ur >> 1) + vr;
1338
39.9M
    x[13] = (ui >> 1) - vi;
1339
39.9M
    x[28] = (ur >> 1) - vr;
1340
39.9M
    x[29] = (ui >> 1) + vi;
1341
1342
39.9M
    SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
1343
39.9M
    ur = x[36];
1344
39.9M
    ui = x[37];
1345
39.9M
    x[36] = (ur >> 1) + vr;
1346
39.9M
    x[37] = (ui >> 1) + vi;
1347
39.9M
    x[52] = (ur >> 1) - vr;
1348
39.9M
    x[53] = (ui >> 1) - vi;
1349
1350
39.9M
    SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
1351
39.9M
    ur = x[44];
1352
39.9M
    ui = x[45];
1353
39.9M
    x[44] = (ur >> 1) + vr;
1354
39.9M
    x[45] = (ui >> 1) - vi;
1355
39.9M
    x[60] = (ur >> 1) - vr;
1356
39.9M
    x[61] = (ui >> 1) + vi;
1357
1358
39.9M
    cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
1359
39.9M
    ur = x[6];
1360
39.9M
    ui = x[7];
1361
39.9M
    x[6] = (ur >> 1) + vr;
1362
39.9M
    x[7] = (ui >> 1) + vi;
1363
39.9M
    x[22] = (ur >> 1) - vr;
1364
39.9M
    x[23] = (ui >> 1) - vi;
1365
1366
39.9M
    cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
1367
39.9M
    ur = x[14];
1368
39.9M
    ui = x[15];
1369
39.9M
    x[14] = (ur >> 1) + vr;
1370
39.9M
    x[15] = (ui >> 1) - vi;
1371
39.9M
    x[30] = (ur >> 1) - vr;
1372
39.9M
    x[31] = (ui >> 1) + vi;
1373
1374
39.9M
    cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
1375
39.9M
    ur = x[38];
1376
39.9M
    ui = x[39];
1377
39.9M
    x[38] = (ur >> 1) + vr;
1378
39.9M
    x[39] = (ui >> 1) + vi;
1379
39.9M
    x[54] = (ur >> 1) - vr;
1380
39.9M
    x[55] = (ui >> 1) - vi;
1381
1382
39.9M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
1383
39.9M
    ur = x[46];
1384
39.9M
    ui = x[47];
1385
1386
39.9M
    x[46] = (ur >> 1) + vr;
1387
39.9M
    x[47] = (ui >> 1) - vi;
1388
39.9M
    x[62] = (ur >> 1) - vr;
1389
39.9M
    x[63] = (ui >> 1) + vi;
1390
1391
39.9M
    vr = x[32];
1392
39.9M
    vi = x[33];
1393
39.9M
    ur = x[0] >> 1;
1394
39.9M
    ui = x[1] >> 1;
1395
39.9M
    x[0] = ur + (vr >> 1);
1396
39.9M
    x[1] = ui + (vi >> 1);
1397
39.9M
    x[32] = ur - (vr >> 1);
1398
39.9M
    x[33] = ui - (vi >> 1);
1399
1400
39.9M
    vi = x[48];
1401
39.9M
    vr = x[49];
1402
39.9M
    ur = x[16] >> 1;
1403
39.9M
    ui = x[17] >> 1;
1404
39.9M
    x[16] = ur + (vr >> 1);
1405
39.9M
    x[17] = ui - (vi >> 1);
1406
39.9M
    x[48] = ur - (vr >> 1);
1407
39.9M
    x[49] = ui + (vi >> 1);
1408
1409
39.9M
    cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
1410
39.9M
    ur = x[2];
1411
39.9M
    ui = x[3];
1412
39.9M
    x[2] = (ur >> 1) + vr;
1413
39.9M
    x[3] = (ui >> 1) + vi;
1414
39.9M
    x[34] = (ur >> 1) - vr;
1415
39.9M
    x[35] = (ui >> 1) - vi;
1416
1417
39.9M
    cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
1418
39.9M
    ur = x[18];
1419
39.9M
    ui = x[19];
1420
39.9M
    x[18] = (ur >> 1) + vr;
1421
39.9M
    x[19] = (ui >> 1) - vi;
1422
39.9M
    x[50] = (ur >> 1) - vr;
1423
39.9M
    x[51] = (ui >> 1) + vi;
1424
1425
39.9M
    cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
1426
39.9M
    ur = x[4];
1427
39.9M
    ui = x[5];
1428
39.9M
    x[4] = (ur >> 1) + vr;
1429
39.9M
    x[5] = (ui >> 1) + vi;
1430
39.9M
    x[36] = (ur >> 1) - vr;
1431
39.9M
    x[37] = (ui >> 1) - vi;
1432
1433
39.9M
    cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
1434
39.9M
    ur = x[20];
1435
39.9M
    ui = x[21];
1436
39.9M
    x[20] = (ur >> 1) + vr;
1437
39.9M
    x[21] = (ui >> 1) - vi;
1438
39.9M
    x[52] = (ur >> 1) - vr;
1439
39.9M
    x[53] = (ui >> 1) + vi;
1440
1441
39.9M
    cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
1442
39.9M
    ur = x[6];
1443
39.9M
    ui = x[7];
1444
39.9M
    x[6] = (ur >> 1) + vr;
1445
39.9M
    x[7] = (ui >> 1) + vi;
1446
39.9M
    x[38] = (ur >> 1) - vr;
1447
39.9M
    x[39] = (ui >> 1) - vi;
1448
1449
39.9M
    cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
1450
39.9M
    ur = x[22];
1451
39.9M
    ui = x[23];
1452
39.9M
    x[22] = (ur >> 1) + vr;
1453
39.9M
    x[23] = (ui >> 1) - vi;
1454
39.9M
    x[54] = (ur >> 1) - vr;
1455
39.9M
    x[55] = (ui >> 1) + vi;
1456
1457
39.9M
    SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
1458
39.9M
    ur = x[8];
1459
39.9M
    ui = x[9];
1460
39.9M
    x[8] = (ur >> 1) + vr;
1461
39.9M
    x[9] = (ui >> 1) + vi;
1462
39.9M
    x[40] = (ur >> 1) - vr;
1463
39.9M
    x[41] = (ui >> 1) - vi;
1464
1465
39.9M
    SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
1466
39.9M
    ur = x[24];
1467
39.9M
    ui = x[25];
1468
39.9M
    x[24] = (ur >> 1) + vr;
1469
39.9M
    x[25] = (ui >> 1) - vi;
1470
39.9M
    x[56] = (ur >> 1) - vr;
1471
39.9M
    x[57] = (ui >> 1) + vi;
1472
1473
39.9M
    cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
1474
39.9M
    ur = x[10];
1475
39.9M
    ui = x[11];
1476
1477
39.9M
    x[10] = (ur >> 1) + vr;
1478
39.9M
    x[11] = (ui >> 1) + vi;
1479
39.9M
    x[42] = (ur >> 1) - vr;
1480
39.9M
    x[43] = (ui >> 1) - vi;
1481
1482
39.9M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
1483
39.9M
    ur = x[26];
1484
39.9M
    ui = x[27];
1485
39.9M
    x[26] = (ur >> 1) + vr;
1486
39.9M
    x[27] = (ui >> 1) - vi;
1487
39.9M
    x[58] = (ur >> 1) - vr;
1488
39.9M
    x[59] = (ui >> 1) + vi;
1489
1490
39.9M
    cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1491
39.9M
    ur = x[12];
1492
39.9M
    ui = x[13];
1493
39.9M
    x[12] = (ur >> 1) + vr;
1494
39.9M
    x[13] = (ui >> 1) + vi;
1495
39.9M
    x[44] = (ur >> 1) - vr;
1496
39.9M
    x[45] = (ui >> 1) - vi;
1497
1498
39.9M
    cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1499
39.9M
    ur = x[28];
1500
39.9M
    ui = x[29];
1501
39.9M
    x[28] = (ur >> 1) + vr;
1502
39.9M
    x[29] = (ui >> 1) - vi;
1503
39.9M
    x[60] = (ur >> 1) - vr;
1504
39.9M
    x[61] = (ui >> 1) + vi;
1505
1506
39.9M
    cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1507
39.9M
    ur = x[14];
1508
39.9M
    ui = x[15];
1509
39.9M
    x[14] = (ur >> 1) + vr;
1510
39.9M
    x[15] = (ui >> 1) + vi;
1511
39.9M
    x[46] = (ur >> 1) - vr;
1512
39.9M
    x[47] = (ui >> 1) - vi;
1513
1514
39.9M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1515
39.9M
    ur = x[30];
1516
39.9M
    ui = x[31];
1517
39.9M
    x[30] = (ur >> 1) + vr;
1518
39.9M
    x[31] = (ui >> 1) - vi;
1519
39.9M
    x[62] = (ur >> 1) - vr;
1520
39.9M
    x[63] = (ui >> 1) + vi;
1521
39.9M
  }
1522
39.9M
}
1523
#endif /* #ifndef FUNCTION_fft_32 */
1524
1525
/**
1526
 * \brief Apply rotation vectors to a data buffer.
1527
 * \param cl length of each row of input data.
1528
 * \param l total length of input data.
1529
 * \param pVecRe real part of rotation coefficient vector.
1530
 * \param pVecIm imaginary part of rotation coefficient vector.
1531
 */
1532
1533
/*
1534
   This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000
1535
   (-1.0) instead. At the end, the sign of the result is inverted
1536
*/
1537
#define noFFT_APPLY_ROT_VECTOR_HQ
1538
1539
#ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL
1540
static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl,
1541
                                        const int l, const FIXP_STB *pVecRe,
1542
4.71M
                                        const FIXP_STB *pVecIm) {
1543
4.71M
  FIXP_DBL re, im;
1544
4.71M
  FIXP_STB vre, vim;
1545
1546
4.71M
  int i, c;
1547
1548
20.3M
  for (i = 0; i < cl; i++) {
1549
15.6M
    re = pData[2 * i];
1550
15.6M
    im = pData[2 * i + 1];
1551
1552
15.6M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1553
15.6M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1554
15.6M
  }
1555
30.0M
  for (; i < l; i += cl) {
1556
25.3M
    re = pData[2 * i];
1557
25.3M
    im = pData[2 * i + 1];
1558
1559
25.3M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1560
25.3M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1561
1562
149M
    for (c = i + 1; c < i + cl; c++) {
1563
123M
      re = pData[2 * c] >> 1;
1564
123M
      im = pData[2 * c + 1] >> 1;
1565
123M
      vre = *pVecRe++;
1566
123M
      vim = *pVecIm++;
1567
1568
123M
      cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim);
1569
123M
    }
1570
25.3M
  }
1571
4.71M
}
1572
#endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */
1573
1574
/* select either switch case of function pointer. */
1575
//#define FFT_TWO_STAGE_SWITCH_CASE
1576
#ifndef FUNCTION_fftN2_func
1577
static inline void fftN2_func(FIXP_DBL *pInput, const int length,
1578
                              const int dim1, const int dim2,
1579
                              void (*const fft1)(FIXP_DBL *),
1580
                              void (*const fft2)(FIXP_DBL *),
1581
                              const FIXP_STB *RotVectorReal,
1582
                              const FIXP_STB *RotVectorImag, FIXP_DBL *aDst,
1583
4.71M
                              FIXP_DBL *aDst2) {
1584
  /* The real part of the input samples are at the addresses with even indices
1585
  and the imaginary part of the input samples are at the addresses with odd
1586
  indices. The output samples are stored at the address of pInput
1587
  */
1588
4.71M
  FIXP_DBL *pSrc, *pDst, *pDstOut;
1589
4.71M
  int i;
1590
1591
4.71M
  FDK_ASSERT(length == dim1 * dim2);
1592
1593
  /* Perform dim2 times the fft of length dim1. The input samples are at the
1594
  address of pSrc and the output samples are at the address of pDst. The input
1595
  vector for the fft of length dim1 is built of the interleaved samples in pSrc,
1596
  the output samples are stored consecutively.
1597
  */
1598
4.71M
  pSrc = pInput;
1599
4.71M
  pDst = aDst;
1600
34.8M
  for (i = 0; i < dim2; i++) {
1601
194M
    for (int j = 0; j < dim1; j++) {
1602
164M
      pDst[2 * j] = pSrc[2 * j * dim2];
1603
164M
      pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1];
1604
164M
    }
1605
1606
      /* fft of size dim1 */
1607
30.0M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1608
30.0M
    fft1(pDst);
1609
#else
1610
    switch (dim1) {
1611
      case 2:
1612
        fft2(pDst);
1613
        break;
1614
      case 3:
1615
        fft3(pDst);
1616
        break;
1617
      case 4:
1618
        fft_4(pDst);
1619
        break;
1620
      /* case 5: fft5(pDst); break; */
1621
      /* case 8: fft_8(pDst); break; */
1622
      case 12:
1623
        fft12(pDst);
1624
        break;
1625
      /* case 15: fft15(pDst); break; */
1626
      case 16:
1627
        fft_16(pDst);
1628
        break;
1629
      case 32:
1630
        fft_32(pDst);
1631
        break;
1632
        /*case 64: fft_64(pDst); break;*/
1633
        /* case 128: fft_128(pDst); break; */
1634
    }
1635
#endif
1636
30.0M
    pSrc += 2;
1637
30.0M
    pDst = pDst + 2 * dim1;
1638
30.0M
  }
1639
1640
  /* Perform the modulation of the output of the fft of length dim1 */
1641
4.71M
  pSrc = aDst;
1642
4.71M
  fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag);
1643
1644
  /* Perform dim1 times the fft of length dim2. The input samples are at the
1645
  address of aDst and the output samples are at the address of pInput. The input
1646
  vector for the fft of length dim2 is built of the interleaved samples in aDst,
1647
  the output samples are stored consecutively at the address of pInput.
1648
  */
1649
4.71M
  pSrc = aDst;
1650
4.71M
  pDst = aDst2;
1651
4.71M
  pDstOut = pInput;
1652
20.3M
  for (i = 0; i < dim1; i++) {
1653
180M
    for (int j = 0; j < dim2; j++) {
1654
164M
      pDst[2 * j] = pSrc[2 * j * dim1];
1655
164M
      pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1];
1656
164M
    }
1657
1658
15.6M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1659
15.6M
    fft2(pDst);
1660
#else
1661
    switch (dim2) {
1662
      case 4:
1663
        fft_4(pDst);
1664
        break;
1665
      case 9:
1666
        fft9(pDst);
1667
        break;
1668
      case 12:
1669
        fft12(pDst);
1670
        break;
1671
      case 15:
1672
        fft15(pDst);
1673
        break;
1674
      case 16:
1675
        fft_16(pDst);
1676
        break;
1677
      case 32:
1678
        fft_32(pDst);
1679
        break;
1680
    }
1681
#endif
1682
1683
180M
    for (int j = 0; j < dim2; j++) {
1684
164M
      pDstOut[2 * j * dim1] = pDst[2 * j];
1685
164M
      pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1];
1686
164M
    }
1687
15.6M
    pSrc += 2;
1688
15.6M
    pDstOut += 2;
1689
15.6M
  }
1690
4.71M
}
1691
#endif /* FUNCTION_fftN2_function */
1692
1693
#define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \
1694
              RotVectorReal, RotVectorImag)                                \
1695
4.71M
  {                                                                        \
1696
4.71M
    C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length)                    \
1697
4.71M
    C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2)                     \
1698
4.71M
    fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2,           \
1699
4.71M
               RotVectorReal, RotVectorImag, aDst, aDst2);                 \
1700
4.71M
    C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2)                       \
1701
4.71M
    C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length)                      \
1702
4.71M
  }
1703
1704
  /*!
1705
   *
1706
   *  \brief  complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480
1707
   *  \param  pInput contains the input signal prescaled right by 2
1708
   *          pInput contains the output signal scaled by SCALEFACTOR<#length>
1709
   *          The output signal does not have any fixed headroom
1710
   *  \return void
1711
   *
1712
   */
1713
1714
#ifndef FUNCTION_fft6
1715
3.34M
static inline void fft6(FIXP_DBL *pInput) {
1716
3.34M
  fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6);
1717
3.34M
}
1718
#endif /* #ifndef FUNCTION_fft6 */
1719
1720
#ifndef FUNCTION_fft12
1721
static inline void fft12(FIXP_DBL *pInput) {
1722
  fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12,
1723
        RotVectorImag12); /* 16,58 */
1724
}
1725
#endif /* #ifndef FUNCTION_fft12 */
1726
1727
#ifndef FUNCTION_fft20
1728
207k
static inline void fft20(FIXP_DBL *pInput) {
1729
207k
  fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20,
1730
207k
        RotVectorImag20);
1731
207k
}
1732
#endif /* FUNCTION_fft20 */
1733
1734
10.2k
static inline void fft24(FIXP_DBL *pInput) {
1735
10.2k
  fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24,
1736
10.2k
        RotVectorImag24); /* 16,73 */
1737
10.2k
}
1738
1739
480k
static inline void fft48(FIXP_DBL *pInput) {
1740
480k
  fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48,
1741
480k
        RotVectorImag48); /* 16,32 */
1742
480k
}
1743
1744
#ifndef FUNCTION_fft60
1745
272k
static inline void fft60(FIXP_DBL *pInput) {
1746
272k
  fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60,
1747
272k
        RotVectorImag60); /* 15,51 */
1748
272k
}
1749
#endif /* FUNCTION_fft60 */
1750
1751
#ifndef FUNCTION_fft80
1752
187
static inline void fft80(FIXP_DBL *pInput) {
1753
187
  fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80,
1754
187
        RotVectorImag80); /*  */
1755
187
}
1756
#endif
1757
1758
#ifndef FUNCTION_fft96
1759
82.5k
static inline void fft96(FIXP_DBL *pInput) {
1760
82.5k
  fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96,
1761
82.5k
        RotVectorImag96); /* 15,47 */
1762
82.5k
}
1763
#endif /* FUNCTION_fft96*/
1764
1765
#ifndef FUNCTION_fft120
1766
2.79k
static inline void fft120(FIXP_DBL *pInput) {
1767
2.79k
  fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120,
1768
2.79k
        RotVectorImag120);
1769
2.79k
}
1770
#endif /* FUNCTION_fft120 */
1771
1772
#ifndef FUNCTION_fft192
1773
11.3k
static inline void fft192(FIXP_DBL *pInput) {
1774
11.3k
  fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192,
1775
11.3k
        RotVectorImag192); /* 15,50 */
1776
11.3k
}
1777
#endif
1778
1779
#ifndef FUNCTION_fft240
1780
178k
static inline void fft240(FIXP_DBL *pInput) {
1781
178k
  fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240,
1782
178k
        RotVectorImag240); /* 15.44 */
1783
178k
}
1784
#endif
1785
1786
#ifndef FUNCTION_fft384
1787
102k
static inline void fft384(FIXP_DBL *pInput) {
1788
102k
  fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384,
1789
102k
        RotVectorImag384); /* 16.02 */
1790
102k
}
1791
#endif /* FUNCTION_fft384 */
1792
1793
#ifndef FUNCTION_fft480
1794
16.3k
static inline void fft480(FIXP_DBL *pInput) {
1795
16.3k
  fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480,
1796
16.3k
        RotVectorImag480); /* 15.84 */
1797
16.3k
}
1798
#endif /* FUNCTION_fft480 */
1799
1800
85.8M
void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) {
1801
  /* Ensure, that the io-ptr is always (at least 8-byte) aligned */
1802
85.8M
  C_ALLOC_ALIGNED_CHECK(pInput);
1803
1804
85.8M
  if (length == 32) {
1805
38.2M
    fft_32(pInput);
1806
38.2M
    *pScalefactor += SCALEFACTOR32;
1807
47.6M
  } else {
1808
47.6M
    switch (length) {
1809
19.7M
      case 16:
1810
19.7M
        fft_16(pInput);
1811
19.7M
        *pScalefactor += SCALEFACTOR16;
1812
19.7M
        break;
1813
7.85M
      case 8:
1814
7.85M
        fft_8(pInput);
1815
7.85M
        *pScalefactor += SCALEFACTOR8;
1816
7.85M
        break;
1817
0
      case 2:
1818
0
        fft2(pInput);
1819
0
        *pScalefactor += SCALEFACTOR2;
1820
0
        break;
1821
0
      case 3:
1822
0
        fft3(pInput);
1823
0
        *pScalefactor += SCALEFACTOR3;
1824
0
        break;
1825
866k
      case 4:
1826
866k
        fft_4(pInput);
1827
866k
        *pScalefactor += SCALEFACTOR4;
1828
866k
        break;
1829
0
      case 5:
1830
0
        fft5(pInput);
1831
0
        *pScalefactor += SCALEFACTOR5;
1832
0
        break;
1833
3.34M
      case 6:
1834
3.34M
        fft6(pInput);
1835
3.34M
        *pScalefactor += SCALEFACTOR6;
1836
3.34M
        break;
1837
207k
      case 10:
1838
207k
        fft10(pInput);
1839
207k
        *pScalefactor += SCALEFACTOR10;
1840
207k
        break;
1841
13.4M
      case 12:
1842
13.4M
        fft12(pInput);
1843
13.4M
        *pScalefactor += SCALEFACTOR12;
1844
13.4M
        break;
1845
0
      case 15:
1846
0
        fft15(pInput);
1847
0
        *pScalefactor += SCALEFACTOR15;
1848
0
        break;
1849
207k
      case 20:
1850
207k
        fft20(pInput);
1851
207k
        *pScalefactor += SCALEFACTOR20;
1852
207k
        break;
1853
10.2k
      case 24:
1854
10.2k
        fft24(pInput);
1855
10.2k
        *pScalefactor += SCALEFACTOR24;
1856
10.2k
        break;
1857
480k
      case 48:
1858
480k
        fft48(pInput);
1859
480k
        *pScalefactor += SCALEFACTOR48;
1860
480k
        break;
1861
272k
      case 60:
1862
272k
        fft60(pInput);
1863
272k
        *pScalefactor += SCALEFACTOR60;
1864
272k
        break;
1865
432k
      case 64:
1866
432k
        dit_fft(pInput, 6, SineTable512, 512);
1867
432k
        *pScalefactor += SCALEFACTOR64;
1868
432k
        break;
1869
187
      case 80:
1870
187
        fft80(pInput);
1871
187
        *pScalefactor += SCALEFACTOR80;
1872
187
        break;
1873
82.5k
      case 96:
1874
82.5k
        fft96(pInput);
1875
82.5k
        *pScalefactor += SCALEFACTOR96;
1876
82.5k
        break;
1877
2.79k
      case 120:
1878
2.79k
        fft120(pInput);
1879
2.79k
        *pScalefactor += SCALEFACTOR120;
1880
2.79k
        break;
1881
79.5k
      case 128:
1882
79.5k
        dit_fft(pInput, 7, SineTable512, 512);
1883
79.5k
        *pScalefactor += SCALEFACTOR128;
1884
79.5k
        break;
1885
11.3k
      case 192:
1886
11.3k
        fft192(pInput);
1887
11.3k
        *pScalefactor += SCALEFACTOR192;
1888
11.3k
        break;
1889
178k
      case 240:
1890
178k
        fft240(pInput);
1891
178k
        *pScalefactor += SCALEFACTOR240;
1892
178k
        break;
1893
83.9k
      case 256:
1894
83.9k
        dit_fft(pInput, 8, SineTable512, 512);
1895
83.9k
        *pScalefactor += SCALEFACTOR256;
1896
83.9k
        break;
1897
102k
      case 384:
1898
102k
        fft384(pInput);
1899
102k
        *pScalefactor += SCALEFACTOR384;
1900
102k
        break;
1901
16.3k
      case 480:
1902
16.3k
        fft480(pInput);
1903
16.3k
        *pScalefactor += SCALEFACTOR480;
1904
16.3k
        break;
1905
184k
      case 512:
1906
184k
        dit_fft(pInput, 9, SineTable512, 512);
1907
184k
        *pScalefactor += SCALEFACTOR512;
1908
184k
        break;
1909
0
      default:
1910
0
        FDK_ASSERT(0); /* FFT length not supported! */
1911
0
        break;
1912
47.6M
    }
1913
47.6M
  }
1914
85.8M
}
1915
1916
0
void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) {
1917
0
  switch (length) {
1918
0
    default:
1919
0
      FDK_ASSERT(0); /* IFFT length not supported! */
1920
0
      break;
1921
0
  }
1922
0
}