Coverage Report

Created: 2025-07-11 06:54

/src/aac/libFDK/src/fft.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -----------------------------------------------------------------------------
2
Software License for The Fraunhofer FDK AAC Codec Library for Android
3
4
© Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
5
Forschung e.V. All rights reserved.
6
7
 1.    INTRODUCTION
8
The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
9
that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
10
scheme for digital audio. This FDK AAC Codec software is intended to be used on
11
a wide variety of Android devices.
12
13
AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
14
general perceptual audio codecs. AAC-ELD is considered the best-performing
15
full-bandwidth communications codec by independent studies and is widely
16
deployed. AAC has been standardized by ISO and IEC as part of the MPEG
17
specifications.
18
19
Patent licenses for necessary patent claims for the FDK AAC Codec (including
20
those of Fraunhofer) may be obtained through Via Licensing
21
(www.vialicensing.com) or through the respective patent owners individually for
22
the purpose of encoding or decoding bit streams in products that are compliant
23
with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
24
Android devices already license these patent claims through Via Licensing or
25
directly from the patent owners, and therefore FDK AAC Codec software may
26
already be covered under those patent licenses when it is used for those
27
licensed purposes only.
28
29
Commercially-licensed AAC software libraries, including floating-point versions
30
with enhanced sound quality, are also available from Fraunhofer. Users are
31
encouraged to check the Fraunhofer website for additional applications
32
information and documentation.
33
34
2.    COPYRIGHT LICENSE
35
36
Redistribution and use in source and binary forms, with or without modification,
37
are permitted without payment of copyright license fees provided that you
38
satisfy the following conditions:
39
40
You must retain the complete text of this software license in redistributions of
41
the FDK AAC Codec or your modifications thereto in source code form.
42
43
You must retain the complete text of this software license in the documentation
44
and/or other materials provided with redistributions of the FDK AAC Codec or
45
your modifications thereto in binary form. You must make available free of
46
charge copies of the complete source code of the FDK AAC Codec and your
47
modifications thereto to recipients of copies in binary form.
48
49
The name of Fraunhofer may not be used to endorse or promote products derived
50
from this library without prior written permission.
51
52
You may not charge copyright license fees for anyone to use, copy or distribute
53
the FDK AAC Codec software or your modifications thereto.
54
55
Your modified versions of the FDK AAC Codec must carry prominent notices stating
56
that you changed the software and the date of any change. For modified versions
57
of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
58
must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
59
AAC Codec Library for Android."
60
61
3.    NO PATENT LICENSE
62
63
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
64
limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
65
Fraunhofer provides no warranty of patent non-infringement with respect to this
66
software.
67
68
You may use this FDK AAC Codec software or modifications thereto only for
69
purposes that are authorized by appropriate patent licenses.
70
71
4.    DISCLAIMER
72
73
This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
74
holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
75
including but not limited to the implied warranties of merchantability and
76
fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
77
CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
78
or consequential damages, including but not limited to procurement of substitute
79
goods or services; loss of use, data, or profits, or business interruption,
80
however caused and on any theory of liability, whether in contract, strict
81
liability, or tort (including negligence), arising in any way out of the use of
82
this software, even if advised of the possibility of such damage.
83
84
5.    CONTACT INFORMATION
85
86
Fraunhofer Institute for Integrated Circuits IIS
87
Attention: Audio and Multimedia Departments - FDK AAC LL
88
Am Wolfsmantel 33
89
91058 Erlangen, Germany
90
91
www.iis.fraunhofer.de/amm
92
amm-info@iis.fraunhofer.de
93
----------------------------------------------------------------------------- */
94
95
/******************* Library for basic calculation routines ********************
96
97
   Author(s):   Josef Hoepfl, DSP Solutions
98
99
   Description: Fix point FFT
100
101
*******************************************************************************/
102
103
#include "fft_rad2.h"
104
#include "FDK_tools_rom.h"
105
106
287M
#define W_PiFOURTH STC(0x5a82799a)
107
//#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a))
108
#ifndef SUMDIFF_PIFOURTH
109
#define SUMDIFF_PIFOURTH(diff, sum, a, b) \
110
694M
  {                                       \
111
694M
    FIXP_DBL wa, wb;                      \
112
694M
    wa = fMultDiv2(a, W_PiFOURTH);        \
113
694M
    wb = fMultDiv2(b, W_PiFOURTH);        \
114
694M
    diff = wb - wa;                       \
115
694M
    sum = wb + wa;                        \
116
694M
  }
117
#define SUMDIFF_PIFOURTH16(diff, sum, a, b)       \
118
  {                                               \
119
    FIXP_SGL wa, wb;                              \
120
    wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \
121
    wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \
122
    diff = wb - wa;                               \
123
    sum = wb + wa;                                \
124
  }
125
#endif
126
127
#define SCALEFACTOR2048 10
128
#define SCALEFACTOR1024 9
129
174k
#define SCALEFACTOR512 8
130
74.1k
#define SCALEFACTOR256 7
131
81.0k
#define SCALEFACTOR128 6
132
422k
#define SCALEFACTOR64 5
133
37.9M
#define SCALEFACTOR32 4
134
20.9M
#define SCALEFACTOR16 3
135
8.26M
#define SCALEFACTOR8 2
136
1.74M
#define SCALEFACTOR4 1
137
3.55M
#define SCALEFACTOR2 1
138
139
3.61M
#define SCALEFACTOR3 1
140
114k
#define SCALEFACTOR5 1
141
3.55M
#define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2)
142
#define SCALEFACTOR7 2
143
#define SCALEFACTOR9 2
144
2.39M
#define SCALEFACTOR10 5
145
13.0M
#define SCALEFACTOR12 3
146
472k
#define SCALEFACTOR15 3
147
#define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2)
148
114k
#define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2)
149
#define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2)
150
6.20k
#define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2)
151
#define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2)
152
#define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2)
153
376k
#define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2)
154
245k
#define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2)
155
187
#define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2)
156
68.1k
#define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2)
157
3.83k
#define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2)
158
#define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2)
159
#define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2)
160
7.80k
#define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2)
161
207k
#define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2)
162
#define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2)
163
#define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2)
164
93.2k
#define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2)
165
15.7k
#define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2)
166
167
#include "fft.h"
168
169
#ifndef FUNCTION_fft2
170
171
/* Performs the FFT of length 2. Input vector unscaled, output vector scaled
172
 * with factor 0.5 */
173
10.7M
static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) {
174
10.7M
  FIXP_DBL r1, i1;
175
10.7M
  FIXP_DBL r2, i2;
176
177
  /* real part */
178
10.7M
  r1 = pDat[2];
179
10.7M
  r2 = pDat[0];
180
181
  /* imaginary part */
182
10.7M
  i1 = pDat[3];
183
10.7M
  i2 = pDat[1];
184
185
  /* real part */
186
10.7M
  pDat[0] = (r2 + r1) >> 1;
187
10.7M
  pDat[2] = (r2 - r1) >> 1;
188
189
  /* imaginary part */
190
10.7M
  pDat[1] = (i2 + i1) >> 1;
191
10.7M
  pDat[3] = (i2 - i1) >> 1;
192
10.7M
}
193
#endif /* FUNCTION_fft2 */
194
195
18.5M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
196
197
#ifndef FUNCTION_fft3
198
/* Performs the FFT of length 3 according to the algorithm after winograd. */
199
9.28M
static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) {
200
9.28M
  FIXP_DBL r1, r2;
201
9.28M
  FIXP_DBL s1, s2;
202
9.28M
  FIXP_DBL pD;
203
204
  /* real part */
205
9.28M
  r1 = pDat[2] + pDat[4];
206
9.28M
  r2 = fMultDiv2((pDat[2] - pDat[4]), C31);
207
9.28M
  pD = pDat[0] >> 1;
208
9.28M
  pDat[0] = pD + (r1 >> 1);
209
9.28M
  r1 = pD - (r1 >> 2);
210
211
  /* imaginary part */
212
9.28M
  s1 = pDat[3] + pDat[5];
213
9.28M
  s2 = fMultDiv2((pDat[3] - pDat[5]), C31);
214
9.28M
  pD = pDat[1] >> 1;
215
9.28M
  pDat[1] = pD + (s1 >> 1);
216
9.28M
  s1 = pD - (s1 >> 2);
217
218
  /* combination */
219
9.28M
  pDat[2] = r1 - s2;
220
9.28M
  pDat[4] = r1 + s2;
221
9.28M
  pDat[3] = s1 + r2;
222
9.28M
  pDat[5] = s1 - r2;
223
9.28M
}
224
#endif /* #ifndef FUNCTION_fft3 */
225
226
149M
#define F5C(x) STC(x)
227
228
29.9M
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
229
29.9M
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
230
29.9M
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
231
29.9M
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
232
29.9M
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
233
234
/* performs the FFT of length 5 according to the algorithm after winograd */
235
/* This version works with a prescale of 2 instead of 3 */
236
14.9M
static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) {
237
14.9M
  FIXP_DBL r1, r2, r3, r4;
238
14.9M
  FIXP_DBL s1, s2, s3, s4;
239
14.9M
  FIXP_DBL t;
240
241
  /* real part */
242
14.9M
  r1 = (pDat[2] + pDat[8]) >> 1;
243
14.9M
  r4 = (pDat[2] - pDat[8]) >> 1;
244
14.9M
  r3 = (pDat[4] + pDat[6]) >> 1;
245
14.9M
  r2 = (pDat[4] - pDat[6]) >> 1;
246
14.9M
  t = fMult((r1 - r3), C54);
247
14.9M
  r1 = r1 + r3;
248
14.9M
  pDat[0] = (pDat[0] >> 1) + r1;
249
  /* Bit shift left because of the constant C55 which was scaled with the factor
250
     0.5 because of the representation of the values as fracts */
251
14.9M
  r1 = pDat[0] + (fMultDiv2(r1, C55) << (2));
252
14.9M
  r3 = r1 - t;
253
14.9M
  r1 = r1 + t;
254
14.9M
  t = fMult((r4 + r2), C51);
255
  /* Bit shift left because of the constant C55 which was scaled with the factor
256
     0.5 because of the representation of the values as fracts */
257
14.9M
  r4 = t + (fMultDiv2(r4, C52) << (2));
258
14.9M
  r2 = t + fMult(r2, C53);
259
260
  /* imaginary part */
261
14.9M
  s1 = (pDat[3] + pDat[9]) >> 1;
262
14.9M
  s4 = (pDat[3] - pDat[9]) >> 1;
263
14.9M
  s3 = (pDat[5] + pDat[7]) >> 1;
264
14.9M
  s2 = (pDat[5] - pDat[7]) >> 1;
265
14.9M
  t = fMult((s1 - s3), C54);
266
14.9M
  s1 = s1 + s3;
267
14.9M
  pDat[1] = (pDat[1] >> 1) + s1;
268
  /* Bit shift left because of the constant C55 which was scaled with the factor
269
     0.5 because of the representation of the values as fracts */
270
14.9M
  s1 = pDat[1] + (fMultDiv2(s1, C55) << (2));
271
14.9M
  s3 = s1 - t;
272
14.9M
  s1 = s1 + t;
273
14.9M
  t = fMult((s4 + s2), C51);
274
  /* Bit shift left because of the constant C55 which was scaled with the factor
275
     0.5 because of the representation of the values as fracts */
276
14.9M
  s4 = t + (fMultDiv2(s4, C52) << (2));
277
14.9M
  s2 = t + fMult(s2, C53);
278
279
  /* combination */
280
14.9M
  pDat[2] = r1 + s2;
281
14.9M
  pDat[8] = r1 - s2;
282
14.9M
  pDat[4] = r3 - s4;
283
14.9M
  pDat[6] = r3 + s4;
284
285
14.9M
  pDat[3] = s1 - r2;
286
14.9M
  pDat[9] = s1 + r2;
287
14.9M
  pDat[5] = s3 + r4;
288
14.9M
  pDat[7] = s3 - r4;
289
14.9M
}
290
291
2.28M
#define F5C(x) STC(x)
292
293
457k
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
294
457k
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
295
457k
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
296
457k
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
297
457k
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
298
/**
299
 * \brief    Function performs a complex 10-point FFT
300
 *           The FFT is performed inplace. The result of the FFT
301
 *           is scaled by SCALEFACTOR10 bits.
302
 *
303
 *           WOPS FLC version:                    1093 cycles
304
 *           WOPS with 32x16 bit multiplications:  196 cycles
305
 *
306
 * \param    [i/o] re    real input / output
307
 * \param    [i/o] im    imag input / output
308
 * \param    [i  ] s     stride real and imag input / output
309
 *
310
 * \return   void
311
 */
312
static void fft10(FIXP_DBL *x)  // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s)
313
114k
{
314
114k
  FIXP_DBL t;
315
114k
  FIXP_DBL x0, x1, x2, x3, x4;
316
114k
  FIXP_DBL r1, r2, r3, r4;
317
114k
  FIXP_DBL s1, s2, s3, s4;
318
114k
  FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
319
114k
  FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
320
321
114k
  const int s = 1;  // stride factor
322
323
  /* 2 fft5 stages */
324
325
  /* real part */
326
114k
  x0 = (x[s * 0] >> SCALEFACTOR10);
327
114k
  x1 = (x[s * 4] >> SCALEFACTOR10);
328
114k
  x2 = (x[s * 8] >> SCALEFACTOR10);
329
114k
  x3 = (x[s * 12] >> SCALEFACTOR10);
330
114k
  x4 = (x[s * 16] >> SCALEFACTOR10);
331
332
114k
  r1 = (x3 + x2);
333
114k
  r4 = (x3 - x2);
334
114k
  r3 = (x1 + x4);
335
114k
  r2 = (x1 - x4);
336
114k
  t = fMult((r1 - r3), C54);
337
114k
  r1 = (r1 + r3);
338
114k
  y00 = (x0 + r1);
339
114k
  r1 = (y00 + ((fMult(r1, C55) << 1)));
340
114k
  r3 = (r1 - t);
341
114k
  r1 = (r1 + t);
342
114k
  t = fMult((r4 + r2), C51);
343
114k
  r4 = (t + (fMult(r4, C52) << 1));
344
114k
  r2 = (t + fMult(r2, C53));
345
346
  /* imaginary part */
347
114k
  x0 = (x[s * 0 + 1] >> SCALEFACTOR10);
348
114k
  x1 = (x[s * 4 + 1] >> SCALEFACTOR10);
349
114k
  x2 = (x[s * 8 + 1] >> SCALEFACTOR10);
350
114k
  x3 = (x[s * 12 + 1] >> SCALEFACTOR10);
351
114k
  x4 = (x[s * 16 + 1] >> SCALEFACTOR10);
352
353
114k
  s1 = (x3 + x2);
354
114k
  s4 = (x3 - x2);
355
114k
  s3 = (x1 + x4);
356
114k
  s2 = (x1 - x4);
357
114k
  t = fMult((s1 - s3), C54);
358
114k
  s1 = (s1 + s3);
359
114k
  y01 = (x0 + s1);
360
114k
  s1 = (y01 + (fMult(s1, C55) << 1));
361
114k
  s3 = (s1 - t);
362
114k
  s1 = (s1 + t);
363
114k
  t = fMult((s4 + s2), C51);
364
114k
  s4 = (t + (fMult(s4, C52) << 1));
365
114k
  s2 = (t + fMult(s2, C53));
366
367
  /* combination */
368
114k
  y04 = (r1 + s2);
369
114k
  y16 = (r1 - s2);
370
114k
  y08 = (r3 - s4);
371
114k
  y12 = (r3 + s4);
372
373
114k
  y05 = (s1 - r2);
374
114k
  y17 = (s1 + r2);
375
114k
  y09 = (s3 + r4);
376
114k
  y13 = (s3 - r4);
377
378
  /* real part */
379
114k
  x0 = (x[s * 10] >> SCALEFACTOR10);
380
114k
  x1 = (x[s * 2] >> SCALEFACTOR10);
381
114k
  x2 = (x[s * 6] >> SCALEFACTOR10);
382
114k
  x3 = (x[s * 14] >> SCALEFACTOR10);
383
114k
  x4 = (x[s * 18] >> SCALEFACTOR10);
384
385
114k
  r1 = (x1 + x4);
386
114k
  r4 = (x1 - x4);
387
114k
  r3 = (x3 + x2);
388
114k
  r2 = (x3 - x2);
389
114k
  t = fMult((r1 - r3), C54);
390
114k
  r1 = (r1 + r3);
391
114k
  y02 = (x0 + r1);
392
114k
  r1 = (y02 + ((fMult(r1, C55) << 1)));
393
114k
  r3 = (r1 - t);
394
114k
  r1 = (r1 + t);
395
114k
  t = fMult(((r4 + r2)), C51);
396
114k
  r4 = (t + (fMult(r4, C52) << 1));
397
114k
  r2 = (t + fMult(r2, C53));
398
399
  /* imaginary part */
400
114k
  x0 = (x[s * 10 + 1] >> SCALEFACTOR10);
401
114k
  x1 = (x[s * 2 + 1] >> SCALEFACTOR10);
402
114k
  x2 = (x[s * 6 + 1] >> SCALEFACTOR10);
403
114k
  x3 = (x[s * 14 + 1] >> SCALEFACTOR10);
404
114k
  x4 = (x[s * 18 + 1] >> SCALEFACTOR10);
405
406
114k
  s1 = (x1 + x4);
407
114k
  s4 = (x1 - x4);
408
114k
  s3 = (x3 + x2);
409
114k
  s2 = (x3 - x2);
410
114k
  t = fMult((s1 - s3), C54);
411
114k
  s1 = (s1 + s3);
412
114k
  y03 = (x0 + s1);
413
114k
  s1 = (y03 + (fMult(s1, C55) << 1));
414
114k
  s3 = (s1 - t);
415
114k
  s1 = (s1 + t);
416
114k
  t = fMult((s4 + s2), C51);
417
114k
  s4 = (t + (fMult(s4, C52) << 1));
418
114k
  s2 = (t + fMult(s2, C53));
419
420
  /* combination */
421
114k
  y06 = (r1 + s2);
422
114k
  y18 = (r1 - s2);
423
114k
  y10 = (r3 - s4);
424
114k
  y14 = (r3 + s4);
425
426
114k
  y07 = (s1 - r2);
427
114k
  y19 = (s1 + r2);
428
114k
  y11 = (s3 + r4);
429
114k
  y15 = (s3 - r4);
430
431
  /* 5 fft2 stages */
432
114k
  x[s * 0] = (y00 + y02);
433
114k
  x[s * 0 + 1] = (y01 + y03);
434
114k
  x[s * 10] = (y00 - y02);
435
114k
  x[s * 10 + 1] = (y01 - y03);
436
437
114k
  x[s * 4] = (y04 + y06);
438
114k
  x[s * 4 + 1] = (y05 + y07);
439
114k
  x[s * 14] = (y04 - y06);
440
114k
  x[s * 14 + 1] = (y05 - y07);
441
442
114k
  x[s * 8] = (y08 + y10);
443
114k
  x[s * 8 + 1] = (y09 + y11);
444
114k
  x[s * 18] = (y08 - y10);
445
114k
  x[s * 18 + 1] = (y09 - y11);
446
447
114k
  x[s * 12] = (y12 + y14);
448
114k
  x[s * 12 + 1] = (y13 + y15);
449
114k
  x[s * 2] = (y12 - y14);
450
114k
  x[s * 2 + 1] = (y13 - y15);
451
452
114k
  x[s * 16] = (y16 + y18);
453
114k
  x[s * 16 + 1] = (y17 + y19);
454
114k
  x[s * 6] = (y16 - y18);
455
114k
  x[s * 6 + 1] = (y17 - y19);
456
114k
}
457
458
#ifndef FUNCTION_fft12
459
#define FUNCTION_fft12
460
461
#undef C31
462
185M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
463
464
17.1M
static inline void fft12(FIXP_DBL *pInput) {
465
17.1M
  FIXP_DBL aDst[24];
466
17.1M
  FIXP_DBL *pSrc, *pDst;
467
17.1M
  int i;
468
469
17.1M
  pSrc = pInput;
470
17.1M
  pDst = aDst;
471
17.1M
  FIXP_DBL r1, r2, s1, s2, pD;
472
473
  /* First 3*2 samples are shifted right by 2 before output */
474
17.1M
  r1 = pSrc[8] + pSrc[16];
475
17.1M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
476
17.1M
  pD = pSrc[0] >> 1;
477
17.1M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
478
17.1M
  r1 = pD - (r1 >> 2);
479
480
  /* imaginary part */
481
17.1M
  s1 = pSrc[9] + pSrc[17];
482
17.1M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
483
17.1M
  pD = pSrc[1] >> 1;
484
17.1M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
485
17.1M
  s1 = pD - (s1 >> 2);
486
487
  /* combination */
488
17.1M
  pDst[2] = (r1 - s2) >> 1;
489
17.1M
  pDst[3] = (s1 + r2) >> 1;
490
17.1M
  pDst[4] = (r1 + s2) >> 1;
491
17.1M
  pDst[5] = (s1 - r2) >> 1;
492
17.1M
  pSrc += 2;
493
17.1M
  pDst += 6;
494
495
17.1M
  const FIXP_STB *pVecRe = RotVectorReal12;
496
17.1M
  const FIXP_STB *pVecIm = RotVectorImag12;
497
17.1M
  FIXP_DBL re, im;
498
17.1M
  FIXP_STB vre, vim;
499
51.5M
  for (i = 0; i < 2; i++) {
500
    /* sample 0,1 are shifted right by 2 before output */
501
    /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before
502
     * output */
503
504
34.3M
    r1 = pSrc[8] + pSrc[16];
505
34.3M
    r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
506
34.3M
    pD = pSrc[0] >> 1;
507
34.3M
    pDst[0] = (pD + (r1 >> 1)) >> 1;
508
34.3M
    r1 = pD - (r1 >> 2);
509
510
    /* imaginary part */
511
34.3M
    s1 = pSrc[9] + pSrc[17];
512
34.3M
    s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
513
34.3M
    pD = pSrc[1] >> 1;
514
34.3M
    pDst[1] = (pD + (s1 >> 1)) >> 1;
515
34.3M
    s1 = pD - (s1 >> 2);
516
517
    /* combination */
518
34.3M
    re = (r1 - s2) >> 0;
519
34.3M
    im = (s1 + r2) >> 0;
520
34.3M
    vre = *pVecRe++;
521
34.3M
    vim = *pVecIm++;
522
34.3M
    cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim);
523
524
34.3M
    re = (r1 + s2) >> 0;
525
34.3M
    im = (s1 - r2) >> 0;
526
34.3M
    vre = *pVecRe++;
527
34.3M
    vim = *pVecIm++;
528
34.3M
    cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim);
529
530
34.3M
    pDst += 6;
531
34.3M
    pSrc += 2;
532
34.3M
  }
533
  /* sample 0,1 are shifted right by 2 before output */
534
  /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
535
  /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
536
17.1M
  r1 = pSrc[8] + pSrc[16];
537
17.1M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
538
17.1M
  pD = pSrc[0] >> 1;
539
17.1M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
540
17.1M
  r1 = pD - (r1 >> 2);
541
542
  /* imaginary part */
543
17.1M
  s1 = pSrc[9] + pSrc[17];
544
17.1M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
545
17.1M
  pD = pSrc[1] >> 1;
546
17.1M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
547
17.1M
  s1 = pD - (s1 >> 2);
548
549
  /* combination */
550
17.1M
  pDst[2] = (s1 + r2) >> 1;
551
17.1M
  pDst[3] = (s2 - r1) >> 1;
552
17.1M
  pDst[4] = -((r1 + s2) >> 1);
553
17.1M
  pDst[5] = (r2 - s1) >> 1;
554
555
  /* Perform 3 times the fft of length 4. The input samples are at the address
556
  of aDst and the output samples are at the address of pInput. The input vector
557
  for the fft of length 4 is built of the interleaved samples in aDst, the
558
  output samples are stored consecutively at the address of pInput.
559
  */
560
17.1M
  pSrc = aDst;
561
17.1M
  pDst = pInput;
562
68.6M
  for (i = 0; i < 3; i++) {
563
    /* inline FFT4 merged with incoming resorting loop */
564
51.5M
    FIXP_DBL a00, a10, a20, a30, tmp0, tmp1;
565
566
51.5M
    a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */
567
51.5M
    a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */
568
51.5M
    a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */
569
51.5M
    a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */
570
571
51.5M
    pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
572
51.5M
    pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
573
574
51.5M
    tmp0 = a00 - pSrc[12]; /* Re A - Re B */
575
51.5M
    tmp1 = a20 - pSrc[13]; /* Im A - Im B */
576
577
51.5M
    pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
578
51.5M
    pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
579
580
51.5M
    a10 = a10 - pSrc[18]; /* Re C - Re D */
581
51.5M
    a30 = a30 - pSrc[19]; /* Im C - Im D */
582
583
51.5M
    pDst[6] = tmp0 + a30;  /* Re B' = Re A - Re B + Im C - Im D */
584
51.5M
    pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */
585
51.5M
    pDst[7] = tmp1 - a10;  /* Im B' = Im A - Im B - Re C + Re D */
586
51.5M
    pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */
587
588
51.5M
    pSrc += 2;
589
51.5M
    pDst += 2;
590
51.5M
  }
591
17.1M
}
592
#endif /* FUNCTION_fft12 */
593
594
#ifndef FUNCTION_fft15
595
596
179M
#define N3 3
597
101M
#define N5 5
598
58.0M
#define N6 6
599
179M
#define N15 15
600
601
/* Performs the FFT of length 15. It is split into FFTs of length 3 and
602
 * length 5. */
603
4.84M
static inline void fft15(FIXP_DBL *pInput) {
604
4.84M
  FIXP_DBL aDst[2 * N15];
605
4.84M
  FIXP_DBL aDst1[2 * N15];
606
4.84M
  int i, k, l;
607
608
  /* Sort input vector for fft's of length 3
609
  input3(0:2)   = [input(0) input(5) input(10)];
610
  input3(3:5)   = [input(3) input(8) input(13)];
611
  input3(6:8)   = [input(6) input(11) input(1)];
612
  input3(9:11)  = [input(9) input(14) input(4)];
613
  input3(12:14) = [input(12) input(2) input(7)]; */
614
4.84M
  {
615
4.84M
    const FIXP_DBL *pSrc = pInput;
616
4.84M
    FIXP_DBL *RESTRICT pDst = aDst;
617
    /* Merge 3 loops into one, skip call of fft3 */
618
29.0M
    for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) {
619
24.2M
      pDst[k + 0] = pSrc[l];
620
24.2M
      pDst[k + 1] = pSrc[l + 1];
621
24.2M
      l += 2 * N5;
622
24.2M
      if (l >= (2 * N15)) l -= (2 * N15);
623
624
24.2M
      pDst[k + 2] = pSrc[l];
625
24.2M
      pDst[k + 3] = pSrc[l + 1];
626
24.2M
      l += 2 * N5;
627
24.2M
      if (l >= (2 * N15)) l -= (2 * N15);
628
24.2M
      pDst[k + 4] = pSrc[l];
629
24.2M
      pDst[k + 5] = pSrc[l + 1];
630
24.2M
      l += (2 * N5) + (2 * N3);
631
24.2M
      if (l >= (2 * N15)) l -= (2 * N15);
632
633
      /* fft3 merged with shift right by 2 loop */
634
24.2M
      FIXP_DBL r1, r2, r3;
635
24.2M
      FIXP_DBL s1, s2;
636
      /* real part */
637
24.2M
      r1 = pDst[k + 2] + pDst[k + 4];
638
24.2M
      r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31);
639
24.2M
      s1 = pDst[k + 0];
640
24.2M
      pDst[k + 0] = (s1 + r1) >> 2;
641
24.2M
      r1 = s1 - (r1 >> 1);
642
643
      /* imaginary part */
644
24.2M
      s1 = pDst[k + 3] + pDst[k + 5];
645
24.2M
      s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31);
646
24.2M
      r3 = pDst[k + 1];
647
24.2M
      pDst[k + 1] = (r3 + s1) >> 2;
648
24.2M
      s1 = r3 - (s1 >> 1);
649
650
      /* combination */
651
24.2M
      pDst[k + 2] = (r1 - s2) >> 2;
652
24.2M
      pDst[k + 4] = (r1 + s2) >> 2;
653
24.2M
      pDst[k + 3] = (s1 + r2) >> 2;
654
24.2M
      pDst[k + 5] = (s1 - r2) >> 2;
655
24.2M
    }
656
4.84M
  }
657
  /* Sort input vector for fft's of length 5
658
  input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
659
  input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
660
  input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
661
  /* Merge 2 loops into one, brings about 10% */
662
4.84M
  {
663
4.84M
    const FIXP_DBL *pSrc = aDst;
664
4.84M
    FIXP_DBL *RESTRICT pDst = aDst1;
665
19.3M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
666
14.5M
      l = 2 * i;
667
14.5M
      pDst[k + 0] = pSrc[l + 0];
668
14.5M
      pDst[k + 1] = pSrc[l + 1];
669
14.5M
      pDst[k + 2] = pSrc[l + 0 + (2 * N3)];
670
14.5M
      pDst[k + 3] = pSrc[l + 1 + (2 * N3)];
671
14.5M
      pDst[k + 4] = pSrc[l + 0 + (4 * N3)];
672
14.5M
      pDst[k + 5] = pSrc[l + 1 + (4 * N3)];
673
14.5M
      pDst[k + 6] = pSrc[l + 0 + (6 * N3)];
674
14.5M
      pDst[k + 7] = pSrc[l + 1 + (6 * N3)];
675
14.5M
      pDst[k + 8] = pSrc[l + 0 + (8 * N3)];
676
14.5M
      pDst[k + 9] = pSrc[l + 1 + (8 * N3)];
677
14.5M
      fft5(&pDst[k]);
678
14.5M
    }
679
4.84M
  }
680
  /* Sort output vector of length 15
681
  output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
682
            out5(10) out5(1)  out5(7)  out5(13) out5(4)
683
            out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
684
  /* optimize clumsy loop, brings about 5% */
685
4.84M
  {
686
4.84M
    const FIXP_DBL *pSrc = aDst1;
687
4.84M
    FIXP_DBL *RESTRICT pDst = pInput;
688
19.3M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
689
14.5M
      pDst[k + 0] = pSrc[l];
690
14.5M
      pDst[k + 1] = pSrc[l + 1];
691
14.5M
      l += (2 * N6);
692
14.5M
      if (l >= (2 * N15)) l -= (2 * N15);
693
14.5M
      pDst[k + 2] = pSrc[l];
694
14.5M
      pDst[k + 3] = pSrc[l + 1];
695
14.5M
      l += (2 * N6);
696
14.5M
      if (l >= (2 * N15)) l -= (2 * N15);
697
14.5M
      pDst[k + 4] = pSrc[l];
698
14.5M
      pDst[k + 5] = pSrc[l + 1];
699
14.5M
      l += (2 * N6);
700
14.5M
      if (l >= (2 * N15)) l -= (2 * N15);
701
14.5M
      pDst[k + 6] = pSrc[l];
702
14.5M
      pDst[k + 7] = pSrc[l + 1];
703
14.5M
      l += (2 * N6);
704
14.5M
      if (l >= (2 * N15)) l -= (2 * N15);
705
14.5M
      pDst[k + 8] = pSrc[l];
706
14.5M
      pDst[k + 9] = pSrc[l + 1];
707
14.5M
      l += 2; /* no modulo check needed, it cannot occur */
708
14.5M
    }
709
4.84M
  }
710
4.84M
}
711
#endif /* FUNCTION_fft15 */
712
713
/*
714
 Select shift placement.
715
 Some processors like ARM may shift "for free" in combination with an addition
716
 or substraction, but others don't so either combining shift with +/- or reduce
717
 the total amount or shift operations is optimal
718
 */
719
#if !defined(__arm__)
720
1.01G
#define SHIFT_A >> 1
721
#define SHIFT_B
722
#else
723
#define SHIFT_A
724
#define SHIFT_B >> 1
725
#endif
726
727
#ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \
728
                         */
729
730
/* This defines prevents this array to be declared twice, if 16-bit fft is
731
 * enabled too */
732
#define FUNCTION_DATA_fft_16_w16
733
static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d),
734
                                      STCP(0x30fbc54d, 0x7641af3d)};
735
736
LNK_SECTION_CODE_L1
737
23.9M
inline void fft_16(FIXP_DBL *RESTRICT x) {
738
23.9M
  FIXP_DBL vr, ur;
739
23.9M
  FIXP_DBL vr2, ur2;
740
23.9M
  FIXP_DBL vr3, ur3;
741
23.9M
  FIXP_DBL vr4, ur4;
742
23.9M
  FIXP_DBL vi, ui;
743
23.9M
  FIXP_DBL vi2, ui2;
744
23.9M
  FIXP_DBL vi3, ui3;
745
746
23.9M
  vr = (x[0] >> 1) + (x[16] >> 1);       /* Re A + Re B */
747
23.9M
  ur = (x[1] >> 1) + (x[17] >> 1);       /* Im A + Im B */
748
23.9M
  vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */
749
23.9M
  ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */
750
23.9M
  x[0] = vr + (vi SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
751
23.9M
  x[1] = ur + (ui SHIFT_B);              /* Im A' = sum of imag values */
752
753
23.9M
  vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */
754
23.9M
  ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */
755
756
23.9M
  x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
757
23.9M
  x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
758
23.9M
  vr -= x[16];              /* Re A - Re B */
759
23.9M
  vi = (vi SHIFT_B)-x[24];  /* Re C - Re D */
760
23.9M
  ur -= x[17];              /* Im A - Im B */
761
23.9M
  ui = (ui SHIFT_B)-x[25];  /* Im C - Im D */
762
763
23.9M
  vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */
764
23.9M
  ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */
765
766
23.9M
  x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
767
23.9M
  x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
768
769
23.9M
  vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */
770
23.9M
  ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */
771
772
23.9M
  x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
773
23.9M
  x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
774
775
23.9M
  vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */
776
23.9M
  ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */
777
23.9M
  x[8] = vr2 + (vi2 SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
778
23.9M
  x[9] = ur2 + (ui2 SHIFT_B);              /* Im A' = sum of imag values */
779
23.9M
  x[12] = vr2 - (vi2 SHIFT_B);             /* Re C' = -(ReC+ReD) + (ReA+ReB) */
780
23.9M
  x[13] = ur2 - (ui2 SHIFT_B);             /* Im C' = -Im C -Im D +Im A +Im B */
781
23.9M
  vr2 -= x[20];                            /* Re A - Re B */
782
23.9M
  ur2 -= x[21];                            /* Im A - Im B */
783
23.9M
  vi2 = (vi2 SHIFT_B)-x[28];               /* Re C - Re D */
784
23.9M
  ui2 = (ui2 SHIFT_B)-x[29];               /* Im C - Im D */
785
786
23.9M
  vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */
787
23.9M
  ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */
788
789
23.9M
  x[10] = ui2 + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
790
23.9M
  x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
791
792
23.9M
  vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */
793
23.9M
  ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */
794
795
23.9M
  x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
796
23.9M
  x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */
797
798
23.9M
  x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
799
23.9M
  x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */
800
23.9M
  x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
801
23.9M
  x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
802
23.9M
  vr3 -= x[18];               /* Re A - Re B */
803
23.9M
  ur3 -= x[19];               /* Im A - Im B */
804
23.9M
  vi = (vi SHIFT_B)-x[26];    /* Re C - Re D */
805
23.9M
  ui = (ui SHIFT_B)-x[27];    /* Im C - Im D */
806
23.9M
  x[18] = ui + vr3;           /* Re B' = Im C - Im D  + Re A - Re B */
807
23.9M
  x[19] = ur3 - vi;           /* Im B'= -Re C + Re D + Im A - Im B */
808
809
23.9M
  x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
810
23.9M
  x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
811
23.9M
  x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
812
23.9M
  x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
813
23.9M
  vr4 -= x[22];                /* Re A - Re B */
814
23.9M
  ur4 -= x[23];                /* Im A - Im B */
815
816
23.9M
  x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
817
23.9M
  x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */
818
819
23.9M
  vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */
820
23.9M
  ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */
821
23.9M
  x[26] = ui3 + vr4;         /* Re B' = Im C - Im D  + Re A - Re B */
822
23.9M
  x[30] = vr4 - ui3;         /* Re D' = -Im C + Im D + Re A - Re B */
823
23.9M
  x[27] = ur4 - vi3;         /* Im B'= -Re C + Re D + Im A - Im B */
824
23.9M
  x[31] = vi3 + ur4;         /* Im D'= Re C - Re D + Im A - Im B */
825
826
  // xt1 =  0
827
  // xt2 =  8
828
23.9M
  vr = x[8];
829
23.9M
  vi = x[9];
830
23.9M
  ur = x[0] >> 1;
831
23.9M
  ui = x[1] >> 1;
832
23.9M
  x[0] = ur + (vr >> 1);
833
23.9M
  x[1] = ui + (vi >> 1);
834
23.9M
  x[8] = ur - (vr >> 1);
835
23.9M
  x[9] = ui - (vi >> 1);
836
837
  // xt1 =  4
838
  // xt2 = 12
839
23.9M
  vr = x[13];
840
23.9M
  vi = x[12];
841
23.9M
  ur = x[4] >> 1;
842
23.9M
  ui = x[5] >> 1;
843
23.9M
  x[4] = ur + (vr >> 1);
844
23.9M
  x[5] = ui - (vi >> 1);
845
23.9M
  x[12] = ur - (vr >> 1);
846
23.9M
  x[13] = ui + (vi >> 1);
847
848
  // xt1 = 16
849
  // xt2 = 24
850
23.9M
  vr = x[24];
851
23.9M
  vi = x[25];
852
23.9M
  ur = x[16] >> 1;
853
23.9M
  ui = x[17] >> 1;
854
23.9M
  x[16] = ur + (vr >> 1);
855
23.9M
  x[17] = ui + (vi >> 1);
856
23.9M
  x[24] = ur - (vr >> 1);
857
23.9M
  x[25] = ui - (vi >> 1);
858
859
  // xt1 = 20
860
  // xt2 = 28
861
23.9M
  vr = x[29];
862
23.9M
  vi = x[28];
863
23.9M
  ur = x[20] >> 1;
864
23.9M
  ui = x[21] >> 1;
865
23.9M
  x[20] = ur + (vr >> 1);
866
23.9M
  x[21] = ui - (vi >> 1);
867
23.9M
  x[28] = ur - (vr >> 1);
868
23.9M
  x[29] = ui + (vi >> 1);
869
870
  // xt1 =  2
871
  // xt2 = 10
872
23.9M
  SUMDIFF_PIFOURTH(vi, vr, x[10], x[11])
873
  // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH);
874
  // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH);
875
23.9M
  ur = x[2];
876
23.9M
  ui = x[3];
877
23.9M
  x[2] = (ur >> 1) + vr;
878
23.9M
  x[3] = (ui >> 1) + vi;
879
23.9M
  x[10] = (ur >> 1) - vr;
880
23.9M
  x[11] = (ui >> 1) - vi;
881
882
  // xt1 =  6
883
  // xt2 = 14
884
23.9M
  SUMDIFF_PIFOURTH(vr, vi, x[14], x[15])
885
23.9M
  ur = x[6];
886
23.9M
  ui = x[7];
887
23.9M
  x[6] = (ur >> 1) + vr;
888
23.9M
  x[7] = (ui >> 1) - vi;
889
23.9M
  x[14] = (ur >> 1) - vr;
890
23.9M
  x[15] = (ui >> 1) + vi;
891
892
  // xt1 = 18
893
  // xt2 = 26
894
23.9M
  SUMDIFF_PIFOURTH(vi, vr, x[26], x[27])
895
23.9M
  ur = x[18];
896
23.9M
  ui = x[19];
897
23.9M
  x[18] = (ur >> 1) + vr;
898
23.9M
  x[19] = (ui >> 1) + vi;
899
23.9M
  x[26] = (ur >> 1) - vr;
900
23.9M
  x[27] = (ui >> 1) - vi;
901
902
  // xt1 = 22
903
  // xt2 = 30
904
23.9M
  SUMDIFF_PIFOURTH(vr, vi, x[30], x[31])
905
23.9M
  ur = x[22];
906
23.9M
  ui = x[23];
907
23.9M
  x[22] = (ur >> 1) + vr;
908
23.9M
  x[23] = (ui >> 1) - vi;
909
23.9M
  x[30] = (ur >> 1) - vr;
910
23.9M
  x[31] = (ui >> 1) + vi;
911
912
  // xt1 =  0
913
  // xt2 = 16
914
23.9M
  vr = x[16];
915
23.9M
  vi = x[17];
916
23.9M
  ur = x[0] >> 1;
917
23.9M
  ui = x[1] >> 1;
918
23.9M
  x[0] = ur + (vr >> 1);
919
23.9M
  x[1] = ui + (vi >> 1);
920
23.9M
  x[16] = ur - (vr >> 1);
921
23.9M
  x[17] = ui - (vi >> 1);
922
923
  // xt1 =  8
924
  // xt2 = 24
925
23.9M
  vi = x[24];
926
23.9M
  vr = x[25];
927
23.9M
  ur = x[8] >> 1;
928
23.9M
  ui = x[9] >> 1;
929
23.9M
  x[8] = ur + (vr >> 1);
930
23.9M
  x[9] = ui - (vi >> 1);
931
23.9M
  x[24] = ur - (vr >> 1);
932
23.9M
  x[25] = ui + (vi >> 1);
933
934
  // xt1 =  2
935
  // xt2 = 18
936
23.9M
  cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
937
23.9M
  ur = x[2];
938
23.9M
  ui = x[3];
939
23.9M
  x[2] = (ur >> 1) + vr;
940
23.9M
  x[3] = (ui >> 1) + vi;
941
23.9M
  x[18] = (ur >> 1) - vr;
942
23.9M
  x[19] = (ui >> 1) - vi;
943
944
  // xt1 = 10
945
  // xt2 = 26
946
23.9M
  cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
947
23.9M
  ur = x[10];
948
23.9M
  ui = x[11];
949
23.9M
  x[10] = (ur >> 1) + vr;
950
23.9M
  x[11] = (ui >> 1) - vi;
951
23.9M
  x[26] = (ur >> 1) - vr;
952
23.9M
  x[27] = (ui >> 1) + vi;
953
954
  // xt1 =  4
955
  // xt2 = 20
956
23.9M
  SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
957
23.9M
  ur = x[4];
958
23.9M
  ui = x[5];
959
23.9M
  x[4] = (ur >> 1) + vr;
960
23.9M
  x[5] = (ui >> 1) + vi;
961
23.9M
  x[20] = (ur >> 1) - vr;
962
23.9M
  x[21] = (ui >> 1) - vi;
963
964
  // xt1 = 12
965
  // xt2 = 28
966
23.9M
  SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
967
23.9M
  ur = x[12];
968
23.9M
  ui = x[13];
969
23.9M
  x[12] = (ur >> 1) + vr;
970
23.9M
  x[13] = (ui >> 1) - vi;
971
23.9M
  x[28] = (ur >> 1) - vr;
972
23.9M
  x[29] = (ui >> 1) + vi;
973
974
  // xt1 =  6
975
  // xt2 = 22
976
23.9M
  cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
977
23.9M
  ur = x[6];
978
23.9M
  ui = x[7];
979
23.9M
  x[6] = (ur >> 1) + vr;
980
23.9M
  x[7] = (ui >> 1) + vi;
981
23.9M
  x[22] = (ur >> 1) - vr;
982
23.9M
  x[23] = (ui >> 1) - vi;
983
984
  // xt1 = 14
985
  // xt2 = 30
986
23.9M
  cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
987
23.9M
  ur = x[14];
988
23.9M
  ui = x[15];
989
23.9M
  x[14] = (ur >> 1) + vr;
990
23.9M
  x[15] = (ui >> 1) - vi;
991
23.9M
  x[30] = (ur >> 1) - vr;
992
23.9M
  x[31] = (ui >> 1) + vi;
993
23.9M
}
994
#endif /* FUNCTION_fft_16 */
995
996
#ifndef FUNCTION_fft_32
997
static const FIXP_STP fft32_w32[6] = {
998
    STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d),
999
    STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7),
1000
    STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)};
1001
1.10G
#define W_PiFOURTH STC(0x5a82799a)
1002
1003
LNK_SECTION_CODE_L1
1004
39.3M
inline void fft_32(FIXP_DBL *const _x) {
1005
  /*
1006
   * 1+2 stage radix 4
1007
   */
1008
1009
  /////////////////////////////////////////////////////////////////////////////////////////
1010
39.3M
  {
1011
39.3M
    FIXP_DBL *const x = _x;
1012
39.3M
    FIXP_DBL vi, ui;
1013
39.3M
    FIXP_DBL vi2, ui2;
1014
39.3M
    FIXP_DBL vi3, ui3;
1015
39.3M
    FIXP_DBL vr, ur;
1016
39.3M
    FIXP_DBL vr2, ur2;
1017
39.3M
    FIXP_DBL vr3, ur3;
1018
39.3M
    FIXP_DBL vr4, ur4;
1019
1020
    // i = 0
1021
39.3M
    vr = (x[0] + x[32]) >> 1;     /* Re A + Re B */
1022
39.3M
    ur = (x[1] + x[33]) >> 1;     /* Im A + Im B */
1023
39.3M
    vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */
1024
39.3M
    ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */
1025
1026
39.3M
    x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1027
39.3M
    x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1028
1029
39.3M
    vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */
1030
39.3M
    ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */
1031
1032
39.3M
    x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1033
39.3M
    x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1034
1035
39.3M
    vr -= x[32];             /* Re A - Re B */
1036
39.3M
    ur -= x[33];             /* Im A - Im B */
1037
39.3M
    vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */
1038
39.3M
    ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */
1039
1040
39.3M
    vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */
1041
39.3M
    ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */
1042
1043
39.3M
    x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1044
39.3M
    x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1045
1046
39.3M
    vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */
1047
39.3M
    ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */
1048
1049
39.3M
    x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1050
39.3M
    x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1051
1052
    // i=16
1053
39.3M
    vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */
1054
39.3M
    ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */
1055
1056
39.3M
    x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1057
39.3M
    x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1058
39.3M
    x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1059
39.3M
    x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1060
1061
39.3M
    vr2 -= x[36];            /* Re A - Re B */
1062
39.3M
    ur2 -= x[37];            /* Im A - Im B */
1063
39.3M
    vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */
1064
39.3M
    ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */
1065
1066
39.3M
    vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */
1067
39.3M
    ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */
1068
1069
39.3M
    x[18] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1070
39.3M
    x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1071
1072
39.3M
    vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */
1073
39.3M
    ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */
1074
1075
39.3M
    x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1076
39.3M
    x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1077
1078
    // i = 32
1079
1080
39.3M
    x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1081
39.3M
    x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1082
39.3M
    x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1083
39.3M
    x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1084
1085
39.3M
    vr3 -= x[34];              /* Re A - Re B */
1086
39.3M
    ur3 -= x[35];              /* Im A - Im B */
1087
39.3M
    vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */
1088
39.3M
    ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */
1089
1090
39.3M
    x[34] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1091
39.3M
    x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1092
1093
    // i=48
1094
1095
39.3M
    x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1096
39.3M
    x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1097
39.3M
    x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1098
39.3M
    x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1099
1100
39.3M
    vr4 -= x[38]; /* Re A - Re B */
1101
39.3M
    ur4 -= x[39]; /* Im A - Im B */
1102
1103
39.3M
    x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1104
39.3M
    x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1105
1106
39.3M
    vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */
1107
39.3M
    ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */
1108
1109
39.3M
    x[50] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1110
39.3M
    x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1111
39.3M
    x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1112
39.3M
    x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1113
1114
    // i=8
1115
39.3M
    vr = (x[8] + x[40]) >> 1;     /* Re A + Re B */
1116
39.3M
    ur = (x[9] + x[41]) >> 1;     /* Im A + Im B */
1117
39.3M
    vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */
1118
39.3M
    ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */
1119
1120
39.3M
    x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1121
39.3M
    x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1122
1123
39.3M
    vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */
1124
39.3M
    ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */
1125
1126
39.3M
    x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1127
39.3M
    x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1128
1129
39.3M
    vr -= x[40];             /* Re A - Re B */
1130
39.3M
    ur -= x[41];             /* Im A - Im B */
1131
39.3M
    vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */
1132
39.3M
    ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */
1133
1134
39.3M
    vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */
1135
39.3M
    ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */
1136
1137
39.3M
    x[10] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1138
39.3M
    x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1139
1140
39.3M
    vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */
1141
39.3M
    ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */
1142
1143
39.3M
    x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1144
39.3M
    x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1145
1146
    // i=24
1147
39.3M
    vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */
1148
39.3M
    ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */
1149
1150
39.3M
    x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1151
39.3M
    x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1152
39.3M
    x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1153
39.3M
    x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1154
1155
39.3M
    vr2 -= x[44];            /* Re A - Re B */
1156
39.3M
    ur2 -= x[45];            /* Im A - Im B */
1157
39.3M
    vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */
1158
39.3M
    ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */
1159
1160
39.3M
    vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */
1161
39.3M
    ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */
1162
1163
39.3M
    x[26] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1164
39.3M
    x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1165
1166
39.3M
    vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */
1167
39.3M
    ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */
1168
1169
39.3M
    x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1170
39.3M
    x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1171
1172
    // i=40
1173
1174
39.3M
    x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1175
39.3M
    x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1176
39.3M
    x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1177
39.3M
    x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1178
1179
39.3M
    vr3 -= x[42];              /* Re A - Re B */
1180
39.3M
    ur3 -= x[43];              /* Im A - Im B */
1181
39.3M
    vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */
1182
39.3M
    ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */
1183
1184
39.3M
    x[42] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1185
39.3M
    x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1186
1187
    // i=56
1188
1189
39.3M
    x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1190
39.3M
    x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1191
39.3M
    x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1192
39.3M
    x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1193
1194
39.3M
    vr4 -= x[46]; /* Re A - Re B */
1195
39.3M
    ur4 -= x[47]; /* Im A - Im B */
1196
1197
39.3M
    x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1198
39.3M
    x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1199
1200
39.3M
    vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */
1201
39.3M
    ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */
1202
1203
39.3M
    x[58] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1204
39.3M
    x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1205
39.3M
    x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1206
39.3M
    x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1207
39.3M
  }
1208
1209
39.3M
  {
1210
39.3M
    FIXP_DBL *xt = _x;
1211
1212
39.3M
    int j = 4;
1213
157M
    do {
1214
157M
      FIXP_DBL vi, ui, vr, ur;
1215
1216
157M
      vr = xt[8];
1217
157M
      vi = xt[9];
1218
157M
      ur = xt[0] >> 1;
1219
157M
      ui = xt[1] >> 1;
1220
157M
      xt[0] = ur + (vr >> 1);
1221
157M
      xt[1] = ui + (vi >> 1);
1222
157M
      xt[8] = ur - (vr >> 1);
1223
157M
      xt[9] = ui - (vi >> 1);
1224
1225
157M
      vr = xt[13];
1226
157M
      vi = xt[12];
1227
157M
      ur = xt[4] >> 1;
1228
157M
      ui = xt[5] >> 1;
1229
157M
      xt[4] = ur + (vr >> 1);
1230
157M
      xt[5] = ui - (vi >> 1);
1231
157M
      xt[12] = ur - (vr >> 1);
1232
157M
      xt[13] = ui + (vi >> 1);
1233
1234
157M
      SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11])
1235
157M
      ur = xt[2];
1236
157M
      ui = xt[3];
1237
157M
      xt[2] = (ur >> 1) + vr;
1238
157M
      xt[3] = (ui >> 1) + vi;
1239
157M
      xt[10] = (ur >> 1) - vr;
1240
157M
      xt[11] = (ui >> 1) - vi;
1241
1242
157M
      SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15])
1243
157M
      ur = xt[6];
1244
157M
      ui = xt[7];
1245
1246
157M
      xt[6] = (ur >> 1) + vr;
1247
157M
      xt[7] = (ui >> 1) - vi;
1248
157M
      xt[14] = (ur >> 1) - vr;
1249
157M
      xt[15] = (ui >> 1) + vi;
1250
157M
      xt += 16;
1251
157M
    } while (--j != 0);
1252
39.3M
  }
1253
1254
39.3M
  {
1255
39.3M
    FIXP_DBL *const x = _x;
1256
39.3M
    FIXP_DBL vi, ui, vr, ur;
1257
1258
39.3M
    vr = x[16];
1259
39.3M
    vi = x[17];
1260
39.3M
    ur = x[0] >> 1;
1261
39.3M
    ui = x[1] >> 1;
1262
39.3M
    x[0] = ur + (vr >> 1);
1263
39.3M
    x[1] = ui + (vi >> 1);
1264
39.3M
    x[16] = ur - (vr >> 1);
1265
39.3M
    x[17] = ui - (vi >> 1);
1266
1267
39.3M
    vi = x[24];
1268
39.3M
    vr = x[25];
1269
39.3M
    ur = x[8] >> 1;
1270
39.3M
    ui = x[9] >> 1;
1271
39.3M
    x[8] = ur + (vr >> 1);
1272
39.3M
    x[9] = ui - (vi >> 1);
1273
39.3M
    x[24] = ur - (vr >> 1);
1274
39.3M
    x[25] = ui + (vi >> 1);
1275
1276
39.3M
    vr = x[48];
1277
39.3M
    vi = x[49];
1278
39.3M
    ur = x[32] >> 1;
1279
39.3M
    ui = x[33] >> 1;
1280
39.3M
    x[32] = ur + (vr >> 1);
1281
39.3M
    x[33] = ui + (vi >> 1);
1282
39.3M
    x[48] = ur - (vr >> 1);
1283
39.3M
    x[49] = ui - (vi >> 1);
1284
1285
39.3M
    vi = x[56];
1286
39.3M
    vr = x[57];
1287
39.3M
    ur = x[40] >> 1;
1288
39.3M
    ui = x[41] >> 1;
1289
39.3M
    x[40] = ur + (vr >> 1);
1290
39.3M
    x[41] = ui - (vi >> 1);
1291
39.3M
    x[56] = ur - (vr >> 1);
1292
39.3M
    x[57] = ui + (vi >> 1);
1293
1294
39.3M
    cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
1295
39.3M
    ur = x[2];
1296
39.3M
    ui = x[3];
1297
39.3M
    x[2] = (ur >> 1) + vr;
1298
39.3M
    x[3] = (ui >> 1) + vi;
1299
39.3M
    x[18] = (ur >> 1) - vr;
1300
39.3M
    x[19] = (ui >> 1) - vi;
1301
1302
39.3M
    cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
1303
39.3M
    ur = x[10];
1304
39.3M
    ui = x[11];
1305
39.3M
    x[10] = (ur >> 1) + vr;
1306
39.3M
    x[11] = (ui >> 1) - vi;
1307
39.3M
    x[26] = (ur >> 1) - vr;
1308
39.3M
    x[27] = (ui >> 1) + vi;
1309
1310
39.3M
    cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
1311
39.3M
    ur = x[34];
1312
39.3M
    ui = x[35];
1313
39.3M
    x[34] = (ur >> 1) + vr;
1314
39.3M
    x[35] = (ui >> 1) + vi;
1315
39.3M
    x[50] = (ur >> 1) - vr;
1316
39.3M
    x[51] = (ui >> 1) - vi;
1317
1318
39.3M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
1319
39.3M
    ur = x[42];
1320
39.3M
    ui = x[43];
1321
39.3M
    x[42] = (ur >> 1) + vr;
1322
39.3M
    x[43] = (ui >> 1) - vi;
1323
39.3M
    x[58] = (ur >> 1) - vr;
1324
39.3M
    x[59] = (ui >> 1) + vi;
1325
1326
39.3M
    SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
1327
39.3M
    ur = x[4];
1328
39.3M
    ui = x[5];
1329
39.3M
    x[4] = (ur >> 1) + vr;
1330
39.3M
    x[5] = (ui >> 1) + vi;
1331
39.3M
    x[20] = (ur >> 1) - vr;
1332
39.3M
    x[21] = (ui >> 1) - vi;
1333
1334
39.3M
    SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
1335
39.3M
    ur = x[12];
1336
39.3M
    ui = x[13];
1337
39.3M
    x[12] = (ur >> 1) + vr;
1338
39.3M
    x[13] = (ui >> 1) - vi;
1339
39.3M
    x[28] = (ur >> 1) - vr;
1340
39.3M
    x[29] = (ui >> 1) + vi;
1341
1342
39.3M
    SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
1343
39.3M
    ur = x[36];
1344
39.3M
    ui = x[37];
1345
39.3M
    x[36] = (ur >> 1) + vr;
1346
39.3M
    x[37] = (ui >> 1) + vi;
1347
39.3M
    x[52] = (ur >> 1) - vr;
1348
39.3M
    x[53] = (ui >> 1) - vi;
1349
1350
39.3M
    SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
1351
39.3M
    ur = x[44];
1352
39.3M
    ui = x[45];
1353
39.3M
    x[44] = (ur >> 1) + vr;
1354
39.3M
    x[45] = (ui >> 1) - vi;
1355
39.3M
    x[60] = (ur >> 1) - vr;
1356
39.3M
    x[61] = (ui >> 1) + vi;
1357
1358
39.3M
    cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
1359
39.3M
    ur = x[6];
1360
39.3M
    ui = x[7];
1361
39.3M
    x[6] = (ur >> 1) + vr;
1362
39.3M
    x[7] = (ui >> 1) + vi;
1363
39.3M
    x[22] = (ur >> 1) - vr;
1364
39.3M
    x[23] = (ui >> 1) - vi;
1365
1366
39.3M
    cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
1367
39.3M
    ur = x[14];
1368
39.3M
    ui = x[15];
1369
39.3M
    x[14] = (ur >> 1) + vr;
1370
39.3M
    x[15] = (ui >> 1) - vi;
1371
39.3M
    x[30] = (ur >> 1) - vr;
1372
39.3M
    x[31] = (ui >> 1) + vi;
1373
1374
39.3M
    cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
1375
39.3M
    ur = x[38];
1376
39.3M
    ui = x[39];
1377
39.3M
    x[38] = (ur >> 1) + vr;
1378
39.3M
    x[39] = (ui >> 1) + vi;
1379
39.3M
    x[54] = (ur >> 1) - vr;
1380
39.3M
    x[55] = (ui >> 1) - vi;
1381
1382
39.3M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
1383
39.3M
    ur = x[46];
1384
39.3M
    ui = x[47];
1385
1386
39.3M
    x[46] = (ur >> 1) + vr;
1387
39.3M
    x[47] = (ui >> 1) - vi;
1388
39.3M
    x[62] = (ur >> 1) - vr;
1389
39.3M
    x[63] = (ui >> 1) + vi;
1390
1391
39.3M
    vr = x[32];
1392
39.3M
    vi = x[33];
1393
39.3M
    ur = x[0] >> 1;
1394
39.3M
    ui = x[1] >> 1;
1395
39.3M
    x[0] = ur + (vr >> 1);
1396
39.3M
    x[1] = ui + (vi >> 1);
1397
39.3M
    x[32] = ur - (vr >> 1);
1398
39.3M
    x[33] = ui - (vi >> 1);
1399
1400
39.3M
    vi = x[48];
1401
39.3M
    vr = x[49];
1402
39.3M
    ur = x[16] >> 1;
1403
39.3M
    ui = x[17] >> 1;
1404
39.3M
    x[16] = ur + (vr >> 1);
1405
39.3M
    x[17] = ui - (vi >> 1);
1406
39.3M
    x[48] = ur - (vr >> 1);
1407
39.3M
    x[49] = ui + (vi >> 1);
1408
1409
39.3M
    cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
1410
39.3M
    ur = x[2];
1411
39.3M
    ui = x[3];
1412
39.3M
    x[2] = (ur >> 1) + vr;
1413
39.3M
    x[3] = (ui >> 1) + vi;
1414
39.3M
    x[34] = (ur >> 1) - vr;
1415
39.3M
    x[35] = (ui >> 1) - vi;
1416
1417
39.3M
    cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
1418
39.3M
    ur = x[18];
1419
39.3M
    ui = x[19];
1420
39.3M
    x[18] = (ur >> 1) + vr;
1421
39.3M
    x[19] = (ui >> 1) - vi;
1422
39.3M
    x[50] = (ur >> 1) - vr;
1423
39.3M
    x[51] = (ui >> 1) + vi;
1424
1425
39.3M
    cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
1426
39.3M
    ur = x[4];
1427
39.3M
    ui = x[5];
1428
39.3M
    x[4] = (ur >> 1) + vr;
1429
39.3M
    x[5] = (ui >> 1) + vi;
1430
39.3M
    x[36] = (ur >> 1) - vr;
1431
39.3M
    x[37] = (ui >> 1) - vi;
1432
1433
39.3M
    cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
1434
39.3M
    ur = x[20];
1435
39.3M
    ui = x[21];
1436
39.3M
    x[20] = (ur >> 1) + vr;
1437
39.3M
    x[21] = (ui >> 1) - vi;
1438
39.3M
    x[52] = (ur >> 1) - vr;
1439
39.3M
    x[53] = (ui >> 1) + vi;
1440
1441
39.3M
    cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
1442
39.3M
    ur = x[6];
1443
39.3M
    ui = x[7];
1444
39.3M
    x[6] = (ur >> 1) + vr;
1445
39.3M
    x[7] = (ui >> 1) + vi;
1446
39.3M
    x[38] = (ur >> 1) - vr;
1447
39.3M
    x[39] = (ui >> 1) - vi;
1448
1449
39.3M
    cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
1450
39.3M
    ur = x[22];
1451
39.3M
    ui = x[23];
1452
39.3M
    x[22] = (ur >> 1) + vr;
1453
39.3M
    x[23] = (ui >> 1) - vi;
1454
39.3M
    x[54] = (ur >> 1) - vr;
1455
39.3M
    x[55] = (ui >> 1) + vi;
1456
1457
39.3M
    SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
1458
39.3M
    ur = x[8];
1459
39.3M
    ui = x[9];
1460
39.3M
    x[8] = (ur >> 1) + vr;
1461
39.3M
    x[9] = (ui >> 1) + vi;
1462
39.3M
    x[40] = (ur >> 1) - vr;
1463
39.3M
    x[41] = (ui >> 1) - vi;
1464
1465
39.3M
    SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
1466
39.3M
    ur = x[24];
1467
39.3M
    ui = x[25];
1468
39.3M
    x[24] = (ur >> 1) + vr;
1469
39.3M
    x[25] = (ui >> 1) - vi;
1470
39.3M
    x[56] = (ur >> 1) - vr;
1471
39.3M
    x[57] = (ui >> 1) + vi;
1472
1473
39.3M
    cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
1474
39.3M
    ur = x[10];
1475
39.3M
    ui = x[11];
1476
1477
39.3M
    x[10] = (ur >> 1) + vr;
1478
39.3M
    x[11] = (ui >> 1) + vi;
1479
39.3M
    x[42] = (ur >> 1) - vr;
1480
39.3M
    x[43] = (ui >> 1) - vi;
1481
1482
39.3M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
1483
39.3M
    ur = x[26];
1484
39.3M
    ui = x[27];
1485
39.3M
    x[26] = (ur >> 1) + vr;
1486
39.3M
    x[27] = (ui >> 1) - vi;
1487
39.3M
    x[58] = (ur >> 1) - vr;
1488
39.3M
    x[59] = (ui >> 1) + vi;
1489
1490
39.3M
    cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1491
39.3M
    ur = x[12];
1492
39.3M
    ui = x[13];
1493
39.3M
    x[12] = (ur >> 1) + vr;
1494
39.3M
    x[13] = (ui >> 1) + vi;
1495
39.3M
    x[44] = (ur >> 1) - vr;
1496
39.3M
    x[45] = (ui >> 1) - vi;
1497
1498
39.3M
    cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1499
39.3M
    ur = x[28];
1500
39.3M
    ui = x[29];
1501
39.3M
    x[28] = (ur >> 1) + vr;
1502
39.3M
    x[29] = (ui >> 1) - vi;
1503
39.3M
    x[60] = (ur >> 1) - vr;
1504
39.3M
    x[61] = (ui >> 1) + vi;
1505
1506
39.3M
    cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1507
39.3M
    ur = x[14];
1508
39.3M
    ui = x[15];
1509
39.3M
    x[14] = (ur >> 1) + vr;
1510
39.3M
    x[15] = (ui >> 1) + vi;
1511
39.3M
    x[46] = (ur >> 1) - vr;
1512
39.3M
    x[47] = (ui >> 1) - vi;
1513
1514
39.3M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1515
39.3M
    ur = x[30];
1516
39.3M
    ui = x[31];
1517
39.3M
    x[30] = (ur >> 1) + vr;
1518
39.3M
    x[31] = (ui >> 1) - vi;
1519
39.3M
    x[62] = (ur >> 1) - vr;
1520
39.3M
    x[63] = (ui >> 1) + vi;
1521
39.3M
  }
1522
39.3M
}
1523
#endif /* #ifndef FUNCTION_fft_32 */
1524
1525
/**
1526
 * \brief Apply rotation vectors to a data buffer.
1527
 * \param cl length of each row of input data.
1528
 * \param l total length of input data.
1529
 * \param pVecRe real part of rotation coefficient vector.
1530
 * \param pVecIm imaginary part of rotation coefficient vector.
1531
 */
1532
1533
/*
1534
   This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000
1535
   (-1.0) instead. At the end, the sign of the result is inverted
1536
*/
1537
#define noFFT_APPLY_ROT_VECTOR_HQ
1538
1539
#ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL
1540
static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl,
1541
                                        const int l, const FIXP_STB *pVecRe,
1542
4.69M
                                        const FIXP_STB *pVecIm) {
1543
4.69M
  FIXP_DBL re, im;
1544
4.69M
  FIXP_STB vre, vim;
1545
1546
4.69M
  int i, c;
1547
1548
20.0M
  for (i = 0; i < cl; i++) {
1549
15.3M
    re = pData[2 * i];
1550
15.3M
    im = pData[2 * i + 1];
1551
1552
15.3M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1553
15.3M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1554
15.3M
  }
1555
28.1M
  for (; i < l; i += cl) {
1556
23.4M
    re = pData[2 * i];
1557
23.4M
    im = pData[2 * i + 1];
1558
1559
23.4M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1560
23.4M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1561
1562
142M
    for (c = i + 1; c < i + cl; c++) {
1563
119M
      re = pData[2 * c] >> 1;
1564
119M
      im = pData[2 * c + 1] >> 1;
1565
119M
      vre = *pVecRe++;
1566
119M
      vim = *pVecIm++;
1567
1568
119M
      cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim);
1569
119M
    }
1570
23.4M
  }
1571
4.69M
}
1572
#endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */
1573
1574
/* select either switch case of function pointer. */
1575
//#define FFT_TWO_STAGE_SWITCH_CASE
1576
#ifndef FUNCTION_fftN2_func
1577
static inline void fftN2_func(FIXP_DBL *pInput, const int length,
1578
                              const int dim1, const int dim2,
1579
                              void (*const fft1)(FIXP_DBL *),
1580
                              void (*const fft2)(FIXP_DBL *),
1581
                              const FIXP_STB *RotVectorReal,
1582
                              const FIXP_STB *RotVectorImag, FIXP_DBL *aDst,
1583
4.69M
                              FIXP_DBL *aDst2) {
1584
  /* The real part of the input samples are at the addresses with even indices
1585
  and the imaginary part of the input samples are at the addresses with odd
1586
  indices. The output samples are stored at the address of pInput
1587
  */
1588
4.69M
  FIXP_DBL *pSrc, *pDst, *pDstOut;
1589
4.69M
  int i;
1590
1591
4.69M
  FDK_ASSERT(length == dim1 * dim2);
1592
1593
  /* Perform dim2 times the fft of length dim1. The input samples are at the
1594
  address of pSrc and the output samples are at the address of pDst. The input
1595
  vector for the fft of length dim1 is built of the interleaved samples in pSrc,
1596
  the output samples are stored consecutively.
1597
  */
1598
4.69M
  pSrc = pInput;
1599
4.69M
  pDst = aDst;
1600
32.8M
  for (i = 0; i < dim2; i++) {
1601
186M
    for (int j = 0; j < dim1; j++) {
1602
158M
      pDst[2 * j] = pSrc[2 * j * dim2];
1603
158M
      pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1];
1604
158M
    }
1605
1606
      /* fft of size dim1 */
1607
28.1M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1608
28.1M
    fft1(pDst);
1609
#else
1610
    switch (dim1) {
1611
      case 2:
1612
        fft2(pDst);
1613
        break;
1614
      case 3:
1615
        fft3(pDst);
1616
        break;
1617
      case 4:
1618
        fft_4(pDst);
1619
        break;
1620
      /* case 5: fft5(pDst); break; */
1621
      /* case 8: fft_8(pDst); break; */
1622
      case 12:
1623
        fft12(pDst);
1624
        break;
1625
      /* case 15: fft15(pDst); break; */
1626
      case 16:
1627
        fft_16(pDst);
1628
        break;
1629
      case 32:
1630
        fft_32(pDst);
1631
        break;
1632
        /*case 64: fft_64(pDst); break;*/
1633
        /* case 128: fft_128(pDst); break; */
1634
    }
1635
#endif
1636
28.1M
    pSrc += 2;
1637
28.1M
    pDst = pDst + 2 * dim1;
1638
28.1M
  }
1639
1640
  /* Perform the modulation of the output of the fft of length dim1 */
1641
4.69M
  pSrc = aDst;
1642
4.69M
  fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag);
1643
1644
  /* Perform dim1 times the fft of length dim2. The input samples are at the
1645
  address of aDst and the output samples are at the address of pInput. The input
1646
  vector for the fft of length dim2 is built of the interleaved samples in aDst,
1647
  the output samples are stored consecutively at the address of pInput.
1648
  */
1649
4.69M
  pSrc = aDst;
1650
4.69M
  pDst = aDst2;
1651
4.69M
  pDstOut = pInput;
1652
20.0M
  for (i = 0; i < dim1; i++) {
1653
173M
    for (int j = 0; j < dim2; j++) {
1654
158M
      pDst[2 * j] = pSrc[2 * j * dim1];
1655
158M
      pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1];
1656
158M
    }
1657
1658
15.3M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1659
15.3M
    fft2(pDst);
1660
#else
1661
    switch (dim2) {
1662
      case 4:
1663
        fft_4(pDst);
1664
        break;
1665
      case 9:
1666
        fft9(pDst);
1667
        break;
1668
      case 12:
1669
        fft12(pDst);
1670
        break;
1671
      case 15:
1672
        fft15(pDst);
1673
        break;
1674
      case 16:
1675
        fft_16(pDst);
1676
        break;
1677
      case 32:
1678
        fft_32(pDst);
1679
        break;
1680
    }
1681
#endif
1682
1683
173M
    for (int j = 0; j < dim2; j++) {
1684
158M
      pDstOut[2 * j * dim1] = pDst[2 * j];
1685
158M
      pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1];
1686
158M
    }
1687
15.3M
    pSrc += 2;
1688
15.3M
    pDstOut += 2;
1689
15.3M
  }
1690
4.69M
}
1691
#endif /* FUNCTION_fftN2_function */
1692
1693
#define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \
1694
              RotVectorReal, RotVectorImag)                                \
1695
4.69M
  {                                                                        \
1696
4.69M
    C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length)                    \
1697
4.69M
    C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2)                     \
1698
4.69M
    fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2,           \
1699
4.69M
               RotVectorReal, RotVectorImag, aDst, aDst2);                 \
1700
4.69M
    C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2)                       \
1701
4.69M
    C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length)                      \
1702
4.69M
  }
1703
1704
  /*!
1705
   *
1706
   *  \brief  complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480
1707
   *  \param  pInput contains the input signal prescaled right by 2
1708
   *          pInput contains the output signal scaled by SCALEFACTOR<#length>
1709
   *          The output signal does not have any fixed headroom
1710
   *  \return void
1711
   *
1712
   */
1713
1714
#ifndef FUNCTION_fft6
1715
3.55M
static inline void fft6(FIXP_DBL *pInput) {
1716
3.55M
  fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6);
1717
3.55M
}
1718
#endif /* #ifndef FUNCTION_fft6 */
1719
1720
#ifndef FUNCTION_fft12
1721
static inline void fft12(FIXP_DBL *pInput) {
1722
  fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12,
1723
        RotVectorImag12); /* 16,58 */
1724
}
1725
#endif /* #ifndef FUNCTION_fft12 */
1726
1727
#ifndef FUNCTION_fft20
1728
114k
static inline void fft20(FIXP_DBL *pInput) {
1729
114k
  fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20,
1730
114k
        RotVectorImag20);
1731
114k
}
1732
#endif /* FUNCTION_fft20 */
1733
1734
6.20k
static inline void fft24(FIXP_DBL *pInput) {
1735
6.20k
  fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24,
1736
6.20k
        RotVectorImag24); /* 16,73 */
1737
6.20k
}
1738
1739
376k
static inline void fft48(FIXP_DBL *pInput) {
1740
376k
  fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48,
1741
376k
        RotVectorImag48); /* 16,32 */
1742
376k
}
1743
1744
#ifndef FUNCTION_fft60
1745
245k
static inline void fft60(FIXP_DBL *pInput) {
1746
245k
  fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60,
1747
245k
        RotVectorImag60); /* 15,51 */
1748
245k
}
1749
#endif /* FUNCTION_fft60 */
1750
1751
#ifndef FUNCTION_fft80
1752
187
static inline void fft80(FIXP_DBL *pInput) {
1753
187
  fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80,
1754
187
        RotVectorImag80); /*  */
1755
187
}
1756
#endif
1757
1758
#ifndef FUNCTION_fft96
1759
68.1k
static inline void fft96(FIXP_DBL *pInput) {
1760
68.1k
  fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96,
1761
68.1k
        RotVectorImag96); /* 15,47 */
1762
68.1k
}
1763
#endif /* FUNCTION_fft96*/
1764
1765
#ifndef FUNCTION_fft120
1766
3.83k
static inline void fft120(FIXP_DBL *pInput) {
1767
3.83k
  fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120,
1768
3.83k
        RotVectorImag120);
1769
3.83k
}
1770
#endif /* FUNCTION_fft120 */
1771
1772
#ifndef FUNCTION_fft192
1773
7.80k
static inline void fft192(FIXP_DBL *pInput) {
1774
7.80k
  fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192,
1775
7.80k
        RotVectorImag192); /* 15,50 */
1776
7.80k
}
1777
#endif
1778
1779
#ifndef FUNCTION_fft240
1780
207k
static inline void fft240(FIXP_DBL *pInput) {
1781
207k
  fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240,
1782
207k
        RotVectorImag240); /* 15.44 */
1783
207k
}
1784
#endif
1785
1786
#ifndef FUNCTION_fft384
1787
93.2k
static inline void fft384(FIXP_DBL *pInput) {
1788
93.2k
  fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384,
1789
93.2k
        RotVectorImag384); /* 16.02 */
1790
93.2k
}
1791
#endif /* FUNCTION_fft384 */
1792
1793
#ifndef FUNCTION_fft480
1794
15.7k
static inline void fft480(FIXP_DBL *pInput) {
1795
15.7k
  fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480,
1796
15.7k
        RotVectorImag480); /* 15.84 */
1797
15.7k
}
1798
#endif /* FUNCTION_fft480 */
1799
1800
85.8M
void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) {
1801
  /* Ensure, that the io-ptr is always (at least 8-byte) aligned */
1802
85.8M
  C_ALLOC_ALIGNED_CHECK(pInput);
1803
1804
85.8M
  if (length == 32) {
1805
37.7M
    fft_32(pInput);
1806
37.7M
    *pScalefactor += SCALEFACTOR32;
1807
48.1M
  } else {
1808
48.1M
    switch (length) {
1809
20.7M
      case 16:
1810
20.7M
        fft_16(pInput);
1811
20.7M
        *pScalefactor += SCALEFACTOR16;
1812
20.7M
        break;
1813
8.26M
      case 8:
1814
8.26M
        fft_8(pInput);
1815
8.26M
        *pScalefactor += SCALEFACTOR8;
1816
8.26M
        break;
1817
0
      case 2:
1818
0
        fft2(pInput);
1819
0
        *pScalefactor += SCALEFACTOR2;
1820
0
        break;
1821
0
      case 3:
1822
0
        fft3(pInput);
1823
0
        *pScalefactor += SCALEFACTOR3;
1824
0
        break;
1825
1.00M
      case 4:
1826
1.00M
        fft_4(pInput);
1827
1.00M
        *pScalefactor += SCALEFACTOR4;
1828
1.00M
        break;
1829
0
      case 5:
1830
0
        fft5(pInput);
1831
0
        *pScalefactor += SCALEFACTOR5;
1832
0
        break;
1833
3.55M
      case 6:
1834
3.55M
        fft6(pInput);
1835
3.55M
        *pScalefactor += SCALEFACTOR6;
1836
3.55M
        break;
1837
114k
      case 10:
1838
114k
        fft10(pInput);
1839
114k
        *pScalefactor += SCALEFACTOR10;
1840
114k
        break;
1841
12.5M
      case 12:
1842
12.5M
        fft12(pInput);
1843
12.5M
        *pScalefactor += SCALEFACTOR12;
1844
12.5M
        break;
1845
0
      case 15:
1846
0
        fft15(pInput);
1847
0
        *pScalefactor += SCALEFACTOR15;
1848
0
        break;
1849
114k
      case 20:
1850
114k
        fft20(pInput);
1851
114k
        *pScalefactor += SCALEFACTOR20;
1852
114k
        break;
1853
6.20k
      case 24:
1854
6.20k
        fft24(pInput);
1855
6.20k
        *pScalefactor += SCALEFACTOR24;
1856
6.20k
        break;
1857
376k
      case 48:
1858
376k
        fft48(pInput);
1859
376k
        *pScalefactor += SCALEFACTOR48;
1860
376k
        break;
1861
245k
      case 60:
1862
245k
        fft60(pInput);
1863
245k
        *pScalefactor += SCALEFACTOR60;
1864
245k
        break;
1865
422k
      case 64:
1866
422k
        dit_fft(pInput, 6, SineTable512, 512);
1867
422k
        *pScalefactor += SCALEFACTOR64;
1868
422k
        break;
1869
187
      case 80:
1870
187
        fft80(pInput);
1871
187
        *pScalefactor += SCALEFACTOR80;
1872
187
        break;
1873
68.1k
      case 96:
1874
68.1k
        fft96(pInput);
1875
68.1k
        *pScalefactor += SCALEFACTOR96;
1876
68.1k
        break;
1877
3.83k
      case 120:
1878
3.83k
        fft120(pInput);
1879
3.83k
        *pScalefactor += SCALEFACTOR120;
1880
3.83k
        break;
1881
81.0k
      case 128:
1882
81.0k
        dit_fft(pInput, 7, SineTable512, 512);
1883
81.0k
        *pScalefactor += SCALEFACTOR128;
1884
81.0k
        break;
1885
7.80k
      case 192:
1886
7.80k
        fft192(pInput);
1887
7.80k
        *pScalefactor += SCALEFACTOR192;
1888
7.80k
        break;
1889
207k
      case 240:
1890
207k
        fft240(pInput);
1891
207k
        *pScalefactor += SCALEFACTOR240;
1892
207k
        break;
1893
74.1k
      case 256:
1894
74.1k
        dit_fft(pInput, 8, SineTable512, 512);
1895
74.1k
        *pScalefactor += SCALEFACTOR256;
1896
74.1k
        break;
1897
93.2k
      case 384:
1898
93.2k
        fft384(pInput);
1899
93.2k
        *pScalefactor += SCALEFACTOR384;
1900
93.2k
        break;
1901
15.7k
      case 480:
1902
15.7k
        fft480(pInput);
1903
15.7k
        *pScalefactor += SCALEFACTOR480;
1904
15.7k
        break;
1905
174k
      case 512:
1906
174k
        dit_fft(pInput, 9, SineTable512, 512);
1907
174k
        *pScalefactor += SCALEFACTOR512;
1908
174k
        break;
1909
0
      default:
1910
0
        FDK_ASSERT(0); /* FFT length not supported! */
1911
0
        break;
1912
48.1M
    }
1913
48.1M
  }
1914
85.8M
}
1915
1916
0
void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) {
1917
0
  switch (length) {
1918
0
    default:
1919
0
      FDK_ASSERT(0); /* IFFT length not supported! */
1920
0
      break;
1921
0
  }
1922
0
}