Coverage Report

Created: 2025-08-26 06:50

/src/aac/libFDK/src/fft.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -----------------------------------------------------------------------------
2
Software License for The Fraunhofer FDK AAC Codec Library for Android
3
4
© Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
5
Forschung e.V. All rights reserved.
6
7
 1.    INTRODUCTION
8
The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
9
that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
10
scheme for digital audio. This FDK AAC Codec software is intended to be used on
11
a wide variety of Android devices.
12
13
AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
14
general perceptual audio codecs. AAC-ELD is considered the best-performing
15
full-bandwidth communications codec by independent studies and is widely
16
deployed. AAC has been standardized by ISO and IEC as part of the MPEG
17
specifications.
18
19
Patent licenses for necessary patent claims for the FDK AAC Codec (including
20
those of Fraunhofer) may be obtained through Via Licensing
21
(www.vialicensing.com) or through the respective patent owners individually for
22
the purpose of encoding or decoding bit streams in products that are compliant
23
with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
24
Android devices already license these patent claims through Via Licensing or
25
directly from the patent owners, and therefore FDK AAC Codec software may
26
already be covered under those patent licenses when it is used for those
27
licensed purposes only.
28
29
Commercially-licensed AAC software libraries, including floating-point versions
30
with enhanced sound quality, are also available from Fraunhofer. Users are
31
encouraged to check the Fraunhofer website for additional applications
32
information and documentation.
33
34
2.    COPYRIGHT LICENSE
35
36
Redistribution and use in source and binary forms, with or without modification,
37
are permitted without payment of copyright license fees provided that you
38
satisfy the following conditions:
39
40
You must retain the complete text of this software license in redistributions of
41
the FDK AAC Codec or your modifications thereto in source code form.
42
43
You must retain the complete text of this software license in the documentation
44
and/or other materials provided with redistributions of the FDK AAC Codec or
45
your modifications thereto in binary form. You must make available free of
46
charge copies of the complete source code of the FDK AAC Codec and your
47
modifications thereto to recipients of copies in binary form.
48
49
The name of Fraunhofer may not be used to endorse or promote products derived
50
from this library without prior written permission.
51
52
You may not charge copyright license fees for anyone to use, copy or distribute
53
the FDK AAC Codec software or your modifications thereto.
54
55
Your modified versions of the FDK AAC Codec must carry prominent notices stating
56
that you changed the software and the date of any change. For modified versions
57
of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
58
must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
59
AAC Codec Library for Android."
60
61
3.    NO PATENT LICENSE
62
63
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
64
limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
65
Fraunhofer provides no warranty of patent non-infringement with respect to this
66
software.
67
68
You may use this FDK AAC Codec software or modifications thereto only for
69
purposes that are authorized by appropriate patent licenses.
70
71
4.    DISCLAIMER
72
73
This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
74
holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
75
including but not limited to the implied warranties of merchantability and
76
fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
77
CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
78
or consequential damages, including but not limited to procurement of substitute
79
goods or services; loss of use, data, or profits, or business interruption,
80
however caused and on any theory of liability, whether in contract, strict
81
liability, or tort (including negligence), arising in any way out of the use of
82
this software, even if advised of the possibility of such damage.
83
84
5.    CONTACT INFORMATION
85
86
Fraunhofer Institute for Integrated Circuits IIS
87
Attention: Audio and Multimedia Departments - FDK AAC LL
88
Am Wolfsmantel 33
89
91058 Erlangen, Germany
90
91
www.iis.fraunhofer.de/amm
92
amm-info@iis.fraunhofer.de
93
----------------------------------------------------------------------------- */
94
95
/******************* Library for basic calculation routines ********************
96
97
   Author(s):   Josef Hoepfl, DSP Solutions
98
99
   Description: Fix point FFT
100
101
*******************************************************************************/
102
103
#include "fft_rad2.h"
104
#include "FDK_tools_rom.h"
105
106
293M
#define W_PiFOURTH STC(0x5a82799a)
107
//#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a))
108
#ifndef SUMDIFF_PIFOURTH
109
#define SUMDIFF_PIFOURTH(diff, sum, a, b) \
110
722M
  {                                       \
111
722M
    FIXP_DBL wa, wb;                      \
112
722M
    wa = fMultDiv2(a, W_PiFOURTH);        \
113
722M
    wb = fMultDiv2(b, W_PiFOURTH);        \
114
722M
    diff = wb - wa;                       \
115
722M
    sum = wb + wa;                        \
116
722M
  }
117
#define SUMDIFF_PIFOURTH16(diff, sum, a, b)       \
118
  {                                               \
119
    FIXP_SGL wa, wb;                              \
120
    wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \
121
    wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \
122
    diff = wb - wa;                               \
123
    sum = wb + wa;                                \
124
  }
125
#endif
126
127
#define SCALEFACTOR2048 10
128
#define SCALEFACTOR1024 9
129
194k
#define SCALEFACTOR512 8
130
84.8k
#define SCALEFACTOR256 7
131
82.5k
#define SCALEFACTOR128 6
132
485k
#define SCALEFACTOR64 5
133
39.7M
#define SCALEFACTOR32 4
134
21.3M
#define SCALEFACTOR16 3
135
10.6M
#define SCALEFACTOR8 2
136
2.34M
#define SCALEFACTOR4 1
137
4.02M
#define SCALEFACTOR2 1
138
139
4.09M
#define SCALEFACTOR3 1
140
148k
#define SCALEFACTOR5 1
141
4.01M
#define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2)
142
#define SCALEFACTOR7 2
143
#define SCALEFACTOR9 2
144
3.12M
#define SCALEFACTOR10 5
145
13.0M
#define SCALEFACTOR12 3
146
501k
#define SCALEFACTOR15 3
147
#define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2)
148
148k
#define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2)
149
#define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2)
150
6.55k
#define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2)
151
#define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2)
152
#define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2)
153
373k
#define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2)
154
265k
#define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2)
155
131
#define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2)
156
76.4k
#define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2)
157
3.38k
#define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2)
158
#define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2)
159
#define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2)
160
10.1k
#define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2)
161
215k
#define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2)
162
#define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2)
163
#define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2)
164
89.2k
#define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2)
165
16.7k
#define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2)
166
167
#include "fft.h"
168
169
#ifndef FUNCTION_fft2
170
171
/* Performs the FFT of length 2. Input vector unscaled, output vector scaled
172
 * with factor 0.5 */
173
12.1M
static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) {
174
12.1M
  FIXP_DBL r1, i1;
175
12.1M
  FIXP_DBL r2, i2;
176
177
  /* real part */
178
12.1M
  r1 = pDat[2];
179
12.1M
  r2 = pDat[0];
180
181
  /* imaginary part */
182
12.1M
  i1 = pDat[3];
183
12.1M
  i2 = pDat[1];
184
185
  /* real part */
186
12.1M
  pDat[0] = (r2 + r1) >> 1;
187
12.1M
  pDat[2] = (r2 - r1) >> 1;
188
189
  /* imaginary part */
190
12.1M
  pDat[1] = (i2 + i1) >> 1;
191
12.1M
  pDat[3] = (i2 - i1) >> 1;
192
12.1M
}
193
#endif /* FUNCTION_fft2 */
194
195
20.9M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
196
197
#ifndef FUNCTION_fft3
198
/* Performs the FFT of length 3 according to the algorithm after winograd. */
199
10.4M
static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) {
200
10.4M
  FIXP_DBL r1, r2;
201
10.4M
  FIXP_DBL s1, s2;
202
10.4M
  FIXP_DBL pD;
203
204
  /* real part */
205
10.4M
  r1 = pDat[2] + pDat[4];
206
10.4M
  r2 = fMultDiv2((pDat[2] - pDat[4]), C31);
207
10.4M
  pD = pDat[0] >> 1;
208
10.4M
  pDat[0] = pD + (r1 >> 1);
209
10.4M
  r1 = pD - (r1 >> 2);
210
211
  /* imaginary part */
212
10.4M
  s1 = pDat[3] + pDat[5];
213
10.4M
  s2 = fMultDiv2((pDat[3] - pDat[5]), C31);
214
10.4M
  pD = pDat[1] >> 1;
215
10.4M
  pDat[1] = pD + (s1 >> 1);
216
10.4M
  s1 = pD - (s1 >> 2);
217
218
  /* combination */
219
10.4M
  pDat[2] = r1 - s2;
220
10.4M
  pDat[4] = r1 + s2;
221
10.4M
  pDat[3] = s1 + r2;
222
10.4M
  pDat[5] = s1 - r2;
223
10.4M
}
224
#endif /* #ifndef FUNCTION_fft3 */
225
226
158M
#define F5C(x) STC(x)
227
228
31.6M
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
229
31.6M
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
230
31.6M
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
231
31.6M
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
232
31.6M
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
233
234
/* performs the FFT of length 5 according to the algorithm after winograd */
235
/* This version works with a prescale of 2 instead of 3 */
236
15.8M
static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) {
237
15.8M
  FIXP_DBL r1, r2, r3, r4;
238
15.8M
  FIXP_DBL s1, s2, s3, s4;
239
15.8M
  FIXP_DBL t;
240
241
  /* real part */
242
15.8M
  r1 = (pDat[2] + pDat[8]) >> 1;
243
15.8M
  r4 = (pDat[2] - pDat[8]) >> 1;
244
15.8M
  r3 = (pDat[4] + pDat[6]) >> 1;
245
15.8M
  r2 = (pDat[4] - pDat[6]) >> 1;
246
15.8M
  t = fMult((r1 - r3), C54);
247
15.8M
  r1 = r1 + r3;
248
15.8M
  pDat[0] = (pDat[0] >> 1) + r1;
249
  /* Bit shift left because of the constant C55 which was scaled with the factor
250
     0.5 because of the representation of the values as fracts */
251
15.8M
  r1 = pDat[0] + (fMultDiv2(r1, C55) << (2));
252
15.8M
  r3 = r1 - t;
253
15.8M
  r1 = r1 + t;
254
15.8M
  t = fMult((r4 + r2), C51);
255
  /* Bit shift left because of the constant C55 which was scaled with the factor
256
     0.5 because of the representation of the values as fracts */
257
15.8M
  r4 = t + (fMultDiv2(r4, C52) << (2));
258
15.8M
  r2 = t + fMult(r2, C53);
259
260
  /* imaginary part */
261
15.8M
  s1 = (pDat[3] + pDat[9]) >> 1;
262
15.8M
  s4 = (pDat[3] - pDat[9]) >> 1;
263
15.8M
  s3 = (pDat[5] + pDat[7]) >> 1;
264
15.8M
  s2 = (pDat[5] - pDat[7]) >> 1;
265
15.8M
  t = fMult((s1 - s3), C54);
266
15.8M
  s1 = s1 + s3;
267
15.8M
  pDat[1] = (pDat[1] >> 1) + s1;
268
  /* Bit shift left because of the constant C55 which was scaled with the factor
269
     0.5 because of the representation of the values as fracts */
270
15.8M
  s1 = pDat[1] + (fMultDiv2(s1, C55) << (2));
271
15.8M
  s3 = s1 - t;
272
15.8M
  s1 = s1 + t;
273
15.8M
  t = fMult((s4 + s2), C51);
274
  /* Bit shift left because of the constant C55 which was scaled with the factor
275
     0.5 because of the representation of the values as fracts */
276
15.8M
  s4 = t + (fMultDiv2(s4, C52) << (2));
277
15.8M
  s2 = t + fMult(s2, C53);
278
279
  /* combination */
280
15.8M
  pDat[2] = r1 + s2;
281
15.8M
  pDat[8] = r1 - s2;
282
15.8M
  pDat[4] = r3 - s4;
283
15.8M
  pDat[6] = r3 + s4;
284
285
15.8M
  pDat[3] = s1 - r2;
286
15.8M
  pDat[9] = s1 + r2;
287
15.8M
  pDat[5] = s3 + r4;
288
15.8M
  pDat[7] = s3 - r4;
289
15.8M
}
290
291
2.97M
#define F5C(x) STC(x)
292
293
594k
#define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
294
594k
#define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
295
594k
#define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
296
594k
#define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
297
594k
#define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
298
/**
299
 * \brief    Function performs a complex 10-point FFT
300
 *           The FFT is performed inplace. The result of the FFT
301
 *           is scaled by SCALEFACTOR10 bits.
302
 *
303
 *           WOPS FLC version:                    1093 cycles
304
 *           WOPS with 32x16 bit multiplications:  196 cycles
305
 *
306
 * \param    [i/o] re    real input / output
307
 * \param    [i/o] im    imag input / output
308
 * \param    [i  ] s     stride real and imag input / output
309
 *
310
 * \return   void
311
 */
312
static void fft10(FIXP_DBL *x)  // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s)
313
148k
{
314
148k
  FIXP_DBL t;
315
148k
  FIXP_DBL x0, x1, x2, x3, x4;
316
148k
  FIXP_DBL r1, r2, r3, r4;
317
148k
  FIXP_DBL s1, s2, s3, s4;
318
148k
  FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
319
148k
  FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
320
321
148k
  const int s = 1;  // stride factor
322
323
  /* 2 fft5 stages */
324
325
  /* real part */
326
148k
  x0 = (x[s * 0] >> SCALEFACTOR10);
327
148k
  x1 = (x[s * 4] >> SCALEFACTOR10);
328
148k
  x2 = (x[s * 8] >> SCALEFACTOR10);
329
148k
  x3 = (x[s * 12] >> SCALEFACTOR10);
330
148k
  x4 = (x[s * 16] >> SCALEFACTOR10);
331
332
148k
  r1 = (x3 + x2);
333
148k
  r4 = (x3 - x2);
334
148k
  r3 = (x1 + x4);
335
148k
  r2 = (x1 - x4);
336
148k
  t = fMult((r1 - r3), C54);
337
148k
  r1 = (r1 + r3);
338
148k
  y00 = (x0 + r1);
339
148k
  r1 = (y00 + ((fMult(r1, C55) << 1)));
340
148k
  r3 = (r1 - t);
341
148k
  r1 = (r1 + t);
342
148k
  t = fMult((r4 + r2), C51);
343
148k
  r4 = (t + (fMult(r4, C52) << 1));
344
148k
  r2 = (t + fMult(r2, C53));
345
346
  /* imaginary part */
347
148k
  x0 = (x[s * 0 + 1] >> SCALEFACTOR10);
348
148k
  x1 = (x[s * 4 + 1] >> SCALEFACTOR10);
349
148k
  x2 = (x[s * 8 + 1] >> SCALEFACTOR10);
350
148k
  x3 = (x[s * 12 + 1] >> SCALEFACTOR10);
351
148k
  x4 = (x[s * 16 + 1] >> SCALEFACTOR10);
352
353
148k
  s1 = (x3 + x2);
354
148k
  s4 = (x3 - x2);
355
148k
  s3 = (x1 + x4);
356
148k
  s2 = (x1 - x4);
357
148k
  t = fMult((s1 - s3), C54);
358
148k
  s1 = (s1 + s3);
359
148k
  y01 = (x0 + s1);
360
148k
  s1 = (y01 + (fMult(s1, C55) << 1));
361
148k
  s3 = (s1 - t);
362
148k
  s1 = (s1 + t);
363
148k
  t = fMult((s4 + s2), C51);
364
148k
  s4 = (t + (fMult(s4, C52) << 1));
365
148k
  s2 = (t + fMult(s2, C53));
366
367
  /* combination */
368
148k
  y04 = (r1 + s2);
369
148k
  y16 = (r1 - s2);
370
148k
  y08 = (r3 - s4);
371
148k
  y12 = (r3 + s4);
372
373
148k
  y05 = (s1 - r2);
374
148k
  y17 = (s1 + r2);
375
148k
  y09 = (s3 + r4);
376
148k
  y13 = (s3 - r4);
377
378
  /* real part */
379
148k
  x0 = (x[s * 10] >> SCALEFACTOR10);
380
148k
  x1 = (x[s * 2] >> SCALEFACTOR10);
381
148k
  x2 = (x[s * 6] >> SCALEFACTOR10);
382
148k
  x3 = (x[s * 14] >> SCALEFACTOR10);
383
148k
  x4 = (x[s * 18] >> SCALEFACTOR10);
384
385
148k
  r1 = (x1 + x4);
386
148k
  r4 = (x1 - x4);
387
148k
  r3 = (x3 + x2);
388
148k
  r2 = (x3 - x2);
389
148k
  t = fMult((r1 - r3), C54);
390
148k
  r1 = (r1 + r3);
391
148k
  y02 = (x0 + r1);
392
148k
  r1 = (y02 + ((fMult(r1, C55) << 1)));
393
148k
  r3 = (r1 - t);
394
148k
  r1 = (r1 + t);
395
148k
  t = fMult(((r4 + r2)), C51);
396
148k
  r4 = (t + (fMult(r4, C52) << 1));
397
148k
  r2 = (t + fMult(r2, C53));
398
399
  /* imaginary part */
400
148k
  x0 = (x[s * 10 + 1] >> SCALEFACTOR10);
401
148k
  x1 = (x[s * 2 + 1] >> SCALEFACTOR10);
402
148k
  x2 = (x[s * 6 + 1] >> SCALEFACTOR10);
403
148k
  x3 = (x[s * 14 + 1] >> SCALEFACTOR10);
404
148k
  x4 = (x[s * 18 + 1] >> SCALEFACTOR10);
405
406
148k
  s1 = (x1 + x4);
407
148k
  s4 = (x1 - x4);
408
148k
  s3 = (x3 + x2);
409
148k
  s2 = (x3 - x2);
410
148k
  t = fMult((s1 - s3), C54);
411
148k
  s1 = (s1 + s3);
412
148k
  y03 = (x0 + s1);
413
148k
  s1 = (y03 + (fMult(s1, C55) << 1));
414
148k
  s3 = (s1 - t);
415
148k
  s1 = (s1 + t);
416
148k
  t = fMult((s4 + s2), C51);
417
148k
  s4 = (t + (fMult(s4, C52) << 1));
418
148k
  s2 = (t + fMult(s2, C53));
419
420
  /* combination */
421
148k
  y06 = (r1 + s2);
422
148k
  y18 = (r1 - s2);
423
148k
  y10 = (r3 - s4);
424
148k
  y14 = (r3 + s4);
425
426
148k
  y07 = (s1 - r2);
427
148k
  y19 = (s1 + r2);
428
148k
  y11 = (s3 + r4);
429
148k
  y15 = (s3 - r4);
430
431
  /* 5 fft2 stages */
432
148k
  x[s * 0] = (y00 + y02);
433
148k
  x[s * 0 + 1] = (y01 + y03);
434
148k
  x[s * 10] = (y00 - y02);
435
148k
  x[s * 10 + 1] = (y01 - y03);
436
437
148k
  x[s * 4] = (y04 + y06);
438
148k
  x[s * 4 + 1] = (y05 + y07);
439
148k
  x[s * 14] = (y04 - y06);
440
148k
  x[s * 14 + 1] = (y05 - y07);
441
442
148k
  x[s * 8] = (y08 + y10);
443
148k
  x[s * 8 + 1] = (y09 + y11);
444
148k
  x[s * 18] = (y08 - y10);
445
148k
  x[s * 18 + 1] = (y09 - y11);
446
447
148k
  x[s * 12] = (y12 + y14);
448
148k
  x[s * 12 + 1] = (y13 + y15);
449
148k
  x[s * 2] = (y12 - y14);
450
148k
  x[s * 2 + 1] = (y13 - y15);
451
452
148k
  x[s * 16] = (y16 + y18);
453
148k
  x[s * 16 + 1] = (y17 + y19);
454
148k
  x[s * 6] = (y16 - y18);
455
148k
  x[s * 6 + 1] = (y17 - y19);
456
148k
}
457
458
#ifndef FUNCTION_fft12
459
#define FUNCTION_fft12
460
461
#undef C31
462
187M
#define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
463
464
17.0M
static inline void fft12(FIXP_DBL *pInput) {
465
17.0M
  FIXP_DBL aDst[24];
466
17.0M
  FIXP_DBL *pSrc, *pDst;
467
17.0M
  int i;
468
469
17.0M
  pSrc = pInput;
470
17.0M
  pDst = aDst;
471
17.0M
  FIXP_DBL r1, r2, s1, s2, pD;
472
473
  /* First 3*2 samples are shifted right by 2 before output */
474
17.0M
  r1 = pSrc[8] + pSrc[16];
475
17.0M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
476
17.0M
  pD = pSrc[0] >> 1;
477
17.0M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
478
17.0M
  r1 = pD - (r1 >> 2);
479
480
  /* imaginary part */
481
17.0M
  s1 = pSrc[9] + pSrc[17];
482
17.0M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
483
17.0M
  pD = pSrc[1] >> 1;
484
17.0M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
485
17.0M
  s1 = pD - (s1 >> 2);
486
487
  /* combination */
488
17.0M
  pDst[2] = (r1 - s2) >> 1;
489
17.0M
  pDst[3] = (s1 + r2) >> 1;
490
17.0M
  pDst[4] = (r1 + s2) >> 1;
491
17.0M
  pDst[5] = (s1 - r2) >> 1;
492
17.0M
  pSrc += 2;
493
17.0M
  pDst += 6;
494
495
17.0M
  const FIXP_STB *pVecRe = RotVectorReal12;
496
17.0M
  const FIXP_STB *pVecIm = RotVectorImag12;
497
17.0M
  FIXP_DBL re, im;
498
17.0M
  FIXP_STB vre, vim;
499
51.1M
  for (i = 0; i < 2; i++) {
500
    /* sample 0,1 are shifted right by 2 before output */
501
    /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before
502
     * output */
503
504
34.1M
    r1 = pSrc[8] + pSrc[16];
505
34.1M
    r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
506
34.1M
    pD = pSrc[0] >> 1;
507
34.1M
    pDst[0] = (pD + (r1 >> 1)) >> 1;
508
34.1M
    r1 = pD - (r1 >> 2);
509
510
    /* imaginary part */
511
34.1M
    s1 = pSrc[9] + pSrc[17];
512
34.1M
    s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
513
34.1M
    pD = pSrc[1] >> 1;
514
34.1M
    pDst[1] = (pD + (s1 >> 1)) >> 1;
515
34.1M
    s1 = pD - (s1 >> 2);
516
517
    /* combination */
518
34.1M
    re = (r1 - s2) >> 0;
519
34.1M
    im = (s1 + r2) >> 0;
520
34.1M
    vre = *pVecRe++;
521
34.1M
    vim = *pVecIm++;
522
34.1M
    cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim);
523
524
34.1M
    re = (r1 + s2) >> 0;
525
34.1M
    im = (s1 - r2) >> 0;
526
34.1M
    vre = *pVecRe++;
527
34.1M
    vim = *pVecIm++;
528
34.1M
    cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim);
529
530
34.1M
    pDst += 6;
531
34.1M
    pSrc += 2;
532
34.1M
  }
533
  /* sample 0,1 are shifted right by 2 before output */
534
  /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
535
  /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
536
17.0M
  r1 = pSrc[8] + pSrc[16];
537
17.0M
  r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
538
17.0M
  pD = pSrc[0] >> 1;
539
17.0M
  pDst[0] = (pD + (r1 >> 1)) >> 1;
540
17.0M
  r1 = pD - (r1 >> 2);
541
542
  /* imaginary part */
543
17.0M
  s1 = pSrc[9] + pSrc[17];
544
17.0M
  s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
545
17.0M
  pD = pSrc[1] >> 1;
546
17.0M
  pDst[1] = (pD + (s1 >> 1)) >> 1;
547
17.0M
  s1 = pD - (s1 >> 2);
548
549
  /* combination */
550
17.0M
  pDst[2] = (s1 + r2) >> 1;
551
17.0M
  pDst[3] = (s2 - r1) >> 1;
552
17.0M
  pDst[4] = -((r1 + s2) >> 1);
553
17.0M
  pDst[5] = (r2 - s1) >> 1;
554
555
  /* Perform 3 times the fft of length 4. The input samples are at the address
556
  of aDst and the output samples are at the address of pInput. The input vector
557
  for the fft of length 4 is built of the interleaved samples in aDst, the
558
  output samples are stored consecutively at the address of pInput.
559
  */
560
17.0M
  pSrc = aDst;
561
17.0M
  pDst = pInput;
562
68.2M
  for (i = 0; i < 3; i++) {
563
    /* inline FFT4 merged with incoming resorting loop */
564
51.1M
    FIXP_DBL a00, a10, a20, a30, tmp0, tmp1;
565
566
51.1M
    a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */
567
51.1M
    a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */
568
51.1M
    a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */
569
51.1M
    a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */
570
571
51.1M
    pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
572
51.1M
    pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
573
574
51.1M
    tmp0 = a00 - pSrc[12]; /* Re A - Re B */
575
51.1M
    tmp1 = a20 - pSrc[13]; /* Im A - Im B */
576
577
51.1M
    pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
578
51.1M
    pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
579
580
51.1M
    a10 = a10 - pSrc[18]; /* Re C - Re D */
581
51.1M
    a30 = a30 - pSrc[19]; /* Im C - Im D */
582
583
51.1M
    pDst[6] = tmp0 + a30;  /* Re B' = Re A - Re B + Im C - Im D */
584
51.1M
    pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */
585
51.1M
    pDst[7] = tmp1 - a10;  /* Im B' = Im A - Im B - Re C + Re D */
586
51.1M
    pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */
587
588
51.1M
    pSrc += 2;
589
51.1M
    pDst += 2;
590
51.1M
  }
591
17.0M
}
592
#endif /* FUNCTION_fft12 */
593
594
#ifndef FUNCTION_fft15
595
596
187M
#define N3 3
597
106M
#define N5 5
598
60.9M
#define N6 6
599
187M
#define N15 15
600
601
/* Performs the FFT of length 15. It is split into FFTs of length 3 and
602
 * length 5. */
603
5.07M
static inline void fft15(FIXP_DBL *pInput) {
604
5.07M
  FIXP_DBL aDst[2 * N15];
605
5.07M
  FIXP_DBL aDst1[2 * N15];
606
5.07M
  int i, k, l;
607
608
  /* Sort input vector for fft's of length 3
609
  input3(0:2)   = [input(0) input(5) input(10)];
610
  input3(3:5)   = [input(3) input(8) input(13)];
611
  input3(6:8)   = [input(6) input(11) input(1)];
612
  input3(9:11)  = [input(9) input(14) input(4)];
613
  input3(12:14) = [input(12) input(2) input(7)]; */
614
5.07M
  {
615
5.07M
    const FIXP_DBL *pSrc = pInput;
616
5.07M
    FIXP_DBL *RESTRICT pDst = aDst;
617
    /* Merge 3 loops into one, skip call of fft3 */
618
30.4M
    for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) {
619
25.3M
      pDst[k + 0] = pSrc[l];
620
25.3M
      pDst[k + 1] = pSrc[l + 1];
621
25.3M
      l += 2 * N5;
622
25.3M
      if (l >= (2 * N15)) l -= (2 * N15);
623
624
25.3M
      pDst[k + 2] = pSrc[l];
625
25.3M
      pDst[k + 3] = pSrc[l + 1];
626
25.3M
      l += 2 * N5;
627
25.3M
      if (l >= (2 * N15)) l -= (2 * N15);
628
25.3M
      pDst[k + 4] = pSrc[l];
629
25.3M
      pDst[k + 5] = pSrc[l + 1];
630
25.3M
      l += (2 * N5) + (2 * N3);
631
25.3M
      if (l >= (2 * N15)) l -= (2 * N15);
632
633
      /* fft3 merged with shift right by 2 loop */
634
25.3M
      FIXP_DBL r1, r2, r3;
635
25.3M
      FIXP_DBL s1, s2;
636
      /* real part */
637
25.3M
      r1 = pDst[k + 2] + pDst[k + 4];
638
25.3M
      r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31);
639
25.3M
      s1 = pDst[k + 0];
640
25.3M
      pDst[k + 0] = (s1 + r1) >> 2;
641
25.3M
      r1 = s1 - (r1 >> 1);
642
643
      /* imaginary part */
644
25.3M
      s1 = pDst[k + 3] + pDst[k + 5];
645
25.3M
      s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31);
646
25.3M
      r3 = pDst[k + 1];
647
25.3M
      pDst[k + 1] = (r3 + s1) >> 2;
648
25.3M
      s1 = r3 - (s1 >> 1);
649
650
      /* combination */
651
25.3M
      pDst[k + 2] = (r1 - s2) >> 2;
652
25.3M
      pDst[k + 4] = (r1 + s2) >> 2;
653
25.3M
      pDst[k + 3] = (s1 + r2) >> 2;
654
25.3M
      pDst[k + 5] = (s1 - r2) >> 2;
655
25.3M
    }
656
5.07M
  }
657
  /* Sort input vector for fft's of length 5
658
  input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
659
  input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
660
  input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
661
  /* Merge 2 loops into one, brings about 10% */
662
5.07M
  {
663
5.07M
    const FIXP_DBL *pSrc = aDst;
664
5.07M
    FIXP_DBL *RESTRICT pDst = aDst1;
665
20.3M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
666
15.2M
      l = 2 * i;
667
15.2M
      pDst[k + 0] = pSrc[l + 0];
668
15.2M
      pDst[k + 1] = pSrc[l + 1];
669
15.2M
      pDst[k + 2] = pSrc[l + 0 + (2 * N3)];
670
15.2M
      pDst[k + 3] = pSrc[l + 1 + (2 * N3)];
671
15.2M
      pDst[k + 4] = pSrc[l + 0 + (4 * N3)];
672
15.2M
      pDst[k + 5] = pSrc[l + 1 + (4 * N3)];
673
15.2M
      pDst[k + 6] = pSrc[l + 0 + (6 * N3)];
674
15.2M
      pDst[k + 7] = pSrc[l + 1 + (6 * N3)];
675
15.2M
      pDst[k + 8] = pSrc[l + 0 + (8 * N3)];
676
15.2M
      pDst[k + 9] = pSrc[l + 1 + (8 * N3)];
677
15.2M
      fft5(&pDst[k]);
678
15.2M
    }
679
5.07M
  }
680
  /* Sort output vector of length 15
681
  output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
682
            out5(10) out5(1)  out5(7)  out5(13) out5(4)
683
            out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
684
  /* optimize clumsy loop, brings about 5% */
685
5.07M
  {
686
5.07M
    const FIXP_DBL *pSrc = aDst1;
687
5.07M
    FIXP_DBL *RESTRICT pDst = pInput;
688
20.3M
    for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
689
15.2M
      pDst[k + 0] = pSrc[l];
690
15.2M
      pDst[k + 1] = pSrc[l + 1];
691
15.2M
      l += (2 * N6);
692
15.2M
      if (l >= (2 * N15)) l -= (2 * N15);
693
15.2M
      pDst[k + 2] = pSrc[l];
694
15.2M
      pDst[k + 3] = pSrc[l + 1];
695
15.2M
      l += (2 * N6);
696
15.2M
      if (l >= (2 * N15)) l -= (2 * N15);
697
15.2M
      pDst[k + 4] = pSrc[l];
698
15.2M
      pDst[k + 5] = pSrc[l + 1];
699
15.2M
      l += (2 * N6);
700
15.2M
      if (l >= (2 * N15)) l -= (2 * N15);
701
15.2M
      pDst[k + 6] = pSrc[l];
702
15.2M
      pDst[k + 7] = pSrc[l + 1];
703
15.2M
      l += (2 * N6);
704
15.2M
      if (l >= (2 * N15)) l -= (2 * N15);
705
15.2M
      pDst[k + 8] = pSrc[l];
706
15.2M
      pDst[k + 9] = pSrc[l + 1];
707
15.2M
      l += 2; /* no modulo check needed, it cannot occur */
708
15.2M
    }
709
5.07M
  }
710
5.07M
}
711
#endif /* FUNCTION_fft15 */
712
713
/*
714
 Select shift placement.
715
 Some processors like ARM may shift "for free" in combination with an addition
716
 or substraction, but others don't so either combining shift with +/- or reduce
717
 the total amount or shift operations is optimal
718
 */
719
#if !defined(__arm__)
720
1.04G
#define SHIFT_A >> 1
721
#define SHIFT_B
722
#else
723
#define SHIFT_A
724
#define SHIFT_B >> 1
725
#endif
726
727
#ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \
728
                         */
729
730
/* This defines prevents this array to be declared twice, if 16-bit fft is
731
 * enabled too */
732
#define FUNCTION_DATA_fft_16_w16
733
static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d),
734
                                      STCP(0x30fbc54d, 0x7641af3d)};
735
736
LNK_SECTION_CODE_L1
737
24.4M
inline void fft_16(FIXP_DBL *RESTRICT x) {
738
24.4M
  FIXP_DBL vr, ur;
739
24.4M
  FIXP_DBL vr2, ur2;
740
24.4M
  FIXP_DBL vr3, ur3;
741
24.4M
  FIXP_DBL vr4, ur4;
742
24.4M
  FIXP_DBL vi, ui;
743
24.4M
  FIXP_DBL vi2, ui2;
744
24.4M
  FIXP_DBL vi3, ui3;
745
746
24.4M
  vr = (x[0] >> 1) + (x[16] >> 1);       /* Re A + Re B */
747
24.4M
  ur = (x[1] >> 1) + (x[17] >> 1);       /* Im A + Im B */
748
24.4M
  vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */
749
24.4M
  ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */
750
24.4M
  x[0] = vr + (vi SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
751
24.4M
  x[1] = ur + (ui SHIFT_B);              /* Im A' = sum of imag values */
752
753
24.4M
  vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */
754
24.4M
  ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */
755
756
24.4M
  x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
757
24.4M
  x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
758
24.4M
  vr -= x[16];              /* Re A - Re B */
759
24.4M
  vi = (vi SHIFT_B)-x[24];  /* Re C - Re D */
760
24.4M
  ur -= x[17];              /* Im A - Im B */
761
24.4M
  ui = (ui SHIFT_B)-x[25];  /* Im C - Im D */
762
763
24.4M
  vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */
764
24.4M
  ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */
765
766
24.4M
  x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
767
24.4M
  x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
768
769
24.4M
  vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */
770
24.4M
  ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */
771
772
24.4M
  x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
773
24.4M
  x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
774
775
24.4M
  vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */
776
24.4M
  ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */
777
24.4M
  x[8] = vr2 + (vi2 SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
778
24.4M
  x[9] = ur2 + (ui2 SHIFT_B);              /* Im A' = sum of imag values */
779
24.4M
  x[12] = vr2 - (vi2 SHIFT_B);             /* Re C' = -(ReC+ReD) + (ReA+ReB) */
780
24.4M
  x[13] = ur2 - (ui2 SHIFT_B);             /* Im C' = -Im C -Im D +Im A +Im B */
781
24.4M
  vr2 -= x[20];                            /* Re A - Re B */
782
24.4M
  ur2 -= x[21];                            /* Im A - Im B */
783
24.4M
  vi2 = (vi2 SHIFT_B)-x[28];               /* Re C - Re D */
784
24.4M
  ui2 = (ui2 SHIFT_B)-x[29];               /* Im C - Im D */
785
786
24.4M
  vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */
787
24.4M
  ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */
788
789
24.4M
  x[10] = ui2 + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
790
24.4M
  x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
791
792
24.4M
  vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */
793
24.4M
  ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */
794
795
24.4M
  x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
796
24.4M
  x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */
797
798
24.4M
  x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
799
24.4M
  x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */
800
24.4M
  x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
801
24.4M
  x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
802
24.4M
  vr3 -= x[18];               /* Re A - Re B */
803
24.4M
  ur3 -= x[19];               /* Im A - Im B */
804
24.4M
  vi = (vi SHIFT_B)-x[26];    /* Re C - Re D */
805
24.4M
  ui = (ui SHIFT_B)-x[27];    /* Im C - Im D */
806
24.4M
  x[18] = ui + vr3;           /* Re B' = Im C - Im D  + Re A - Re B */
807
24.4M
  x[19] = ur3 - vi;           /* Im B'= -Re C + Re D + Im A - Im B */
808
809
24.4M
  x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
810
24.4M
  x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
811
24.4M
  x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
812
24.4M
  x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
813
24.4M
  vr4 -= x[22];                /* Re A - Re B */
814
24.4M
  ur4 -= x[23];                /* Im A - Im B */
815
816
24.4M
  x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
817
24.4M
  x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */
818
819
24.4M
  vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */
820
24.4M
  ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */
821
24.4M
  x[26] = ui3 + vr4;         /* Re B' = Im C - Im D  + Re A - Re B */
822
24.4M
  x[30] = vr4 - ui3;         /* Re D' = -Im C + Im D + Re A - Re B */
823
24.4M
  x[27] = ur4 - vi3;         /* Im B'= -Re C + Re D + Im A - Im B */
824
24.4M
  x[31] = vi3 + ur4;         /* Im D'= Re C - Re D + Im A - Im B */
825
826
  // xt1 =  0
827
  // xt2 =  8
828
24.4M
  vr = x[8];
829
24.4M
  vi = x[9];
830
24.4M
  ur = x[0] >> 1;
831
24.4M
  ui = x[1] >> 1;
832
24.4M
  x[0] = ur + (vr >> 1);
833
24.4M
  x[1] = ui + (vi >> 1);
834
24.4M
  x[8] = ur - (vr >> 1);
835
24.4M
  x[9] = ui - (vi >> 1);
836
837
  // xt1 =  4
838
  // xt2 = 12
839
24.4M
  vr = x[13];
840
24.4M
  vi = x[12];
841
24.4M
  ur = x[4] >> 1;
842
24.4M
  ui = x[5] >> 1;
843
24.4M
  x[4] = ur + (vr >> 1);
844
24.4M
  x[5] = ui - (vi >> 1);
845
24.4M
  x[12] = ur - (vr >> 1);
846
24.4M
  x[13] = ui + (vi >> 1);
847
848
  // xt1 = 16
849
  // xt2 = 24
850
24.4M
  vr = x[24];
851
24.4M
  vi = x[25];
852
24.4M
  ur = x[16] >> 1;
853
24.4M
  ui = x[17] >> 1;
854
24.4M
  x[16] = ur + (vr >> 1);
855
24.4M
  x[17] = ui + (vi >> 1);
856
24.4M
  x[24] = ur - (vr >> 1);
857
24.4M
  x[25] = ui - (vi >> 1);
858
859
  // xt1 = 20
860
  // xt2 = 28
861
24.4M
  vr = x[29];
862
24.4M
  vi = x[28];
863
24.4M
  ur = x[20] >> 1;
864
24.4M
  ui = x[21] >> 1;
865
24.4M
  x[20] = ur + (vr >> 1);
866
24.4M
  x[21] = ui - (vi >> 1);
867
24.4M
  x[28] = ur - (vr >> 1);
868
24.4M
  x[29] = ui + (vi >> 1);
869
870
  // xt1 =  2
871
  // xt2 = 10
872
24.4M
  SUMDIFF_PIFOURTH(vi, vr, x[10], x[11])
873
  // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH);
874
  // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH);
875
24.4M
  ur = x[2];
876
24.4M
  ui = x[3];
877
24.4M
  x[2] = (ur >> 1) + vr;
878
24.4M
  x[3] = (ui >> 1) + vi;
879
24.4M
  x[10] = (ur >> 1) - vr;
880
24.4M
  x[11] = (ui >> 1) - vi;
881
882
  // xt1 =  6
883
  // xt2 = 14
884
24.4M
  SUMDIFF_PIFOURTH(vr, vi, x[14], x[15])
885
24.4M
  ur = x[6];
886
24.4M
  ui = x[7];
887
24.4M
  x[6] = (ur >> 1) + vr;
888
24.4M
  x[7] = (ui >> 1) - vi;
889
24.4M
  x[14] = (ur >> 1) - vr;
890
24.4M
  x[15] = (ui >> 1) + vi;
891
892
  // xt1 = 18
893
  // xt2 = 26
894
24.4M
  SUMDIFF_PIFOURTH(vi, vr, x[26], x[27])
895
24.4M
  ur = x[18];
896
24.4M
  ui = x[19];
897
24.4M
  x[18] = (ur >> 1) + vr;
898
24.4M
  x[19] = (ui >> 1) + vi;
899
24.4M
  x[26] = (ur >> 1) - vr;
900
24.4M
  x[27] = (ui >> 1) - vi;
901
902
  // xt1 = 22
903
  // xt2 = 30
904
24.4M
  SUMDIFF_PIFOURTH(vr, vi, x[30], x[31])
905
24.4M
  ur = x[22];
906
24.4M
  ui = x[23];
907
24.4M
  x[22] = (ur >> 1) + vr;
908
24.4M
  x[23] = (ui >> 1) - vi;
909
24.4M
  x[30] = (ur >> 1) - vr;
910
24.4M
  x[31] = (ui >> 1) + vi;
911
912
  // xt1 =  0
913
  // xt2 = 16
914
24.4M
  vr = x[16];
915
24.4M
  vi = x[17];
916
24.4M
  ur = x[0] >> 1;
917
24.4M
  ui = x[1] >> 1;
918
24.4M
  x[0] = ur + (vr >> 1);
919
24.4M
  x[1] = ui + (vi >> 1);
920
24.4M
  x[16] = ur - (vr >> 1);
921
24.4M
  x[17] = ui - (vi >> 1);
922
923
  // xt1 =  8
924
  // xt2 = 24
925
24.4M
  vi = x[24];
926
24.4M
  vr = x[25];
927
24.4M
  ur = x[8] >> 1;
928
24.4M
  ui = x[9] >> 1;
929
24.4M
  x[8] = ur + (vr >> 1);
930
24.4M
  x[9] = ui - (vi >> 1);
931
24.4M
  x[24] = ur - (vr >> 1);
932
24.4M
  x[25] = ui + (vi >> 1);
933
934
  // xt1 =  2
935
  // xt2 = 18
936
24.4M
  cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
937
24.4M
  ur = x[2];
938
24.4M
  ui = x[3];
939
24.4M
  x[2] = (ur >> 1) + vr;
940
24.4M
  x[3] = (ui >> 1) + vi;
941
24.4M
  x[18] = (ur >> 1) - vr;
942
24.4M
  x[19] = (ui >> 1) - vi;
943
944
  // xt1 = 10
945
  // xt2 = 26
946
24.4M
  cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
947
24.4M
  ur = x[10];
948
24.4M
  ui = x[11];
949
24.4M
  x[10] = (ur >> 1) + vr;
950
24.4M
  x[11] = (ui >> 1) - vi;
951
24.4M
  x[26] = (ur >> 1) - vr;
952
24.4M
  x[27] = (ui >> 1) + vi;
953
954
  // xt1 =  4
955
  // xt2 = 20
956
24.4M
  SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
957
24.4M
  ur = x[4];
958
24.4M
  ui = x[5];
959
24.4M
  x[4] = (ur >> 1) + vr;
960
24.4M
  x[5] = (ui >> 1) + vi;
961
24.4M
  x[20] = (ur >> 1) - vr;
962
24.4M
  x[21] = (ui >> 1) - vi;
963
964
  // xt1 = 12
965
  // xt2 = 28
966
24.4M
  SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
967
24.4M
  ur = x[12];
968
24.4M
  ui = x[13];
969
24.4M
  x[12] = (ur >> 1) + vr;
970
24.4M
  x[13] = (ui >> 1) - vi;
971
24.4M
  x[28] = (ur >> 1) - vr;
972
24.4M
  x[29] = (ui >> 1) + vi;
973
974
  // xt1 =  6
975
  // xt2 = 22
976
24.4M
  cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
977
24.4M
  ur = x[6];
978
24.4M
  ui = x[7];
979
24.4M
  x[6] = (ur >> 1) + vr;
980
24.4M
  x[7] = (ui >> 1) + vi;
981
24.4M
  x[22] = (ur >> 1) - vr;
982
24.4M
  x[23] = (ui >> 1) - vi;
983
984
  // xt1 = 14
985
  // xt2 = 30
986
24.4M
  cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
987
24.4M
  ur = x[14];
988
24.4M
  ui = x[15];
989
24.4M
  x[14] = (ur >> 1) + vr;
990
24.4M
  x[15] = (ui >> 1) - vi;
991
24.4M
  x[30] = (ur >> 1) - vr;
992
24.4M
  x[31] = (ui >> 1) + vi;
993
24.4M
}
994
#endif /* FUNCTION_fft_16 */
995
996
#ifndef FUNCTION_fft_32
997
static const FIXP_STP fft32_w32[6] = {
998
    STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d),
999
    STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7),
1000
    STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)};
1001
1.15G
#define W_PiFOURTH STC(0x5a82799a)
1002
1003
LNK_SECTION_CODE_L1
1004
41.1M
inline void fft_32(FIXP_DBL *const _x) {
1005
  /*
1006
   * 1+2 stage radix 4
1007
   */
1008
1009
  /////////////////////////////////////////////////////////////////////////////////////////
1010
41.1M
  {
1011
41.1M
    FIXP_DBL *const x = _x;
1012
41.1M
    FIXP_DBL vi, ui;
1013
41.1M
    FIXP_DBL vi2, ui2;
1014
41.1M
    FIXP_DBL vi3, ui3;
1015
41.1M
    FIXP_DBL vr, ur;
1016
41.1M
    FIXP_DBL vr2, ur2;
1017
41.1M
    FIXP_DBL vr3, ur3;
1018
41.1M
    FIXP_DBL vr4, ur4;
1019
1020
    // i = 0
1021
41.1M
    vr = (x[0] + x[32]) >> 1;     /* Re A + Re B */
1022
41.1M
    ur = (x[1] + x[33]) >> 1;     /* Im A + Im B */
1023
41.1M
    vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */
1024
41.1M
    ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */
1025
1026
41.1M
    x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1027
41.1M
    x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1028
1029
41.1M
    vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */
1030
41.1M
    ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */
1031
1032
41.1M
    x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1033
41.1M
    x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1034
1035
41.1M
    vr -= x[32];             /* Re A - Re B */
1036
41.1M
    ur -= x[33];             /* Im A - Im B */
1037
41.1M
    vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */
1038
41.1M
    ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */
1039
1040
41.1M
    vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */
1041
41.1M
    ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */
1042
1043
41.1M
    x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1044
41.1M
    x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1045
1046
41.1M
    vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */
1047
41.1M
    ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */
1048
1049
41.1M
    x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1050
41.1M
    x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1051
1052
    // i=16
1053
41.1M
    vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */
1054
41.1M
    ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */
1055
1056
41.1M
    x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1057
41.1M
    x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1058
41.1M
    x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1059
41.1M
    x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1060
1061
41.1M
    vr2 -= x[36];            /* Re A - Re B */
1062
41.1M
    ur2 -= x[37];            /* Im A - Im B */
1063
41.1M
    vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */
1064
41.1M
    ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */
1065
1066
41.1M
    vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */
1067
41.1M
    ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */
1068
1069
41.1M
    x[18] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1070
41.1M
    x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1071
1072
41.1M
    vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */
1073
41.1M
    ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */
1074
1075
41.1M
    x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1076
41.1M
    x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1077
1078
    // i = 32
1079
1080
41.1M
    x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1081
41.1M
    x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1082
41.1M
    x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1083
41.1M
    x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1084
1085
41.1M
    vr3 -= x[34];              /* Re A - Re B */
1086
41.1M
    ur3 -= x[35];              /* Im A - Im B */
1087
41.1M
    vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */
1088
41.1M
    ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */
1089
1090
41.1M
    x[34] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1091
41.1M
    x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1092
1093
    // i=48
1094
1095
41.1M
    x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1096
41.1M
    x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1097
41.1M
    x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1098
41.1M
    x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1099
1100
41.1M
    vr4 -= x[38]; /* Re A - Re B */
1101
41.1M
    ur4 -= x[39]; /* Im A - Im B */
1102
1103
41.1M
    x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1104
41.1M
    x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1105
1106
41.1M
    vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */
1107
41.1M
    ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */
1108
1109
41.1M
    x[50] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1110
41.1M
    x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1111
41.1M
    x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1112
41.1M
    x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1113
1114
    // i=8
1115
41.1M
    vr = (x[8] + x[40]) >> 1;     /* Re A + Re B */
1116
41.1M
    ur = (x[9] + x[41]) >> 1;     /* Im A + Im B */
1117
41.1M
    vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */
1118
41.1M
    ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */
1119
1120
41.1M
    x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1121
41.1M
    x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1122
1123
41.1M
    vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */
1124
41.1M
    ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */
1125
1126
41.1M
    x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1127
41.1M
    x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1128
1129
41.1M
    vr -= x[40];             /* Re A - Re B */
1130
41.1M
    ur -= x[41];             /* Im A - Im B */
1131
41.1M
    vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */
1132
41.1M
    ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */
1133
1134
41.1M
    vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */
1135
41.1M
    ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */
1136
1137
41.1M
    x[10] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
1138
41.1M
    x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1139
1140
41.1M
    vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */
1141
41.1M
    ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */
1142
1143
41.1M
    x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1144
41.1M
    x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1145
1146
    // i=24
1147
41.1M
    vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */
1148
41.1M
    ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */
1149
1150
41.1M
    x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1151
41.1M
    x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1152
41.1M
    x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1153
41.1M
    x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1154
1155
41.1M
    vr2 -= x[44];            /* Re A - Re B */
1156
41.1M
    ur2 -= x[45];            /* Im A - Im B */
1157
41.1M
    vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */
1158
41.1M
    ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */
1159
1160
41.1M
    vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */
1161
41.1M
    ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */
1162
1163
41.1M
    x[26] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
1164
41.1M
    x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1165
1166
41.1M
    vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */
1167
41.1M
    ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */
1168
1169
41.1M
    x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1170
41.1M
    x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1171
1172
    // i=40
1173
1174
41.1M
    x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1175
41.1M
    x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1176
41.1M
    x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1177
41.1M
    x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1178
1179
41.1M
    vr3 -= x[42];              /* Re A - Re B */
1180
41.1M
    ur3 -= x[43];              /* Im A - Im B */
1181
41.1M
    vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */
1182
41.1M
    ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */
1183
1184
41.1M
    x[42] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
1185
41.1M
    x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1186
1187
    // i=56
1188
1189
41.1M
    x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1190
41.1M
    x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1191
41.1M
    x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1192
41.1M
    x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1193
1194
41.1M
    vr4 -= x[46]; /* Re A - Re B */
1195
41.1M
    ur4 -= x[47]; /* Im A - Im B */
1196
1197
41.1M
    x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1198
41.1M
    x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1199
1200
41.1M
    vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */
1201
41.1M
    ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */
1202
1203
41.1M
    x[58] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
1204
41.1M
    x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1205
41.1M
    x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1206
41.1M
    x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1207
41.1M
  }
1208
1209
41.1M
  {
1210
41.1M
    FIXP_DBL *xt = _x;
1211
1212
41.1M
    int j = 4;
1213
164M
    do {
1214
164M
      FIXP_DBL vi, ui, vr, ur;
1215
1216
164M
      vr = xt[8];
1217
164M
      vi = xt[9];
1218
164M
      ur = xt[0] >> 1;
1219
164M
      ui = xt[1] >> 1;
1220
164M
      xt[0] = ur + (vr >> 1);
1221
164M
      xt[1] = ui + (vi >> 1);
1222
164M
      xt[8] = ur - (vr >> 1);
1223
164M
      xt[9] = ui - (vi >> 1);
1224
1225
164M
      vr = xt[13];
1226
164M
      vi = xt[12];
1227
164M
      ur = xt[4] >> 1;
1228
164M
      ui = xt[5] >> 1;
1229
164M
      xt[4] = ur + (vr >> 1);
1230
164M
      xt[5] = ui - (vi >> 1);
1231
164M
      xt[12] = ur - (vr >> 1);
1232
164M
      xt[13] = ui + (vi >> 1);
1233
1234
164M
      SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11])
1235
164M
      ur = xt[2];
1236
164M
      ui = xt[3];
1237
164M
      xt[2] = (ur >> 1) + vr;
1238
164M
      xt[3] = (ui >> 1) + vi;
1239
164M
      xt[10] = (ur >> 1) - vr;
1240
164M
      xt[11] = (ui >> 1) - vi;
1241
1242
164M
      SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15])
1243
164M
      ur = xt[6];
1244
164M
      ui = xt[7];
1245
1246
164M
      xt[6] = (ur >> 1) + vr;
1247
164M
      xt[7] = (ui >> 1) - vi;
1248
164M
      xt[14] = (ur >> 1) - vr;
1249
164M
      xt[15] = (ui >> 1) + vi;
1250
164M
      xt += 16;
1251
164M
    } while (--j != 0);
1252
41.1M
  }
1253
1254
41.1M
  {
1255
41.1M
    FIXP_DBL *const x = _x;
1256
41.1M
    FIXP_DBL vi, ui, vr, ur;
1257
1258
41.1M
    vr = x[16];
1259
41.1M
    vi = x[17];
1260
41.1M
    ur = x[0] >> 1;
1261
41.1M
    ui = x[1] >> 1;
1262
41.1M
    x[0] = ur + (vr >> 1);
1263
41.1M
    x[1] = ui + (vi >> 1);
1264
41.1M
    x[16] = ur - (vr >> 1);
1265
41.1M
    x[17] = ui - (vi >> 1);
1266
1267
41.1M
    vi = x[24];
1268
41.1M
    vr = x[25];
1269
41.1M
    ur = x[8] >> 1;
1270
41.1M
    ui = x[9] >> 1;
1271
41.1M
    x[8] = ur + (vr >> 1);
1272
41.1M
    x[9] = ui - (vi >> 1);
1273
41.1M
    x[24] = ur - (vr >> 1);
1274
41.1M
    x[25] = ui + (vi >> 1);
1275
1276
41.1M
    vr = x[48];
1277
41.1M
    vi = x[49];
1278
41.1M
    ur = x[32] >> 1;
1279
41.1M
    ui = x[33] >> 1;
1280
41.1M
    x[32] = ur + (vr >> 1);
1281
41.1M
    x[33] = ui + (vi >> 1);
1282
41.1M
    x[48] = ur - (vr >> 1);
1283
41.1M
    x[49] = ui - (vi >> 1);
1284
1285
41.1M
    vi = x[56];
1286
41.1M
    vr = x[57];
1287
41.1M
    ur = x[40] >> 1;
1288
41.1M
    ui = x[41] >> 1;
1289
41.1M
    x[40] = ur + (vr >> 1);
1290
41.1M
    x[41] = ui - (vi >> 1);
1291
41.1M
    x[56] = ur - (vr >> 1);
1292
41.1M
    x[57] = ui + (vi >> 1);
1293
1294
41.1M
    cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
1295
41.1M
    ur = x[2];
1296
41.1M
    ui = x[3];
1297
41.1M
    x[2] = (ur >> 1) + vr;
1298
41.1M
    x[3] = (ui >> 1) + vi;
1299
41.1M
    x[18] = (ur >> 1) - vr;
1300
41.1M
    x[19] = (ui >> 1) - vi;
1301
1302
41.1M
    cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
1303
41.1M
    ur = x[10];
1304
41.1M
    ui = x[11];
1305
41.1M
    x[10] = (ur >> 1) + vr;
1306
41.1M
    x[11] = (ui >> 1) - vi;
1307
41.1M
    x[26] = (ur >> 1) - vr;
1308
41.1M
    x[27] = (ui >> 1) + vi;
1309
1310
41.1M
    cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
1311
41.1M
    ur = x[34];
1312
41.1M
    ui = x[35];
1313
41.1M
    x[34] = (ur >> 1) + vr;
1314
41.1M
    x[35] = (ui >> 1) + vi;
1315
41.1M
    x[50] = (ur >> 1) - vr;
1316
41.1M
    x[51] = (ui >> 1) - vi;
1317
1318
41.1M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
1319
41.1M
    ur = x[42];
1320
41.1M
    ui = x[43];
1321
41.1M
    x[42] = (ur >> 1) + vr;
1322
41.1M
    x[43] = (ui >> 1) - vi;
1323
41.1M
    x[58] = (ur >> 1) - vr;
1324
41.1M
    x[59] = (ui >> 1) + vi;
1325
1326
41.1M
    SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
1327
41.1M
    ur = x[4];
1328
41.1M
    ui = x[5];
1329
41.1M
    x[4] = (ur >> 1) + vr;
1330
41.1M
    x[5] = (ui >> 1) + vi;
1331
41.1M
    x[20] = (ur >> 1) - vr;
1332
41.1M
    x[21] = (ui >> 1) - vi;
1333
1334
41.1M
    SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
1335
41.1M
    ur = x[12];
1336
41.1M
    ui = x[13];
1337
41.1M
    x[12] = (ur >> 1) + vr;
1338
41.1M
    x[13] = (ui >> 1) - vi;
1339
41.1M
    x[28] = (ur >> 1) - vr;
1340
41.1M
    x[29] = (ui >> 1) + vi;
1341
1342
41.1M
    SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
1343
41.1M
    ur = x[36];
1344
41.1M
    ui = x[37];
1345
41.1M
    x[36] = (ur >> 1) + vr;
1346
41.1M
    x[37] = (ui >> 1) + vi;
1347
41.1M
    x[52] = (ur >> 1) - vr;
1348
41.1M
    x[53] = (ui >> 1) - vi;
1349
1350
41.1M
    SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
1351
41.1M
    ur = x[44];
1352
41.1M
    ui = x[45];
1353
41.1M
    x[44] = (ur >> 1) + vr;
1354
41.1M
    x[45] = (ui >> 1) - vi;
1355
41.1M
    x[60] = (ur >> 1) - vr;
1356
41.1M
    x[61] = (ui >> 1) + vi;
1357
1358
41.1M
    cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
1359
41.1M
    ur = x[6];
1360
41.1M
    ui = x[7];
1361
41.1M
    x[6] = (ur >> 1) + vr;
1362
41.1M
    x[7] = (ui >> 1) + vi;
1363
41.1M
    x[22] = (ur >> 1) - vr;
1364
41.1M
    x[23] = (ui >> 1) - vi;
1365
1366
41.1M
    cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
1367
41.1M
    ur = x[14];
1368
41.1M
    ui = x[15];
1369
41.1M
    x[14] = (ur >> 1) + vr;
1370
41.1M
    x[15] = (ui >> 1) - vi;
1371
41.1M
    x[30] = (ur >> 1) - vr;
1372
41.1M
    x[31] = (ui >> 1) + vi;
1373
1374
41.1M
    cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
1375
41.1M
    ur = x[38];
1376
41.1M
    ui = x[39];
1377
41.1M
    x[38] = (ur >> 1) + vr;
1378
41.1M
    x[39] = (ui >> 1) + vi;
1379
41.1M
    x[54] = (ur >> 1) - vr;
1380
41.1M
    x[55] = (ui >> 1) - vi;
1381
1382
41.1M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
1383
41.1M
    ur = x[46];
1384
41.1M
    ui = x[47];
1385
1386
41.1M
    x[46] = (ur >> 1) + vr;
1387
41.1M
    x[47] = (ui >> 1) - vi;
1388
41.1M
    x[62] = (ur >> 1) - vr;
1389
41.1M
    x[63] = (ui >> 1) + vi;
1390
1391
41.1M
    vr = x[32];
1392
41.1M
    vi = x[33];
1393
41.1M
    ur = x[0] >> 1;
1394
41.1M
    ui = x[1] >> 1;
1395
41.1M
    x[0] = ur + (vr >> 1);
1396
41.1M
    x[1] = ui + (vi >> 1);
1397
41.1M
    x[32] = ur - (vr >> 1);
1398
41.1M
    x[33] = ui - (vi >> 1);
1399
1400
41.1M
    vi = x[48];
1401
41.1M
    vr = x[49];
1402
41.1M
    ur = x[16] >> 1;
1403
41.1M
    ui = x[17] >> 1;
1404
41.1M
    x[16] = ur + (vr >> 1);
1405
41.1M
    x[17] = ui - (vi >> 1);
1406
41.1M
    x[48] = ur - (vr >> 1);
1407
41.1M
    x[49] = ui + (vi >> 1);
1408
1409
41.1M
    cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
1410
41.1M
    ur = x[2];
1411
41.1M
    ui = x[3];
1412
41.1M
    x[2] = (ur >> 1) + vr;
1413
41.1M
    x[3] = (ui >> 1) + vi;
1414
41.1M
    x[34] = (ur >> 1) - vr;
1415
41.1M
    x[35] = (ui >> 1) - vi;
1416
1417
41.1M
    cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
1418
41.1M
    ur = x[18];
1419
41.1M
    ui = x[19];
1420
41.1M
    x[18] = (ur >> 1) + vr;
1421
41.1M
    x[19] = (ui >> 1) - vi;
1422
41.1M
    x[50] = (ur >> 1) - vr;
1423
41.1M
    x[51] = (ui >> 1) + vi;
1424
1425
41.1M
    cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
1426
41.1M
    ur = x[4];
1427
41.1M
    ui = x[5];
1428
41.1M
    x[4] = (ur >> 1) + vr;
1429
41.1M
    x[5] = (ui >> 1) + vi;
1430
41.1M
    x[36] = (ur >> 1) - vr;
1431
41.1M
    x[37] = (ui >> 1) - vi;
1432
1433
41.1M
    cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
1434
41.1M
    ur = x[20];
1435
41.1M
    ui = x[21];
1436
41.1M
    x[20] = (ur >> 1) + vr;
1437
41.1M
    x[21] = (ui >> 1) - vi;
1438
41.1M
    x[52] = (ur >> 1) - vr;
1439
41.1M
    x[53] = (ui >> 1) + vi;
1440
1441
41.1M
    cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
1442
41.1M
    ur = x[6];
1443
41.1M
    ui = x[7];
1444
41.1M
    x[6] = (ur >> 1) + vr;
1445
41.1M
    x[7] = (ui >> 1) + vi;
1446
41.1M
    x[38] = (ur >> 1) - vr;
1447
41.1M
    x[39] = (ui >> 1) - vi;
1448
1449
41.1M
    cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
1450
41.1M
    ur = x[22];
1451
41.1M
    ui = x[23];
1452
41.1M
    x[22] = (ur >> 1) + vr;
1453
41.1M
    x[23] = (ui >> 1) - vi;
1454
41.1M
    x[54] = (ur >> 1) - vr;
1455
41.1M
    x[55] = (ui >> 1) + vi;
1456
1457
41.1M
    SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
1458
41.1M
    ur = x[8];
1459
41.1M
    ui = x[9];
1460
41.1M
    x[8] = (ur >> 1) + vr;
1461
41.1M
    x[9] = (ui >> 1) + vi;
1462
41.1M
    x[40] = (ur >> 1) - vr;
1463
41.1M
    x[41] = (ui >> 1) - vi;
1464
1465
41.1M
    SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
1466
41.1M
    ur = x[24];
1467
41.1M
    ui = x[25];
1468
41.1M
    x[24] = (ur >> 1) + vr;
1469
41.1M
    x[25] = (ui >> 1) - vi;
1470
41.1M
    x[56] = (ur >> 1) - vr;
1471
41.1M
    x[57] = (ui >> 1) + vi;
1472
1473
41.1M
    cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
1474
41.1M
    ur = x[10];
1475
41.1M
    ui = x[11];
1476
1477
41.1M
    x[10] = (ur >> 1) + vr;
1478
41.1M
    x[11] = (ui >> 1) + vi;
1479
41.1M
    x[42] = (ur >> 1) - vr;
1480
41.1M
    x[43] = (ui >> 1) - vi;
1481
1482
41.1M
    cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
1483
41.1M
    ur = x[26];
1484
41.1M
    ui = x[27];
1485
41.1M
    x[26] = (ur >> 1) + vr;
1486
41.1M
    x[27] = (ui >> 1) - vi;
1487
41.1M
    x[58] = (ur >> 1) - vr;
1488
41.1M
    x[59] = (ui >> 1) + vi;
1489
1490
41.1M
    cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1491
41.1M
    ur = x[12];
1492
41.1M
    ui = x[13];
1493
41.1M
    x[12] = (ur >> 1) + vr;
1494
41.1M
    x[13] = (ui >> 1) + vi;
1495
41.1M
    x[44] = (ur >> 1) - vr;
1496
41.1M
    x[45] = (ui >> 1) - vi;
1497
1498
41.1M
    cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1499
41.1M
    ur = x[28];
1500
41.1M
    ui = x[29];
1501
41.1M
    x[28] = (ur >> 1) + vr;
1502
41.1M
    x[29] = (ui >> 1) - vi;
1503
41.1M
    x[60] = (ur >> 1) - vr;
1504
41.1M
    x[61] = (ui >> 1) + vi;
1505
1506
41.1M
    cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1507
41.1M
    ur = x[14];
1508
41.1M
    ui = x[15];
1509
41.1M
    x[14] = (ur >> 1) + vr;
1510
41.1M
    x[15] = (ui >> 1) + vi;
1511
41.1M
    x[46] = (ur >> 1) - vr;
1512
41.1M
    x[47] = (ui >> 1) - vi;
1513
1514
41.1M
    cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1515
41.1M
    ur = x[30];
1516
41.1M
    ui = x[31];
1517
41.1M
    x[30] = (ur >> 1) + vr;
1518
41.1M
    x[31] = (ui >> 1) - vi;
1519
41.1M
    x[62] = (ur >> 1) - vr;
1520
41.1M
    x[63] = (ui >> 1) + vi;
1521
41.1M
  }
1522
41.1M
}
1523
#endif /* #ifndef FUNCTION_fft_32 */
1524
1525
/**
1526
 * \brief Apply rotation vectors to a data buffer.
1527
 * \param cl length of each row of input data.
1528
 * \param l total length of input data.
1529
 * \param pVecRe real part of rotation coefficient vector.
1530
 * \param pVecIm imaginary part of rotation coefficient vector.
1531
 */
1532
1533
/*
1534
   This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000
1535
   (-1.0) instead. At the end, the sign of the result is inverted
1536
*/
1537
#define noFFT_APPLY_ROT_VECTOR_HQ
1538
1539
#ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL
1540
static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl,
1541
                                        const int l, const FIXP_STB *pVecRe,
1542
5.22M
                                        const FIXP_STB *pVecIm) {
1543
5.22M
  FIXP_DBL re, im;
1544
5.22M
  FIXP_STB vre, vim;
1545
1546
5.22M
  int i, c;
1547
1548
21.8M
  for (i = 0; i < cl; i++) {
1549
16.6M
    re = pData[2 * i];
1550
16.6M
    im = pData[2 * i + 1];
1551
1552
16.6M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1553
16.6M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1554
16.6M
  }
1555
30.2M
  for (; i < l; i += cl) {
1556
25.0M
    re = pData[2 * i];
1557
25.0M
    im = pData[2 * i + 1];
1558
1559
25.0M
    pData[2 * i] = re >> 2;     /* * 0.25 */
1560
25.0M
    pData[2 * i + 1] = im >> 2; /* * 0.25 */
1561
1562
148M
    for (c = i + 1; c < i + cl; c++) {
1563
123M
      re = pData[2 * c] >> 1;
1564
123M
      im = pData[2 * c + 1] >> 1;
1565
123M
      vre = *pVecRe++;
1566
123M
      vim = *pVecIm++;
1567
1568
123M
      cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim);
1569
123M
    }
1570
25.0M
  }
1571
5.22M
}
1572
#endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */
1573
1574
/* select either switch case of function pointer. */
1575
//#define FFT_TWO_STAGE_SWITCH_CASE
1576
#ifndef FUNCTION_fftN2_func
1577
static inline void fftN2_func(FIXP_DBL *pInput, const int length,
1578
                              const int dim1, const int dim2,
1579
                              void (*const fft1)(FIXP_DBL *),
1580
                              void (*const fft2)(FIXP_DBL *),
1581
                              const FIXP_STB *RotVectorReal,
1582
                              const FIXP_STB *RotVectorImag, FIXP_DBL *aDst,
1583
5.22M
                              FIXP_DBL *aDst2) {
1584
  /* The real part of the input samples are at the addresses with even indices
1585
  and the imaginary part of the input samples are at the addresses with odd
1586
  indices. The output samples are stored at the address of pInput
1587
  */
1588
5.22M
  FIXP_DBL *pSrc, *pDst, *pDstOut;
1589
5.22M
  int i;
1590
1591
5.22M
  FDK_ASSERT(length == dim1 * dim2);
1592
1593
  /* Perform dim2 times the fft of length dim1. The input samples are at the
1594
  address of pSrc and the output samples are at the address of pDst. The input
1595
  vector for the fft of length dim1 is built of the interleaved samples in pSrc,
1596
  the output samples are stored consecutively.
1597
  */
1598
5.22M
  pSrc = pInput;
1599
5.22M
  pDst = aDst;
1600
35.5M
  for (i = 0; i < dim2; i++) {
1601
195M
    for (int j = 0; j < dim1; j++) {
1602
164M
      pDst[2 * j] = pSrc[2 * j * dim2];
1603
164M
      pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1];
1604
164M
    }
1605
1606
      /* fft of size dim1 */
1607
30.2M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1608
30.2M
    fft1(pDst);
1609
#else
1610
    switch (dim1) {
1611
      case 2:
1612
        fft2(pDst);
1613
        break;
1614
      case 3:
1615
        fft3(pDst);
1616
        break;
1617
      case 4:
1618
        fft_4(pDst);
1619
        break;
1620
      /* case 5: fft5(pDst); break; */
1621
      /* case 8: fft_8(pDst); break; */
1622
      case 12:
1623
        fft12(pDst);
1624
        break;
1625
      /* case 15: fft15(pDst); break; */
1626
      case 16:
1627
        fft_16(pDst);
1628
        break;
1629
      case 32:
1630
        fft_32(pDst);
1631
        break;
1632
        /*case 64: fft_64(pDst); break;*/
1633
        /* case 128: fft_128(pDst); break; */
1634
    }
1635
#endif
1636
30.2M
    pSrc += 2;
1637
30.2M
    pDst = pDst + 2 * dim1;
1638
30.2M
  }
1639
1640
  /* Perform the modulation of the output of the fft of length dim1 */
1641
5.22M
  pSrc = aDst;
1642
5.22M
  fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag);
1643
1644
  /* Perform dim1 times the fft of length dim2. The input samples are at the
1645
  address of aDst and the output samples are at the address of pInput. The input
1646
  vector for the fft of length dim2 is built of the interleaved samples in aDst,
1647
  the output samples are stored consecutively at the address of pInput.
1648
  */
1649
5.22M
  pSrc = aDst;
1650
5.22M
  pDst = aDst2;
1651
5.22M
  pDstOut = pInput;
1652
21.8M
  for (i = 0; i < dim1; i++) {
1653
181M
    for (int j = 0; j < dim2; j++) {
1654
164M
      pDst[2 * j] = pSrc[2 * j * dim1];
1655
164M
      pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1];
1656
164M
    }
1657
1658
16.6M
#ifndef FFT_TWO_STAGE_SWITCH_CASE
1659
16.6M
    fft2(pDst);
1660
#else
1661
    switch (dim2) {
1662
      case 4:
1663
        fft_4(pDst);
1664
        break;
1665
      case 9:
1666
        fft9(pDst);
1667
        break;
1668
      case 12:
1669
        fft12(pDst);
1670
        break;
1671
      case 15:
1672
        fft15(pDst);
1673
        break;
1674
      case 16:
1675
        fft_16(pDst);
1676
        break;
1677
      case 32:
1678
        fft_32(pDst);
1679
        break;
1680
    }
1681
#endif
1682
1683
181M
    for (int j = 0; j < dim2; j++) {
1684
164M
      pDstOut[2 * j * dim1] = pDst[2 * j];
1685
164M
      pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1];
1686
164M
    }
1687
16.6M
    pSrc += 2;
1688
16.6M
    pDstOut += 2;
1689
16.6M
  }
1690
5.22M
}
1691
#endif /* FUNCTION_fftN2_function */
1692
1693
#define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \
1694
              RotVectorReal, RotVectorImag)                                \
1695
5.22M
  {                                                                        \
1696
5.22M
    C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length)                    \
1697
5.22M
    C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2)                     \
1698
5.22M
    fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2,           \
1699
5.22M
               RotVectorReal, RotVectorImag, aDst, aDst2);                 \
1700
5.22M
    C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2)                       \
1701
5.22M
    C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length)                      \
1702
5.22M
  }
1703
1704
  /*!
1705
   *
1706
   *  \brief  complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480
1707
   *  \param  pInput contains the input signal prescaled right by 2
1708
   *          pInput contains the output signal scaled by SCALEFACTOR<#length>
1709
   *          The output signal does not have any fixed headroom
1710
   *  \return void
1711
   *
1712
   */
1713
1714
#ifndef FUNCTION_fft6
1715
4.01M
static inline void fft6(FIXP_DBL *pInput) {
1716
4.01M
  fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6);
1717
4.01M
}
1718
#endif /* #ifndef FUNCTION_fft6 */
1719
1720
#ifndef FUNCTION_fft12
1721
static inline void fft12(FIXP_DBL *pInput) {
1722
  fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12,
1723
        RotVectorImag12); /* 16,58 */
1724
}
1725
#endif /* #ifndef FUNCTION_fft12 */
1726
1727
#ifndef FUNCTION_fft20
1728
148k
static inline void fft20(FIXP_DBL *pInput) {
1729
148k
  fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20,
1730
148k
        RotVectorImag20);
1731
148k
}
1732
#endif /* FUNCTION_fft20 */
1733
1734
6.55k
static inline void fft24(FIXP_DBL *pInput) {
1735
6.55k
  fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24,
1736
6.55k
        RotVectorImag24); /* 16,73 */
1737
6.55k
}
1738
1739
373k
static inline void fft48(FIXP_DBL *pInput) {
1740
373k
  fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48,
1741
373k
        RotVectorImag48); /* 16,32 */
1742
373k
}
1743
1744
#ifndef FUNCTION_fft60
1745
265k
static inline void fft60(FIXP_DBL *pInput) {
1746
265k
  fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60,
1747
265k
        RotVectorImag60); /* 15,51 */
1748
265k
}
1749
#endif /* FUNCTION_fft60 */
1750
1751
#ifndef FUNCTION_fft80
1752
131
static inline void fft80(FIXP_DBL *pInput) {
1753
131
  fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80,
1754
131
        RotVectorImag80); /*  */
1755
131
}
1756
#endif
1757
1758
#ifndef FUNCTION_fft96
1759
76.4k
static inline void fft96(FIXP_DBL *pInput) {
1760
76.4k
  fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96,
1761
76.4k
        RotVectorImag96); /* 15,47 */
1762
76.4k
}
1763
#endif /* FUNCTION_fft96*/
1764
1765
#ifndef FUNCTION_fft120
1766
3.38k
static inline void fft120(FIXP_DBL *pInput) {
1767
3.38k
  fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120,
1768
3.38k
        RotVectorImag120);
1769
3.38k
}
1770
#endif /* FUNCTION_fft120 */
1771
1772
#ifndef FUNCTION_fft192
1773
10.1k
static inline void fft192(FIXP_DBL *pInput) {
1774
10.1k
  fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192,
1775
10.1k
        RotVectorImag192); /* 15,50 */
1776
10.1k
}
1777
#endif
1778
1779
#ifndef FUNCTION_fft240
1780
215k
static inline void fft240(FIXP_DBL *pInput) {
1781
215k
  fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240,
1782
215k
        RotVectorImag240); /* 15.44 */
1783
215k
}
1784
#endif
1785
1786
#ifndef FUNCTION_fft384
1787
89.2k
static inline void fft384(FIXP_DBL *pInput) {
1788
89.2k
  fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384,
1789
89.2k
        RotVectorImag384); /* 16.02 */
1790
89.2k
}
1791
#endif /* FUNCTION_fft384 */
1792
1793
#ifndef FUNCTION_fft480
1794
16.7k
static inline void fft480(FIXP_DBL *pInput) {
1795
16.7k
  fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480,
1796
16.7k
        RotVectorImag480); /* 15.84 */
1797
16.7k
}
1798
#endif /* FUNCTION_fft480 */
1799
1800
91.6M
void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) {
1801
  /* Ensure, that the io-ptr is always (at least 8-byte) aligned */
1802
91.6M
  C_ALLOC_ALIGNED_CHECK(pInput);
1803
1804
91.6M
  if (length == 32) {
1805
39.5M
    fft_32(pInput);
1806
39.5M
    *pScalefactor += SCALEFACTOR32;
1807
52.0M
  } else {
1808
52.0M
    switch (length) {
1809
21.1M
      case 16:
1810
21.1M
        fft_16(pInput);
1811
21.1M
        *pScalefactor += SCALEFACTOR16;
1812
21.1M
        break;
1813
10.6M
      case 8:
1814
10.6M
        fft_8(pInput);
1815
10.6M
        *pScalefactor += SCALEFACTOR8;
1816
10.6M
        break;
1817
0
      case 2:
1818
0
        fft2(pInput);
1819
0
        *pScalefactor += SCALEFACTOR2;
1820
0
        break;
1821
0
      case 3:
1822
0
        fft3(pInput);
1823
0
        *pScalefactor += SCALEFACTOR3;
1824
0
        break;
1825
1.56M
      case 4:
1826
1.56M
        fft_4(pInput);
1827
1.56M
        *pScalefactor += SCALEFACTOR4;
1828
1.56M
        break;
1829
0
      case 5:
1830
0
        fft5(pInput);
1831
0
        *pScalefactor += SCALEFACTOR5;
1832
0
        break;
1833
4.01M
      case 6:
1834
4.01M
        fft6(pInput);
1835
4.01M
        *pScalefactor += SCALEFACTOR6;
1836
4.01M
        break;
1837
148k
      case 10:
1838
148k
        fft10(pInput);
1839
148k
        *pScalefactor += SCALEFACTOR10;
1840
148k
        break;
1841
12.5M
      case 12:
1842
12.5M
        fft12(pInput);
1843
12.5M
        *pScalefactor += SCALEFACTOR12;
1844
12.5M
        break;
1845
0
      case 15:
1846
0
        fft15(pInput);
1847
0
        *pScalefactor += SCALEFACTOR15;
1848
0
        break;
1849
148k
      case 20:
1850
148k
        fft20(pInput);
1851
148k
        *pScalefactor += SCALEFACTOR20;
1852
148k
        break;
1853
6.55k
      case 24:
1854
6.55k
        fft24(pInput);
1855
6.55k
        *pScalefactor += SCALEFACTOR24;
1856
6.55k
        break;
1857
373k
      case 48:
1858
373k
        fft48(pInput);
1859
373k
        *pScalefactor += SCALEFACTOR48;
1860
373k
        break;
1861
265k
      case 60:
1862
265k
        fft60(pInput);
1863
265k
        *pScalefactor += SCALEFACTOR60;
1864
265k
        break;
1865
485k
      case 64:
1866
485k
        dit_fft(pInput, 6, SineTable512, 512);
1867
485k
        *pScalefactor += SCALEFACTOR64;
1868
485k
        break;
1869
131
      case 80:
1870
131
        fft80(pInput);
1871
131
        *pScalefactor += SCALEFACTOR80;
1872
131
        break;
1873
76.4k
      case 96:
1874
76.4k
        fft96(pInput);
1875
76.4k
        *pScalefactor += SCALEFACTOR96;
1876
76.4k
        break;
1877
3.38k
      case 120:
1878
3.38k
        fft120(pInput);
1879
3.38k
        *pScalefactor += SCALEFACTOR120;
1880
3.38k
        break;
1881
82.5k
      case 128:
1882
82.5k
        dit_fft(pInput, 7, SineTable512, 512);
1883
82.5k
        *pScalefactor += SCALEFACTOR128;
1884
82.5k
        break;
1885
10.1k
      case 192:
1886
10.1k
        fft192(pInput);
1887
10.1k
        *pScalefactor += SCALEFACTOR192;
1888
10.1k
        break;
1889
215k
      case 240:
1890
215k
        fft240(pInput);
1891
215k
        *pScalefactor += SCALEFACTOR240;
1892
215k
        break;
1893
84.8k
      case 256:
1894
84.8k
        dit_fft(pInput, 8, SineTable512, 512);
1895
84.8k
        *pScalefactor += SCALEFACTOR256;
1896
84.8k
        break;
1897
89.2k
      case 384:
1898
89.2k
        fft384(pInput);
1899
89.2k
        *pScalefactor += SCALEFACTOR384;
1900
89.2k
        break;
1901
16.7k
      case 480:
1902
16.7k
        fft480(pInput);
1903
16.7k
        *pScalefactor += SCALEFACTOR480;
1904
16.7k
        break;
1905
194k
      case 512:
1906
194k
        dit_fft(pInput, 9, SineTable512, 512);
1907
194k
        *pScalefactor += SCALEFACTOR512;
1908
194k
        break;
1909
0
      default:
1910
0
        FDK_ASSERT(0); /* FFT length not supported! */
1911
0
        break;
1912
52.0M
    }
1913
52.0M
  }
1914
91.6M
}
1915
1916
0
void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) {
1917
0
  switch (length) {
1918
0
    default:
1919
0
      FDK_ASSERT(0); /* IFFT length not supported! */
1920
0
      break;
1921
0
  }
1922
0
}