/src/aac/libFDK/src/fft.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* ----------------------------------------------------------------------------- |
2 | | Software License for The Fraunhofer FDK AAC Codec Library for Android |
3 | | |
4 | | © Copyright 1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten |
5 | | Forschung e.V. All rights reserved. |
6 | | |
7 | | 1. INTRODUCTION |
8 | | The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software |
9 | | that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding |
10 | | scheme for digital audio. This FDK AAC Codec software is intended to be used on |
11 | | a wide variety of Android devices. |
12 | | |
13 | | AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient |
14 | | general perceptual audio codecs. AAC-ELD is considered the best-performing |
15 | | full-bandwidth communications codec by independent studies and is widely |
16 | | deployed. AAC has been standardized by ISO and IEC as part of the MPEG |
17 | | specifications. |
18 | | |
19 | | Patent licenses for necessary patent claims for the FDK AAC Codec (including |
20 | | those of Fraunhofer) may be obtained through Via Licensing |
21 | | (www.vialicensing.com) or through the respective patent owners individually for |
22 | | the purpose of encoding or decoding bit streams in products that are compliant |
23 | | with the ISO/IEC MPEG audio standards. Please note that most manufacturers of |
24 | | Android devices already license these patent claims through Via Licensing or |
25 | | directly from the patent owners, and therefore FDK AAC Codec software may |
26 | | already be covered under those patent licenses when it is used for those |
27 | | licensed purposes only. |
28 | | |
29 | | Commercially-licensed AAC software libraries, including floating-point versions |
30 | | with enhanced sound quality, are also available from Fraunhofer. Users are |
31 | | encouraged to check the Fraunhofer website for additional applications |
32 | | information and documentation. |
33 | | |
34 | | 2. COPYRIGHT LICENSE |
35 | | |
36 | | Redistribution and use in source and binary forms, with or without modification, |
37 | | are permitted without payment of copyright license fees provided that you |
38 | | satisfy the following conditions: |
39 | | |
40 | | You must retain the complete text of this software license in redistributions of |
41 | | the FDK AAC Codec or your modifications thereto in source code form. |
42 | | |
43 | | You must retain the complete text of this software license in the documentation |
44 | | and/or other materials provided with redistributions of the FDK AAC Codec or |
45 | | your modifications thereto in binary form. You must make available free of |
46 | | charge copies of the complete source code of the FDK AAC Codec and your |
47 | | modifications thereto to recipients of copies in binary form. |
48 | | |
49 | | The name of Fraunhofer may not be used to endorse or promote products derived |
50 | | from this library without prior written permission. |
51 | | |
52 | | You may not charge copyright license fees for anyone to use, copy or distribute |
53 | | the FDK AAC Codec software or your modifications thereto. |
54 | | |
55 | | Your modified versions of the FDK AAC Codec must carry prominent notices stating |
56 | | that you changed the software and the date of any change. For modified versions |
57 | | of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android" |
58 | | must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK |
59 | | AAC Codec Library for Android." |
60 | | |
61 | | 3. NO PATENT LICENSE |
62 | | |
63 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without |
64 | | limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE. |
65 | | Fraunhofer provides no warranty of patent non-infringement with respect to this |
66 | | software. |
67 | | |
68 | | You may use this FDK AAC Codec software or modifications thereto only for |
69 | | purposes that are authorized by appropriate patent licenses. |
70 | | |
71 | | 4. DISCLAIMER |
72 | | |
73 | | This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright |
74 | | holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, |
75 | | including but not limited to the implied warranties of merchantability and |
76 | | fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
77 | | CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, |
78 | | or consequential damages, including but not limited to procurement of substitute |
79 | | goods or services; loss of use, data, or profits, or business interruption, |
80 | | however caused and on any theory of liability, whether in contract, strict |
81 | | liability, or tort (including negligence), arising in any way out of the use of |
82 | | this software, even if advised of the possibility of such damage. |
83 | | |
84 | | 5. CONTACT INFORMATION |
85 | | |
86 | | Fraunhofer Institute for Integrated Circuits IIS |
87 | | Attention: Audio and Multimedia Departments - FDK AAC LL |
88 | | Am Wolfsmantel 33 |
89 | | 91058 Erlangen, Germany |
90 | | |
91 | | www.iis.fraunhofer.de/amm |
92 | | amm-info@iis.fraunhofer.de |
93 | | ----------------------------------------------------------------------------- */ |
94 | | |
95 | | /******************* Library for basic calculation routines ******************** |
96 | | |
97 | | Author(s): Josef Hoepfl, DSP Solutions |
98 | | |
99 | | Description: Fix point FFT |
100 | | |
101 | | *******************************************************************************/ |
102 | | |
103 | | #include "fft_rad2.h" |
104 | | #include "FDK_tools_rom.h" |
105 | | |
106 | 287M | #define W_PiFOURTH STC(0x5a82799a) |
107 | | //#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a)) |
108 | | #ifndef SUMDIFF_PIFOURTH |
109 | | #define SUMDIFF_PIFOURTH(diff, sum, a, b) \ |
110 | 694M | { \ |
111 | 694M | FIXP_DBL wa, wb; \ |
112 | 694M | wa = fMultDiv2(a, W_PiFOURTH); \ |
113 | 694M | wb = fMultDiv2(b, W_PiFOURTH); \ |
114 | 694M | diff = wb - wa; \ |
115 | 694M | sum = wb + wa; \ |
116 | 694M | } |
117 | | #define SUMDIFF_PIFOURTH16(diff, sum, a, b) \ |
118 | | { \ |
119 | | FIXP_SGL wa, wb; \ |
120 | | wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \ |
121 | | wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \ |
122 | | diff = wb - wa; \ |
123 | | sum = wb + wa; \ |
124 | | } |
125 | | #endif |
126 | | |
127 | | #define SCALEFACTOR2048 10 |
128 | | #define SCALEFACTOR1024 9 |
129 | 174k | #define SCALEFACTOR512 8 |
130 | 74.1k | #define SCALEFACTOR256 7 |
131 | 81.0k | #define SCALEFACTOR128 6 |
132 | 422k | #define SCALEFACTOR64 5 |
133 | 37.9M | #define SCALEFACTOR32 4 |
134 | 20.9M | #define SCALEFACTOR16 3 |
135 | 8.26M | #define SCALEFACTOR8 2 |
136 | 1.74M | #define SCALEFACTOR4 1 |
137 | 3.55M | #define SCALEFACTOR2 1 |
138 | | |
139 | 3.61M | #define SCALEFACTOR3 1 |
140 | 114k | #define SCALEFACTOR5 1 |
141 | 3.55M | #define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2) |
142 | | #define SCALEFACTOR7 2 |
143 | | #define SCALEFACTOR9 2 |
144 | 2.39M | #define SCALEFACTOR10 5 |
145 | 13.0M | #define SCALEFACTOR12 3 |
146 | 472k | #define SCALEFACTOR15 3 |
147 | | #define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2) |
148 | 114k | #define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2) |
149 | | #define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2) |
150 | 6.20k | #define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2) |
151 | | #define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2) |
152 | | #define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2) |
153 | 376k | #define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2) |
154 | 245k | #define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2) |
155 | 187 | #define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2) |
156 | 68.1k | #define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2) |
157 | 3.83k | #define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2) |
158 | | #define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2) |
159 | | #define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2) |
160 | 7.80k | #define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2) |
161 | 207k | #define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2) |
162 | | #define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2) |
163 | | #define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2) |
164 | 93.2k | #define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2) |
165 | 15.7k | #define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2) |
166 | | |
167 | | #include "fft.h" |
168 | | |
169 | | #ifndef FUNCTION_fft2 |
170 | | |
171 | | /* Performs the FFT of length 2. Input vector unscaled, output vector scaled |
172 | | * with factor 0.5 */ |
173 | 10.7M | static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) { |
174 | 10.7M | FIXP_DBL r1, i1; |
175 | 10.7M | FIXP_DBL r2, i2; |
176 | | |
177 | | /* real part */ |
178 | 10.7M | r1 = pDat[2]; |
179 | 10.7M | r2 = pDat[0]; |
180 | | |
181 | | /* imaginary part */ |
182 | 10.7M | i1 = pDat[3]; |
183 | 10.7M | i2 = pDat[1]; |
184 | | |
185 | | /* real part */ |
186 | 10.7M | pDat[0] = (r2 + r1) >> 1; |
187 | 10.7M | pDat[2] = (r2 - r1) >> 1; |
188 | | |
189 | | /* imaginary part */ |
190 | 10.7M | pDat[1] = (i2 + i1) >> 1; |
191 | 10.7M | pDat[3] = (i2 - i1) >> 1; |
192 | 10.7M | } |
193 | | #endif /* FUNCTION_fft2 */ |
194 | | |
195 | 18.5M | #define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */ |
196 | | |
197 | | #ifndef FUNCTION_fft3 |
198 | | /* Performs the FFT of length 3 according to the algorithm after winograd. */ |
199 | 9.28M | static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) { |
200 | 9.28M | FIXP_DBL r1, r2; |
201 | 9.28M | FIXP_DBL s1, s2; |
202 | 9.28M | FIXP_DBL pD; |
203 | | |
204 | | /* real part */ |
205 | 9.28M | r1 = pDat[2] + pDat[4]; |
206 | 9.28M | r2 = fMultDiv2((pDat[2] - pDat[4]), C31); |
207 | 9.28M | pD = pDat[0] >> 1; |
208 | 9.28M | pDat[0] = pD + (r1 >> 1); |
209 | 9.28M | r1 = pD - (r1 >> 2); |
210 | | |
211 | | /* imaginary part */ |
212 | 9.28M | s1 = pDat[3] + pDat[5]; |
213 | 9.28M | s2 = fMultDiv2((pDat[3] - pDat[5]), C31); |
214 | 9.28M | pD = pDat[1] >> 1; |
215 | 9.28M | pDat[1] = pD + (s1 >> 1); |
216 | 9.28M | s1 = pD - (s1 >> 2); |
217 | | |
218 | | /* combination */ |
219 | 9.28M | pDat[2] = r1 - s2; |
220 | 9.28M | pDat[4] = r1 + s2; |
221 | 9.28M | pDat[3] = s1 + r2; |
222 | 9.28M | pDat[5] = s1 - r2; |
223 | 9.28M | } |
224 | | #endif /* #ifndef FUNCTION_fft3 */ |
225 | | |
226 | 149M | #define F5C(x) STC(x) |
227 | | |
228 | 29.9M | #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */ |
229 | 29.9M | #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */ |
230 | 29.9M | #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */ |
231 | 29.9M | #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */ |
232 | 29.9M | #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */ |
233 | | |
234 | | /* performs the FFT of length 5 according to the algorithm after winograd */ |
235 | | /* This version works with a prescale of 2 instead of 3 */ |
236 | 14.9M | static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) { |
237 | 14.9M | FIXP_DBL r1, r2, r3, r4; |
238 | 14.9M | FIXP_DBL s1, s2, s3, s4; |
239 | 14.9M | FIXP_DBL t; |
240 | | |
241 | | /* real part */ |
242 | 14.9M | r1 = (pDat[2] + pDat[8]) >> 1; |
243 | 14.9M | r4 = (pDat[2] - pDat[8]) >> 1; |
244 | 14.9M | r3 = (pDat[4] + pDat[6]) >> 1; |
245 | 14.9M | r2 = (pDat[4] - pDat[6]) >> 1; |
246 | 14.9M | t = fMult((r1 - r3), C54); |
247 | 14.9M | r1 = r1 + r3; |
248 | 14.9M | pDat[0] = (pDat[0] >> 1) + r1; |
249 | | /* Bit shift left because of the constant C55 which was scaled with the factor |
250 | | 0.5 because of the representation of the values as fracts */ |
251 | 14.9M | r1 = pDat[0] + (fMultDiv2(r1, C55) << (2)); |
252 | 14.9M | r3 = r1 - t; |
253 | 14.9M | r1 = r1 + t; |
254 | 14.9M | t = fMult((r4 + r2), C51); |
255 | | /* Bit shift left because of the constant C55 which was scaled with the factor |
256 | | 0.5 because of the representation of the values as fracts */ |
257 | 14.9M | r4 = t + (fMultDiv2(r4, C52) << (2)); |
258 | 14.9M | r2 = t + fMult(r2, C53); |
259 | | |
260 | | /* imaginary part */ |
261 | 14.9M | s1 = (pDat[3] + pDat[9]) >> 1; |
262 | 14.9M | s4 = (pDat[3] - pDat[9]) >> 1; |
263 | 14.9M | s3 = (pDat[5] + pDat[7]) >> 1; |
264 | 14.9M | s2 = (pDat[5] - pDat[7]) >> 1; |
265 | 14.9M | t = fMult((s1 - s3), C54); |
266 | 14.9M | s1 = s1 + s3; |
267 | 14.9M | pDat[1] = (pDat[1] >> 1) + s1; |
268 | | /* Bit shift left because of the constant C55 which was scaled with the factor |
269 | | 0.5 because of the representation of the values as fracts */ |
270 | 14.9M | s1 = pDat[1] + (fMultDiv2(s1, C55) << (2)); |
271 | 14.9M | s3 = s1 - t; |
272 | 14.9M | s1 = s1 + t; |
273 | 14.9M | t = fMult((s4 + s2), C51); |
274 | | /* Bit shift left because of the constant C55 which was scaled with the factor |
275 | | 0.5 because of the representation of the values as fracts */ |
276 | 14.9M | s4 = t + (fMultDiv2(s4, C52) << (2)); |
277 | 14.9M | s2 = t + fMult(s2, C53); |
278 | | |
279 | | /* combination */ |
280 | 14.9M | pDat[2] = r1 + s2; |
281 | 14.9M | pDat[8] = r1 - s2; |
282 | 14.9M | pDat[4] = r3 - s4; |
283 | 14.9M | pDat[6] = r3 + s4; |
284 | | |
285 | 14.9M | pDat[3] = s1 - r2; |
286 | 14.9M | pDat[9] = s1 + r2; |
287 | 14.9M | pDat[5] = s3 + r4; |
288 | 14.9M | pDat[7] = s3 - r4; |
289 | 14.9M | } |
290 | | |
291 | 2.28M | #define F5C(x) STC(x) |
292 | | |
293 | 457k | #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */ |
294 | 457k | #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */ |
295 | 457k | #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */ |
296 | 457k | #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */ |
297 | 457k | #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */ |
298 | | /** |
299 | | * \brief Function performs a complex 10-point FFT |
300 | | * The FFT is performed inplace. The result of the FFT |
301 | | * is scaled by SCALEFACTOR10 bits. |
302 | | * |
303 | | * WOPS FLC version: 1093 cycles |
304 | | * WOPS with 32x16 bit multiplications: 196 cycles |
305 | | * |
306 | | * \param [i/o] re real input / output |
307 | | * \param [i/o] im imag input / output |
308 | | * \param [i ] s stride real and imag input / output |
309 | | * |
310 | | * \return void |
311 | | */ |
312 | | static void fft10(FIXP_DBL *x) // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s) |
313 | 114k | { |
314 | 114k | FIXP_DBL t; |
315 | 114k | FIXP_DBL x0, x1, x2, x3, x4; |
316 | 114k | FIXP_DBL r1, r2, r3, r4; |
317 | 114k | FIXP_DBL s1, s2, s3, s4; |
318 | 114k | FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09; |
319 | 114k | FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; |
320 | | |
321 | 114k | const int s = 1; // stride factor |
322 | | |
323 | | /* 2 fft5 stages */ |
324 | | |
325 | | /* real part */ |
326 | 114k | x0 = (x[s * 0] >> SCALEFACTOR10); |
327 | 114k | x1 = (x[s * 4] >> SCALEFACTOR10); |
328 | 114k | x2 = (x[s * 8] >> SCALEFACTOR10); |
329 | 114k | x3 = (x[s * 12] >> SCALEFACTOR10); |
330 | 114k | x4 = (x[s * 16] >> SCALEFACTOR10); |
331 | | |
332 | 114k | r1 = (x3 + x2); |
333 | 114k | r4 = (x3 - x2); |
334 | 114k | r3 = (x1 + x4); |
335 | 114k | r2 = (x1 - x4); |
336 | 114k | t = fMult((r1 - r3), C54); |
337 | 114k | r1 = (r1 + r3); |
338 | 114k | y00 = (x0 + r1); |
339 | 114k | r1 = (y00 + ((fMult(r1, C55) << 1))); |
340 | 114k | r3 = (r1 - t); |
341 | 114k | r1 = (r1 + t); |
342 | 114k | t = fMult((r4 + r2), C51); |
343 | 114k | r4 = (t + (fMult(r4, C52) << 1)); |
344 | 114k | r2 = (t + fMult(r2, C53)); |
345 | | |
346 | | /* imaginary part */ |
347 | 114k | x0 = (x[s * 0 + 1] >> SCALEFACTOR10); |
348 | 114k | x1 = (x[s * 4 + 1] >> SCALEFACTOR10); |
349 | 114k | x2 = (x[s * 8 + 1] >> SCALEFACTOR10); |
350 | 114k | x3 = (x[s * 12 + 1] >> SCALEFACTOR10); |
351 | 114k | x4 = (x[s * 16 + 1] >> SCALEFACTOR10); |
352 | | |
353 | 114k | s1 = (x3 + x2); |
354 | 114k | s4 = (x3 - x2); |
355 | 114k | s3 = (x1 + x4); |
356 | 114k | s2 = (x1 - x4); |
357 | 114k | t = fMult((s1 - s3), C54); |
358 | 114k | s1 = (s1 + s3); |
359 | 114k | y01 = (x0 + s1); |
360 | 114k | s1 = (y01 + (fMult(s1, C55) << 1)); |
361 | 114k | s3 = (s1 - t); |
362 | 114k | s1 = (s1 + t); |
363 | 114k | t = fMult((s4 + s2), C51); |
364 | 114k | s4 = (t + (fMult(s4, C52) << 1)); |
365 | 114k | s2 = (t + fMult(s2, C53)); |
366 | | |
367 | | /* combination */ |
368 | 114k | y04 = (r1 + s2); |
369 | 114k | y16 = (r1 - s2); |
370 | 114k | y08 = (r3 - s4); |
371 | 114k | y12 = (r3 + s4); |
372 | | |
373 | 114k | y05 = (s1 - r2); |
374 | 114k | y17 = (s1 + r2); |
375 | 114k | y09 = (s3 + r4); |
376 | 114k | y13 = (s3 - r4); |
377 | | |
378 | | /* real part */ |
379 | 114k | x0 = (x[s * 10] >> SCALEFACTOR10); |
380 | 114k | x1 = (x[s * 2] >> SCALEFACTOR10); |
381 | 114k | x2 = (x[s * 6] >> SCALEFACTOR10); |
382 | 114k | x3 = (x[s * 14] >> SCALEFACTOR10); |
383 | 114k | x4 = (x[s * 18] >> SCALEFACTOR10); |
384 | | |
385 | 114k | r1 = (x1 + x4); |
386 | 114k | r4 = (x1 - x4); |
387 | 114k | r3 = (x3 + x2); |
388 | 114k | r2 = (x3 - x2); |
389 | 114k | t = fMult((r1 - r3), C54); |
390 | 114k | r1 = (r1 + r3); |
391 | 114k | y02 = (x0 + r1); |
392 | 114k | r1 = (y02 + ((fMult(r1, C55) << 1))); |
393 | 114k | r3 = (r1 - t); |
394 | 114k | r1 = (r1 + t); |
395 | 114k | t = fMult(((r4 + r2)), C51); |
396 | 114k | r4 = (t + (fMult(r4, C52) << 1)); |
397 | 114k | r2 = (t + fMult(r2, C53)); |
398 | | |
399 | | /* imaginary part */ |
400 | 114k | x0 = (x[s * 10 + 1] >> SCALEFACTOR10); |
401 | 114k | x1 = (x[s * 2 + 1] >> SCALEFACTOR10); |
402 | 114k | x2 = (x[s * 6 + 1] >> SCALEFACTOR10); |
403 | 114k | x3 = (x[s * 14 + 1] >> SCALEFACTOR10); |
404 | 114k | x4 = (x[s * 18 + 1] >> SCALEFACTOR10); |
405 | | |
406 | 114k | s1 = (x1 + x4); |
407 | 114k | s4 = (x1 - x4); |
408 | 114k | s3 = (x3 + x2); |
409 | 114k | s2 = (x3 - x2); |
410 | 114k | t = fMult((s1 - s3), C54); |
411 | 114k | s1 = (s1 + s3); |
412 | 114k | y03 = (x0 + s1); |
413 | 114k | s1 = (y03 + (fMult(s1, C55) << 1)); |
414 | 114k | s3 = (s1 - t); |
415 | 114k | s1 = (s1 + t); |
416 | 114k | t = fMult((s4 + s2), C51); |
417 | 114k | s4 = (t + (fMult(s4, C52) << 1)); |
418 | 114k | s2 = (t + fMult(s2, C53)); |
419 | | |
420 | | /* combination */ |
421 | 114k | y06 = (r1 + s2); |
422 | 114k | y18 = (r1 - s2); |
423 | 114k | y10 = (r3 - s4); |
424 | 114k | y14 = (r3 + s4); |
425 | | |
426 | 114k | y07 = (s1 - r2); |
427 | 114k | y19 = (s1 + r2); |
428 | 114k | y11 = (s3 + r4); |
429 | 114k | y15 = (s3 - r4); |
430 | | |
431 | | /* 5 fft2 stages */ |
432 | 114k | x[s * 0] = (y00 + y02); |
433 | 114k | x[s * 0 + 1] = (y01 + y03); |
434 | 114k | x[s * 10] = (y00 - y02); |
435 | 114k | x[s * 10 + 1] = (y01 - y03); |
436 | | |
437 | 114k | x[s * 4] = (y04 + y06); |
438 | 114k | x[s * 4 + 1] = (y05 + y07); |
439 | 114k | x[s * 14] = (y04 - y06); |
440 | 114k | x[s * 14 + 1] = (y05 - y07); |
441 | | |
442 | 114k | x[s * 8] = (y08 + y10); |
443 | 114k | x[s * 8 + 1] = (y09 + y11); |
444 | 114k | x[s * 18] = (y08 - y10); |
445 | 114k | x[s * 18 + 1] = (y09 - y11); |
446 | | |
447 | 114k | x[s * 12] = (y12 + y14); |
448 | 114k | x[s * 12 + 1] = (y13 + y15); |
449 | 114k | x[s * 2] = (y12 - y14); |
450 | 114k | x[s * 2 + 1] = (y13 - y15); |
451 | | |
452 | 114k | x[s * 16] = (y16 + y18); |
453 | 114k | x[s * 16 + 1] = (y17 + y19); |
454 | 114k | x[s * 6] = (y16 - y18); |
455 | 114k | x[s * 6 + 1] = (y17 - y19); |
456 | 114k | } |
457 | | |
458 | | #ifndef FUNCTION_fft12 |
459 | | #define FUNCTION_fft12 |
460 | | |
461 | | #undef C31 |
462 | 185M | #define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */ |
463 | | |
464 | 17.1M | static inline void fft12(FIXP_DBL *pInput) { |
465 | 17.1M | FIXP_DBL aDst[24]; |
466 | 17.1M | FIXP_DBL *pSrc, *pDst; |
467 | 17.1M | int i; |
468 | | |
469 | 17.1M | pSrc = pInput; |
470 | 17.1M | pDst = aDst; |
471 | 17.1M | FIXP_DBL r1, r2, s1, s2, pD; |
472 | | |
473 | | /* First 3*2 samples are shifted right by 2 before output */ |
474 | 17.1M | r1 = pSrc[8] + pSrc[16]; |
475 | 17.1M | r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31); |
476 | 17.1M | pD = pSrc[0] >> 1; |
477 | 17.1M | pDst[0] = (pD + (r1 >> 1)) >> 1; |
478 | 17.1M | r1 = pD - (r1 >> 2); |
479 | | |
480 | | /* imaginary part */ |
481 | 17.1M | s1 = pSrc[9] + pSrc[17]; |
482 | 17.1M | s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31); |
483 | 17.1M | pD = pSrc[1] >> 1; |
484 | 17.1M | pDst[1] = (pD + (s1 >> 1)) >> 1; |
485 | 17.1M | s1 = pD - (s1 >> 2); |
486 | | |
487 | | /* combination */ |
488 | 17.1M | pDst[2] = (r1 - s2) >> 1; |
489 | 17.1M | pDst[3] = (s1 + r2) >> 1; |
490 | 17.1M | pDst[4] = (r1 + s2) >> 1; |
491 | 17.1M | pDst[5] = (s1 - r2) >> 1; |
492 | 17.1M | pSrc += 2; |
493 | 17.1M | pDst += 6; |
494 | | |
495 | 17.1M | const FIXP_STB *pVecRe = RotVectorReal12; |
496 | 17.1M | const FIXP_STB *pVecIm = RotVectorImag12; |
497 | 17.1M | FIXP_DBL re, im; |
498 | 17.1M | FIXP_STB vre, vim; |
499 | 51.5M | for (i = 0; i < 2; i++) { |
500 | | /* sample 0,1 are shifted right by 2 before output */ |
501 | | /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before |
502 | | * output */ |
503 | | |
504 | 34.3M | r1 = pSrc[8] + pSrc[16]; |
505 | 34.3M | r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31); |
506 | 34.3M | pD = pSrc[0] >> 1; |
507 | 34.3M | pDst[0] = (pD + (r1 >> 1)) >> 1; |
508 | 34.3M | r1 = pD - (r1 >> 2); |
509 | | |
510 | | /* imaginary part */ |
511 | 34.3M | s1 = pSrc[9] + pSrc[17]; |
512 | 34.3M | s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31); |
513 | 34.3M | pD = pSrc[1] >> 1; |
514 | 34.3M | pDst[1] = (pD + (s1 >> 1)) >> 1; |
515 | 34.3M | s1 = pD - (s1 >> 2); |
516 | | |
517 | | /* combination */ |
518 | 34.3M | re = (r1 - s2) >> 0; |
519 | 34.3M | im = (s1 + r2) >> 0; |
520 | 34.3M | vre = *pVecRe++; |
521 | 34.3M | vim = *pVecIm++; |
522 | 34.3M | cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim); |
523 | | |
524 | 34.3M | re = (r1 + s2) >> 0; |
525 | 34.3M | im = (s1 - r2) >> 0; |
526 | 34.3M | vre = *pVecRe++; |
527 | 34.3M | vim = *pVecIm++; |
528 | 34.3M | cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim); |
529 | | |
530 | 34.3M | pDst += 6; |
531 | 34.3M | pSrc += 2; |
532 | 34.3M | } |
533 | | /* sample 0,1 are shifted right by 2 before output */ |
534 | | /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */ |
535 | | /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */ |
536 | 17.1M | r1 = pSrc[8] + pSrc[16]; |
537 | 17.1M | r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31); |
538 | 17.1M | pD = pSrc[0] >> 1; |
539 | 17.1M | pDst[0] = (pD + (r1 >> 1)) >> 1; |
540 | 17.1M | r1 = pD - (r1 >> 2); |
541 | | |
542 | | /* imaginary part */ |
543 | 17.1M | s1 = pSrc[9] + pSrc[17]; |
544 | 17.1M | s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31); |
545 | 17.1M | pD = pSrc[1] >> 1; |
546 | 17.1M | pDst[1] = (pD + (s1 >> 1)) >> 1; |
547 | 17.1M | s1 = pD - (s1 >> 2); |
548 | | |
549 | | /* combination */ |
550 | 17.1M | pDst[2] = (s1 + r2) >> 1; |
551 | 17.1M | pDst[3] = (s2 - r1) >> 1; |
552 | 17.1M | pDst[4] = -((r1 + s2) >> 1); |
553 | 17.1M | pDst[5] = (r2 - s1) >> 1; |
554 | | |
555 | | /* Perform 3 times the fft of length 4. The input samples are at the address |
556 | | of aDst and the output samples are at the address of pInput. The input vector |
557 | | for the fft of length 4 is built of the interleaved samples in aDst, the |
558 | | output samples are stored consecutively at the address of pInput. |
559 | | */ |
560 | 17.1M | pSrc = aDst; |
561 | 17.1M | pDst = pInput; |
562 | 68.6M | for (i = 0; i < 3; i++) { |
563 | | /* inline FFT4 merged with incoming resorting loop */ |
564 | 51.5M | FIXP_DBL a00, a10, a20, a30, tmp0, tmp1; |
565 | | |
566 | 51.5M | a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */ |
567 | 51.5M | a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */ |
568 | 51.5M | a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */ |
569 | 51.5M | a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */ |
570 | | |
571 | 51.5M | pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */ |
572 | 51.5M | pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */ |
573 | | |
574 | 51.5M | tmp0 = a00 - pSrc[12]; /* Re A - Re B */ |
575 | 51.5M | tmp1 = a20 - pSrc[13]; /* Im A - Im B */ |
576 | | |
577 | 51.5M | pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */ |
578 | 51.5M | pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */ |
579 | | |
580 | 51.5M | a10 = a10 - pSrc[18]; /* Re C - Re D */ |
581 | 51.5M | a30 = a30 - pSrc[19]; /* Im C - Im D */ |
582 | | |
583 | 51.5M | pDst[6] = tmp0 + a30; /* Re B' = Re A - Re B + Im C - Im D */ |
584 | 51.5M | pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */ |
585 | 51.5M | pDst[7] = tmp1 - a10; /* Im B' = Im A - Im B - Re C + Re D */ |
586 | 51.5M | pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */ |
587 | | |
588 | 51.5M | pSrc += 2; |
589 | 51.5M | pDst += 2; |
590 | 51.5M | } |
591 | 17.1M | } |
592 | | #endif /* FUNCTION_fft12 */ |
593 | | |
594 | | #ifndef FUNCTION_fft15 |
595 | | |
596 | 179M | #define N3 3 |
597 | 101M | #define N5 5 |
598 | 58.0M | #define N6 6 |
599 | 179M | #define N15 15 |
600 | | |
601 | | /* Performs the FFT of length 15. It is split into FFTs of length 3 and |
602 | | * length 5. */ |
603 | 4.84M | static inline void fft15(FIXP_DBL *pInput) { |
604 | 4.84M | FIXP_DBL aDst[2 * N15]; |
605 | 4.84M | FIXP_DBL aDst1[2 * N15]; |
606 | 4.84M | int i, k, l; |
607 | | |
608 | | /* Sort input vector for fft's of length 3 |
609 | | input3(0:2) = [input(0) input(5) input(10)]; |
610 | | input3(3:5) = [input(3) input(8) input(13)]; |
611 | | input3(6:8) = [input(6) input(11) input(1)]; |
612 | | input3(9:11) = [input(9) input(14) input(4)]; |
613 | | input3(12:14) = [input(12) input(2) input(7)]; */ |
614 | 4.84M | { |
615 | 4.84M | const FIXP_DBL *pSrc = pInput; |
616 | 4.84M | FIXP_DBL *RESTRICT pDst = aDst; |
617 | | /* Merge 3 loops into one, skip call of fft3 */ |
618 | 29.0M | for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) { |
619 | 24.2M | pDst[k + 0] = pSrc[l]; |
620 | 24.2M | pDst[k + 1] = pSrc[l + 1]; |
621 | 24.2M | l += 2 * N5; |
622 | 24.2M | if (l >= (2 * N15)) l -= (2 * N15); |
623 | | |
624 | 24.2M | pDst[k + 2] = pSrc[l]; |
625 | 24.2M | pDst[k + 3] = pSrc[l + 1]; |
626 | 24.2M | l += 2 * N5; |
627 | 24.2M | if (l >= (2 * N15)) l -= (2 * N15); |
628 | 24.2M | pDst[k + 4] = pSrc[l]; |
629 | 24.2M | pDst[k + 5] = pSrc[l + 1]; |
630 | 24.2M | l += (2 * N5) + (2 * N3); |
631 | 24.2M | if (l >= (2 * N15)) l -= (2 * N15); |
632 | | |
633 | | /* fft3 merged with shift right by 2 loop */ |
634 | 24.2M | FIXP_DBL r1, r2, r3; |
635 | 24.2M | FIXP_DBL s1, s2; |
636 | | /* real part */ |
637 | 24.2M | r1 = pDst[k + 2] + pDst[k + 4]; |
638 | 24.2M | r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31); |
639 | 24.2M | s1 = pDst[k + 0]; |
640 | 24.2M | pDst[k + 0] = (s1 + r1) >> 2; |
641 | 24.2M | r1 = s1 - (r1 >> 1); |
642 | | |
643 | | /* imaginary part */ |
644 | 24.2M | s1 = pDst[k + 3] + pDst[k + 5]; |
645 | 24.2M | s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31); |
646 | 24.2M | r3 = pDst[k + 1]; |
647 | 24.2M | pDst[k + 1] = (r3 + s1) >> 2; |
648 | 24.2M | s1 = r3 - (s1 >> 1); |
649 | | |
650 | | /* combination */ |
651 | 24.2M | pDst[k + 2] = (r1 - s2) >> 2; |
652 | 24.2M | pDst[k + 4] = (r1 + s2) >> 2; |
653 | 24.2M | pDst[k + 3] = (s1 + r2) >> 2; |
654 | 24.2M | pDst[k + 5] = (s1 - r2) >> 2; |
655 | 24.2M | } |
656 | 4.84M | } |
657 | | /* Sort input vector for fft's of length 5 |
658 | | input5(0:4) = [output3(0) output3(3) output3(6) output3(9) output3(12)]; |
659 | | input5(5:9) = [output3(1) output3(4) output3(7) output3(10) output3(13)]; |
660 | | input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */ |
661 | | /* Merge 2 loops into one, brings about 10% */ |
662 | 4.84M | { |
663 | 4.84M | const FIXP_DBL *pSrc = aDst; |
664 | 4.84M | FIXP_DBL *RESTRICT pDst = aDst1; |
665 | 19.3M | for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) { |
666 | 14.5M | l = 2 * i; |
667 | 14.5M | pDst[k + 0] = pSrc[l + 0]; |
668 | 14.5M | pDst[k + 1] = pSrc[l + 1]; |
669 | 14.5M | pDst[k + 2] = pSrc[l + 0 + (2 * N3)]; |
670 | 14.5M | pDst[k + 3] = pSrc[l + 1 + (2 * N3)]; |
671 | 14.5M | pDst[k + 4] = pSrc[l + 0 + (4 * N3)]; |
672 | 14.5M | pDst[k + 5] = pSrc[l + 1 + (4 * N3)]; |
673 | 14.5M | pDst[k + 6] = pSrc[l + 0 + (6 * N3)]; |
674 | 14.5M | pDst[k + 7] = pSrc[l + 1 + (6 * N3)]; |
675 | 14.5M | pDst[k + 8] = pSrc[l + 0 + (8 * N3)]; |
676 | 14.5M | pDst[k + 9] = pSrc[l + 1 + (8 * N3)]; |
677 | 14.5M | fft5(&pDst[k]); |
678 | 14.5M | } |
679 | 4.84M | } |
680 | | /* Sort output vector of length 15 |
681 | | output = [out5(0) out5(6) out5(12) out5(3) out5(9) |
682 | | out5(10) out5(1) out5(7) out5(13) out5(4) |
683 | | out5(5) out5(11) out5(2) out5(8) out5(14)]; */ |
684 | | /* optimize clumsy loop, brings about 5% */ |
685 | 4.84M | { |
686 | 4.84M | const FIXP_DBL *pSrc = aDst1; |
687 | 4.84M | FIXP_DBL *RESTRICT pDst = pInput; |
688 | 19.3M | for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) { |
689 | 14.5M | pDst[k + 0] = pSrc[l]; |
690 | 14.5M | pDst[k + 1] = pSrc[l + 1]; |
691 | 14.5M | l += (2 * N6); |
692 | 14.5M | if (l >= (2 * N15)) l -= (2 * N15); |
693 | 14.5M | pDst[k + 2] = pSrc[l]; |
694 | 14.5M | pDst[k + 3] = pSrc[l + 1]; |
695 | 14.5M | l += (2 * N6); |
696 | 14.5M | if (l >= (2 * N15)) l -= (2 * N15); |
697 | 14.5M | pDst[k + 4] = pSrc[l]; |
698 | 14.5M | pDst[k + 5] = pSrc[l + 1]; |
699 | 14.5M | l += (2 * N6); |
700 | 14.5M | if (l >= (2 * N15)) l -= (2 * N15); |
701 | 14.5M | pDst[k + 6] = pSrc[l]; |
702 | 14.5M | pDst[k + 7] = pSrc[l + 1]; |
703 | 14.5M | l += (2 * N6); |
704 | 14.5M | if (l >= (2 * N15)) l -= (2 * N15); |
705 | 14.5M | pDst[k + 8] = pSrc[l]; |
706 | 14.5M | pDst[k + 9] = pSrc[l + 1]; |
707 | 14.5M | l += 2; /* no modulo check needed, it cannot occur */ |
708 | 14.5M | } |
709 | 4.84M | } |
710 | 4.84M | } |
711 | | #endif /* FUNCTION_fft15 */ |
712 | | |
713 | | /* |
714 | | Select shift placement. |
715 | | Some processors like ARM may shift "for free" in combination with an addition |
716 | | or substraction, but others don't so either combining shift with +/- or reduce |
717 | | the total amount or shift operations is optimal |
718 | | */ |
719 | | #if !defined(__arm__) |
720 | 1.01G | #define SHIFT_A >> 1 |
721 | | #define SHIFT_B |
722 | | #else |
723 | | #define SHIFT_A |
724 | | #define SHIFT_B >> 1 |
725 | | #endif |
726 | | |
727 | | #ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \ |
728 | | */ |
729 | | |
730 | | /* This defines prevents this array to be declared twice, if 16-bit fft is |
731 | | * enabled too */ |
732 | | #define FUNCTION_DATA_fft_16_w16 |
733 | | static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d), |
734 | | STCP(0x30fbc54d, 0x7641af3d)}; |
735 | | |
736 | | LNK_SECTION_CODE_L1 |
737 | 23.9M | inline void fft_16(FIXP_DBL *RESTRICT x) { |
738 | 23.9M | FIXP_DBL vr, ur; |
739 | 23.9M | FIXP_DBL vr2, ur2; |
740 | 23.9M | FIXP_DBL vr3, ur3; |
741 | 23.9M | FIXP_DBL vr4, ur4; |
742 | 23.9M | FIXP_DBL vi, ui; |
743 | 23.9M | FIXP_DBL vi2, ui2; |
744 | 23.9M | FIXP_DBL vi3, ui3; |
745 | | |
746 | 23.9M | vr = (x[0] >> 1) + (x[16] >> 1); /* Re A + Re B */ |
747 | 23.9M | ur = (x[1] >> 1) + (x[17] >> 1); /* Im A + Im B */ |
748 | 23.9M | vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */ |
749 | 23.9M | ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */ |
750 | 23.9M | x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
751 | 23.9M | x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */ |
752 | | |
753 | 23.9M | vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */ |
754 | 23.9M | ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */ |
755 | | |
756 | 23.9M | x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
757 | 23.9M | x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
758 | 23.9M | vr -= x[16]; /* Re A - Re B */ |
759 | 23.9M | vi = (vi SHIFT_B)-x[24]; /* Re C - Re D */ |
760 | 23.9M | ur -= x[17]; /* Im A - Im B */ |
761 | 23.9M | ui = (ui SHIFT_B)-x[25]; /* Im C - Im D */ |
762 | | |
763 | 23.9M | vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */ |
764 | 23.9M | ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */ |
765 | | |
766 | 23.9M | x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ |
767 | 23.9M | x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ |
768 | | |
769 | 23.9M | vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */ |
770 | 23.9M | ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */ |
771 | | |
772 | 23.9M | x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ |
773 | 23.9M | x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ |
774 | | |
775 | 23.9M | vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */ |
776 | 23.9M | ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */ |
777 | 23.9M | x[8] = vr2 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
778 | 23.9M | x[9] = ur2 + (ui2 SHIFT_B); /* Im A' = sum of imag values */ |
779 | 23.9M | x[12] = vr2 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
780 | 23.9M | x[13] = ur2 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
781 | 23.9M | vr2 -= x[20]; /* Re A - Re B */ |
782 | 23.9M | ur2 -= x[21]; /* Im A - Im B */ |
783 | 23.9M | vi2 = (vi2 SHIFT_B)-x[28]; /* Re C - Re D */ |
784 | 23.9M | ui2 = (ui2 SHIFT_B)-x[29]; /* Im C - Im D */ |
785 | | |
786 | 23.9M | vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */ |
787 | 23.9M | ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */ |
788 | | |
789 | 23.9M | x[10] = ui2 + vr2; /* Re B' = Im C - Im D + Re A - Re B */ |
790 | 23.9M | x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */ |
791 | | |
792 | 23.9M | vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */ |
793 | 23.9M | ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */ |
794 | | |
795 | 23.9M | x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */ |
796 | 23.9M | x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */ |
797 | | |
798 | 23.9M | x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
799 | 23.9M | x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */ |
800 | 23.9M | x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
801 | 23.9M | x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
802 | 23.9M | vr3 -= x[18]; /* Re A - Re B */ |
803 | 23.9M | ur3 -= x[19]; /* Im A - Im B */ |
804 | 23.9M | vi = (vi SHIFT_B)-x[26]; /* Re C - Re D */ |
805 | 23.9M | ui = (ui SHIFT_B)-x[27]; /* Im C - Im D */ |
806 | 23.9M | x[18] = ui + vr3; /* Re B' = Im C - Im D + Re A - Re B */ |
807 | 23.9M | x[19] = ur3 - vi; /* Im B'= -Re C + Re D + Im A - Im B */ |
808 | | |
809 | 23.9M | x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
810 | 23.9M | x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
811 | 23.9M | x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */ |
812 | 23.9M | x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
813 | 23.9M | vr4 -= x[22]; /* Re A - Re B */ |
814 | 23.9M | ur4 -= x[23]; /* Im A - Im B */ |
815 | | |
816 | 23.9M | x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */ |
817 | 23.9M | x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */ |
818 | | |
819 | 23.9M | vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */ |
820 | 23.9M | ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */ |
821 | 23.9M | x[26] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ |
822 | 23.9M | x[30] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ |
823 | 23.9M | x[27] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */ |
824 | 23.9M | x[31] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */ |
825 | | |
826 | | // xt1 = 0 |
827 | | // xt2 = 8 |
828 | 23.9M | vr = x[8]; |
829 | 23.9M | vi = x[9]; |
830 | 23.9M | ur = x[0] >> 1; |
831 | 23.9M | ui = x[1] >> 1; |
832 | 23.9M | x[0] = ur + (vr >> 1); |
833 | 23.9M | x[1] = ui + (vi >> 1); |
834 | 23.9M | x[8] = ur - (vr >> 1); |
835 | 23.9M | x[9] = ui - (vi >> 1); |
836 | | |
837 | | // xt1 = 4 |
838 | | // xt2 = 12 |
839 | 23.9M | vr = x[13]; |
840 | 23.9M | vi = x[12]; |
841 | 23.9M | ur = x[4] >> 1; |
842 | 23.9M | ui = x[5] >> 1; |
843 | 23.9M | x[4] = ur + (vr >> 1); |
844 | 23.9M | x[5] = ui - (vi >> 1); |
845 | 23.9M | x[12] = ur - (vr >> 1); |
846 | 23.9M | x[13] = ui + (vi >> 1); |
847 | | |
848 | | // xt1 = 16 |
849 | | // xt2 = 24 |
850 | 23.9M | vr = x[24]; |
851 | 23.9M | vi = x[25]; |
852 | 23.9M | ur = x[16] >> 1; |
853 | 23.9M | ui = x[17] >> 1; |
854 | 23.9M | x[16] = ur + (vr >> 1); |
855 | 23.9M | x[17] = ui + (vi >> 1); |
856 | 23.9M | x[24] = ur - (vr >> 1); |
857 | 23.9M | x[25] = ui - (vi >> 1); |
858 | | |
859 | | // xt1 = 20 |
860 | | // xt2 = 28 |
861 | 23.9M | vr = x[29]; |
862 | 23.9M | vi = x[28]; |
863 | 23.9M | ur = x[20] >> 1; |
864 | 23.9M | ui = x[21] >> 1; |
865 | 23.9M | x[20] = ur + (vr >> 1); |
866 | 23.9M | x[21] = ui - (vi >> 1); |
867 | 23.9M | x[28] = ur - (vr >> 1); |
868 | 23.9M | x[29] = ui + (vi >> 1); |
869 | | |
870 | | // xt1 = 2 |
871 | | // xt2 = 10 |
872 | 23.9M | SUMDIFF_PIFOURTH(vi, vr, x[10], x[11]) |
873 | | // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH); |
874 | | // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH); |
875 | 23.9M | ur = x[2]; |
876 | 23.9M | ui = x[3]; |
877 | 23.9M | x[2] = (ur >> 1) + vr; |
878 | 23.9M | x[3] = (ui >> 1) + vi; |
879 | 23.9M | x[10] = (ur >> 1) - vr; |
880 | 23.9M | x[11] = (ui >> 1) - vi; |
881 | | |
882 | | // xt1 = 6 |
883 | | // xt2 = 14 |
884 | 23.9M | SUMDIFF_PIFOURTH(vr, vi, x[14], x[15]) |
885 | 23.9M | ur = x[6]; |
886 | 23.9M | ui = x[7]; |
887 | 23.9M | x[6] = (ur >> 1) + vr; |
888 | 23.9M | x[7] = (ui >> 1) - vi; |
889 | 23.9M | x[14] = (ur >> 1) - vr; |
890 | 23.9M | x[15] = (ui >> 1) + vi; |
891 | | |
892 | | // xt1 = 18 |
893 | | // xt2 = 26 |
894 | 23.9M | SUMDIFF_PIFOURTH(vi, vr, x[26], x[27]) |
895 | 23.9M | ur = x[18]; |
896 | 23.9M | ui = x[19]; |
897 | 23.9M | x[18] = (ur >> 1) + vr; |
898 | 23.9M | x[19] = (ui >> 1) + vi; |
899 | 23.9M | x[26] = (ur >> 1) - vr; |
900 | 23.9M | x[27] = (ui >> 1) - vi; |
901 | | |
902 | | // xt1 = 22 |
903 | | // xt2 = 30 |
904 | 23.9M | SUMDIFF_PIFOURTH(vr, vi, x[30], x[31]) |
905 | 23.9M | ur = x[22]; |
906 | 23.9M | ui = x[23]; |
907 | 23.9M | x[22] = (ur >> 1) + vr; |
908 | 23.9M | x[23] = (ui >> 1) - vi; |
909 | 23.9M | x[30] = (ur >> 1) - vr; |
910 | 23.9M | x[31] = (ui >> 1) + vi; |
911 | | |
912 | | // xt1 = 0 |
913 | | // xt2 = 16 |
914 | 23.9M | vr = x[16]; |
915 | 23.9M | vi = x[17]; |
916 | 23.9M | ur = x[0] >> 1; |
917 | 23.9M | ui = x[1] >> 1; |
918 | 23.9M | x[0] = ur + (vr >> 1); |
919 | 23.9M | x[1] = ui + (vi >> 1); |
920 | 23.9M | x[16] = ur - (vr >> 1); |
921 | 23.9M | x[17] = ui - (vi >> 1); |
922 | | |
923 | | // xt1 = 8 |
924 | | // xt2 = 24 |
925 | 23.9M | vi = x[24]; |
926 | 23.9M | vr = x[25]; |
927 | 23.9M | ur = x[8] >> 1; |
928 | 23.9M | ui = x[9] >> 1; |
929 | 23.9M | x[8] = ur + (vr >> 1); |
930 | 23.9M | x[9] = ui - (vi >> 1); |
931 | 23.9M | x[24] = ur - (vr >> 1); |
932 | 23.9M | x[25] = ui + (vi >> 1); |
933 | | |
934 | | // xt1 = 2 |
935 | | // xt2 = 18 |
936 | 23.9M | cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]); |
937 | 23.9M | ur = x[2]; |
938 | 23.9M | ui = x[3]; |
939 | 23.9M | x[2] = (ur >> 1) + vr; |
940 | 23.9M | x[3] = (ui >> 1) + vi; |
941 | 23.9M | x[18] = (ur >> 1) - vr; |
942 | 23.9M | x[19] = (ui >> 1) - vi; |
943 | | |
944 | | // xt1 = 10 |
945 | | // xt2 = 26 |
946 | 23.9M | cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]); |
947 | 23.9M | ur = x[10]; |
948 | 23.9M | ui = x[11]; |
949 | 23.9M | x[10] = (ur >> 1) + vr; |
950 | 23.9M | x[11] = (ui >> 1) - vi; |
951 | 23.9M | x[26] = (ur >> 1) - vr; |
952 | 23.9M | x[27] = (ui >> 1) + vi; |
953 | | |
954 | | // xt1 = 4 |
955 | | // xt2 = 20 |
956 | 23.9M | SUMDIFF_PIFOURTH(vi, vr, x[20], x[21]) |
957 | 23.9M | ur = x[4]; |
958 | 23.9M | ui = x[5]; |
959 | 23.9M | x[4] = (ur >> 1) + vr; |
960 | 23.9M | x[5] = (ui >> 1) + vi; |
961 | 23.9M | x[20] = (ur >> 1) - vr; |
962 | 23.9M | x[21] = (ui >> 1) - vi; |
963 | | |
964 | | // xt1 = 12 |
965 | | // xt2 = 28 |
966 | 23.9M | SUMDIFF_PIFOURTH(vr, vi, x[28], x[29]) |
967 | 23.9M | ur = x[12]; |
968 | 23.9M | ui = x[13]; |
969 | 23.9M | x[12] = (ur >> 1) + vr; |
970 | 23.9M | x[13] = (ui >> 1) - vi; |
971 | 23.9M | x[28] = (ur >> 1) - vr; |
972 | 23.9M | x[29] = (ui >> 1) + vi; |
973 | | |
974 | | // xt1 = 6 |
975 | | // xt2 = 22 |
976 | 23.9M | cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]); |
977 | 23.9M | ur = x[6]; |
978 | 23.9M | ui = x[7]; |
979 | 23.9M | x[6] = (ur >> 1) + vr; |
980 | 23.9M | x[7] = (ui >> 1) + vi; |
981 | 23.9M | x[22] = (ur >> 1) - vr; |
982 | 23.9M | x[23] = (ui >> 1) - vi; |
983 | | |
984 | | // xt1 = 14 |
985 | | // xt2 = 30 |
986 | 23.9M | cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]); |
987 | 23.9M | ur = x[14]; |
988 | 23.9M | ui = x[15]; |
989 | 23.9M | x[14] = (ur >> 1) + vr; |
990 | 23.9M | x[15] = (ui >> 1) - vi; |
991 | 23.9M | x[30] = (ur >> 1) - vr; |
992 | 23.9M | x[31] = (ui >> 1) + vi; |
993 | 23.9M | } |
994 | | #endif /* FUNCTION_fft_16 */ |
995 | | |
996 | | #ifndef FUNCTION_fft_32 |
997 | | static const FIXP_STP fft32_w32[6] = { |
998 | | STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d), |
999 | | STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7), |
1000 | | STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)}; |
1001 | 1.10G | #define W_PiFOURTH STC(0x5a82799a) |
1002 | | |
1003 | | LNK_SECTION_CODE_L1 |
1004 | 39.3M | inline void fft_32(FIXP_DBL *const _x) { |
1005 | | /* |
1006 | | * 1+2 stage radix 4 |
1007 | | */ |
1008 | | |
1009 | | ///////////////////////////////////////////////////////////////////////////////////////// |
1010 | 39.3M | { |
1011 | 39.3M | FIXP_DBL *const x = _x; |
1012 | 39.3M | FIXP_DBL vi, ui; |
1013 | 39.3M | FIXP_DBL vi2, ui2; |
1014 | 39.3M | FIXP_DBL vi3, ui3; |
1015 | 39.3M | FIXP_DBL vr, ur; |
1016 | 39.3M | FIXP_DBL vr2, ur2; |
1017 | 39.3M | FIXP_DBL vr3, ur3; |
1018 | 39.3M | FIXP_DBL vr4, ur4; |
1019 | | |
1020 | | // i = 0 |
1021 | 39.3M | vr = (x[0] + x[32]) >> 1; /* Re A + Re B */ |
1022 | 39.3M | ur = (x[1] + x[33]) >> 1; /* Im A + Im B */ |
1023 | 39.3M | vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */ |
1024 | 39.3M | ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */ |
1025 | | |
1026 | 39.3M | x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
1027 | 39.3M | x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */ |
1028 | | |
1029 | 39.3M | vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */ |
1030 | 39.3M | ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */ |
1031 | | |
1032 | 39.3M | x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
1033 | 39.3M | x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
1034 | | |
1035 | 39.3M | vr -= x[32]; /* Re A - Re B */ |
1036 | 39.3M | ur -= x[33]; /* Im A - Im B */ |
1037 | 39.3M | vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */ |
1038 | 39.3M | ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */ |
1039 | | |
1040 | 39.3M | vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */ |
1041 | 39.3M | ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */ |
1042 | | |
1043 | 39.3M | x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ |
1044 | 39.3M | x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ |
1045 | | |
1046 | 39.3M | vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */ |
1047 | 39.3M | ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */ |
1048 | | |
1049 | 39.3M | x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ |
1050 | 39.3M | x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ |
1051 | | |
1052 | | // i=16 |
1053 | 39.3M | vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */ |
1054 | 39.3M | ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */ |
1055 | | |
1056 | 39.3M | x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
1057 | 39.3M | x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */ |
1058 | 39.3M | x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
1059 | 39.3M | x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
1060 | | |
1061 | 39.3M | vr2 -= x[36]; /* Re A - Re B */ |
1062 | 39.3M | ur2 -= x[37]; /* Im A - Im B */ |
1063 | 39.3M | vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */ |
1064 | 39.3M | ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */ |
1065 | | |
1066 | 39.3M | vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */ |
1067 | 39.3M | ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */ |
1068 | | |
1069 | 39.3M | x[18] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */ |
1070 | 39.3M | x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */ |
1071 | | |
1072 | 39.3M | vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */ |
1073 | 39.3M | ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */ |
1074 | | |
1075 | 39.3M | x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */ |
1076 | 39.3M | x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */ |
1077 | | |
1078 | | // i = 32 |
1079 | | |
1080 | 39.3M | x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
1081 | 39.3M | x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */ |
1082 | 39.3M | x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
1083 | 39.3M | x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
1084 | | |
1085 | 39.3M | vr3 -= x[34]; /* Re A - Re B */ |
1086 | 39.3M | ur3 -= x[35]; /* Im A - Im B */ |
1087 | 39.3M | vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */ |
1088 | 39.3M | ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */ |
1089 | | |
1090 | 39.3M | x[34] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */ |
1091 | 39.3M | x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */ |
1092 | | |
1093 | | // i=48 |
1094 | | |
1095 | 39.3M | x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
1096 | 39.3M | x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
1097 | 39.3M | x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */ |
1098 | 39.3M | x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
1099 | | |
1100 | 39.3M | vr4 -= x[38]; /* Re A - Re B */ |
1101 | 39.3M | ur4 -= x[39]; /* Im A - Im B */ |
1102 | | |
1103 | 39.3M | x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */ |
1104 | 39.3M | x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */ |
1105 | | |
1106 | 39.3M | vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */ |
1107 | 39.3M | ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */ |
1108 | | |
1109 | 39.3M | x[50] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ |
1110 | 39.3M | x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ |
1111 | 39.3M | x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */ |
1112 | 39.3M | x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */ |
1113 | | |
1114 | | // i=8 |
1115 | 39.3M | vr = (x[8] + x[40]) >> 1; /* Re A + Re B */ |
1116 | 39.3M | ur = (x[9] + x[41]) >> 1; /* Im A + Im B */ |
1117 | 39.3M | vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */ |
1118 | 39.3M | ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */ |
1119 | | |
1120 | 39.3M | x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
1121 | 39.3M | x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */ |
1122 | | |
1123 | 39.3M | vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */ |
1124 | 39.3M | ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */ |
1125 | | |
1126 | 39.3M | x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
1127 | 39.3M | x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
1128 | | |
1129 | 39.3M | vr -= x[40]; /* Re A - Re B */ |
1130 | 39.3M | ur -= x[41]; /* Im A - Im B */ |
1131 | 39.3M | vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */ |
1132 | 39.3M | ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */ |
1133 | | |
1134 | 39.3M | vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */ |
1135 | 39.3M | ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */ |
1136 | | |
1137 | 39.3M | x[10] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ |
1138 | 39.3M | x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ |
1139 | | |
1140 | 39.3M | vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */ |
1141 | 39.3M | ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */ |
1142 | | |
1143 | 39.3M | x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ |
1144 | 39.3M | x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ |
1145 | | |
1146 | | // i=24 |
1147 | 39.3M | vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */ |
1148 | 39.3M | ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */ |
1149 | | |
1150 | 39.3M | x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
1151 | 39.3M | x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
1152 | 39.3M | x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */ |
1153 | 39.3M | x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
1154 | | |
1155 | 39.3M | vr2 -= x[44]; /* Re A - Re B */ |
1156 | 39.3M | ur2 -= x[45]; /* Im A - Im B */ |
1157 | 39.3M | vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */ |
1158 | 39.3M | ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */ |
1159 | | |
1160 | 39.3M | vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */ |
1161 | 39.3M | ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */ |
1162 | | |
1163 | 39.3M | x[26] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */ |
1164 | 39.3M | x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */ |
1165 | | |
1166 | 39.3M | vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */ |
1167 | 39.3M | ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */ |
1168 | | |
1169 | 39.3M | x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */ |
1170 | 39.3M | x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */ |
1171 | | |
1172 | | // i=40 |
1173 | | |
1174 | 39.3M | x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
1175 | 39.3M | x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
1176 | 39.3M | x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */ |
1177 | 39.3M | x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
1178 | | |
1179 | 39.3M | vr3 -= x[42]; /* Re A - Re B */ |
1180 | 39.3M | ur3 -= x[43]; /* Im A - Im B */ |
1181 | 39.3M | vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */ |
1182 | 39.3M | ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */ |
1183 | | |
1184 | 39.3M | x[42] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */ |
1185 | 39.3M | x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */ |
1186 | | |
1187 | | // i=56 |
1188 | | |
1189 | 39.3M | x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ |
1190 | 39.3M | x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ |
1191 | 39.3M | x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */ |
1192 | 39.3M | x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ |
1193 | | |
1194 | 39.3M | vr4 -= x[46]; /* Re A - Re B */ |
1195 | 39.3M | ur4 -= x[47]; /* Im A - Im B */ |
1196 | | |
1197 | 39.3M | x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */ |
1198 | 39.3M | x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */ |
1199 | | |
1200 | 39.3M | vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */ |
1201 | 39.3M | ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */ |
1202 | | |
1203 | 39.3M | x[58] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ |
1204 | 39.3M | x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ |
1205 | 39.3M | x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */ |
1206 | 39.3M | x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */ |
1207 | 39.3M | } |
1208 | | |
1209 | 39.3M | { |
1210 | 39.3M | FIXP_DBL *xt = _x; |
1211 | | |
1212 | 39.3M | int j = 4; |
1213 | 157M | do { |
1214 | 157M | FIXP_DBL vi, ui, vr, ur; |
1215 | | |
1216 | 157M | vr = xt[8]; |
1217 | 157M | vi = xt[9]; |
1218 | 157M | ur = xt[0] >> 1; |
1219 | 157M | ui = xt[1] >> 1; |
1220 | 157M | xt[0] = ur + (vr >> 1); |
1221 | 157M | xt[1] = ui + (vi >> 1); |
1222 | 157M | xt[8] = ur - (vr >> 1); |
1223 | 157M | xt[9] = ui - (vi >> 1); |
1224 | | |
1225 | 157M | vr = xt[13]; |
1226 | 157M | vi = xt[12]; |
1227 | 157M | ur = xt[4] >> 1; |
1228 | 157M | ui = xt[5] >> 1; |
1229 | 157M | xt[4] = ur + (vr >> 1); |
1230 | 157M | xt[5] = ui - (vi >> 1); |
1231 | 157M | xt[12] = ur - (vr >> 1); |
1232 | 157M | xt[13] = ui + (vi >> 1); |
1233 | | |
1234 | 157M | SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11]) |
1235 | 157M | ur = xt[2]; |
1236 | 157M | ui = xt[3]; |
1237 | 157M | xt[2] = (ur >> 1) + vr; |
1238 | 157M | xt[3] = (ui >> 1) + vi; |
1239 | 157M | xt[10] = (ur >> 1) - vr; |
1240 | 157M | xt[11] = (ui >> 1) - vi; |
1241 | | |
1242 | 157M | SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15]) |
1243 | 157M | ur = xt[6]; |
1244 | 157M | ui = xt[7]; |
1245 | | |
1246 | 157M | xt[6] = (ur >> 1) + vr; |
1247 | 157M | xt[7] = (ui >> 1) - vi; |
1248 | 157M | xt[14] = (ur >> 1) - vr; |
1249 | 157M | xt[15] = (ui >> 1) + vi; |
1250 | 157M | xt += 16; |
1251 | 157M | } while (--j != 0); |
1252 | 39.3M | } |
1253 | | |
1254 | 39.3M | { |
1255 | 39.3M | FIXP_DBL *const x = _x; |
1256 | 39.3M | FIXP_DBL vi, ui, vr, ur; |
1257 | | |
1258 | 39.3M | vr = x[16]; |
1259 | 39.3M | vi = x[17]; |
1260 | 39.3M | ur = x[0] >> 1; |
1261 | 39.3M | ui = x[1] >> 1; |
1262 | 39.3M | x[0] = ur + (vr >> 1); |
1263 | 39.3M | x[1] = ui + (vi >> 1); |
1264 | 39.3M | x[16] = ur - (vr >> 1); |
1265 | 39.3M | x[17] = ui - (vi >> 1); |
1266 | | |
1267 | 39.3M | vi = x[24]; |
1268 | 39.3M | vr = x[25]; |
1269 | 39.3M | ur = x[8] >> 1; |
1270 | 39.3M | ui = x[9] >> 1; |
1271 | 39.3M | x[8] = ur + (vr >> 1); |
1272 | 39.3M | x[9] = ui - (vi >> 1); |
1273 | 39.3M | x[24] = ur - (vr >> 1); |
1274 | 39.3M | x[25] = ui + (vi >> 1); |
1275 | | |
1276 | 39.3M | vr = x[48]; |
1277 | 39.3M | vi = x[49]; |
1278 | 39.3M | ur = x[32] >> 1; |
1279 | 39.3M | ui = x[33] >> 1; |
1280 | 39.3M | x[32] = ur + (vr >> 1); |
1281 | 39.3M | x[33] = ui + (vi >> 1); |
1282 | 39.3M | x[48] = ur - (vr >> 1); |
1283 | 39.3M | x[49] = ui - (vi >> 1); |
1284 | | |
1285 | 39.3M | vi = x[56]; |
1286 | 39.3M | vr = x[57]; |
1287 | 39.3M | ur = x[40] >> 1; |
1288 | 39.3M | ui = x[41] >> 1; |
1289 | 39.3M | x[40] = ur + (vr >> 1); |
1290 | 39.3M | x[41] = ui - (vi >> 1); |
1291 | 39.3M | x[56] = ur - (vr >> 1); |
1292 | 39.3M | x[57] = ui + (vi >> 1); |
1293 | | |
1294 | 39.3M | cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]); |
1295 | 39.3M | ur = x[2]; |
1296 | 39.3M | ui = x[3]; |
1297 | 39.3M | x[2] = (ur >> 1) + vr; |
1298 | 39.3M | x[3] = (ui >> 1) + vi; |
1299 | 39.3M | x[18] = (ur >> 1) - vr; |
1300 | 39.3M | x[19] = (ui >> 1) - vi; |
1301 | | |
1302 | 39.3M | cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]); |
1303 | 39.3M | ur = x[10]; |
1304 | 39.3M | ui = x[11]; |
1305 | 39.3M | x[10] = (ur >> 1) + vr; |
1306 | 39.3M | x[11] = (ui >> 1) - vi; |
1307 | 39.3M | x[26] = (ur >> 1) - vr; |
1308 | 39.3M | x[27] = (ui >> 1) + vi; |
1309 | | |
1310 | 39.3M | cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]); |
1311 | 39.3M | ur = x[34]; |
1312 | 39.3M | ui = x[35]; |
1313 | 39.3M | x[34] = (ur >> 1) + vr; |
1314 | 39.3M | x[35] = (ui >> 1) + vi; |
1315 | 39.3M | x[50] = (ur >> 1) - vr; |
1316 | 39.3M | x[51] = (ui >> 1) - vi; |
1317 | | |
1318 | 39.3M | cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]); |
1319 | 39.3M | ur = x[42]; |
1320 | 39.3M | ui = x[43]; |
1321 | 39.3M | x[42] = (ur >> 1) + vr; |
1322 | 39.3M | x[43] = (ui >> 1) - vi; |
1323 | 39.3M | x[58] = (ur >> 1) - vr; |
1324 | 39.3M | x[59] = (ui >> 1) + vi; |
1325 | | |
1326 | 39.3M | SUMDIFF_PIFOURTH(vi, vr, x[20], x[21]) |
1327 | 39.3M | ur = x[4]; |
1328 | 39.3M | ui = x[5]; |
1329 | 39.3M | x[4] = (ur >> 1) + vr; |
1330 | 39.3M | x[5] = (ui >> 1) + vi; |
1331 | 39.3M | x[20] = (ur >> 1) - vr; |
1332 | 39.3M | x[21] = (ui >> 1) - vi; |
1333 | | |
1334 | 39.3M | SUMDIFF_PIFOURTH(vr, vi, x[28], x[29]) |
1335 | 39.3M | ur = x[12]; |
1336 | 39.3M | ui = x[13]; |
1337 | 39.3M | x[12] = (ur >> 1) + vr; |
1338 | 39.3M | x[13] = (ui >> 1) - vi; |
1339 | 39.3M | x[28] = (ur >> 1) - vr; |
1340 | 39.3M | x[29] = (ui >> 1) + vi; |
1341 | | |
1342 | 39.3M | SUMDIFF_PIFOURTH(vi, vr, x[52], x[53]) |
1343 | 39.3M | ur = x[36]; |
1344 | 39.3M | ui = x[37]; |
1345 | 39.3M | x[36] = (ur >> 1) + vr; |
1346 | 39.3M | x[37] = (ui >> 1) + vi; |
1347 | 39.3M | x[52] = (ur >> 1) - vr; |
1348 | 39.3M | x[53] = (ui >> 1) - vi; |
1349 | | |
1350 | 39.3M | SUMDIFF_PIFOURTH(vr, vi, x[60], x[61]) |
1351 | 39.3M | ur = x[44]; |
1352 | 39.3M | ui = x[45]; |
1353 | 39.3M | x[44] = (ur >> 1) + vr; |
1354 | 39.3M | x[45] = (ui >> 1) - vi; |
1355 | 39.3M | x[60] = (ur >> 1) - vr; |
1356 | 39.3M | x[61] = (ui >> 1) + vi; |
1357 | | |
1358 | 39.3M | cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]); |
1359 | 39.3M | ur = x[6]; |
1360 | 39.3M | ui = x[7]; |
1361 | 39.3M | x[6] = (ur >> 1) + vr; |
1362 | 39.3M | x[7] = (ui >> 1) + vi; |
1363 | 39.3M | x[22] = (ur >> 1) - vr; |
1364 | 39.3M | x[23] = (ui >> 1) - vi; |
1365 | | |
1366 | 39.3M | cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]); |
1367 | 39.3M | ur = x[14]; |
1368 | 39.3M | ui = x[15]; |
1369 | 39.3M | x[14] = (ur >> 1) + vr; |
1370 | 39.3M | x[15] = (ui >> 1) - vi; |
1371 | 39.3M | x[30] = (ur >> 1) - vr; |
1372 | 39.3M | x[31] = (ui >> 1) + vi; |
1373 | | |
1374 | 39.3M | cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]); |
1375 | 39.3M | ur = x[38]; |
1376 | 39.3M | ui = x[39]; |
1377 | 39.3M | x[38] = (ur >> 1) + vr; |
1378 | 39.3M | x[39] = (ui >> 1) + vi; |
1379 | 39.3M | x[54] = (ur >> 1) - vr; |
1380 | 39.3M | x[55] = (ui >> 1) - vi; |
1381 | | |
1382 | 39.3M | cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]); |
1383 | 39.3M | ur = x[46]; |
1384 | 39.3M | ui = x[47]; |
1385 | | |
1386 | 39.3M | x[46] = (ur >> 1) + vr; |
1387 | 39.3M | x[47] = (ui >> 1) - vi; |
1388 | 39.3M | x[62] = (ur >> 1) - vr; |
1389 | 39.3M | x[63] = (ui >> 1) + vi; |
1390 | | |
1391 | 39.3M | vr = x[32]; |
1392 | 39.3M | vi = x[33]; |
1393 | 39.3M | ur = x[0] >> 1; |
1394 | 39.3M | ui = x[1] >> 1; |
1395 | 39.3M | x[0] = ur + (vr >> 1); |
1396 | 39.3M | x[1] = ui + (vi >> 1); |
1397 | 39.3M | x[32] = ur - (vr >> 1); |
1398 | 39.3M | x[33] = ui - (vi >> 1); |
1399 | | |
1400 | 39.3M | vi = x[48]; |
1401 | 39.3M | vr = x[49]; |
1402 | 39.3M | ur = x[16] >> 1; |
1403 | 39.3M | ui = x[17] >> 1; |
1404 | 39.3M | x[16] = ur + (vr >> 1); |
1405 | 39.3M | x[17] = ui - (vi >> 1); |
1406 | 39.3M | x[48] = ur - (vr >> 1); |
1407 | 39.3M | x[49] = ui + (vi >> 1); |
1408 | | |
1409 | 39.3M | cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]); |
1410 | 39.3M | ur = x[2]; |
1411 | 39.3M | ui = x[3]; |
1412 | 39.3M | x[2] = (ur >> 1) + vr; |
1413 | 39.3M | x[3] = (ui >> 1) + vi; |
1414 | 39.3M | x[34] = (ur >> 1) - vr; |
1415 | 39.3M | x[35] = (ui >> 1) - vi; |
1416 | | |
1417 | 39.3M | cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]); |
1418 | 39.3M | ur = x[18]; |
1419 | 39.3M | ui = x[19]; |
1420 | 39.3M | x[18] = (ur >> 1) + vr; |
1421 | 39.3M | x[19] = (ui >> 1) - vi; |
1422 | 39.3M | x[50] = (ur >> 1) - vr; |
1423 | 39.3M | x[51] = (ui >> 1) + vi; |
1424 | | |
1425 | 39.3M | cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]); |
1426 | 39.3M | ur = x[4]; |
1427 | 39.3M | ui = x[5]; |
1428 | 39.3M | x[4] = (ur >> 1) + vr; |
1429 | 39.3M | x[5] = (ui >> 1) + vi; |
1430 | 39.3M | x[36] = (ur >> 1) - vr; |
1431 | 39.3M | x[37] = (ui >> 1) - vi; |
1432 | | |
1433 | 39.3M | cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]); |
1434 | 39.3M | ur = x[20]; |
1435 | 39.3M | ui = x[21]; |
1436 | 39.3M | x[20] = (ur >> 1) + vr; |
1437 | 39.3M | x[21] = (ui >> 1) - vi; |
1438 | 39.3M | x[52] = (ur >> 1) - vr; |
1439 | 39.3M | x[53] = (ui >> 1) + vi; |
1440 | | |
1441 | 39.3M | cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]); |
1442 | 39.3M | ur = x[6]; |
1443 | 39.3M | ui = x[7]; |
1444 | 39.3M | x[6] = (ur >> 1) + vr; |
1445 | 39.3M | x[7] = (ui >> 1) + vi; |
1446 | 39.3M | x[38] = (ur >> 1) - vr; |
1447 | 39.3M | x[39] = (ui >> 1) - vi; |
1448 | | |
1449 | 39.3M | cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]); |
1450 | 39.3M | ur = x[22]; |
1451 | 39.3M | ui = x[23]; |
1452 | 39.3M | x[22] = (ur >> 1) + vr; |
1453 | 39.3M | x[23] = (ui >> 1) - vi; |
1454 | 39.3M | x[54] = (ur >> 1) - vr; |
1455 | 39.3M | x[55] = (ui >> 1) + vi; |
1456 | | |
1457 | 39.3M | SUMDIFF_PIFOURTH(vi, vr, x[40], x[41]) |
1458 | 39.3M | ur = x[8]; |
1459 | 39.3M | ui = x[9]; |
1460 | 39.3M | x[8] = (ur >> 1) + vr; |
1461 | 39.3M | x[9] = (ui >> 1) + vi; |
1462 | 39.3M | x[40] = (ur >> 1) - vr; |
1463 | 39.3M | x[41] = (ui >> 1) - vi; |
1464 | | |
1465 | 39.3M | SUMDIFF_PIFOURTH(vr, vi, x[56], x[57]) |
1466 | 39.3M | ur = x[24]; |
1467 | 39.3M | ui = x[25]; |
1468 | 39.3M | x[24] = (ur >> 1) + vr; |
1469 | 39.3M | x[25] = (ui >> 1) - vi; |
1470 | 39.3M | x[56] = (ur >> 1) - vr; |
1471 | 39.3M | x[57] = (ui >> 1) + vi; |
1472 | | |
1473 | 39.3M | cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]); |
1474 | 39.3M | ur = x[10]; |
1475 | 39.3M | ui = x[11]; |
1476 | | |
1477 | 39.3M | x[10] = (ur >> 1) + vr; |
1478 | 39.3M | x[11] = (ui >> 1) + vi; |
1479 | 39.3M | x[42] = (ur >> 1) - vr; |
1480 | 39.3M | x[43] = (ui >> 1) - vi; |
1481 | | |
1482 | 39.3M | cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]); |
1483 | 39.3M | ur = x[26]; |
1484 | 39.3M | ui = x[27]; |
1485 | 39.3M | x[26] = (ur >> 1) + vr; |
1486 | 39.3M | x[27] = (ui >> 1) - vi; |
1487 | 39.3M | x[58] = (ur >> 1) - vr; |
1488 | 39.3M | x[59] = (ui >> 1) + vi; |
1489 | | |
1490 | 39.3M | cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]); |
1491 | 39.3M | ur = x[12]; |
1492 | 39.3M | ui = x[13]; |
1493 | 39.3M | x[12] = (ur >> 1) + vr; |
1494 | 39.3M | x[13] = (ui >> 1) + vi; |
1495 | 39.3M | x[44] = (ur >> 1) - vr; |
1496 | 39.3M | x[45] = (ui >> 1) - vi; |
1497 | | |
1498 | 39.3M | cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]); |
1499 | 39.3M | ur = x[28]; |
1500 | 39.3M | ui = x[29]; |
1501 | 39.3M | x[28] = (ur >> 1) + vr; |
1502 | 39.3M | x[29] = (ui >> 1) - vi; |
1503 | 39.3M | x[60] = (ur >> 1) - vr; |
1504 | 39.3M | x[61] = (ui >> 1) + vi; |
1505 | | |
1506 | 39.3M | cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]); |
1507 | 39.3M | ur = x[14]; |
1508 | 39.3M | ui = x[15]; |
1509 | 39.3M | x[14] = (ur >> 1) + vr; |
1510 | 39.3M | x[15] = (ui >> 1) + vi; |
1511 | 39.3M | x[46] = (ur >> 1) - vr; |
1512 | 39.3M | x[47] = (ui >> 1) - vi; |
1513 | | |
1514 | 39.3M | cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]); |
1515 | 39.3M | ur = x[30]; |
1516 | 39.3M | ui = x[31]; |
1517 | 39.3M | x[30] = (ur >> 1) + vr; |
1518 | 39.3M | x[31] = (ui >> 1) - vi; |
1519 | 39.3M | x[62] = (ur >> 1) - vr; |
1520 | 39.3M | x[63] = (ui >> 1) + vi; |
1521 | 39.3M | } |
1522 | 39.3M | } |
1523 | | #endif /* #ifndef FUNCTION_fft_32 */ |
1524 | | |
1525 | | /** |
1526 | | * \brief Apply rotation vectors to a data buffer. |
1527 | | * \param cl length of each row of input data. |
1528 | | * \param l total length of input data. |
1529 | | * \param pVecRe real part of rotation coefficient vector. |
1530 | | * \param pVecIm imaginary part of rotation coefficient vector. |
1531 | | */ |
1532 | | |
1533 | | /* |
1534 | | This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000 |
1535 | | (-1.0) instead. At the end, the sign of the result is inverted |
1536 | | */ |
1537 | | #define noFFT_APPLY_ROT_VECTOR_HQ |
1538 | | |
1539 | | #ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL |
1540 | | static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl, |
1541 | | const int l, const FIXP_STB *pVecRe, |
1542 | 4.69M | const FIXP_STB *pVecIm) { |
1543 | 4.69M | FIXP_DBL re, im; |
1544 | 4.69M | FIXP_STB vre, vim; |
1545 | | |
1546 | 4.69M | int i, c; |
1547 | | |
1548 | 20.0M | for (i = 0; i < cl; i++) { |
1549 | 15.3M | re = pData[2 * i]; |
1550 | 15.3M | im = pData[2 * i + 1]; |
1551 | | |
1552 | 15.3M | pData[2 * i] = re >> 2; /* * 0.25 */ |
1553 | 15.3M | pData[2 * i + 1] = im >> 2; /* * 0.25 */ |
1554 | 15.3M | } |
1555 | 28.1M | for (; i < l; i += cl) { |
1556 | 23.4M | re = pData[2 * i]; |
1557 | 23.4M | im = pData[2 * i + 1]; |
1558 | | |
1559 | 23.4M | pData[2 * i] = re >> 2; /* * 0.25 */ |
1560 | 23.4M | pData[2 * i + 1] = im >> 2; /* * 0.25 */ |
1561 | | |
1562 | 142M | for (c = i + 1; c < i + cl; c++) { |
1563 | 119M | re = pData[2 * c] >> 1; |
1564 | 119M | im = pData[2 * c + 1] >> 1; |
1565 | 119M | vre = *pVecRe++; |
1566 | 119M | vim = *pVecIm++; |
1567 | | |
1568 | 119M | cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim); |
1569 | 119M | } |
1570 | 23.4M | } |
1571 | 4.69M | } |
1572 | | #endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */ |
1573 | | |
1574 | | /* select either switch case of function pointer. */ |
1575 | | //#define FFT_TWO_STAGE_SWITCH_CASE |
1576 | | #ifndef FUNCTION_fftN2_func |
1577 | | static inline void fftN2_func(FIXP_DBL *pInput, const int length, |
1578 | | const int dim1, const int dim2, |
1579 | | void (*const fft1)(FIXP_DBL *), |
1580 | | void (*const fft2)(FIXP_DBL *), |
1581 | | const FIXP_STB *RotVectorReal, |
1582 | | const FIXP_STB *RotVectorImag, FIXP_DBL *aDst, |
1583 | 4.69M | FIXP_DBL *aDst2) { |
1584 | | /* The real part of the input samples are at the addresses with even indices |
1585 | | and the imaginary part of the input samples are at the addresses with odd |
1586 | | indices. The output samples are stored at the address of pInput |
1587 | | */ |
1588 | 4.69M | FIXP_DBL *pSrc, *pDst, *pDstOut; |
1589 | 4.69M | int i; |
1590 | | |
1591 | 4.69M | FDK_ASSERT(length == dim1 * dim2); |
1592 | | |
1593 | | /* Perform dim2 times the fft of length dim1. The input samples are at the |
1594 | | address of pSrc and the output samples are at the address of pDst. The input |
1595 | | vector for the fft of length dim1 is built of the interleaved samples in pSrc, |
1596 | | the output samples are stored consecutively. |
1597 | | */ |
1598 | 4.69M | pSrc = pInput; |
1599 | 4.69M | pDst = aDst; |
1600 | 32.8M | for (i = 0; i < dim2; i++) { |
1601 | 186M | for (int j = 0; j < dim1; j++) { |
1602 | 158M | pDst[2 * j] = pSrc[2 * j * dim2]; |
1603 | 158M | pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1]; |
1604 | 158M | } |
1605 | | |
1606 | | /* fft of size dim1 */ |
1607 | 28.1M | #ifndef FFT_TWO_STAGE_SWITCH_CASE |
1608 | 28.1M | fft1(pDst); |
1609 | | #else |
1610 | | switch (dim1) { |
1611 | | case 2: |
1612 | | fft2(pDst); |
1613 | | break; |
1614 | | case 3: |
1615 | | fft3(pDst); |
1616 | | break; |
1617 | | case 4: |
1618 | | fft_4(pDst); |
1619 | | break; |
1620 | | /* case 5: fft5(pDst); break; */ |
1621 | | /* case 8: fft_8(pDst); break; */ |
1622 | | case 12: |
1623 | | fft12(pDst); |
1624 | | break; |
1625 | | /* case 15: fft15(pDst); break; */ |
1626 | | case 16: |
1627 | | fft_16(pDst); |
1628 | | break; |
1629 | | case 32: |
1630 | | fft_32(pDst); |
1631 | | break; |
1632 | | /*case 64: fft_64(pDst); break;*/ |
1633 | | /* case 128: fft_128(pDst); break; */ |
1634 | | } |
1635 | | #endif |
1636 | 28.1M | pSrc += 2; |
1637 | 28.1M | pDst = pDst + 2 * dim1; |
1638 | 28.1M | } |
1639 | | |
1640 | | /* Perform the modulation of the output of the fft of length dim1 */ |
1641 | 4.69M | pSrc = aDst; |
1642 | 4.69M | fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag); |
1643 | | |
1644 | | /* Perform dim1 times the fft of length dim2. The input samples are at the |
1645 | | address of aDst and the output samples are at the address of pInput. The input |
1646 | | vector for the fft of length dim2 is built of the interleaved samples in aDst, |
1647 | | the output samples are stored consecutively at the address of pInput. |
1648 | | */ |
1649 | 4.69M | pSrc = aDst; |
1650 | 4.69M | pDst = aDst2; |
1651 | 4.69M | pDstOut = pInput; |
1652 | 20.0M | for (i = 0; i < dim1; i++) { |
1653 | 173M | for (int j = 0; j < dim2; j++) { |
1654 | 158M | pDst[2 * j] = pSrc[2 * j * dim1]; |
1655 | 158M | pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1]; |
1656 | 158M | } |
1657 | | |
1658 | 15.3M | #ifndef FFT_TWO_STAGE_SWITCH_CASE |
1659 | 15.3M | fft2(pDst); |
1660 | | #else |
1661 | | switch (dim2) { |
1662 | | case 4: |
1663 | | fft_4(pDst); |
1664 | | break; |
1665 | | case 9: |
1666 | | fft9(pDst); |
1667 | | break; |
1668 | | case 12: |
1669 | | fft12(pDst); |
1670 | | break; |
1671 | | case 15: |
1672 | | fft15(pDst); |
1673 | | break; |
1674 | | case 16: |
1675 | | fft_16(pDst); |
1676 | | break; |
1677 | | case 32: |
1678 | | fft_32(pDst); |
1679 | | break; |
1680 | | } |
1681 | | #endif |
1682 | | |
1683 | 173M | for (int j = 0; j < dim2; j++) { |
1684 | 158M | pDstOut[2 * j * dim1] = pDst[2 * j]; |
1685 | 158M | pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1]; |
1686 | 158M | } |
1687 | 15.3M | pSrc += 2; |
1688 | 15.3M | pDstOut += 2; |
1689 | 15.3M | } |
1690 | 4.69M | } |
1691 | | #endif /* FUNCTION_fftN2_function */ |
1692 | | |
1693 | | #define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \ |
1694 | | RotVectorReal, RotVectorImag) \ |
1695 | 4.69M | { \ |
1696 | 4.69M | C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length) \ |
1697 | 4.69M | C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2) \ |
1698 | 4.69M | fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2, \ |
1699 | 4.69M | RotVectorReal, RotVectorImag, aDst, aDst2); \ |
1700 | 4.69M | C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2) \ |
1701 | 4.69M | C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length) \ |
1702 | 4.69M | } |
1703 | | |
1704 | | /*! |
1705 | | * |
1706 | | * \brief complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480 |
1707 | | * \param pInput contains the input signal prescaled right by 2 |
1708 | | * pInput contains the output signal scaled by SCALEFACTOR<#length> |
1709 | | * The output signal does not have any fixed headroom |
1710 | | * \return void |
1711 | | * |
1712 | | */ |
1713 | | |
1714 | | #ifndef FUNCTION_fft6 |
1715 | 3.55M | static inline void fft6(FIXP_DBL *pInput) { |
1716 | 3.55M | fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6); |
1717 | 3.55M | } |
1718 | | #endif /* #ifndef FUNCTION_fft6 */ |
1719 | | |
1720 | | #ifndef FUNCTION_fft12 |
1721 | | static inline void fft12(FIXP_DBL *pInput) { |
1722 | | fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12, |
1723 | | RotVectorImag12); /* 16,58 */ |
1724 | | } |
1725 | | #endif /* #ifndef FUNCTION_fft12 */ |
1726 | | |
1727 | | #ifndef FUNCTION_fft20 |
1728 | 114k | static inline void fft20(FIXP_DBL *pInput) { |
1729 | 114k | fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20, |
1730 | 114k | RotVectorImag20); |
1731 | 114k | } |
1732 | | #endif /* FUNCTION_fft20 */ |
1733 | | |
1734 | 6.20k | static inline void fft24(FIXP_DBL *pInput) { |
1735 | 6.20k | fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24, |
1736 | 6.20k | RotVectorImag24); /* 16,73 */ |
1737 | 6.20k | } |
1738 | | |
1739 | 376k | static inline void fft48(FIXP_DBL *pInput) { |
1740 | 376k | fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48, |
1741 | 376k | RotVectorImag48); /* 16,32 */ |
1742 | 376k | } |
1743 | | |
1744 | | #ifndef FUNCTION_fft60 |
1745 | 245k | static inline void fft60(FIXP_DBL *pInput) { |
1746 | 245k | fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60, |
1747 | 245k | RotVectorImag60); /* 15,51 */ |
1748 | 245k | } |
1749 | | #endif /* FUNCTION_fft60 */ |
1750 | | |
1751 | | #ifndef FUNCTION_fft80 |
1752 | 187 | static inline void fft80(FIXP_DBL *pInput) { |
1753 | 187 | fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80, |
1754 | 187 | RotVectorImag80); /* */ |
1755 | 187 | } |
1756 | | #endif |
1757 | | |
1758 | | #ifndef FUNCTION_fft96 |
1759 | 68.1k | static inline void fft96(FIXP_DBL *pInput) { |
1760 | 68.1k | fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96, |
1761 | 68.1k | RotVectorImag96); /* 15,47 */ |
1762 | 68.1k | } |
1763 | | #endif /* FUNCTION_fft96*/ |
1764 | | |
1765 | | #ifndef FUNCTION_fft120 |
1766 | 3.83k | static inline void fft120(FIXP_DBL *pInput) { |
1767 | 3.83k | fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120, |
1768 | 3.83k | RotVectorImag120); |
1769 | 3.83k | } |
1770 | | #endif /* FUNCTION_fft120 */ |
1771 | | |
1772 | | #ifndef FUNCTION_fft192 |
1773 | 7.80k | static inline void fft192(FIXP_DBL *pInput) { |
1774 | 7.80k | fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192, |
1775 | 7.80k | RotVectorImag192); /* 15,50 */ |
1776 | 7.80k | } |
1777 | | #endif |
1778 | | |
1779 | | #ifndef FUNCTION_fft240 |
1780 | 207k | static inline void fft240(FIXP_DBL *pInput) { |
1781 | 207k | fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240, |
1782 | 207k | RotVectorImag240); /* 15.44 */ |
1783 | 207k | } |
1784 | | #endif |
1785 | | |
1786 | | #ifndef FUNCTION_fft384 |
1787 | 93.2k | static inline void fft384(FIXP_DBL *pInput) { |
1788 | 93.2k | fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384, |
1789 | 93.2k | RotVectorImag384); /* 16.02 */ |
1790 | 93.2k | } |
1791 | | #endif /* FUNCTION_fft384 */ |
1792 | | |
1793 | | #ifndef FUNCTION_fft480 |
1794 | 15.7k | static inline void fft480(FIXP_DBL *pInput) { |
1795 | 15.7k | fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480, |
1796 | 15.7k | RotVectorImag480); /* 15.84 */ |
1797 | 15.7k | } |
1798 | | #endif /* FUNCTION_fft480 */ |
1799 | | |
1800 | 85.8M | void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) { |
1801 | | /* Ensure, that the io-ptr is always (at least 8-byte) aligned */ |
1802 | 85.8M | C_ALLOC_ALIGNED_CHECK(pInput); |
1803 | | |
1804 | 85.8M | if (length == 32) { |
1805 | 37.7M | fft_32(pInput); |
1806 | 37.7M | *pScalefactor += SCALEFACTOR32; |
1807 | 48.1M | } else { |
1808 | 48.1M | switch (length) { |
1809 | 20.7M | case 16: |
1810 | 20.7M | fft_16(pInput); |
1811 | 20.7M | *pScalefactor += SCALEFACTOR16; |
1812 | 20.7M | break; |
1813 | 8.26M | case 8: |
1814 | 8.26M | fft_8(pInput); |
1815 | 8.26M | *pScalefactor += SCALEFACTOR8; |
1816 | 8.26M | break; |
1817 | 0 | case 2: |
1818 | 0 | fft2(pInput); |
1819 | 0 | *pScalefactor += SCALEFACTOR2; |
1820 | 0 | break; |
1821 | 0 | case 3: |
1822 | 0 | fft3(pInput); |
1823 | 0 | *pScalefactor += SCALEFACTOR3; |
1824 | 0 | break; |
1825 | 1.00M | case 4: |
1826 | 1.00M | fft_4(pInput); |
1827 | 1.00M | *pScalefactor += SCALEFACTOR4; |
1828 | 1.00M | break; |
1829 | 0 | case 5: |
1830 | 0 | fft5(pInput); |
1831 | 0 | *pScalefactor += SCALEFACTOR5; |
1832 | 0 | break; |
1833 | 3.55M | case 6: |
1834 | 3.55M | fft6(pInput); |
1835 | 3.55M | *pScalefactor += SCALEFACTOR6; |
1836 | 3.55M | break; |
1837 | 114k | case 10: |
1838 | 114k | fft10(pInput); |
1839 | 114k | *pScalefactor += SCALEFACTOR10; |
1840 | 114k | break; |
1841 | 12.5M | case 12: |
1842 | 12.5M | fft12(pInput); |
1843 | 12.5M | *pScalefactor += SCALEFACTOR12; |
1844 | 12.5M | break; |
1845 | 0 | case 15: |
1846 | 0 | fft15(pInput); |
1847 | 0 | *pScalefactor += SCALEFACTOR15; |
1848 | 0 | break; |
1849 | 114k | case 20: |
1850 | 114k | fft20(pInput); |
1851 | 114k | *pScalefactor += SCALEFACTOR20; |
1852 | 114k | break; |
1853 | 6.20k | case 24: |
1854 | 6.20k | fft24(pInput); |
1855 | 6.20k | *pScalefactor += SCALEFACTOR24; |
1856 | 6.20k | break; |
1857 | 376k | case 48: |
1858 | 376k | fft48(pInput); |
1859 | 376k | *pScalefactor += SCALEFACTOR48; |
1860 | 376k | break; |
1861 | 245k | case 60: |
1862 | 245k | fft60(pInput); |
1863 | 245k | *pScalefactor += SCALEFACTOR60; |
1864 | 245k | break; |
1865 | 422k | case 64: |
1866 | 422k | dit_fft(pInput, 6, SineTable512, 512); |
1867 | 422k | *pScalefactor += SCALEFACTOR64; |
1868 | 422k | break; |
1869 | 187 | case 80: |
1870 | 187 | fft80(pInput); |
1871 | 187 | *pScalefactor += SCALEFACTOR80; |
1872 | 187 | break; |
1873 | 68.1k | case 96: |
1874 | 68.1k | fft96(pInput); |
1875 | 68.1k | *pScalefactor += SCALEFACTOR96; |
1876 | 68.1k | break; |
1877 | 3.83k | case 120: |
1878 | 3.83k | fft120(pInput); |
1879 | 3.83k | *pScalefactor += SCALEFACTOR120; |
1880 | 3.83k | break; |
1881 | 81.0k | case 128: |
1882 | 81.0k | dit_fft(pInput, 7, SineTable512, 512); |
1883 | 81.0k | *pScalefactor += SCALEFACTOR128; |
1884 | 81.0k | break; |
1885 | 7.80k | case 192: |
1886 | 7.80k | fft192(pInput); |
1887 | 7.80k | *pScalefactor += SCALEFACTOR192; |
1888 | 7.80k | break; |
1889 | 207k | case 240: |
1890 | 207k | fft240(pInput); |
1891 | 207k | *pScalefactor += SCALEFACTOR240; |
1892 | 207k | break; |
1893 | 74.1k | case 256: |
1894 | 74.1k | dit_fft(pInput, 8, SineTable512, 512); |
1895 | 74.1k | *pScalefactor += SCALEFACTOR256; |
1896 | 74.1k | break; |
1897 | 93.2k | case 384: |
1898 | 93.2k | fft384(pInput); |
1899 | 93.2k | *pScalefactor += SCALEFACTOR384; |
1900 | 93.2k | break; |
1901 | 15.7k | case 480: |
1902 | 15.7k | fft480(pInput); |
1903 | 15.7k | *pScalefactor += SCALEFACTOR480; |
1904 | 15.7k | break; |
1905 | 174k | case 512: |
1906 | 174k | dit_fft(pInput, 9, SineTable512, 512); |
1907 | 174k | *pScalefactor += SCALEFACTOR512; |
1908 | 174k | break; |
1909 | 0 | default: |
1910 | 0 | FDK_ASSERT(0); /* FFT length not supported! */ |
1911 | 0 | break; |
1912 | 48.1M | } |
1913 | 48.1M | } |
1914 | 85.8M | } |
1915 | | |
1916 | 0 | void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) { |
1917 | 0 | switch (length) { |
1918 | 0 | default: |
1919 | 0 | FDK_ASSERT(0); /* IFFT length not supported! */ |
1920 | 0 | break; |
1921 | 0 | } |
1922 | 0 | } |