/src/libxaac/common/ixheaac_esbr_fft.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | #include <stdio.h> |
22 | | #include <stdlib.h> |
23 | | #include "ixheaac_type_def.h" |
24 | | #include "ixheaac_constants.h" |
25 | | #include "ixheaac_basic_ops32.h" |
26 | | |
27 | | #define PLATFORM_INLINE __inline |
28 | | |
29 | | #define DIG_REV(i, m, j) \ |
30 | 167M | do { \ |
31 | 167M | unsigned _ = (i); \ |
32 | 167M | _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \ |
33 | 167M | _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \ |
34 | 167M | _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \ |
35 | 167M | (j) = _ >> (m); \ |
36 | 167M | } while (0) |
37 | | |
38 | | extern const FLOAT32 ixheaac_twiddle_table_fft_float[514]; |
39 | | extern const FLOAT32 ixheaac_twidle_tbl_48[64]; |
40 | | extern const FLOAT32 ixheaac_twidle_tbl_24[32]; |
41 | | |
42 | 29.9M | void ixheaac_real_synth_fft_p2(FLOAT32 *ptr_x, FLOAT32 *ptr_y, WORD32 npoints) { |
43 | 29.9M | WORD32 i, j, k, n_stages, h2; |
44 | 29.9M | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
45 | 29.9M | WORD32 del, nodespacing, in_loop_cnt; |
46 | 29.9M | WORD32 not_power_4; |
47 | 29.9M | WORD32 dig_rev_shift; |
48 | 29.9M | const FLOAT32 *ptr_w; |
49 | | |
50 | 29.9M | dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16; |
51 | 29.9M | n_stages = 30 - ixheaac_norm32(npoints); |
52 | 29.9M | not_power_4 = n_stages & 1; |
53 | | |
54 | 29.9M | n_stages = n_stages >> 1; |
55 | | |
56 | 29.9M | ptr_w = ixheaac_twiddle_table_fft_float; |
57 | | |
58 | 117M | for (i = 0; i < npoints; i += 4) { |
59 | 87.3M | FLOAT32 *inp = ptr_x; |
60 | | |
61 | 87.3M | DIG_REV(i, dig_rev_shift, h2); |
62 | 87.3M | if (not_power_4) { |
63 | 62.5M | h2 += 1; |
64 | 62.5M | h2 &= ~1; |
65 | 62.5M | } |
66 | 87.3M | inp += (h2 >> 1); |
67 | | |
68 | 87.3M | x0r = *inp; |
69 | 87.3M | inp += (npoints >> 2); |
70 | | |
71 | 87.3M | x1r = *inp; |
72 | 87.3M | inp += (npoints >> 2); |
73 | | |
74 | 87.3M | x2r = *inp; |
75 | 87.3M | inp += (npoints >> 2); |
76 | | |
77 | 87.3M | x3r = *inp; |
78 | | |
79 | 87.3M | x0r = x0r + x2r; |
80 | 87.3M | x2r = x0r - (x2r * 2); |
81 | 87.3M | x1r = x1r + x3r; |
82 | 87.3M | x3r = x1r - (x3r * 2); |
83 | 87.3M | x0r = x0r + x1r; |
84 | 87.3M | x1r = x0r - (x1r * 2); |
85 | | |
86 | 87.3M | *ptr_y++ = x0r; |
87 | 87.3M | *ptr_y++ = 0; |
88 | 87.3M | *ptr_y++ = x2r; |
89 | 87.3M | *ptr_y++ = x3r; |
90 | 87.3M | *ptr_y++ = x1r; |
91 | 87.3M | *ptr_y++ = 0; |
92 | 87.3M | *ptr_y++ = x2r; |
93 | 87.3M | *ptr_y++ = -x3r; |
94 | 87.3M | } |
95 | 29.9M | ptr_y -= 2 * npoints; |
96 | 29.9M | del = 4; |
97 | 29.9M | nodespacing = 64; |
98 | 29.9M | in_loop_cnt = npoints >> 4; |
99 | 38.6M | for (i = n_stages - 1; i > 0; i--) { |
100 | 8.72M | const FLOAT32 *twiddles = ptr_w; |
101 | 8.72M | FLOAT32 *data = ptr_y; |
102 | 8.72M | FLOAT32 W1, W2, W3, W4, W5, W6; |
103 | 8.72M | WORD32 sec_loop_cnt; |
104 | | |
105 | 19.9M | for (k = in_loop_cnt; k != 0; k--) { |
106 | 11.2M | x0r = (*data); |
107 | 11.2M | x0i = (*(data + 1)); |
108 | 11.2M | data += ((SIZE_T)del << 1); |
109 | | |
110 | 11.2M | x1r = (*data); |
111 | 11.2M | x1i = (*(data + 1)); |
112 | 11.2M | data += ((SIZE_T)del << 1); |
113 | | |
114 | 11.2M | x2r = (*data); |
115 | 11.2M | x2i = (*(data + 1)); |
116 | 11.2M | data += ((SIZE_T)del << 1); |
117 | | |
118 | 11.2M | x3r = (*data); |
119 | 11.2M | x3i = (*(data + 1)); |
120 | 11.2M | data -= 3 * ((SIZE_T)del << 1); |
121 | | |
122 | 11.2M | x0r = x0r + x2r; |
123 | 11.2M | x0i = x0i + x2i; |
124 | 11.2M | x2r = x0r - (x2r * 2); |
125 | 11.2M | x2i = x0i - (x2i * 2); |
126 | 11.2M | x1r = x1r + x3r; |
127 | 11.2M | x1i = x1i + x3i; |
128 | 11.2M | x3r = x1r - (x3r * 2); |
129 | 11.2M | x3i = x1i - (x3i * 2); |
130 | | |
131 | 11.2M | x0r = x0r + x1r; |
132 | 11.2M | x0i = x0i + x1i; |
133 | 11.2M | x1r = x0r - (x1r * 2); |
134 | 11.2M | x1i = x0i - (x1i * 2); |
135 | 11.2M | x2r = x2r - x3i; |
136 | 11.2M | x2i = x2i + x3r; |
137 | 11.2M | x3i = x2r + (x3i * 2); |
138 | 11.2M | x3r = x2i - (x3r * 2); |
139 | | |
140 | 11.2M | *data = x0r; |
141 | 11.2M | *(data + 1) = x0i; |
142 | 11.2M | data += ((SIZE_T)del << 1); |
143 | | |
144 | 11.2M | *data = x2r; |
145 | 11.2M | *(data + 1) = x2i; |
146 | 11.2M | data += ((SIZE_T)del << 1); |
147 | | |
148 | 11.2M | *data = x1r; |
149 | 11.2M | *(data + 1) = x1i; |
150 | 11.2M | data += ((SIZE_T)del << 1); |
151 | | |
152 | 11.2M | *data = x3i; |
153 | 11.2M | *(data + 1) = x3r; |
154 | 11.2M | data += ((SIZE_T)del << 1); |
155 | 11.2M | } |
156 | 8.72M | data = ptr_y + 2; |
157 | | |
158 | 8.72M | sec_loop_cnt = (nodespacing * del); |
159 | 8.72M | sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) + |
160 | 8.72M | (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - |
161 | 8.72M | (sec_loop_cnt / 256); |
162 | | |
163 | 17.4M | for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { |
164 | 8.72M | W1 = *(twiddles + j); |
165 | 8.72M | W4 = *(twiddles + j + 257); |
166 | 8.72M | W2 = *(twiddles + ((SIZE_T)j << 1)); |
167 | 8.72M | W5 = *(twiddles + ((SIZE_T)j << 1) + 257); |
168 | 8.72M | W3 = *(twiddles + j + ((SIZE_T)j << 1)); |
169 | 8.72M | W6 = *(twiddles + j + ((SIZE_T)j << 1) + 257); |
170 | | |
171 | 19.9M | for (k = in_loop_cnt; k != 0; k--) { |
172 | 11.2M | FLOAT32 tmp; |
173 | 11.2M | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
174 | | |
175 | 11.2M | data += ((SIZE_T)del << 1); |
176 | | |
177 | 11.2M | x1r = *data; |
178 | 11.2M | x1i = *(data + 1); |
179 | 11.2M | data += ((SIZE_T)del << 1); |
180 | | |
181 | 11.2M | x2r = *data; |
182 | 11.2M | x2i = *(data + 1); |
183 | 11.2M | data += ((SIZE_T)del << 1); |
184 | | |
185 | 11.2M | x3r = *data; |
186 | 11.2M | x3i = *(data + 1); |
187 | 11.2M | data -= 3 * ((SIZE_T)del << 1); |
188 | | |
189 | 11.2M | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
190 | 11.2M | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
191 | 11.2M | x1r = tmp; |
192 | | |
193 | 11.2M | tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5)); |
194 | 11.2M | x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2); |
195 | 11.2M | x2r = tmp; |
196 | | |
197 | 11.2M | tmp = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6)); |
198 | 11.2M | x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3); |
199 | 11.2M | x3r = tmp; |
200 | | |
201 | 11.2M | x0r = (*data); |
202 | 11.2M | x0i = (*(data + 1)); |
203 | | |
204 | 11.2M | x0r = x0r + (x2r); |
205 | 11.2M | x0i = x0i + (x2i); |
206 | 11.2M | x2r = x0r - (x2r * 2); |
207 | 11.2M | x2i = x0i - (x2i * 2); |
208 | 11.2M | x1r = x1r + x3r; |
209 | 11.2M | x1i = x1i + x3i; |
210 | 11.2M | x3r = x1r - (x3r * 2); |
211 | 11.2M | x3i = x1i - (x3i * 2); |
212 | | |
213 | 11.2M | x0r = x0r + (x1r); |
214 | 11.2M | x0i = x0i + (x1i); |
215 | 11.2M | x1r = x0r - (x1r * 2); |
216 | 11.2M | x1i = x0i - (x1i * 2); |
217 | 11.2M | x2r = x2r - (x3i); |
218 | 11.2M | x2i = x2i + (x3r); |
219 | 11.2M | x3i = x2r + (x3i * 2); |
220 | 11.2M | x3r = x2i - (x3r * 2); |
221 | | |
222 | 11.2M | *data = x0r; |
223 | 11.2M | *(data + 1) = x0i; |
224 | 11.2M | data += ((SIZE_T)del << 1); |
225 | | |
226 | 11.2M | *data = x2r; |
227 | 11.2M | *(data + 1) = x2i; |
228 | 11.2M | data += ((SIZE_T)del << 1); |
229 | | |
230 | 11.2M | *data = x1r; |
231 | 11.2M | *(data + 1) = x1i; |
232 | 11.2M | data += ((SIZE_T)del << 1); |
233 | | |
234 | 11.2M | *data = x3i; |
235 | 11.2M | *(data + 1) = x3r; |
236 | 11.2M | data += ((SIZE_T)del << 1); |
237 | 11.2M | } |
238 | 8.72M | data -= 2 * npoints; |
239 | 8.72M | data += 2; |
240 | 8.72M | } |
241 | 17.4M | for (; j <= (nodespacing * del) >> 1; j += nodespacing) { |
242 | 8.72M | W1 = *(twiddles + j); |
243 | 8.72M | W4 = *(twiddles + j + 257); |
244 | 8.72M | W2 = *(twiddles + ((SIZE_T)j << 1)); |
245 | 8.72M | W5 = *(twiddles + ((SIZE_T)j << 1) + 257); |
246 | 8.72M | W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256); |
247 | 8.72M | W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1); |
248 | | |
249 | 19.9M | for (k = in_loop_cnt; k != 0; k--) { |
250 | 11.2M | FLOAT32 tmp; |
251 | 11.2M | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
252 | | |
253 | 11.2M | data += ((SIZE_T)del << 1); |
254 | | |
255 | 11.2M | x1r = *data; |
256 | 11.2M | x1i = *(data + 1); |
257 | 11.2M | data += ((SIZE_T)del << 1); |
258 | | |
259 | 11.2M | x2r = *data; |
260 | 11.2M | x2i = *(data + 1); |
261 | 11.2M | data += ((SIZE_T)del << 1); |
262 | | |
263 | 11.2M | x3r = *data; |
264 | 11.2M | x3i = *(data + 1); |
265 | 11.2M | data -= 3 * ((SIZE_T)del << 1); |
266 | | |
267 | 11.2M | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
268 | 11.2M | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
269 | 11.2M | x1r = tmp; |
270 | | |
271 | 11.2M | tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5)); |
272 | 11.2M | x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2); |
273 | 11.2M | x2r = tmp; |
274 | | |
275 | 11.2M | tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3)); |
276 | 11.2M | x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6)); |
277 | 11.2M | x3r = tmp; |
278 | | |
279 | 11.2M | x0r = (*data); |
280 | 11.2M | x0i = (*(data + 1)); |
281 | | |
282 | 11.2M | x0r = x0r + (x2r); |
283 | 11.2M | x0i = x0i + (x2i); |
284 | 11.2M | x2r = x0r - (x2r * 2); |
285 | 11.2M | x2i = x0i - (x2i * 2); |
286 | 11.2M | x1r = x1r + x3r; |
287 | 11.2M | x1i = x1i + x3i; |
288 | 11.2M | x3r = x1r - (x3r * 2); |
289 | 11.2M | x3i = x1i - (x3i * 2); |
290 | | |
291 | 11.2M | x0r = x0r + (x1r); |
292 | 11.2M | x0i = x0i + (x1i); |
293 | 11.2M | x1r = x0r - (x1r * 2); |
294 | 11.2M | x1i = x0i - (x1i * 2); |
295 | 11.2M | x2r = x2r - (x3i); |
296 | 11.2M | x2i = x2i + (x3r); |
297 | 11.2M | x3i = x2r + (x3i * 2); |
298 | 11.2M | x3r = x2i - (x3r * 2); |
299 | | |
300 | 11.2M | *data = x0r; |
301 | 11.2M | *(data + 1) = x0i; |
302 | 11.2M | data += ((SIZE_T)del << 1); |
303 | | |
304 | 11.2M | *data = x2r; |
305 | 11.2M | *(data + 1) = x2i; |
306 | 11.2M | data += ((SIZE_T)del << 1); |
307 | | |
308 | 11.2M | *data = x1r; |
309 | 11.2M | *(data + 1) = x1i; |
310 | 11.2M | data += ((SIZE_T)del << 1); |
311 | | |
312 | 11.2M | *data = x3i; |
313 | 11.2M | *(data + 1) = x3r; |
314 | 11.2M | data += ((SIZE_T)del << 1); |
315 | 11.2M | } |
316 | 8.72M | data -= 2 * npoints; |
317 | 8.72M | data += 2; |
318 | 8.72M | } |
319 | 8.72M | for (; j <= sec_loop_cnt * 2; j += nodespacing) { |
320 | 0 | W1 = *(twiddles + j); |
321 | 0 | W4 = *(twiddles + j + 257); |
322 | 0 | W2 = *(twiddles + ((SIZE_T)j << 1) - 256); |
323 | 0 | W5 = *(twiddles + ((SIZE_T)j << 1) + 1); |
324 | 0 | W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256); |
325 | 0 | W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1); |
326 | |
|
327 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
328 | 0 | FLOAT32 tmp; |
329 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
330 | |
|
331 | 0 | data += ((SIZE_T)del << 1); |
332 | |
|
333 | 0 | x1r = *data; |
334 | 0 | x1i = *(data + 1); |
335 | 0 | data += ((SIZE_T)del << 1); |
336 | |
|
337 | 0 | x2r = *data; |
338 | 0 | x2i = *(data + 1); |
339 | 0 | data += ((SIZE_T)del << 1); |
340 | |
|
341 | 0 | x3r = *data; |
342 | 0 | x3i = *(data + 1); |
343 | 0 | data -= 3 * ((SIZE_T)del << 1); |
344 | |
|
345 | 0 | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
346 | 0 | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
347 | 0 | x1r = tmp; |
348 | |
|
349 | 0 | tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2)); |
350 | 0 | x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5)); |
351 | 0 | x2r = tmp; |
352 | |
|
353 | 0 | tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3)); |
354 | 0 | x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6)); |
355 | 0 | x3r = tmp; |
356 | |
|
357 | 0 | x0r = (*data); |
358 | 0 | x0i = (*(data + 1)); |
359 | |
|
360 | 0 | x0r = x0r + (x2r); |
361 | 0 | x0i = x0i + (x2i); |
362 | 0 | x2r = x0r - (x2r * 2); |
363 | 0 | x2i = x0i - (x2i * 2); |
364 | 0 | x1r = x1r + x3r; |
365 | 0 | x1i = x1i + x3i; |
366 | 0 | x3r = x1r - (x3r * 2); |
367 | 0 | x3i = x1i - (x3i * 2); |
368 | |
|
369 | 0 | x0r = x0r + (x1r); |
370 | 0 | x0i = x0i + (x1i); |
371 | 0 | x1r = x0r - (x1r * 2); |
372 | 0 | x1i = x0i - (x1i * 2); |
373 | 0 | x2r = x2r - (x3i); |
374 | 0 | x2i = x2i + (x3r); |
375 | 0 | x3i = x2r + (x3i * 2); |
376 | 0 | x3r = x2i - (x3r * 2); |
377 | |
|
378 | 0 | *data = x0r; |
379 | 0 | *(data + 1) = x0i; |
380 | 0 | data += ((SIZE_T)del << 1); |
381 | |
|
382 | 0 | *data = x2r; |
383 | 0 | *(data + 1) = x2i; |
384 | 0 | data += ((SIZE_T)del << 1); |
385 | |
|
386 | 0 | *data = x1r; |
387 | 0 | *(data + 1) = x1i; |
388 | 0 | data += ((SIZE_T)del << 1); |
389 | |
|
390 | 0 | *data = x3i; |
391 | 0 | *(data + 1) = x3r; |
392 | 0 | data += ((SIZE_T)del << 1); |
393 | 0 | } |
394 | 0 | data -= 2 * npoints; |
395 | 0 | data += 2; |
396 | 0 | } |
397 | 17.4M | for (; j < nodespacing * del; j += nodespacing) { |
398 | 8.72M | W1 = *(twiddles + j); |
399 | 8.72M | W4 = *(twiddles + j + 257); |
400 | 8.72M | W2 = *(twiddles + ((SIZE_T)j << 1) - 256); |
401 | 8.72M | W5 = *(twiddles + ((SIZE_T)j << 1) + 1); |
402 | 8.72M | W3 = *(twiddles + j + ((SIZE_T)j << 1) - 512); |
403 | 8.72M | W6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257); |
404 | | |
405 | 19.9M | for (k = in_loop_cnt; k != 0; k--) { |
406 | 11.2M | FLOAT32 tmp; |
407 | 11.2M | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
408 | | |
409 | 11.2M | data += ((SIZE_T)del << 1); |
410 | | |
411 | 11.2M | x1r = *data; |
412 | 11.2M | x1i = *(data + 1); |
413 | 11.2M | data += ((SIZE_T)del << 1); |
414 | | |
415 | 11.2M | x2r = *data; |
416 | 11.2M | x2i = *(data + 1); |
417 | 11.2M | data += ((SIZE_T)del << 1); |
418 | | |
419 | 11.2M | x3r = *data; |
420 | 11.2M | x3i = *(data + 1); |
421 | 11.2M | data -= 3 * ((SIZE_T)del << 1); |
422 | | |
423 | 11.2M | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
424 | 11.2M | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
425 | 11.2M | x1r = tmp; |
426 | | |
427 | 11.2M | tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2)); |
428 | 11.2M | x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5)); |
429 | 11.2M | x2r = tmp; |
430 | | |
431 | 11.2M | tmp = (FLOAT32)(-((FLOAT32)x3r * W3) - ((FLOAT32)x3i * W6)); |
432 | 11.2M | x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3); |
433 | 11.2M | x3r = tmp; |
434 | | |
435 | 11.2M | x0r = (*data); |
436 | 11.2M | x0i = (*(data + 1)); |
437 | | |
438 | 11.2M | x0r = x0r + (x2r); |
439 | 11.2M | x0i = x0i + (x2i); |
440 | 11.2M | x2r = x0r - (x2r * 2); |
441 | 11.2M | x2i = x0i - (x2i * 2); |
442 | 11.2M | x1r = x1r + x3r; |
443 | 11.2M | x1i = x1i - x3i; |
444 | 11.2M | x3r = x1r - (x3r * 2); |
445 | 11.2M | x3i = x1i + (x3i * 2); |
446 | | |
447 | 11.2M | x0r = x0r + (x1r); |
448 | 11.2M | x0i = x0i + (x1i); |
449 | 11.2M | x1r = x0r - (x1r * 2); |
450 | 11.2M | x1i = x0i - (x1i * 2); |
451 | 11.2M | x2r = x2r - (x3i); |
452 | 11.2M | x2i = x2i + (x3r); |
453 | 11.2M | x3i = x2r + (x3i * 2); |
454 | 11.2M | x3r = x2i - (x3r * 2); |
455 | | |
456 | 11.2M | *data = x0r; |
457 | 11.2M | *(data + 1) = x0i; |
458 | 11.2M | data += ((SIZE_T)del << 1); |
459 | | |
460 | 11.2M | *data = x2r; |
461 | 11.2M | *(data + 1) = x2i; |
462 | 11.2M | data += ((SIZE_T)del << 1); |
463 | | |
464 | 11.2M | *data = x1r; |
465 | 11.2M | *(data + 1) = x1i; |
466 | 11.2M | data += ((SIZE_T)del << 1); |
467 | | |
468 | 11.2M | *data = x3i; |
469 | 11.2M | *(data + 1) = x3r; |
470 | 11.2M | data += ((SIZE_T)del << 1); |
471 | 11.2M | } |
472 | 8.72M | data -= 2 * npoints; |
473 | 8.72M | data += 2; |
474 | 8.72M | } |
475 | 8.72M | nodespacing >>= 2; |
476 | 8.72M | del <<= 2; |
477 | 8.72M | in_loop_cnt >>= 2; |
478 | 8.72M | } |
479 | | |
480 | 29.9M | if (not_power_4) { |
481 | 23.7M | const FLOAT32 *twiddles = ptr_w; |
482 | 23.7M | nodespacing <<= 1; |
483 | | |
484 | 86.3M | for (j = del / 2; j != 0; j--) { |
485 | 62.5M | FLOAT32 W1 = *twiddles; |
486 | 62.5M | FLOAT32 W4 = *(twiddles + 257); |
487 | 62.5M | FLOAT32 tmp; |
488 | 62.5M | twiddles += nodespacing; |
489 | | |
490 | 62.5M | x0r = *ptr_y; |
491 | 62.5M | x0i = *(ptr_y + 1); |
492 | 62.5M | ptr_y += ((SIZE_T)del << 1); |
493 | | |
494 | 62.5M | x1r = *ptr_y; |
495 | 62.5M | x1i = *(ptr_y + 1); |
496 | | |
497 | 62.5M | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
498 | 62.5M | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
499 | 62.5M | x1r = tmp; |
500 | | |
501 | 62.5M | *ptr_y = (x0r) - (x1r); |
502 | 62.5M | *(ptr_y + 1) = (x0i) - (x1i); |
503 | 62.5M | ptr_y -= ((SIZE_T)del << 1); |
504 | | |
505 | 62.5M | *ptr_y = (x0r) + (x1r); |
506 | 62.5M | *(ptr_y + 1) = (x0i) + (x1i); |
507 | 62.5M | ptr_y += 2; |
508 | 62.5M | } |
509 | 23.7M | twiddles = ptr_w; |
510 | 86.3M | for (j = del / 2; j != 0; j--) { |
511 | 62.5M | FLOAT32 W1 = *twiddles; |
512 | 62.5M | FLOAT32 W4 = *(twiddles + 257); |
513 | 62.5M | FLOAT32 tmp; |
514 | 62.5M | twiddles += nodespacing; |
515 | | |
516 | 62.5M | x0r = *ptr_y; |
517 | 62.5M | x0i = *(ptr_y + 1); |
518 | 62.5M | ptr_y += ((SIZE_T)del << 1); |
519 | | |
520 | 62.5M | x1r = *ptr_y; |
521 | 62.5M | x1i = *(ptr_y + 1); |
522 | 62.5M | tmp = (FLOAT32)(((FLOAT32)x1r * W4) - ((FLOAT32)x1i * W1)); |
523 | 62.5M | x1i = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
524 | 62.5M | x1r = tmp; |
525 | | |
526 | 62.5M | *ptr_y = (x0r) - (x1r); |
527 | 62.5M | *(ptr_y + 1) = (x0i) - (x1i); |
528 | 62.5M | ptr_y -= ((SIZE_T)del << 1); |
529 | | |
530 | 62.5M | *ptr_y = (x0r) + (x1r); |
531 | 62.5M | *(ptr_y + 1) = (x0i) + (x1i); |
532 | 62.5M | ptr_y += 2; |
533 | 62.5M | } |
534 | 23.7M | } |
535 | 29.9M | } |
536 | | |
537 | 13.6M | void ixheaac_cmplx_anal_fft_p2(FLOAT32 *ptr_x, FLOAT32 *ptr_y, WORD32 npoints) { |
538 | 13.6M | WORD32 i, j, k, n_stages, h2; |
539 | 13.6M | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
540 | 13.6M | WORD32 del, nodespacing, in_loop_cnt; |
541 | 13.6M | WORD32 not_power_4; |
542 | 13.6M | WORD32 dig_rev_shift; |
543 | 13.6M | const FLOAT32 *ptr_w; |
544 | | |
545 | 13.6M | dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16; |
546 | 13.6M | n_stages = 30 - ixheaac_norm32(npoints); |
547 | 13.6M | not_power_4 = n_stages & 1; |
548 | | |
549 | 13.6M | n_stages = n_stages >> 1; |
550 | | |
551 | 13.6M | ptr_w = ixheaac_twiddle_table_fft_float; |
552 | | |
553 | 94.1M | for (i = 0; i < npoints; i += 4) { |
554 | 80.4M | FLOAT32 *inp = ptr_x; |
555 | | |
556 | 80.4M | DIG_REV(i, dig_rev_shift, h2); |
557 | 80.4M | if (not_power_4) { |
558 | 24.8M | h2 += 1; |
559 | 24.8M | h2 &= ~1; |
560 | 24.8M | } |
561 | 80.4M | inp += (h2); |
562 | | |
563 | 80.4M | x0r = *inp; |
564 | 80.4M | x0i = *(inp + 1); |
565 | 80.4M | inp += (npoints >> 1); |
566 | | |
567 | 80.4M | x1r = *inp; |
568 | 80.4M | x1i = *(inp + 1); |
569 | 80.4M | inp += (npoints >> 1); |
570 | | |
571 | 80.4M | x2r = *inp; |
572 | 80.4M | x2i = *(inp + 1); |
573 | 80.4M | inp += (npoints >> 1); |
574 | | |
575 | 80.4M | x3r = *inp; |
576 | 80.4M | x3i = *(inp + 1); |
577 | | |
578 | 80.4M | x0r = x0r + x2r; |
579 | 80.4M | x0i = x0i + x2i; |
580 | 80.4M | x2r = x0r - (x2r * 2); |
581 | 80.4M | x2i = x0i - (x2i * 2); |
582 | 80.4M | x1r = x1r + x3r; |
583 | 80.4M | x1i = x1i + x3i; |
584 | 80.4M | x3r = x1r - (x3r * 2); |
585 | 80.4M | x3i = x1i - (x3i * 2); |
586 | | |
587 | 80.4M | x0r = x0r + x1r; |
588 | 80.4M | x0i = x0i + x1i; |
589 | 80.4M | x1r = x0r - (x1r * 2); |
590 | 80.4M | x1i = x0i - (x1i * 2); |
591 | 80.4M | x2r = x2r - x3i; |
592 | 80.4M | x2i = x2i + x3r; |
593 | 80.4M | x3i = x2r + (x3i * 2); |
594 | 80.4M | x3r = x2i - (x3r * 2); |
595 | | |
596 | 80.4M | *ptr_y++ = x0r; |
597 | 80.4M | *ptr_y++ = x0i; |
598 | 80.4M | *ptr_y++ = x2r; |
599 | 80.4M | *ptr_y++ = x2i; |
600 | 80.4M | *ptr_y++ = x1r; |
601 | 80.4M | *ptr_y++ = x1i; |
602 | 80.4M | *ptr_y++ = x3i; |
603 | 80.4M | *ptr_y++ = x3r; |
604 | 80.4M | } |
605 | 13.6M | ptr_y -= 2 * npoints; |
606 | 13.6M | del = 4; |
607 | 13.6M | nodespacing = 64; |
608 | 13.6M | in_loop_cnt = npoints >> 4; |
609 | 28.4M | for (i = n_stages - 1; i > 0; i--) { |
610 | 14.7M | const FLOAT32 *twiddles = ptr_w; |
611 | 14.7M | FLOAT32 *data = ptr_y; |
612 | 14.7M | FLOAT32 W1, W2, W3, W4, W5, W6; |
613 | 14.7M | WORD32 sec_loop_cnt; |
614 | | |
615 | 36.0M | for (k = in_loop_cnt; k != 0; k--) { |
616 | 21.2M | x0r = (*data); |
617 | 21.2M | x0i = (*(data + 1)); |
618 | 21.2M | data += ((SIZE_T)del << 1); |
619 | | |
620 | 21.2M | x1r = (*data); |
621 | 21.2M | x1i = (*(data + 1)); |
622 | 21.2M | data += ((SIZE_T)del << 1); |
623 | | |
624 | 21.2M | x2r = (*data); |
625 | 21.2M | x2i = (*(data + 1)); |
626 | 21.2M | data += ((SIZE_T)del << 1); |
627 | | |
628 | 21.2M | x3r = (*data); |
629 | 21.2M | x3i = (*(data + 1)); |
630 | 21.2M | data -= 3 * ((SIZE_T)del << 1); |
631 | | |
632 | 21.2M | x0r = x0r + x2r; |
633 | 21.2M | x0i = x0i + x2i; |
634 | 21.2M | x2r = x0r - (x2r * 2); |
635 | 21.2M | x2i = x0i - (x2i * 2); |
636 | 21.2M | x1r = x1r + x3r; |
637 | 21.2M | x1i = x1i + x3i; |
638 | 21.2M | x3r = x1r - (x3r * 2); |
639 | 21.2M | x3i = x1i - (x3i * 2); |
640 | | |
641 | 21.2M | x0r = x0r + x1r; |
642 | 21.2M | x0i = x0i + x1i; |
643 | 21.2M | x1r = x0r - (x1r * 2); |
644 | 21.2M | x1i = x0i - (x1i * 2); |
645 | 21.2M | x2r = x2r - x3i; |
646 | 21.2M | x2i = x2i + x3r; |
647 | 21.2M | x3i = x2r + (x3i * 2); |
648 | 21.2M | x3r = x2i - (x3r * 2); |
649 | | |
650 | 21.2M | *data = x0r; |
651 | 21.2M | *(data + 1) = x0i; |
652 | 21.2M | data += ((SIZE_T)del << 1); |
653 | | |
654 | 21.2M | *data = x2r; |
655 | 21.2M | *(data + 1) = x2i; |
656 | 21.2M | data += ((SIZE_T)del << 1); |
657 | | |
658 | 21.2M | *data = x1r; |
659 | 21.2M | *(data + 1) = x1i; |
660 | 21.2M | data += ((SIZE_T)del << 1); |
661 | | |
662 | 21.2M | *data = x3i; |
663 | 21.2M | *(data + 1) = x3r; |
664 | 21.2M | data += ((SIZE_T)del << 1); |
665 | 21.2M | } |
666 | 14.7M | data = ptr_y + 2; |
667 | | |
668 | 14.7M | sec_loop_cnt = (nodespacing * del); |
669 | 14.7M | sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) + |
670 | 14.7M | (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - |
671 | 14.7M | (sec_loop_cnt / 256); |
672 | | |
673 | 34.0M | for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { |
674 | 19.2M | W1 = *(twiddles + j); |
675 | 19.2M | W4 = *(twiddles + j + 257); |
676 | 19.2M | W2 = *(twiddles + ((SIZE_T)j << 1)); |
677 | 19.2M | W5 = *(twiddles + ((SIZE_T)j << 1) + 257); |
678 | 19.2M | W3 = *(twiddles + j + ((SIZE_T)j << 1)); |
679 | 19.2M | W6 = *(twiddles + j + ((SIZE_T)j << 1) + 257); |
680 | | |
681 | 44.9M | for (k = in_loop_cnt; k != 0; k--) { |
682 | 25.6M | FLOAT32 tmp; |
683 | 25.6M | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
684 | | |
685 | 25.6M | data += ((SIZE_T)del << 1); |
686 | | |
687 | 25.6M | x1r = *data; |
688 | 25.6M | x1i = *(data + 1); |
689 | 25.6M | data += ((SIZE_T)del << 1); |
690 | | |
691 | 25.6M | x2r = *data; |
692 | 25.6M | x2i = *(data + 1); |
693 | 25.6M | data += ((SIZE_T)del << 1); |
694 | | |
695 | 25.6M | x3r = *data; |
696 | 25.6M | x3i = *(data + 1); |
697 | 25.6M | data -= 3 * ((SIZE_T)del << 1); |
698 | | |
699 | 25.6M | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
700 | 25.6M | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
701 | 25.6M | x1r = tmp; |
702 | | |
703 | 25.6M | tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5)); |
704 | 25.6M | x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2); |
705 | 25.6M | x2r = tmp; |
706 | | |
707 | 25.6M | tmp = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6)); |
708 | 25.6M | x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3); |
709 | 25.6M | x3r = tmp; |
710 | | |
711 | 25.6M | x0r = (*data); |
712 | 25.6M | x0i = (*(data + 1)); |
713 | | |
714 | 25.6M | x0r = x0r + (x2r); |
715 | 25.6M | x0i = x0i + (x2i); |
716 | 25.6M | x2r = x0r - (x2r * 2); |
717 | 25.6M | x2i = x0i - (x2i * 2); |
718 | 25.6M | x1r = x1r + x3r; |
719 | 25.6M | x1i = x1i + x3i; |
720 | 25.6M | x3r = x1r - (x3r * 2); |
721 | 25.6M | x3i = x1i - (x3i * 2); |
722 | | |
723 | 25.6M | x0r = x0r + (x1r); |
724 | 25.6M | x0i = x0i + (x1i); |
725 | 25.6M | x1r = x0r - (x1r * 2); |
726 | 25.6M | x1i = x0i - (x1i * 2); |
727 | 25.6M | x2r = x2r - (x3i); |
728 | 25.6M | x2i = x2i + (x3r); |
729 | 25.6M | x3i = x2r + (x3i * 2); |
730 | 25.6M | x3r = x2i - (x3r * 2); |
731 | | |
732 | 25.6M | *data = x0r; |
733 | 25.6M | *(data + 1) = x0i; |
734 | 25.6M | data += ((SIZE_T)del << 1); |
735 | | |
736 | 25.6M | *data = x2r; |
737 | 25.6M | *(data + 1) = x2i; |
738 | 25.6M | data += ((SIZE_T)del << 1); |
739 | | |
740 | 25.6M | *data = x1r; |
741 | 25.6M | *(data + 1) = x1i; |
742 | 25.6M | data += ((SIZE_T)del << 1); |
743 | | |
744 | 25.6M | *data = x3i; |
745 | 25.6M | *(data + 1) = x3r; |
746 | 25.6M | data += ((SIZE_T)del << 1); |
747 | 25.6M | } |
748 | 19.2M | data -= 2 * npoints; |
749 | 19.2M | data += 2; |
750 | 19.2M | } |
751 | 31.8M | for (; j <= (nodespacing * del) >> 1; j += nodespacing) { |
752 | 17.0M | W1 = *(twiddles + j); |
753 | 17.0M | W4 = *(twiddles + j + 257); |
754 | 17.0M | W2 = *(twiddles + ((SIZE_T)j << 1)); |
755 | 17.0M | W5 = *(twiddles + ((SIZE_T)j << 1) + 257); |
756 | 17.0M | W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256); |
757 | 17.0M | W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1); |
758 | | |
759 | 40.4M | for (k = in_loop_cnt; k != 0; k--) { |
760 | 23.4M | FLOAT32 tmp; |
761 | 23.4M | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
762 | | |
763 | 23.4M | data += ((SIZE_T)del << 1); |
764 | | |
765 | 23.4M | x1r = *data; |
766 | 23.4M | x1i = *(data + 1); |
767 | 23.4M | data += ((SIZE_T)del << 1); |
768 | | |
769 | 23.4M | x2r = *data; |
770 | 23.4M | x2i = *(data + 1); |
771 | 23.4M | data += ((SIZE_T)del << 1); |
772 | | |
773 | 23.4M | x3r = *data; |
774 | 23.4M | x3i = *(data + 1); |
775 | 23.4M | data -= 3 * ((SIZE_T)del << 1); |
776 | | |
777 | 23.4M | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
778 | 23.4M | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
779 | 23.4M | x1r = tmp; |
780 | | |
781 | 23.4M | tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5)); |
782 | 23.4M | x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2); |
783 | 23.4M | x2r = tmp; |
784 | | |
785 | 23.4M | tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3)); |
786 | 23.4M | x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6)); |
787 | 23.4M | x3r = tmp; |
788 | | |
789 | 23.4M | x0r = (*data); |
790 | 23.4M | x0i = (*(data + 1)); |
791 | | |
792 | 23.4M | x0r = x0r + (x2r); |
793 | 23.4M | x0i = x0i + (x2i); |
794 | 23.4M | x2r = x0r - (x2r * 2); |
795 | 23.4M | x2i = x0i - (x2i * 2); |
796 | 23.4M | x1r = x1r + x3r; |
797 | 23.4M | x1i = x1i + x3i; |
798 | 23.4M | x3r = x1r - (x3r * 2); |
799 | 23.4M | x3i = x1i - (x3i * 2); |
800 | | |
801 | 23.4M | x0r = x0r + (x1r); |
802 | 23.4M | x0i = x0i + (x1i); |
803 | 23.4M | x1r = x0r - (x1r * 2); |
804 | 23.4M | x1i = x0i - (x1i * 2); |
805 | 23.4M | x2r = x2r - (x3i); |
806 | 23.4M | x2i = x2i + (x3r); |
807 | 23.4M | x3i = x2r + (x3i * 2); |
808 | 23.4M | x3r = x2i - (x3r * 2); |
809 | | |
810 | 23.4M | *data = x0r; |
811 | 23.4M | *(data + 1) = x0i; |
812 | 23.4M | data += ((SIZE_T)del << 1); |
813 | | |
814 | 23.4M | *data = x2r; |
815 | 23.4M | *(data + 1) = x2i; |
816 | 23.4M | data += ((SIZE_T)del << 1); |
817 | | |
818 | 23.4M | *data = x1r; |
819 | 23.4M | *(data + 1) = x1i; |
820 | 23.4M | data += ((SIZE_T)del << 1); |
821 | | |
822 | 23.4M | *data = x3i; |
823 | 23.4M | *(data + 1) = x3r; |
824 | 23.4M | data += ((SIZE_T)del << 1); |
825 | 23.4M | } |
826 | 17.0M | data -= 2 * npoints; |
827 | 17.0M | data += 2; |
828 | 17.0M | } |
829 | 17.0M | for (; j <= sec_loop_cnt * 2; j += nodespacing) { |
830 | 2.22M | W1 = *(twiddles + j); |
831 | 2.22M | W4 = *(twiddles + j + 257); |
832 | 2.22M | W2 = *(twiddles + ((SIZE_T)j << 1) - 256); |
833 | 2.22M | W5 = *(twiddles + ((SIZE_T)j << 1) + 1); |
834 | 2.22M | W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256); |
835 | 2.22M | W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1); |
836 | | |
837 | 4.44M | for (k = in_loop_cnt; k != 0; k--) { |
838 | 2.22M | FLOAT32 tmp; |
839 | 2.22M | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
840 | | |
841 | 2.22M | data += ((SIZE_T)del << 1); |
842 | | |
843 | 2.22M | x1r = *data; |
844 | 2.22M | x1i = *(data + 1); |
845 | 2.22M | data += ((SIZE_T)del << 1); |
846 | | |
847 | 2.22M | x2r = *data; |
848 | 2.22M | x2i = *(data + 1); |
849 | 2.22M | data += ((SIZE_T)del << 1); |
850 | | |
851 | 2.22M | x3r = *data; |
852 | 2.22M | x3i = *(data + 1); |
853 | 2.22M | data -= 3 * ((SIZE_T)del << 1); |
854 | | |
855 | 2.22M | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
856 | 2.22M | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
857 | 2.22M | x1r = tmp; |
858 | | |
859 | 2.22M | tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2)); |
860 | 2.22M | x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5)); |
861 | 2.22M | x2r = tmp; |
862 | | |
863 | 2.22M | tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3)); |
864 | 2.22M | x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6)); |
865 | 2.22M | x3r = tmp; |
866 | | |
867 | 2.22M | x0r = (*data); |
868 | 2.22M | x0i = (*(data + 1)); |
869 | | |
870 | 2.22M | x0r = x0r + (x2r); |
871 | 2.22M | x0i = x0i + (x2i); |
872 | 2.22M | x2r = x0r - (x2r * 2); |
873 | 2.22M | x2i = x0i - (x2i * 2); |
874 | 2.22M | x1r = x1r + x3r; |
875 | 2.22M | x1i = x1i + x3i; |
876 | 2.22M | x3r = x1r - (x3r * 2); |
877 | 2.22M | x3i = x1i - (x3i * 2); |
878 | | |
879 | 2.22M | x0r = x0r + (x1r); |
880 | 2.22M | x0i = x0i + (x1i); |
881 | 2.22M | x1r = x0r - (x1r * 2); |
882 | 2.22M | x1i = x0i - (x1i * 2); |
883 | 2.22M | x2r = x2r - (x3i); |
884 | 2.22M | x2i = x2i + (x3r); |
885 | 2.22M | x3i = x2r + (x3i * 2); |
886 | 2.22M | x3r = x2i - (x3r * 2); |
887 | | |
888 | 2.22M | *data = x0r; |
889 | 2.22M | *(data + 1) = x0i; |
890 | 2.22M | data += ((SIZE_T)del << 1); |
891 | | |
892 | 2.22M | *data = x2r; |
893 | 2.22M | *(data + 1) = x2i; |
894 | 2.22M | data += ((SIZE_T)del << 1); |
895 | | |
896 | 2.22M | *data = x1r; |
897 | 2.22M | *(data + 1) = x1i; |
898 | 2.22M | data += ((SIZE_T)del << 1); |
899 | | |
900 | 2.22M | *data = x3i; |
901 | 2.22M | *(data + 1) = x3r; |
902 | 2.22M | data += ((SIZE_T)del << 1); |
903 | 2.22M | } |
904 | 2.22M | data -= 2 * npoints; |
905 | 2.22M | data += 2; |
906 | 2.22M | } |
907 | 34.0M | for (; j < nodespacing * del; j += nodespacing) { |
908 | 19.2M | W1 = *(twiddles + j); |
909 | 19.2M | W4 = *(twiddles + j + 257); |
910 | 19.2M | W2 = *(twiddles + ((SIZE_T)j << 1) - 256); |
911 | 19.2M | W5 = *(twiddles + ((SIZE_T)j << 1) + 1); |
912 | 19.2M | W3 = *(twiddles + j + ((SIZE_T)j << 1) - 512); |
913 | 19.2M | W6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257); |
914 | | |
915 | 44.9M | for (k = in_loop_cnt; k != 0; k--) { |
916 | 25.6M | FLOAT32 tmp; |
917 | 25.6M | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
918 | | |
919 | 25.6M | data += ((SIZE_T)del << 1); |
920 | | |
921 | 25.6M | x1r = *data; |
922 | 25.6M | x1i = *(data + 1); |
923 | 25.6M | data += ((SIZE_T)del << 1); |
924 | | |
925 | 25.6M | x2r = *data; |
926 | 25.6M | x2i = *(data + 1); |
927 | 25.6M | data += ((SIZE_T)del << 1); |
928 | | |
929 | 25.6M | x3r = *data; |
930 | 25.6M | x3i = *(data + 1); |
931 | 25.6M | data -= 3 * ((SIZE_T)del << 1); |
932 | | |
933 | 25.6M | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
934 | 25.6M | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
935 | 25.6M | x1r = tmp; |
936 | | |
937 | 25.6M | tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2)); |
938 | 25.6M | x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5)); |
939 | 25.6M | x2r = tmp; |
940 | | |
941 | 25.6M | tmp = (FLOAT32)(-((FLOAT32)x3r * W3) - ((FLOAT32)x3i * W6)); |
942 | 25.6M | x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3); |
943 | 25.6M | x3r = tmp; |
944 | | |
945 | 25.6M | x0r = (*data); |
946 | 25.6M | x0i = (*(data + 1)); |
947 | | |
948 | 25.6M | x0r = x0r + (x2r); |
949 | 25.6M | x0i = x0i + (x2i); |
950 | 25.6M | x2r = x0r - (x2r * 2); |
951 | 25.6M | x2i = x0i - (x2i * 2); |
952 | 25.6M | x1r = x1r + x3r; |
953 | 25.6M | x1i = x1i - x3i; |
954 | 25.6M | x3r = x1r - (x3r * 2); |
955 | 25.6M | x3i = x1i + (x3i * 2); |
956 | | |
957 | 25.6M | x0r = x0r + (x1r); |
958 | 25.6M | x0i = x0i + (x1i); |
959 | 25.6M | x1r = x0r - (x1r * 2); |
960 | 25.6M | x1i = x0i - (x1i * 2); |
961 | 25.6M | x2r = x2r - (x3i); |
962 | 25.6M | x2i = x2i + (x3r); |
963 | 25.6M | x3i = x2r + (x3i * 2); |
964 | 25.6M | x3r = x2i - (x3r * 2); |
965 | | |
966 | 25.6M | *data = x0r; |
967 | 25.6M | *(data + 1) = x0i; |
968 | 25.6M | data += ((SIZE_T)del << 1); |
969 | | |
970 | 25.6M | *data = x2r; |
971 | 25.6M | *(data + 1) = x2i; |
972 | 25.6M | data += ((SIZE_T)del << 1); |
973 | | |
974 | 25.6M | *data = x1r; |
975 | 25.6M | *(data + 1) = x1i; |
976 | 25.6M | data += ((SIZE_T)del << 1); |
977 | | |
978 | 25.6M | *data = x3i; |
979 | 25.6M | *(data + 1) = x3r; |
980 | 25.6M | data += ((SIZE_T)del << 1); |
981 | 25.6M | } |
982 | 19.2M | data -= 2 * npoints; |
983 | 19.2M | data += 2; |
984 | 19.2M | } |
985 | 14.7M | nodespacing >>= 2; |
986 | 14.7M | del <<= 2; |
987 | 14.7M | in_loop_cnt >>= 2; |
988 | 14.7M | } |
989 | | |
990 | 13.6M | if (not_power_4) { |
991 | 3.10M | const FLOAT32 *twiddles = ptr_w; |
992 | 3.10M | nodespacing <<= 1; |
993 | | |
994 | 27.9M | for (j = del / 2; j != 0; j--) { |
995 | 24.8M | FLOAT32 W1 = *twiddles; |
996 | 24.8M | FLOAT32 W4 = *(twiddles + 257); |
997 | 24.8M | FLOAT32 tmp; |
998 | 24.8M | twiddles += nodespacing; |
999 | | |
1000 | 24.8M | x0r = *ptr_y; |
1001 | 24.8M | x0i = *(ptr_y + 1); |
1002 | 24.8M | ptr_y += ((SIZE_T)del << 1); |
1003 | | |
1004 | 24.8M | x1r = *ptr_y; |
1005 | 24.8M | x1i = *(ptr_y + 1); |
1006 | | |
1007 | 24.8M | tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
1008 | 24.8M | x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1); |
1009 | 24.8M | x1r = tmp; |
1010 | | |
1011 | 24.8M | *ptr_y = (x0r) - (x1r); |
1012 | 24.8M | *(ptr_y + 1) = (x0i) - (x1i); |
1013 | 24.8M | ptr_y -= ((SIZE_T)del << 1); |
1014 | | |
1015 | 24.8M | *ptr_y = (x0r) + (x1r); |
1016 | 24.8M | *(ptr_y + 1) = (x0i) + (x1i); |
1017 | 24.8M | ptr_y += 2; |
1018 | 24.8M | } |
1019 | 3.10M | twiddles = ptr_w; |
1020 | 27.9M | for (j = del / 2; j != 0; j--) { |
1021 | 24.8M | FLOAT32 W1 = *twiddles; |
1022 | 24.8M | FLOAT32 W4 = *(twiddles + 257); |
1023 | 24.8M | FLOAT32 tmp; |
1024 | 24.8M | twiddles += nodespacing; |
1025 | | |
1026 | 24.8M | x0r = *ptr_y; |
1027 | 24.8M | x0i = *(ptr_y + 1); |
1028 | 24.8M | ptr_y += ((SIZE_T)del << 1); |
1029 | | |
1030 | 24.8M | x1r = *ptr_y; |
1031 | 24.8M | x1i = *(ptr_y + 1); |
1032 | | |
1033 | 24.8M | tmp = (FLOAT32)(((FLOAT32)x1r * W4) - ((FLOAT32)x1i * W1)); |
1034 | 24.8M | x1i = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4)); |
1035 | 24.8M | x1r = tmp; |
1036 | | |
1037 | 24.8M | *ptr_y = (x0r) - (x1r); |
1038 | 24.8M | *(ptr_y + 1) = (x0i) - (x1i); |
1039 | 24.8M | ptr_y -= ((SIZE_T)del << 1); |
1040 | | |
1041 | 24.8M | *ptr_y = (x0r) + (x1r); |
1042 | 24.8M | *(ptr_y + 1) = (x0i) + (x1i); |
1043 | 24.8M | ptr_y += 2; |
1044 | 24.8M | } |
1045 | 3.10M | } |
1046 | 13.6M | } |
1047 | | |
1048 | 106M | static PLATFORM_INLINE void ixheaac_aac_ld_dec_fft_3_float(FLOAT32 *inp, FLOAT32 *op) { |
1049 | 106M | FLOAT32 add_r, sub_r; |
1050 | 106M | FLOAT32 add_i, sub_i; |
1051 | 106M | FLOAT32 temp_real, temp_imag, temp; |
1052 | | |
1053 | 106M | FLOAT32 p1, p2, p3, p4; |
1054 | | |
1055 | 106M | FLOAT32 sinmu; |
1056 | 106M | sinmu = -0.866025403784439f; |
1057 | | |
1058 | 106M | temp_real = inp[0] + inp[2]; |
1059 | 106M | temp_imag = inp[1] + inp[3]; |
1060 | | |
1061 | 106M | add_r = inp[2] + inp[4]; |
1062 | 106M | add_i = inp[3] + inp[5]; |
1063 | | |
1064 | 106M | sub_r = inp[2] - inp[4]; |
1065 | 106M | sub_i = inp[3] - inp[5]; |
1066 | | |
1067 | 106M | p1 = add_r / 2.0f; |
1068 | 106M | p4 = add_i / 2.0f; |
1069 | 106M | p2 = sub_i * sinmu; |
1070 | 106M | p3 = sub_r * sinmu; |
1071 | | |
1072 | 106M | temp = inp[0] - p1; |
1073 | | |
1074 | 106M | op[0] = temp_real + inp[4]; |
1075 | 106M | op[1] = temp_imag + inp[5]; |
1076 | 106M | op[2] = temp + p2; |
1077 | 106M | op[3] = (inp[1] - p3) - p4; |
1078 | 106M | op[4] = temp - p2; |
1079 | 106M | op[5] = (inp[1] + p3) - p4; |
1080 | | |
1081 | 106M | return; |
1082 | 106M | } |
1083 | | |
1084 | 7.04M | void ixheaac_real_synth_fft_p3(FLOAT32 *x_in, FLOAT32 *x_out, WORD32 npoints) { |
1085 | 7.04M | WORD32 i, j; |
1086 | 7.04M | FLOAT32 x_3[8]; |
1087 | 7.04M | FLOAT32 y_3[16]; |
1088 | 7.04M | FLOAT32 y[48]; |
1089 | 7.04M | FLOAT32 x[48]; |
1090 | 7.04M | FLOAT32 *ptr_y = y; |
1091 | 7.04M | FLOAT32 *y_p3 = y; |
1092 | 7.04M | FLOAT32 *x_p3 = x; |
1093 | | |
1094 | 28.1M | for (i = 0; i < 3; i += 1) { |
1095 | 190M | for (j = 0; j < (npoints / 3); j++) { |
1096 | 169M | x_3[j] = x_in[3 * j + i]; |
1097 | 169M | } |
1098 | | |
1099 | 21.1M | ixheaac_real_synth_fft_p2(x_3, y_3, 8); |
1100 | | |
1101 | 190M | for (j = 0; j < 16; j += 2) { |
1102 | 169M | x[3 * j + 2 * i] = y_3[j]; |
1103 | 169M | x[3 * j + 2 * i + 1] = y_3[j + 1]; |
1104 | 169M | } |
1105 | 21.1M | } |
1106 | | |
1107 | 7.04M | { |
1108 | 7.04M | FLOAT32 *wr; |
1109 | 7.04M | FLOAT32 tmp; |
1110 | 7.04M | FLOAT32 *x_tw = x; |
1111 | 7.04M | wr = (FLOAT32 *)ixheaac_twidle_tbl_24; |
1112 | 7.04M | x_tw += 2; |
1113 | | |
1114 | 63.4M | for (i = 0; i < (npoints / 3); i++) { |
1115 | 56.3M | tmp = ((*x_tw) * (*wr) + (*(x_tw + 1)) * (*(wr + 1))); |
1116 | 56.3M | *(x_tw + 1) = (-(*x_tw) * (*(wr + 1)) + (*(x_tw + 1)) * (*wr)); |
1117 | 56.3M | *x_tw = tmp; |
1118 | | |
1119 | 56.3M | wr += 2; |
1120 | 56.3M | x_tw += 2; |
1121 | | |
1122 | 56.3M | tmp = ((*x_tw) * (*wr) + (*(x_tw + 1)) * (*(wr + 1))); |
1123 | 56.3M | *(x_tw + 1) = (-(*x_tw) * (*(wr + 1)) + (*(x_tw + 1)) * (*wr)); |
1124 | 56.3M | *x_tw = tmp; |
1125 | | |
1126 | 56.3M | wr += 2; |
1127 | 56.3M | x_tw += 4; |
1128 | 56.3M | } |
1129 | 7.04M | } |
1130 | | |
1131 | 63.4M | for (i = 0; i < (npoints / 3); i++) { |
1132 | 56.3M | ixheaac_aac_ld_dec_fft_3_float(x_p3, y_p3); |
1133 | | |
1134 | 56.3M | x_p3 = x_p3 + 6; |
1135 | 56.3M | y_p3 = y_p3 + 6; |
1136 | 56.3M | } |
1137 | | |
1138 | 63.4M | for (i = 0; i < 16; i += 2) { |
1139 | 56.3M | x_out[i] = *ptr_y++; |
1140 | 56.3M | x_out[i + 1] = *ptr_y++; |
1141 | 56.3M | x_out[16 + i] = *ptr_y++; |
1142 | 56.3M | x_out[16 + i + 1] = *ptr_y++; |
1143 | 56.3M | x_out[32 + i] = *ptr_y++; |
1144 | 56.3M | x_out[32 + i + 1] = *ptr_y++; |
1145 | 56.3M | } |
1146 | 7.04M | } |
1147 | | |
1148 | 3.14M | void ixheaac_cmplx_anal_fft_p3(FLOAT32 *x_in, FLOAT32 *x_out, WORD32 npoints) { |
1149 | 3.14M | WORD32 i, j; |
1150 | 3.14M | FLOAT32 x_3[32]; |
1151 | 3.14M | FLOAT32 y_3[32]; |
1152 | 3.14M | FLOAT32 y[96]; |
1153 | 3.14M | FLOAT32 *ptr_x = x_in; |
1154 | 3.14M | FLOAT32 *ptr_y = y; |
1155 | 3.14M | FLOAT32 *y_p3 = y; |
1156 | | |
1157 | 12.5M | for (i = 0; i < 6; i += 2) { |
1158 | 160M | for (j = 0; j < 32; j += 2) { |
1159 | 151M | x_3[j] = x_in[3 * j + i]; |
1160 | 151M | x_3[j + 1] = x_in[3 * j + i + 1]; |
1161 | 151M | } |
1162 | | |
1163 | 9.44M | ixheaac_cmplx_anal_fft_p2(x_3, y_3, 16); |
1164 | | |
1165 | 160M | for (j = 0; j < 32; j += 2) { |
1166 | 151M | x_in[3 * j + i] = y_3[j]; |
1167 | 151M | x_in[3 * j + i + 1] = y_3[j + 1]; |
1168 | 151M | } |
1169 | 9.44M | } |
1170 | | |
1171 | 3.14M | { |
1172 | 3.14M | FLOAT32 *wr; |
1173 | 3.14M | FLOAT32 tmp; |
1174 | 3.14M | wr = (FLOAT32 *)ixheaac_twidle_tbl_48; |
1175 | 3.14M | x_in += 2; |
1176 | | |
1177 | 53.4M | for (i = 0; i < (npoints / 3); i++) { |
1178 | 50.3M | tmp = ((*x_in) * (*wr) + (*(x_in + 1)) * (*(wr + 1))); |
1179 | 50.3M | *(x_in + 1) = (-(*x_in) * (*(wr + 1)) + (*(x_in + 1)) * (*wr)); |
1180 | 50.3M | *x_in = tmp; |
1181 | | |
1182 | 50.3M | wr += 2; |
1183 | 50.3M | x_in += 2; |
1184 | | |
1185 | 50.3M | tmp = ((*x_in) * (*wr) + (*(x_in + 1)) * (*(wr + 1))); |
1186 | 50.3M | *(x_in + 1) = (-(*x_in) * (*(wr + 1)) + (*(x_in + 1)) * (*wr)); |
1187 | 50.3M | *x_in = tmp; |
1188 | | |
1189 | 50.3M | wr += 2; |
1190 | 50.3M | x_in += 4; |
1191 | 50.3M | } |
1192 | 3.14M | } |
1193 | | |
1194 | 53.4M | for (i = 0; i < (npoints / 3); i++) { |
1195 | 50.3M | ixheaac_aac_ld_dec_fft_3_float(ptr_x, ptr_y); |
1196 | | |
1197 | 50.3M | ptr_x = ptr_x + 6; |
1198 | 50.3M | ptr_y = ptr_y + 6; |
1199 | 50.3M | } |
1200 | | |
1201 | 53.4M | for (i = 0; i < 32; i += 2) { |
1202 | 50.3M | x_out[i] = *y_p3++; |
1203 | 50.3M | x_out[i + 1] = *y_p3++; |
1204 | 50.3M | x_out[32 + i] = *y_p3++; |
1205 | 50.3M | x_out[32 + i + 1] = *y_p3++; |
1206 | 50.3M | x_out[64 + i] = *y_p3++; |
1207 | 50.3M | x_out[64 + i + 1] = *y_p3++; |
1208 | 50.3M | } |
1209 | 3.14M | } |