/src/libxaac/decoder/ixheaacd_fft_ifft_32x32.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | #include <stdlib.h> |
21 | | #include <math.h> |
22 | | #include "ixheaac_type_def.h" |
23 | | #include "ixheaac_constants.h" |
24 | | #include "ixheaac_basic_ops32.h" |
25 | | #include "ixheaac_fft_ifft_rom.h" |
26 | | #include "ixheaacd_dsp_fft32x32s.h" |
27 | | |
28 | | #define DIG_REV(i, m, j) \ |
29 | 0 | do { \ |
30 | 0 | unsigned _ = (i); \ |
31 | 0 | _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \ |
32 | 0 | _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \ |
33 | 0 | _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \ |
34 | 0 | (j) = _ >> (m); \ |
35 | 0 | } while (0) |
36 | | |
37 | 0 | FLOAT64 ixheaacd_mult32X32float(FLOAT64 a, FLOAT64 b) { |
38 | 0 | FLOAT64 result; |
39 | |
|
40 | 0 | result = a * b; |
41 | |
|
42 | 0 | return result; |
43 | 0 | } |
44 | | |
45 | 0 | FLOAT64 ixheaacd_mac32X32float(FLOAT64 a, FLOAT64 b, FLOAT64 c) { |
46 | 0 | FLOAT64 result; |
47 | |
|
48 | 0 | result = a + b * c; |
49 | |
|
50 | 0 | return result; |
51 | 0 | } |
52 | | |
53 | 0 | VOID ixheaacd_hbe_apply_ifft_7(FLOAT32 *inp, FLOAT32 *op) { |
54 | 0 | FLOAT32 x0r, x1r, x2r, x3r, x4r, x5r, x6r, x7r, x8r; |
55 | 0 | FLOAT32 x0i, x1i, x2i, x3i, x4i, x5i, x6i, x7i, x8i; |
56 | 0 | FLOAT32 y0r, y1r, y2r, y3r, y4r, y5r, y6r, y7r, y8r; |
57 | 0 | FLOAT32 y0i, y1i, y2i, y3i, y4i, y5i, y6i, y7i, y8i; |
58 | |
|
59 | 0 | x0r = inp[0]; |
60 | 0 | x0i = inp[1]; |
61 | 0 | x1r = inp[2] + inp[12]; |
62 | 0 | x1i = inp[3] + inp[13]; |
63 | 0 | x2r = inp[2] - inp[12]; |
64 | 0 | x2i = inp[3] - inp[13]; |
65 | 0 | x3r = inp[4] + inp[10]; |
66 | 0 | x3i = inp[5] + inp[11]; |
67 | 0 | x4r = inp[4] - inp[10]; |
68 | 0 | x4i = inp[5] - inp[11]; |
69 | 0 | x5r = inp[8] + inp[6]; |
70 | 0 | x5i = inp[9] + inp[7]; |
71 | 0 | x6r = inp[8] - inp[6]; |
72 | 0 | x6i = inp[9] - inp[7]; |
73 | |
|
74 | 0 | y0r = x0r; |
75 | 0 | y0i = x0i; |
76 | 0 | y1r = x1r + x3r + x5r; |
77 | 0 | y1i = x1i + x3i + x5i; |
78 | 0 | y2r = x1r - x3r; |
79 | 0 | y2i = x1i - x3i; |
80 | 0 | y3r = x5r - x1r; |
81 | 0 | y3i = x5i - x1i; |
82 | 0 | y4r = x3r - x5r; |
83 | 0 | y4i = x3i - x5i; |
84 | 0 | y5r = x2r + x4r + x6r; |
85 | 0 | y5i = x2i + x4i + x6i; |
86 | 0 | y6r = x2r - x4r; |
87 | 0 | y6i = x2i - x4i; |
88 | 0 | y7r = x6r - x2r; |
89 | 0 | y7i = x6i - x2i; |
90 | 0 | y8r = x4r - x6r; |
91 | 0 | y8i = x4i - x6i; |
92 | |
|
93 | 0 | x0r = y0r + y1r; |
94 | 0 | x0i = y0i + y1i; |
95 | 0 | x1r = y0r + C70 * y1r; |
96 | 0 | x1i = y0i + C70 * y1i; |
97 | 0 | x2r = C71 * y2r; |
98 | 0 | x2i = C71 * y2i; |
99 | 0 | x3r = C72 * y3r; |
100 | 0 | x3i = C72 * y3i; |
101 | 0 | x4r = C73 * y4r; |
102 | 0 | x4i = C73 * y4i; |
103 | 0 | x5r = C74 * y5i; |
104 | 0 | x5i = -C74 * y5r; |
105 | 0 | x6r = C75 * y6i; |
106 | 0 | x6i = -C75 * y6r; |
107 | 0 | x7r = C76 * y7i; |
108 | 0 | x7i = -C76 * y7r; |
109 | 0 | x8r = C77 * y8i; |
110 | 0 | x8i = -C77 * y8r; |
111 | |
|
112 | 0 | y0r = x0r; |
113 | 0 | y0i = x0i; |
114 | 0 | y1r = x1r + x2r + x4r; |
115 | 0 | y1i = x1i + x2i + x4i; |
116 | 0 | y2r = x1r - x2r - x3r; |
117 | 0 | y2i = x1i - x2i - x3i; |
118 | 0 | y3r = x1r + x3r - x4r; |
119 | 0 | y3i = x1i + x3i - x4i; |
120 | 0 | y4r = x5r + x6r + x8r; |
121 | 0 | y4i = x5i + x6i + x8i; |
122 | 0 | y5r = x5r - x6r - x7r; |
123 | 0 | y5i = x5i - x6i - x7i; |
124 | 0 | y6r = x5r + x7r - x8r; |
125 | 0 | y6i = x5i + x7i - x8i; |
126 | |
|
127 | 0 | x0r = y0r; |
128 | 0 | x0i = y0i; |
129 | 0 | x1r = y1r + y4r; |
130 | 0 | x1i = y1i + y4i; |
131 | 0 | x2r = y3r + y6r; |
132 | 0 | x2i = y3i + y6i; |
133 | 0 | x3r = y2r - y5r; |
134 | 0 | x3i = y2i - y5i; |
135 | 0 | x4r = y2r + y5r; |
136 | 0 | x4i = y2i + y5i; |
137 | 0 | x5r = y3r - y6r; |
138 | 0 | x5i = y3i - y6i; |
139 | 0 | x6r = y1r - y4r; |
140 | 0 | x6i = y1i - y4i; |
141 | |
|
142 | 0 | op[0] = x0r; |
143 | 0 | op[1] = x0i; |
144 | 0 | op[2] = x1r; |
145 | 0 | op[3] = x1i; |
146 | 0 | op[4] = x2r; |
147 | 0 | op[5] = x2i; |
148 | 0 | op[6] = x3r; |
149 | 0 | op[7] = x3i; |
150 | 0 | op[8] = x4r; |
151 | 0 | op[9] = x4i; |
152 | 0 | op[10] = x5r; |
153 | 0 | op[11] = x5i; |
154 | 0 | op[12] = x6r; |
155 | 0 | op[13] = x6i; |
156 | |
|
157 | 0 | return; |
158 | 0 | } |
159 | | |
160 | 0 | VOID ixheaacd_hbe_apply_fft_3(FLOAT32 *inp, FLOAT32 *op, WORD32 i_sign) { |
161 | 0 | FLOAT32 add_r, sub_r; |
162 | 0 | FLOAT32 add_i, sub_i; |
163 | 0 | FLOAT32 X01r, X01i, temp; |
164 | |
|
165 | 0 | FLOAT32 p1, p2, p3, p4; |
166 | |
|
167 | 0 | FLOAT64 sinmu; |
168 | 0 | sinmu = -0.866025403784439 * (FLOAT64)i_sign; |
169 | |
|
170 | 0 | X01r = inp[0] + inp[2]; |
171 | 0 | X01i = inp[1] + inp[3]; |
172 | |
|
173 | 0 | add_r = inp[2] + inp[4]; |
174 | 0 | add_i = inp[3] + inp[5]; |
175 | |
|
176 | 0 | sub_r = inp[2] - inp[4]; |
177 | 0 | sub_i = inp[3] - inp[5]; |
178 | |
|
179 | 0 | p1 = add_r / (FLOAT32)2.0; |
180 | 0 | p4 = add_i / (FLOAT32)2.0; |
181 | 0 | p2 = (FLOAT32)((FLOAT64)sub_i * sinmu); |
182 | 0 | p3 = (FLOAT32)((FLOAT64)sub_r * sinmu); |
183 | |
|
184 | 0 | temp = inp[0] - p1; |
185 | |
|
186 | 0 | op[0] = X01r + inp[4]; |
187 | 0 | op[1] = X01i + inp[5]; |
188 | 0 | op[2] = temp + p2; |
189 | 0 | op[3] = (inp[1] - p3) - p4; |
190 | 0 | op[4] = temp - p2; |
191 | 0 | op[5] = (inp[1] + p3) - p4; |
192 | |
|
193 | 0 | return; |
194 | 0 | } |
195 | | |
196 | | VOID ixheaacd_hbe_apply_tw_mult_ifft(FLOAT32 *inp, FLOAT32 *op, WORD32 dim1, WORD32 dim2, |
197 | 0 | const FLOAT32 *tw) { |
198 | 0 | FLOAT32 accu1, accu2; |
199 | 0 | WORD32 i, j; |
200 | 0 | WORD32 step_val = (dim2 - 1) << 1; |
201 | 0 | for (i = 0; i < (dim2); i++) { |
202 | 0 | op[0] = inp[0]; |
203 | 0 | op[1] = inp[1]; |
204 | 0 | op += 2; |
205 | 0 | inp += 2; |
206 | 0 | } |
207 | |
|
208 | 0 | for (j = 0; j < (dim1 - 1); j++) { |
209 | 0 | op[0] = inp[0]; |
210 | 0 | op[1] = inp[1]; |
211 | 0 | inp += 2; |
212 | 0 | op += 2; |
213 | 0 | for (i = 0; i < (dim2 - 1); i++) { |
214 | 0 | CPLX_MPY_IFFT(accu1, accu2, inp[2 * i + 0], inp[2 * i + 1], tw[2 * i + 1], tw[2 * i]); |
215 | 0 | op[2 * i + 0] = accu1; |
216 | 0 | op[2 * i + 1] = accu2; |
217 | 0 | } |
218 | 0 | inp += step_val; |
219 | 0 | op += step_val; |
220 | 0 | tw += (dim2 - 1) * 2; |
221 | 0 | } |
222 | 0 | } |
223 | | |
224 | | VOID ixheaacd_hbe_apply_tw_mult_fft(FLOAT32 *inp, FLOAT32 *op, WORD32 dim1, WORD32 dim2, |
225 | 0 | const FLOAT32 *tw) { |
226 | 0 | FLOAT32 accu1, accu2; |
227 | 0 | WORD32 i, j; |
228 | 0 | WORD32 step_val = (dim2 - 1) << 1; |
229 | 0 | for (i = 0; i < (dim2); i++) { |
230 | 0 | op[0] = inp[0]; |
231 | 0 | op[1] = inp[1]; |
232 | 0 | op += 2; |
233 | 0 | inp += 2; |
234 | 0 | } |
235 | |
|
236 | 0 | for (j = 0; j < (dim1 - 1); j++) { |
237 | 0 | op[0] = inp[0]; |
238 | 0 | op[1] = inp[1]; |
239 | 0 | inp += 2; |
240 | 0 | op += 2; |
241 | 0 | for (i = 0; i < (dim2 - 1); i++) { |
242 | 0 | CPLX_MPY_FFT(accu1, accu2, inp[2 * i + 0], inp[2 * i + 1], tw[2 * i + 1], tw[2 * i]); |
243 | 0 | op[2 * i + 0] = accu1; |
244 | 0 | op[2 * i + 1] = accu2; |
245 | 0 | } |
246 | 0 | inp += step_val; |
247 | 0 | op += step_val; |
248 | 0 | tw += (dim2 - 1) * 2; |
249 | 0 | } |
250 | 0 | } |
251 | | |
252 | 0 | VOID ixheaacd_hbe_apply_cfftn(FLOAT32 re[], FLOAT32 *scratch, WORD32 n_pass, WORD32 i_sign) { |
253 | 0 | WORD32 i, j, k, n_stages, h2; |
254 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
255 | 0 | WORD32 del, nodespacing, in_loop_cnt; |
256 | 0 | WORD32 not_power_4; |
257 | 0 | WORD32 dig_rev_shift; |
258 | 0 | WORD32 mpass = n_pass; |
259 | 0 | WORD32 npoints = n_pass; |
260 | 0 | const FLOAT64 *ptr_w; |
261 | 0 | FLOAT32 *ptr_x = scratch; |
262 | 0 | FLOAT32 *y = scratch + (2 * n_pass); |
263 | 0 | FLOAT32 *ptr_y = y; |
264 | |
|
265 | 0 | dig_rev_shift = ixheaac_norm32(mpass) + 1 - 16; |
266 | 0 | n_stages = 30 - ixheaac_norm32(mpass); |
267 | 0 | not_power_4 = n_stages & 1; |
268 | |
|
269 | 0 | n_stages = n_stages >> 1; |
270 | |
|
271 | 0 | ptr_w = ixheaac_twid_tbl_fft_double; |
272 | 0 | ptr_x = re; |
273 | |
|
274 | 0 | if (i_sign == -1) { |
275 | 0 | for (i = 0; i < npoints; i += 4) { |
276 | 0 | FLOAT32 *inp = ptr_x; |
277 | 0 | FLOAT32 tmk; |
278 | |
|
279 | 0 | DIG_REV(i, dig_rev_shift, h2); |
280 | 0 | if (not_power_4) { |
281 | 0 | h2 += 1; |
282 | 0 | h2 &= ~1; |
283 | 0 | } |
284 | 0 | inp += (h2); |
285 | |
|
286 | 0 | x0r = *inp; |
287 | 0 | x0i = *(inp + 1); |
288 | 0 | inp += (npoints >> 1); |
289 | |
|
290 | 0 | x1r = *inp; |
291 | 0 | x1i = *(inp + 1); |
292 | 0 | inp += (npoints >> 1); |
293 | |
|
294 | 0 | x2r = *inp; |
295 | 0 | x2i = *(inp + 1); |
296 | 0 | inp += (npoints >> 1); |
297 | |
|
298 | 0 | x3r = *inp; |
299 | 0 | x3i = *(inp + 1); |
300 | |
|
301 | 0 | x0r = x0r + x2r; |
302 | 0 | x0i = x0i + x2i; |
303 | |
|
304 | 0 | tmk = x0r - x2r; |
305 | 0 | x2r = tmk - x2r; |
306 | 0 | tmk = x0i - x2i; |
307 | 0 | x2i = tmk - x2i; |
308 | |
|
309 | 0 | x1r = x1r + x3r; |
310 | 0 | x1i = x1i + x3i; |
311 | |
|
312 | 0 | tmk = x1r - x3r; |
313 | 0 | x3r = tmk - x3r; |
314 | 0 | tmk = x1i - x3i; |
315 | 0 | x3i = tmk - x3i; |
316 | |
|
317 | 0 | x0r = x0r + x1r; |
318 | 0 | x0i = x0i + x1i; |
319 | |
|
320 | 0 | tmk = x0r - x1r; |
321 | 0 | x1r = tmk - x1r; |
322 | 0 | tmk = x0i - x1i; |
323 | 0 | x1i = tmk - x1i; |
324 | |
|
325 | 0 | x2r = x2r + x3i; |
326 | 0 | x2i = x2i - x3r; |
327 | |
|
328 | 0 | tmk = x2r - x3i; |
329 | 0 | x3i = tmk - x3i; |
330 | 0 | tmk = x2i + x3r; |
331 | 0 | x3r = tmk + x3r; |
332 | |
|
333 | 0 | *ptr_y++ = x0r; |
334 | 0 | *ptr_y++ = x0i; |
335 | 0 | *ptr_y++ = x2r; |
336 | 0 | *ptr_y++ = x2i; |
337 | 0 | *ptr_y++ = x1r; |
338 | 0 | *ptr_y++ = x1i; |
339 | 0 | *ptr_y++ = x3i; |
340 | 0 | *ptr_y++ = x3r; |
341 | 0 | } |
342 | 0 | ptr_y -= 2 * npoints; |
343 | 0 | del = 4; |
344 | 0 | nodespacing = 64; |
345 | 0 | in_loop_cnt = npoints >> 4; |
346 | 0 | for (i = n_stages - 1; i > 0; i--) { |
347 | 0 | const FLOAT64 *twiddles = ptr_w; |
348 | 0 | FLOAT32 *data = ptr_y; |
349 | 0 | FLOAT64 W1, W2, W3, W4, W5, W6; |
350 | 0 | WORD32 sec_loop_cnt; |
351 | |
|
352 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
353 | 0 | x0r = (*data); |
354 | 0 | x0i = (*(data + 1)); |
355 | 0 | data += (del << 1); |
356 | |
|
357 | 0 | x1r = (*data); |
358 | 0 | x1i = (*(data + 1)); |
359 | 0 | data += (del << 1); |
360 | |
|
361 | 0 | x2r = (*data); |
362 | 0 | x2i = (*(data + 1)); |
363 | 0 | data += (del << 1); |
364 | |
|
365 | 0 | x3r = (*data); |
366 | 0 | x3i = (*(data + 1)); |
367 | 0 | data -= 3 * (del << 1); |
368 | |
|
369 | 0 | x0r = x0r + x2r; |
370 | 0 | x0i = x0i + x2i; |
371 | 0 | x2r = x0r - (x2r * 2); |
372 | 0 | x2i = x0i - (x2i * 2); |
373 | 0 | x1r = x1r + x3r; |
374 | 0 | x1i = x1i + x3i; |
375 | 0 | x3r = x1r - (x3r * 2); |
376 | 0 | x3i = x1i - (x3i * 2); |
377 | |
|
378 | 0 | x0r = x0r + x1r; |
379 | 0 | x0i = x0i + x1i; |
380 | 0 | x1r = x0r - (x1r * 2); |
381 | 0 | x1i = x0i - (x1i * 2); |
382 | 0 | x2r = x2r + x3i; |
383 | 0 | x2i = x2i - x3r; |
384 | 0 | x3i = x2r - (x3i * 2); |
385 | 0 | x3r = x2i + (x3r * 2); |
386 | |
|
387 | 0 | *data = x0r; |
388 | 0 | *(data + 1) = x0i; |
389 | 0 | data += (del << 1); |
390 | |
|
391 | 0 | *data = x2r; |
392 | 0 | *(data + 1) = x2i; |
393 | 0 | data += (del << 1); |
394 | |
|
395 | 0 | *data = x1r; |
396 | 0 | *(data + 1) = x1i; |
397 | 0 | data += (del << 1); |
398 | |
|
399 | 0 | *data = x3i; |
400 | 0 | *(data + 1) = x3r; |
401 | 0 | data += (del << 1); |
402 | 0 | } |
403 | 0 | data = ptr_y + 2; |
404 | |
|
405 | 0 | sec_loop_cnt = (nodespacing * del); |
406 | 0 | sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) + |
407 | 0 | (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - |
408 | 0 | (sec_loop_cnt / 256); |
409 | 0 | j = nodespacing; |
410 | |
|
411 | 0 | for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { |
412 | 0 | W1 = *(twiddles + j); |
413 | 0 | W4 = *(twiddles + j + 257); |
414 | 0 | W2 = *(twiddles + (j << 1)); |
415 | 0 | W5 = *(twiddles + (j << 1) + 257); |
416 | 0 | W3 = *(twiddles + j + (j << 1)); |
417 | 0 | W6 = *(twiddles + j + (j << 1) + 257); |
418 | |
|
419 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
420 | 0 | FLOAT32 tmp; |
421 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
422 | |
|
423 | 0 | data += (del << 1); |
424 | |
|
425 | 0 | x1r = *data; |
426 | 0 | x1i = *(data + 1); |
427 | 0 | data += (del << 1); |
428 | |
|
429 | 0 | x2r = *data; |
430 | 0 | x2i = *(data + 1); |
431 | 0 | data += (del << 1); |
432 | |
|
433 | 0 | x3r = *data; |
434 | 0 | x3i = *(data + 1); |
435 | 0 | data -= 3 * (del << 1); |
436 | |
|
437 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) - |
438 | 0 | ixheaacd_mult32X32float((FLOAT64)x1i, W4)); |
439 | 0 | x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4), |
440 | 0 | (FLOAT64)x1i, W1); |
441 | 0 | x1r = tmp; |
442 | |
|
443 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W2) - |
444 | 0 | ixheaacd_mult32X32float((FLOAT64)x2i, W5)); |
445 | 0 | x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x2r, W5), |
446 | 0 | (FLOAT64)x2i, W2); |
447 | 0 | x2r = tmp; |
448 | |
|
449 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x3r, W3) - |
450 | 0 | ixheaacd_mult32X32float((FLOAT64)x3i, W6)); |
451 | 0 | x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x3r, W6), |
452 | 0 | (FLOAT64)x3i, W3); |
453 | 0 | x3r = tmp; |
454 | |
|
455 | 0 | x0r = (*data); |
456 | 0 | x0i = (*(data + 1)); |
457 | |
|
458 | 0 | x0r = x0r + (x2r); |
459 | 0 | x0i = x0i + (x2i); |
460 | 0 | x2r = x0r - (x2r * 2); |
461 | 0 | x2i = x0i - (x2i * 2); |
462 | 0 | x1r = x1r + x3r; |
463 | 0 | x1i = x1i + x3i; |
464 | 0 | x3r = x1r - (x3r * 2); |
465 | 0 | x3i = x1i - (x3i * 2); |
466 | |
|
467 | 0 | x0r = x0r + (x1r); |
468 | 0 | x0i = x0i + (x1i); |
469 | 0 | x1r = x0r - (x1r * 2); |
470 | 0 | x1i = x0i - (x1i * 2); |
471 | 0 | x2r = x2r + (x3i); |
472 | 0 | x2i = x2i - (x3r); |
473 | 0 | x3i = x2r - (x3i * 2); |
474 | 0 | x3r = x2i + (x3r * 2); |
475 | |
|
476 | 0 | *data = x0r; |
477 | 0 | *(data + 1) = x0i; |
478 | 0 | data += (del << 1); |
479 | |
|
480 | 0 | *data = x2r; |
481 | 0 | *(data + 1) = x2i; |
482 | 0 | data += (del << 1); |
483 | |
|
484 | 0 | *data = x1r; |
485 | 0 | *(data + 1) = x1i; |
486 | 0 | data += (del << 1); |
487 | |
|
488 | 0 | *data = x3i; |
489 | 0 | *(data + 1) = x3r; |
490 | 0 | data += (del << 1); |
491 | 0 | } |
492 | 0 | data -= 2 * npoints; |
493 | 0 | data += 2; |
494 | 0 | } |
495 | 0 | for (; j <= (nodespacing * del) >> 1; j += nodespacing) { |
496 | 0 | W1 = *(twiddles + j); |
497 | 0 | W4 = *(twiddles + j + 257); |
498 | 0 | W2 = *(twiddles + (j << 1)); |
499 | 0 | W5 = *(twiddles + (j << 1) + 257); |
500 | 0 | W3 = *(twiddles + j + (j << 1) - 256); |
501 | 0 | W6 = *(twiddles + j + (j << 1) + 1); |
502 | |
|
503 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
504 | 0 | FLOAT32 tmp; |
505 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
506 | |
|
507 | 0 | data += (del << 1); |
508 | |
|
509 | 0 | x1r = *data; |
510 | 0 | x1i = *(data + 1); |
511 | 0 | data += (del << 1); |
512 | |
|
513 | 0 | x2r = *data; |
514 | 0 | x2i = *(data + 1); |
515 | 0 | data += (del << 1); |
516 | |
|
517 | 0 | x3r = *data; |
518 | 0 | x3i = *(data + 1); |
519 | 0 | data -= 3 * (del << 1); |
520 | |
|
521 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) - |
522 | 0 | ixheaacd_mult32X32float((FLOAT64)x1i, W4)); |
523 | 0 | x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4), |
524 | 0 | (FLOAT64)x1i, W1); |
525 | 0 | x1r = tmp; |
526 | |
|
527 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W2) - |
528 | 0 | ixheaacd_mult32X32float((FLOAT64)x2i, W5)); |
529 | 0 | x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x2r, W5), |
530 | 0 | (FLOAT64)x2i, W2); |
531 | 0 | x2r = tmp; |
532 | |
|
533 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x3r, W6) + |
534 | 0 | ixheaacd_mult32X32float((FLOAT64)x3i, W3)); |
535 | 0 | x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x3r, W3) + |
536 | 0 | ixheaacd_mult32X32float((FLOAT64)x3i, W6)); |
537 | 0 | x3r = tmp; |
538 | |
|
539 | 0 | x0r = (*data); |
540 | 0 | x0i = (*(data + 1)); |
541 | |
|
542 | 0 | x0r = x0r + (x2r); |
543 | 0 | x0i = x0i + (x2i); |
544 | 0 | x2r = x0r - (x2r * 2); |
545 | 0 | x2i = x0i - (x2i * 2); |
546 | 0 | x1r = x1r + x3r; |
547 | 0 | x1i = x1i + x3i; |
548 | 0 | x3r = x1r - (x3r * 2); |
549 | 0 | x3i = x1i - (x3i * 2); |
550 | |
|
551 | 0 | x0r = x0r + (x1r); |
552 | 0 | x0i = x0i + (x1i); |
553 | 0 | x1r = x0r - (x1r * 2); |
554 | 0 | x1i = x0i - (x1i * 2); |
555 | 0 | x2r = x2r + (x3i); |
556 | 0 | x2i = x2i - (x3r); |
557 | 0 | x3i = x2r - (x3i * 2); |
558 | 0 | x3r = x2i + (x3r * 2); |
559 | |
|
560 | 0 | *data = x0r; |
561 | 0 | *(data + 1) = x0i; |
562 | 0 | data += (del << 1); |
563 | |
|
564 | 0 | *data = x2r; |
565 | 0 | *(data + 1) = x2i; |
566 | 0 | data += (del << 1); |
567 | |
|
568 | 0 | *data = x1r; |
569 | 0 | *(data + 1) = x1i; |
570 | 0 | data += (del << 1); |
571 | |
|
572 | 0 | *data = x3i; |
573 | 0 | *(data + 1) = x3r; |
574 | 0 | data += (del << 1); |
575 | 0 | } |
576 | 0 | data -= 2 * npoints; |
577 | 0 | data += 2; |
578 | 0 | } |
579 | 0 | for (; j <= sec_loop_cnt * 2; j += nodespacing) { |
580 | 0 | W1 = *(twiddles + j); |
581 | 0 | W4 = *(twiddles + j + 257); |
582 | 0 | W2 = *(twiddles + (j << 1) - 256); |
583 | 0 | W5 = *(twiddles + (j << 1) + 1); |
584 | 0 | W3 = *(twiddles + j + (j << 1) - 256); |
585 | 0 | W6 = *(twiddles + j + (j << 1) + 1); |
586 | |
|
587 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
588 | 0 | FLOAT32 tmp; |
589 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
590 | |
|
591 | 0 | data += (del << 1); |
592 | |
|
593 | 0 | x1r = *data; |
594 | 0 | x1i = *(data + 1); |
595 | 0 | data += (del << 1); |
596 | |
|
597 | 0 | x2r = *data; |
598 | 0 | x2i = *(data + 1); |
599 | 0 | data += (del << 1); |
600 | |
|
601 | 0 | x3r = *data; |
602 | 0 | x3i = *(data + 1); |
603 | 0 | data -= 3 * (del << 1); |
604 | |
|
605 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) - |
606 | 0 | ixheaacd_mult32X32float((FLOAT64)x1i, W4)); |
607 | 0 | x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, W4), x1i, W1); |
608 | 0 | x1r = tmp; |
609 | |
|
610 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W5) + |
611 | 0 | ixheaacd_mult32X32float((FLOAT64)x2i, W2)); |
612 | 0 | x2i = (FLOAT32)(-ixheaacd_mult32X32float(x2r, W2) + ixheaacd_mult32X32float(x2i, W5)); |
613 | 0 | x2r = tmp; |
614 | |
|
615 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x3r, W6) + |
616 | 0 | ixheaacd_mult32X32float((FLOAT64)x3i, W3)); |
617 | 0 | x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x3r, W3) + |
618 | 0 | ixheaacd_mult32X32float((FLOAT64)x3i, W6)); |
619 | 0 | x3r = tmp; |
620 | |
|
621 | 0 | x0r = (*data); |
622 | 0 | x0i = (*(data + 1)); |
623 | |
|
624 | 0 | x0r = x0r + (x2r); |
625 | 0 | x0i = x0i + (x2i); |
626 | 0 | x2r = x0r - (x2r * 2); |
627 | 0 | x2i = x0i - (x2i * 2); |
628 | 0 | x1r = x1r + x3r; |
629 | 0 | x1i = x1i + x3i; |
630 | 0 | x3r = x1r - (x3r * 2); |
631 | 0 | x3i = x1i - (x3i * 2); |
632 | |
|
633 | 0 | x0r = x0r + (x1r); |
634 | 0 | x0i = x0i + (x1i); |
635 | 0 | x1r = x0r - (x1r * 2); |
636 | 0 | x1i = x0i - (x1i * 2); |
637 | 0 | x2r = x2r + (x3i); |
638 | 0 | x2i = x2i - (x3r); |
639 | 0 | x3i = x2r - (x3i * 2); |
640 | 0 | x3r = x2i + (x3r * 2); |
641 | |
|
642 | 0 | *data = x0r; |
643 | 0 | *(data + 1) = x0i; |
644 | 0 | data += (del << 1); |
645 | |
|
646 | 0 | *data = x2r; |
647 | 0 | *(data + 1) = x2i; |
648 | 0 | data += (del << 1); |
649 | |
|
650 | 0 | *data = x1r; |
651 | 0 | *(data + 1) = x1i; |
652 | 0 | data += (del << 1); |
653 | |
|
654 | 0 | *data = x3i; |
655 | 0 | *(data + 1) = x3r; |
656 | 0 | data += (del << 1); |
657 | 0 | } |
658 | 0 | data -= 2 * npoints; |
659 | 0 | data += 2; |
660 | 0 | } |
661 | 0 | for (; j < nodespacing * del; j += nodespacing) { |
662 | 0 | W1 = *(twiddles + j); |
663 | 0 | W4 = *(twiddles + j + 257); |
664 | 0 | W2 = *(twiddles + (j << 1) - 256); |
665 | 0 | W5 = *(twiddles + (j << 1) + 1); |
666 | 0 | W3 = *(twiddles + j + (j << 1) - 512); |
667 | 0 | W6 = *(twiddles + j + (j << 1) - 512 + 257); |
668 | |
|
669 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
670 | 0 | FLOAT32 tmp; |
671 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
672 | |
|
673 | 0 | data += (del << 1); |
674 | |
|
675 | 0 | x1r = *data; |
676 | 0 | x1i = *(data + 1); |
677 | 0 | data += (del << 1); |
678 | |
|
679 | 0 | x2r = *data; |
680 | 0 | x2i = *(data + 1); |
681 | 0 | data += (del << 1); |
682 | |
|
683 | 0 | x3r = *data; |
684 | 0 | x3i = *(data + 1); |
685 | 0 | data -= 3 * (del << 1); |
686 | |
|
687 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) - |
688 | 0 | ixheaacd_mult32X32float((FLOAT64)x1i, W4)); |
689 | 0 | x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4), |
690 | 0 | (FLOAT64)x1i, W1); |
691 | 0 | x1r = tmp; |
692 | |
|
693 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W5) + |
694 | 0 | ixheaacd_mult32X32float((FLOAT64)x2i, W2)); |
695 | 0 | x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x2r, W2) + |
696 | 0 | ixheaacd_mult32X32float((FLOAT64)x2i, W5)); |
697 | 0 | x2r = tmp; |
698 | |
|
699 | 0 | tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x3r, W3) + |
700 | 0 | ixheaacd_mult32X32float((FLOAT64)x3i, W6)); |
701 | 0 | x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x3r, W6), |
702 | 0 | (FLOAT64)x3i, W3); |
703 | 0 | x3r = tmp; |
704 | |
|
705 | 0 | x0r = (*data); |
706 | 0 | x0i = (*(data + 1)); |
707 | |
|
708 | 0 | x0r = x0r + (x2r); |
709 | 0 | x0i = x0i + (x2i); |
710 | 0 | x2r = x0r - (x2r * 2); |
711 | 0 | x2i = x0i - (x2i * 2); |
712 | 0 | x1r = x1r + x3r; |
713 | 0 | x1i = x1i - x3i; |
714 | 0 | x3r = x1r - (x3r * 2); |
715 | 0 | x3i = x1i + (x3i * 2); |
716 | |
|
717 | 0 | x0r = x0r + (x1r); |
718 | 0 | x0i = x0i + (x1i); |
719 | 0 | x1r = x0r - (x1r * 2); |
720 | 0 | x1i = x0i - (x1i * 2); |
721 | 0 | x2r = x2r + (x3i); |
722 | 0 | x2i = x2i - (x3r); |
723 | 0 | x3i = x2r - (x3i * 2); |
724 | 0 | x3r = x2i + (x3r * 2); |
725 | |
|
726 | 0 | *data = x0r; |
727 | 0 | *(data + 1) = x0i; |
728 | 0 | data += (del << 1); |
729 | |
|
730 | 0 | *data = x2r; |
731 | 0 | *(data + 1) = x2i; |
732 | 0 | data += (del << 1); |
733 | |
|
734 | 0 | *data = x1r; |
735 | 0 | *(data + 1) = x1i; |
736 | 0 | data += (del << 1); |
737 | |
|
738 | 0 | *data = x3i; |
739 | 0 | *(data + 1) = x3r; |
740 | 0 | data += (del << 1); |
741 | 0 | } |
742 | 0 | data -= 2 * npoints; |
743 | 0 | data += 2; |
744 | 0 | } |
745 | 0 | nodespacing >>= 2; |
746 | 0 | del <<= 2; |
747 | 0 | in_loop_cnt >>= 2; |
748 | 0 | } |
749 | 0 | if (not_power_4) { |
750 | 0 | const FLOAT64 *twiddles = ptr_w; |
751 | 0 | nodespacing <<= 1; |
752 | |
|
753 | 0 | for (j = del / 2; j != 0; j--) { |
754 | 0 | FLOAT64 W1 = *twiddles; |
755 | 0 | FLOAT64 W4 = *(twiddles + 257); |
756 | 0 | FLOAT32 tmp; |
757 | 0 | twiddles += nodespacing; |
758 | |
|
759 | 0 | x0r = *ptr_y; |
760 | 0 | x0i = *(ptr_y + 1); |
761 | 0 | ptr_y += (del << 1); |
762 | |
|
763 | 0 | x1r = *ptr_y; |
764 | 0 | x1i = *(ptr_y + 1); |
765 | |
|
766 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) - |
767 | 0 | ixheaacd_mult32X32float((FLOAT64)x1i, W4)); |
768 | 0 | x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4), |
769 | 0 | (FLOAT64)x1i, W1); |
770 | 0 | x1r = tmp; |
771 | |
|
772 | 0 | *ptr_y = (x0r) - (x1r); |
773 | 0 | *(ptr_y + 1) = (x0i) - (x1i); |
774 | 0 | ptr_y -= (del << 1); |
775 | |
|
776 | 0 | *ptr_y = (x0r) + (x1r); |
777 | 0 | *(ptr_y + 1) = (x0i) + (x1i); |
778 | 0 | ptr_y += 2; |
779 | 0 | } |
780 | 0 | twiddles = ptr_w; |
781 | 0 | for (j = del / 2; j != 0; j--) { |
782 | 0 | FLOAT64 W1 = *twiddles; |
783 | 0 | FLOAT64 W4 = *(twiddles + 257); |
784 | 0 | FLOAT32 tmp; |
785 | 0 | twiddles += nodespacing; |
786 | |
|
787 | 0 | x0r = *ptr_y; |
788 | 0 | x0i = *(ptr_y + 1); |
789 | 0 | ptr_y += (del << 1); |
790 | |
|
791 | 0 | x1r = *ptr_y; |
792 | 0 | x1i = *(ptr_y + 1); |
793 | |
|
794 | 0 | tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W4) + |
795 | 0 | ixheaacd_mult32X32float((FLOAT64)x1i, W1)); |
796 | 0 | x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x1r, W1) + |
797 | 0 | ixheaacd_mult32X32float((FLOAT64)x1i, W4)); |
798 | 0 | x1r = tmp; |
799 | |
|
800 | 0 | *ptr_y = (x0r) - (x1r); |
801 | 0 | *(ptr_y + 1) = (x0i) - (x1i); |
802 | 0 | ptr_y -= (del << 1); |
803 | |
|
804 | 0 | *ptr_y = (x0r) + (x1r); |
805 | 0 | *(ptr_y + 1) = (x0i) + (x1i); |
806 | 0 | ptr_y += 2; |
807 | 0 | } |
808 | 0 | } |
809 | 0 | } else { |
810 | 0 | for (i = 0; i < npoints; i += 4) { |
811 | 0 | FLOAT32 *inp = ptr_x; |
812 | |
|
813 | 0 | DIG_REV(i, dig_rev_shift, h2); |
814 | 0 | if (not_power_4) { |
815 | 0 | h2 += 1; |
816 | 0 | h2 &= ~1; |
817 | 0 | } |
818 | 0 | inp += (h2); |
819 | |
|
820 | 0 | x0r = *inp; |
821 | 0 | x0i = *(inp + 1); |
822 | 0 | inp += (npoints >> 1); |
823 | |
|
824 | 0 | x1r = *inp; |
825 | 0 | x1i = *(inp + 1); |
826 | 0 | inp += (npoints >> 1); |
827 | |
|
828 | 0 | x2r = *inp; |
829 | 0 | x2i = *(inp + 1); |
830 | 0 | inp += (npoints >> 1); |
831 | |
|
832 | 0 | x3r = *inp; |
833 | 0 | x3i = *(inp + 1); |
834 | |
|
835 | 0 | x0r = x0r + x2r; |
836 | 0 | x0i = x0i + x2i; |
837 | 0 | x2r = x0r - (x2r * 2); |
838 | 0 | x2i = x0i - (x2i * 2); |
839 | 0 | x1r = x1r + x3r; |
840 | 0 | x1i = x1i + x3i; |
841 | 0 | x3r = x1r - (x3r * 2); |
842 | 0 | x3i = x1i - (x3i * 2); |
843 | |
|
844 | 0 | x0r = x0r + x1r; |
845 | 0 | x0i = x0i + x1i; |
846 | 0 | x1r = x0r - (x1r * 2); |
847 | 0 | x1i = x0i - (x1i * 2); |
848 | 0 | x2r = x2r - x3i; |
849 | 0 | x2i = x2i + x3r; |
850 | 0 | x3i = x2r + (x3i * 2); |
851 | 0 | x3r = x2i - (x3r * 2); |
852 | |
|
853 | 0 | *ptr_y++ = x0r; |
854 | 0 | *ptr_y++ = x0i; |
855 | 0 | *ptr_y++ = x2r; |
856 | 0 | *ptr_y++ = x2i; |
857 | 0 | *ptr_y++ = x1r; |
858 | 0 | *ptr_y++ = x1i; |
859 | 0 | *ptr_y++ = x3i; |
860 | 0 | *ptr_y++ = x3r; |
861 | 0 | } |
862 | 0 | ptr_y -= 2 * npoints; |
863 | 0 | del = 4; |
864 | 0 | nodespacing = 64; |
865 | 0 | in_loop_cnt = npoints >> 4; |
866 | 0 | for (i = n_stages - 1; i > 0; i--) { |
867 | 0 | const FLOAT64 *twiddles = ptr_w; |
868 | 0 | FLOAT32 *data = ptr_y; |
869 | 0 | FLOAT64 W1, W2, W3, W4, W5, W6; |
870 | 0 | WORD32 sec_loop_cnt; |
871 | |
|
872 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
873 | 0 | x0r = (*data); |
874 | 0 | x0i = (*(data + 1)); |
875 | 0 | data += (del << 1); |
876 | |
|
877 | 0 | x1r = (*data); |
878 | 0 | x1i = (*(data + 1)); |
879 | 0 | data += (del << 1); |
880 | |
|
881 | 0 | x2r = (*data); |
882 | 0 | x2i = (*(data + 1)); |
883 | 0 | data += (del << 1); |
884 | |
|
885 | 0 | x3r = (*data); |
886 | 0 | x3i = (*(data + 1)); |
887 | 0 | data -= 3 * (del << 1); |
888 | |
|
889 | 0 | x0r = x0r + x2r; |
890 | 0 | x0i = x0i + x2i; |
891 | 0 | x2r = x0r - (x2r * 2); |
892 | 0 | x2i = x0i - (x2i * 2); |
893 | 0 | x1r = x1r + x3r; |
894 | 0 | x1i = x1i + x3i; |
895 | 0 | x3r = x1r - (x3r * 2); |
896 | 0 | x3i = x1i - (x3i * 2); |
897 | |
|
898 | 0 | x0r = x0r + x1r; |
899 | 0 | x0i = x0i + x1i; |
900 | 0 | x1r = x0r - (x1r * 2); |
901 | 0 | x1i = x0i - (x1i * 2); |
902 | 0 | x2r = x2r - x3i; |
903 | 0 | x2i = x2i + x3r; |
904 | 0 | x3i = x2r + (x3i * 2); |
905 | 0 | x3r = x2i - (x3r * 2); |
906 | |
|
907 | 0 | *data = x0r; |
908 | 0 | *(data + 1) = x0i; |
909 | 0 | data += (del << 1); |
910 | |
|
911 | 0 | *data = x2r; |
912 | 0 | *(data + 1) = x2i; |
913 | 0 | data += (del << 1); |
914 | |
|
915 | 0 | *data = x1r; |
916 | 0 | *(data + 1) = x1i; |
917 | 0 | data += (del << 1); |
918 | |
|
919 | 0 | *data = x3i; |
920 | 0 | *(data + 1) = x3r; |
921 | 0 | data += (del << 1); |
922 | 0 | } |
923 | 0 | data = ptr_y + 2; |
924 | |
|
925 | 0 | sec_loop_cnt = (nodespacing * del); |
926 | 0 | sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) + |
927 | 0 | (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - |
928 | 0 | (sec_loop_cnt / 256); |
929 | 0 | j = nodespacing; |
930 | |
|
931 | 0 | for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { |
932 | 0 | W1 = *(twiddles + j); |
933 | 0 | W4 = *(twiddles + j + 257); |
934 | 0 | W2 = *(twiddles + (j << 1)); |
935 | 0 | W5 = *(twiddles + (j << 1) + 257); |
936 | 0 | W3 = *(twiddles + j + (j << 1)); |
937 | 0 | W6 = *(twiddles + j + (j << 1) + 257); |
938 | |
|
939 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
940 | 0 | FLOAT32 tmp; |
941 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
942 | |
|
943 | 0 | data += (del << 1); |
944 | |
|
945 | 0 | x1r = *data; |
946 | 0 | x1i = *(data + 1); |
947 | 0 | data += (del << 1); |
948 | |
|
949 | 0 | x2r = *data; |
950 | 0 | x2i = *(data + 1); |
951 | 0 | data += (del << 1); |
952 | |
|
953 | 0 | x3r = *data; |
954 | 0 | x3i = *(data + 1); |
955 | 0 | data -= 3 * (del << 1); |
956 | |
|
957 | 0 | tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4)); |
958 | 0 | x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1); |
959 | 0 | x1r = tmp; |
960 | |
|
961 | 0 | tmp = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5)); |
962 | 0 | x2i = (FLOAT32)(-((FLOAT64)x2r * W5) + (FLOAT64)x2i * W2); |
963 | 0 | x2r = tmp; |
964 | |
|
965 | 0 | tmp = (FLOAT32)(((FLOAT64)x3r * W3) + ((FLOAT64)x3i * W6)); |
966 | 0 | x3i = (FLOAT32)(-((FLOAT64)x3r * W6) + (FLOAT64)x3i * W3); |
967 | 0 | x3r = tmp; |
968 | |
|
969 | 0 | x0r = (*data); |
970 | 0 | x0i = (*(data + 1)); |
971 | |
|
972 | 0 | x0r = x0r + (x2r); |
973 | 0 | x0i = x0i + (x2i); |
974 | 0 | x2r = x0r - (x2r * 2); |
975 | 0 | x2i = x0i - (x2i * 2); |
976 | 0 | x1r = x1r + x3r; |
977 | 0 | x1i = x1i + x3i; |
978 | 0 | x3r = x1r - (x3r * 2); |
979 | 0 | x3i = x1i - (x3i * 2); |
980 | |
|
981 | 0 | x0r = x0r + (x1r); |
982 | 0 | x0i = x0i + (x1i); |
983 | 0 | x1r = x0r - (x1r * 2); |
984 | 0 | x1i = x0i - (x1i * 2); |
985 | 0 | x2r = x2r - (x3i); |
986 | 0 | x2i = x2i + (x3r); |
987 | 0 | x3i = x2r + (x3i * 2); |
988 | 0 | x3r = x2i - (x3r * 2); |
989 | |
|
990 | 0 | *data = x0r; |
991 | 0 | *(data + 1) = x0i; |
992 | 0 | data += (del << 1); |
993 | |
|
994 | 0 | *data = x2r; |
995 | 0 | *(data + 1) = x2i; |
996 | 0 | data += (del << 1); |
997 | |
|
998 | 0 | *data = x1r; |
999 | 0 | *(data + 1) = x1i; |
1000 | 0 | data += (del << 1); |
1001 | |
|
1002 | 0 | *data = x3i; |
1003 | 0 | *(data + 1) = x3r; |
1004 | 0 | data += (del << 1); |
1005 | 0 | } |
1006 | 0 | data -= 2 * npoints; |
1007 | 0 | data += 2; |
1008 | 0 | } |
1009 | 0 | for (; j <= (nodespacing * del) >> 1; j += nodespacing) { |
1010 | 0 | W1 = *(twiddles + j); |
1011 | 0 | W4 = *(twiddles + j + 257); |
1012 | 0 | W2 = *(twiddles + (j << 1)); |
1013 | 0 | W5 = *(twiddles + (j << 1) + 257); |
1014 | 0 | W3 = *(twiddles + j + (j << 1) - 256); |
1015 | 0 | W6 = *(twiddles + j + (j << 1) + 1); |
1016 | |
|
1017 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
1018 | 0 | FLOAT32 tmp; |
1019 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
1020 | |
|
1021 | 0 | data += (del << 1); |
1022 | |
|
1023 | 0 | x1r = *data; |
1024 | 0 | x1i = *(data + 1); |
1025 | 0 | data += (del << 1); |
1026 | |
|
1027 | 0 | x2r = *data; |
1028 | 0 | x2i = *(data + 1); |
1029 | 0 | data += (del << 1); |
1030 | |
|
1031 | 0 | x3r = *data; |
1032 | 0 | x3i = *(data + 1); |
1033 | 0 | data -= 3 * (del << 1); |
1034 | |
|
1035 | 0 | tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4)); |
1036 | 0 | x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1); |
1037 | 0 | x1r = tmp; |
1038 | |
|
1039 | 0 | tmp = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5)); |
1040 | 0 | x2i = (FLOAT32)(-((FLOAT64)x2r * W5) + (FLOAT64)x2i * W2); |
1041 | 0 | x2r = tmp; |
1042 | |
|
1043 | 0 | tmp = (FLOAT32)(((FLOAT64)x3r * W6) - ((FLOAT64)x3i * W3)); |
1044 | 0 | x3i = (FLOAT32)(((FLOAT64)x3r * W3) + ((FLOAT64)x3i * W6)); |
1045 | 0 | x3r = tmp; |
1046 | |
|
1047 | 0 | x0r = (*data); |
1048 | 0 | x0i = (*(data + 1)); |
1049 | |
|
1050 | 0 | x0r = x0r + (x2r); |
1051 | 0 | x0i = x0i + (x2i); |
1052 | 0 | x2r = x0r - (x2r * 2); |
1053 | 0 | x2i = x0i - (x2i * 2); |
1054 | 0 | x1r = x1r + x3r; |
1055 | 0 | x1i = x1i + x3i; |
1056 | 0 | x3r = x1r - (x3r * 2); |
1057 | 0 | x3i = x1i - (x3i * 2); |
1058 | |
|
1059 | 0 | x0r = x0r + (x1r); |
1060 | 0 | x0i = x0i + (x1i); |
1061 | 0 | x1r = x0r - (x1r * 2); |
1062 | 0 | x1i = x0i - (x1i * 2); |
1063 | 0 | x2r = x2r - (x3i); |
1064 | 0 | x2i = x2i + (x3r); |
1065 | 0 | x3i = x2r + (x3i * 2); |
1066 | 0 | x3r = x2i - (x3r * 2); |
1067 | |
|
1068 | 0 | *data = x0r; |
1069 | 0 | *(data + 1) = x0i; |
1070 | 0 | data += (del << 1); |
1071 | |
|
1072 | 0 | *data = x2r; |
1073 | 0 | *(data + 1) = x2i; |
1074 | 0 | data += (del << 1); |
1075 | |
|
1076 | 0 | *data = x1r; |
1077 | 0 | *(data + 1) = x1i; |
1078 | 0 | data += (del << 1); |
1079 | |
|
1080 | 0 | *data = x3i; |
1081 | 0 | *(data + 1) = x3r; |
1082 | 0 | data += (del << 1); |
1083 | 0 | } |
1084 | 0 | data -= 2 * npoints; |
1085 | 0 | data += 2; |
1086 | 0 | } |
1087 | 0 | for (; j <= sec_loop_cnt * 2; j += nodespacing) { |
1088 | 0 | W1 = *(twiddles + j); |
1089 | 0 | W4 = *(twiddles + j + 257); |
1090 | 0 | W2 = *(twiddles + (j << 1) - 256); |
1091 | 0 | W5 = *(twiddles + (j << 1) + 1); |
1092 | 0 | W3 = *(twiddles + j + (j << 1) - 256); |
1093 | 0 | W6 = *(twiddles + j + (j << 1) + 1); |
1094 | |
|
1095 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
1096 | 0 | FLOAT32 tmp; |
1097 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
1098 | |
|
1099 | 0 | data += (del << 1); |
1100 | |
|
1101 | 0 | x1r = *data; |
1102 | 0 | x1i = *(data + 1); |
1103 | 0 | data += (del << 1); |
1104 | |
|
1105 | 0 | x2r = *data; |
1106 | 0 | x2i = *(data + 1); |
1107 | 0 | data += (del << 1); |
1108 | |
|
1109 | 0 | x3r = *data; |
1110 | 0 | x3i = *(data + 1); |
1111 | 0 | data -= 3 * (del << 1); |
1112 | |
|
1113 | 0 | tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4)); |
1114 | 0 | x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1); |
1115 | 0 | x1r = tmp; |
1116 | |
|
1117 | 0 | tmp = (FLOAT32)(((FLOAT64)x2r * W5) - ((FLOAT64)x2i * W2)); |
1118 | 0 | x2i = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5)); |
1119 | 0 | x2r = tmp; |
1120 | |
|
1121 | 0 | tmp = (FLOAT32)(((FLOAT64)x3r * W6) - ((FLOAT64)x3i * W3)); |
1122 | 0 | x3i = (FLOAT32)(((FLOAT64)x3r * W3) + ((FLOAT64)x3i * W6)); |
1123 | 0 | x3r = tmp; |
1124 | |
|
1125 | 0 | x0r = (*data); |
1126 | 0 | x0i = (*(data + 1)); |
1127 | |
|
1128 | 0 | x0r = x0r + (x2r); |
1129 | 0 | x0i = x0i + (x2i); |
1130 | 0 | x2r = x0r - (x2r * 2); |
1131 | 0 | x2i = x0i - (x2i * 2); |
1132 | 0 | x1r = x1r + x3r; |
1133 | 0 | x1i = x1i + x3i; |
1134 | 0 | x3r = x1r - (x3r * 2); |
1135 | 0 | x3i = x1i - (x3i * 2); |
1136 | |
|
1137 | 0 | x0r = x0r + (x1r); |
1138 | 0 | x0i = x0i + (x1i); |
1139 | 0 | x1r = x0r - (x1r * 2); |
1140 | 0 | x1i = x0i - (x1i * 2); |
1141 | 0 | x2r = x2r - (x3i); |
1142 | 0 | x2i = x2i + (x3r); |
1143 | 0 | x3i = x2r + (x3i * 2); |
1144 | 0 | x3r = x2i - (x3r * 2); |
1145 | |
|
1146 | 0 | *data = x0r; |
1147 | 0 | *(data + 1) = x0i; |
1148 | 0 | data += (del << 1); |
1149 | |
|
1150 | 0 | *data = x2r; |
1151 | 0 | *(data + 1) = x2i; |
1152 | 0 | data += (del << 1); |
1153 | |
|
1154 | 0 | *data = x1r; |
1155 | 0 | *(data + 1) = x1i; |
1156 | 0 | data += (del << 1); |
1157 | |
|
1158 | 0 | *data = x3i; |
1159 | 0 | *(data + 1) = x3r; |
1160 | 0 | data += (del << 1); |
1161 | 0 | } |
1162 | 0 | data -= 2 * npoints; |
1163 | 0 | data += 2; |
1164 | 0 | } |
1165 | 0 | for (; j < nodespacing * del; j += nodespacing) { |
1166 | 0 | W1 = *(twiddles + j); |
1167 | 0 | W4 = *(twiddles + j + 257); |
1168 | 0 | W2 = *(twiddles + (j << 1) - 256); |
1169 | 0 | W5 = *(twiddles + (j << 1) + 1); |
1170 | 0 | W3 = *(twiddles + j + (j << 1) - 512); |
1171 | 0 | W6 = *(twiddles + j + (j << 1) - 512 + 257); |
1172 | |
|
1173 | 0 | for (k = in_loop_cnt; k != 0; k--) { |
1174 | 0 | FLOAT32 tmp; |
1175 | 0 | FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
1176 | |
|
1177 | 0 | data += (del << 1); |
1178 | |
|
1179 | 0 | x1r = *data; |
1180 | 0 | x1i = *(data + 1); |
1181 | 0 | data += (del << 1); |
1182 | |
|
1183 | 0 | x2r = *data; |
1184 | 0 | x2i = *(data + 1); |
1185 | 0 | data += (del << 1); |
1186 | |
|
1187 | 0 | x3r = *data; |
1188 | 0 | x3i = *(data + 1); |
1189 | 0 | data -= 3 * (del << 1); |
1190 | |
|
1191 | 0 | tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4)); |
1192 | 0 | x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1); |
1193 | 0 | x1r = tmp; |
1194 | |
|
1195 | 0 | tmp = (FLOAT32)(((FLOAT64)x2r * W5) - ((FLOAT64)x2i * W2)); |
1196 | 0 | x2i = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5)); |
1197 | 0 | x2r = tmp; |
1198 | |
|
1199 | 0 | tmp = (FLOAT32)(-((FLOAT64)x3r * W3) - ((FLOAT64)x3i * W6)); |
1200 | 0 | x3i = (FLOAT32)(-((FLOAT64)x3r * W6) + (FLOAT64)x3i * W3); |
1201 | 0 | x3r = tmp; |
1202 | |
|
1203 | 0 | x0r = (*data); |
1204 | 0 | x0i = (*(data + 1)); |
1205 | |
|
1206 | 0 | x0r = x0r + (x2r); |
1207 | 0 | x0i = x0i + (x2i); |
1208 | 0 | x2r = x0r - (x2r * 2); |
1209 | 0 | x2i = x0i - (x2i * 2); |
1210 | 0 | x1r = x1r + x3r; |
1211 | 0 | x1i = x1i - x3i; |
1212 | 0 | x3r = x1r - (x3r * 2); |
1213 | 0 | x3i = x1i + (x3i * 2); |
1214 | |
|
1215 | 0 | x0r = x0r + (x1r); |
1216 | 0 | x0i = x0i + (x1i); |
1217 | 0 | x1r = x0r - (x1r * 2); |
1218 | 0 | x1i = x0i - (x1i * 2); |
1219 | 0 | x2r = x2r - (x3i); |
1220 | 0 | x2i = x2i + (x3r); |
1221 | 0 | x3i = x2r + (x3i * 2); |
1222 | 0 | x3r = x2i - (x3r * 2); |
1223 | |
|
1224 | 0 | *data = x0r; |
1225 | 0 | *(data + 1) = x0i; |
1226 | 0 | data += (del << 1); |
1227 | |
|
1228 | 0 | *data = x2r; |
1229 | 0 | *(data + 1) = x2i; |
1230 | 0 | data += (del << 1); |
1231 | |
|
1232 | 0 | *data = x1r; |
1233 | 0 | *(data + 1) = x1i; |
1234 | 0 | data += (del << 1); |
1235 | |
|
1236 | 0 | *data = x3i; |
1237 | 0 | *(data + 1) = x3r; |
1238 | 0 | data += (del << 1); |
1239 | 0 | } |
1240 | 0 | data -= 2 * npoints; |
1241 | 0 | data += 2; |
1242 | 0 | } |
1243 | 0 | nodespacing >>= 2; |
1244 | 0 | del <<= 2; |
1245 | 0 | in_loop_cnt >>= 2; |
1246 | 0 | } |
1247 | |
|
1248 | 0 | if (not_power_4) { |
1249 | 0 | const FLOAT64 *twiddles = ptr_w; |
1250 | 0 | nodespacing <<= 1; |
1251 | |
|
1252 | 0 | for (j = del / 2; j != 0; j--) { |
1253 | 0 | FLOAT64 W1 = *twiddles; |
1254 | 0 | FLOAT64 W4 = *(twiddles + 257); |
1255 | 0 | FLOAT32 tmp; |
1256 | 0 | twiddles += nodespacing; |
1257 | |
|
1258 | 0 | x0r = *ptr_y; |
1259 | 0 | x0i = *(ptr_y + 1); |
1260 | 0 | ptr_y += (del << 1); |
1261 | |
|
1262 | 0 | x1r = *ptr_y; |
1263 | 0 | x1i = *(ptr_y + 1); |
1264 | |
|
1265 | 0 | tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4)); |
1266 | 0 | x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1); |
1267 | 0 | x1r = tmp; |
1268 | |
|
1269 | 0 | *ptr_y = (x0r) - (x1r); |
1270 | 0 | *(ptr_y + 1) = (x0i) - (x1i); |
1271 | 0 | ptr_y -= (del << 1); |
1272 | |
|
1273 | 0 | *ptr_y = (x0r) + (x1r); |
1274 | 0 | *(ptr_y + 1) = (x0i) + (x1i); |
1275 | 0 | ptr_y += 2; |
1276 | 0 | } |
1277 | 0 | twiddles = ptr_w; |
1278 | 0 | for (j = del / 2; j != 0; j--) { |
1279 | 0 | FLOAT64 W1 = *twiddles; |
1280 | 0 | FLOAT64 W4 = *(twiddles + 257); |
1281 | 0 | FLOAT32 tmp; |
1282 | 0 | twiddles += nodespacing; |
1283 | |
|
1284 | 0 | x0r = *ptr_y; |
1285 | 0 | x0i = *(ptr_y + 1); |
1286 | 0 | ptr_y += (del << 1); |
1287 | |
|
1288 | 0 | x1r = *ptr_y; |
1289 | 0 | x1i = *(ptr_y + 1); |
1290 | |
|
1291 | 0 | tmp = (FLOAT32)(((FLOAT64)x1r * W4) - ((FLOAT64)x1i * W1)); |
1292 | 0 | x1i = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4)); |
1293 | 0 | x1r = tmp; |
1294 | |
|
1295 | 0 | *ptr_y = (x0r) - (x1r); |
1296 | 0 | *(ptr_y + 1) = (x0i) - (x1i); |
1297 | 0 | ptr_y -= (del << 1); |
1298 | |
|
1299 | 0 | *ptr_y = (x0r) + (x1r); |
1300 | 0 | *(ptr_y + 1) = (x0i) + (x1i); |
1301 | 0 | ptr_y += 2; |
1302 | 0 | } |
1303 | 0 | } |
1304 | 0 | } |
1305 | |
|
1306 | 0 | for (i = 0; i < n_pass; i++) { |
1307 | 0 | re[2 * i + 0] = y[2 * i + 0]; |
1308 | 0 | re[2 * i + 1] = y[2 * i + 1]; |
1309 | 0 | } |
1310 | 0 | } |
1311 | | |
1312 | | VOID ixheaacd_hbe_apply_cfftn_gen(FLOAT32 re[], FLOAT32 *scratch, WORD32 n_pass, |
1313 | 0 | WORD32 i_sign) { |
1314 | 0 | WORD32 i, j; |
1315 | 0 | WORD32 m_points = n_pass; |
1316 | 0 | FLOAT32 *x, *y, *re3; |
1317 | 0 | FLOAT32 *ptr_x, *ptr_y; |
1318 | 0 | ptr_x = x = scratch; |
1319 | 0 | scratch += 2 * m_points; |
1320 | 0 | ptr_y = y = scratch; |
1321 | 0 | scratch += 4 * m_points; |
1322 | 0 | re3 = scratch; |
1323 | 0 | scratch += 2 * m_points; |
1324 | 0 | WORD32 cnfac; |
1325 | 0 | WORD32 mpass = n_pass; |
1326 | |
|
1327 | 0 | cnfac = 0; |
1328 | 0 | while (mpass % 3 == 0) { |
1329 | 0 | mpass /= 3; |
1330 | 0 | cnfac++; |
1331 | 0 | } |
1332 | |
|
1333 | 0 | for (i = 0; i < 3 * cnfac; i++) { |
1334 | 0 | for (j = 0; j < mpass; j++) { |
1335 | 0 | re3[2 * j + 0] = re[6 * j + 2 * i + 0]; |
1336 | 0 | re3[2 * j + 1] = re[6 * j + 2 * i + 1]; |
1337 | 0 | } |
1338 | |
|
1339 | 0 | ixheaacd_hbe_apply_cfftn(re3, scratch, mpass, i_sign); |
1340 | |
|
1341 | 0 | for (j = 0; j < mpass; j++) { |
1342 | 0 | re[6 * j + 2 * i + 0] = re3[2 * j + 0]; |
1343 | 0 | re[6 * j + 2 * i + 1] = re3[2 * j + 1]; |
1344 | 0 | } |
1345 | 0 | } |
1346 | |
|
1347 | 0 | { |
1348 | 0 | FLOAT64 *w1r, *w1i; |
1349 | 0 | FLOAT32 tmp; |
1350 | 0 | w1r = (FLOAT64 *)ixheaac_twid_tbl_fft_ntwt3r; |
1351 | 0 | w1i = (FLOAT64 *)ixheaac_twid_tbl_fft_ntwt3i; |
1352 | |
|
1353 | 0 | if (i_sign < 0) { |
1354 | |
|
1355 | 0 | for (i = 0; i < n_pass; i += 3) { |
1356 | 0 | tmp = (FLOAT32)((FLOAT64)re[2 * i + 0] * (*w1r) - (FLOAT64)re[2 * i + 1] * (*w1i)); |
1357 | 0 | re[2 * i + 1] = |
1358 | 0 | (FLOAT32)((FLOAT64)re[2 * i + 0] * (*w1i) + (FLOAT64)re[2 * i + 1] * (*w1r)); |
1359 | 0 | re[2 * i + 0] = tmp; |
1360 | |
|
1361 | 0 | w1r++; |
1362 | 0 | w1i++; |
1363 | |
|
1364 | 0 | tmp = (FLOAT32)((FLOAT64)re[2 * i + 2] * (*w1r) - (FLOAT64)re[2 * i + 3] * (*w1i)); |
1365 | 0 | re[2 * i + 3] = |
1366 | 0 | (FLOAT32)((FLOAT64)re[2 * i + 2] * (*w1i) + (FLOAT64)re[2 * i + 3] * (*w1r)); |
1367 | 0 | re[2 * i + 2] = tmp; |
1368 | |
|
1369 | 0 | w1r++; |
1370 | 0 | w1i++; |
1371 | |
|
1372 | 0 | tmp = (FLOAT32)((FLOAT64)re[2 * i + 4] * (*w1r) - (FLOAT64)re[2 * i + 5] * (*w1i)); |
1373 | 0 | re[2 * i + 5] = |
1374 | 0 | (FLOAT32)((FLOAT64)re[2 * i + 4] * (*w1i) + (FLOAT64)re[2 * i + 5] * (*w1r)); |
1375 | 0 | re[2 * i + 4] = tmp; |
1376 | |
|
1377 | 0 | w1r += 3 * (128 / mpass - 1) + 1; |
1378 | 0 | w1i += 3 * (128 / mpass - 1) + 1; |
1379 | 0 | } |
1380 | 0 | } else { |
1381 | 0 | for (i = 0; i < n_pass; i += 3) { |
1382 | 0 | tmp = (FLOAT32)((FLOAT64)re[2 * i + 0] * (*w1r) + (FLOAT64)re[2 * i + 1] * (*w1i)); |
1383 | 0 | re[2 * i + 1] = |
1384 | 0 | (FLOAT32)(-(FLOAT64)re[2 * i + 0] * (*w1i) + (FLOAT64)re[2 * i + 1] * (*w1r)); |
1385 | 0 | re[2 * i + 0] = tmp; |
1386 | |
|
1387 | 0 | w1r++; |
1388 | 0 | w1i++; |
1389 | |
|
1390 | 0 | tmp = (FLOAT32)((FLOAT64)re[2 * i + 2] * (*w1r) + (FLOAT64)re[2 * i + 3] * (*w1i)); |
1391 | 0 | re[2 * i + 3] = |
1392 | 0 | (FLOAT32)(-(FLOAT64)re[2 * i + 2] * (*w1i) + (FLOAT64)re[2 * i + 3] * (*w1r)); |
1393 | 0 | re[2 * i + 2] = tmp; |
1394 | |
|
1395 | 0 | w1r++; |
1396 | 0 | w1i++; |
1397 | |
|
1398 | 0 | tmp = (FLOAT32)((FLOAT64)re[2 * i + 4] * (*w1r) + (FLOAT64)re[2 * i + 5] * (*w1i)); |
1399 | 0 | re[2 * i + 5] = |
1400 | 0 | (FLOAT32)(-(FLOAT64)re[2 * i + 4] * (*w1i) + (FLOAT64)re[2 * i + 5] * (*w1r)); |
1401 | 0 | re[2 * i + 4] = tmp; |
1402 | |
|
1403 | 0 | w1r += 3 * (128 / mpass - 1) + 1; |
1404 | 0 | w1i += 3 * (128 / mpass - 1) + 1; |
1405 | 0 | } |
1406 | 0 | } |
1407 | 0 | } |
1408 | |
|
1409 | 0 | for (i = 0; i < n_pass; i++) { |
1410 | 0 | ptr_x[2 * i + 0] = re[2 * i + 0]; |
1411 | 0 | ptr_x[2 * i + 1] = re[2 * i + 1]; |
1412 | 0 | } |
1413 | 0 | for (i = 0; i < mpass; i++) { |
1414 | 0 | ixheaacd_hbe_apply_fft_3(ptr_x, ptr_y, i_sign); |
1415 | |
|
1416 | 0 | ptr_x = ptr_x + 6; |
1417 | 0 | ptr_y = ptr_y + 6; |
1418 | 0 | } |
1419 | |
|
1420 | 0 | for (i = 0; i < mpass; i++) { |
1421 | 0 | re[2 * i + 0] = y[6 * i + 0]; |
1422 | 0 | re[2 * i + 1] = y[6 * i + 1]; |
1423 | 0 | } |
1424 | |
|
1425 | 0 | for (i = 0; i < mpass; i++) { |
1426 | 0 | re[2 * mpass + 2 * i + 0] = y[6 * i + 2]; |
1427 | 0 | re[2 * mpass + 2 * i + 1] = y[6 * i + 3]; |
1428 | 0 | } |
1429 | |
|
1430 | 0 | for (i = 0; i < mpass; i++) { |
1431 | 0 | re[4 * mpass + 2 * i + 0] = y[6 * i + 4]; |
1432 | 0 | re[4 * mpass + 2 * i + 1] = y[6 * i + 5]; |
1433 | 0 | } |
1434 | 0 | } |
1435 | | |
1436 | 0 | VOID ixheaacd_hbe_apply_fft_288(FLOAT32 *inp, FLOAT32 *scratch, WORD32 len, WORD32 i_sign) { |
1437 | 0 | FLOAT32 *op = scratch; |
1438 | 0 | WORD32 mpoints = len / 96; |
1439 | 0 | WORD32 fpoints = len / 3; |
1440 | 0 | WORD32 ii, jj; |
1441 | 0 | scratch += 2 * len; |
1442 | |
|
1443 | 0 | for (ii = 0; ii < mpoints; ii++) { |
1444 | 0 | for (jj = 0; jj < fpoints; jj++) { |
1445 | 0 | op[2 * jj + 0] = inp[2 * mpoints * jj + 2 * ii]; |
1446 | 0 | op[2 * jj + 1] = inp[2 * mpoints * jj + 2 * ii + 1]; |
1447 | 0 | } |
1448 | |
|
1449 | 0 | if (fpoints & (fpoints - 1)) |
1450 | 0 | ixheaacd_hbe_apply_cfftn_gen(op, scratch, fpoints, i_sign); |
1451 | 0 | else |
1452 | 0 | ixheaacd_hbe_apply_cfftn(op, scratch, fpoints, i_sign); |
1453 | |
|
1454 | 0 | for (jj = 0; jj < fpoints; jj++) { |
1455 | 0 | inp[mpoints * 2 * jj + 2 * ii + 0] = op[2 * jj + 0]; |
1456 | 0 | inp[mpoints * 2 * jj + 2 * ii + 1] = op[2 * jj + 1]; |
1457 | 0 | } |
1458 | 0 | } |
1459 | |
|
1460 | 0 | ixheaacd_hbe_apply_tw_mult_fft(inp, op, fpoints, mpoints, ixheaac_twid_tbl_fft_288); |
1461 | |
|
1462 | 0 | for (ii = 0; ii < fpoints; ii++) { |
1463 | 0 | ixheaacd_hbe_apply_fft_3(op, scratch, i_sign); |
1464 | 0 | op = op + (mpoints * 2); |
1465 | 0 | scratch = scratch + (mpoints * 2); |
1466 | 0 | } |
1467 | |
|
1468 | 0 | scratch -= fpoints * mpoints * 2; |
1469 | |
|
1470 | 0 | for (jj = 0; jj < fpoints; jj++) { |
1471 | 0 | inp[2 * jj + 0] = scratch[6 * jj]; |
1472 | 0 | inp[2 * jj + 1] = scratch[6 * jj + 1]; |
1473 | 0 | } |
1474 | 0 | for (jj = 0; jj < fpoints; jj++) { |
1475 | 0 | inp[2 * fpoints + 2 * jj + 0] = scratch[6 * jj + 2]; |
1476 | 0 | inp[2 * fpoints + 2 * jj + 1] = scratch[6 * jj + 3]; |
1477 | 0 | } |
1478 | 0 | for (jj = 0; jj < fpoints; jj++) { |
1479 | 0 | inp[4 * fpoints + 2 * jj + 0] = scratch[6 * jj + 4]; |
1480 | 0 | inp[4 * fpoints + 2 * jj + 1] = scratch[6 * jj + 5]; |
1481 | 0 | } |
1482 | 0 | } |
1483 | | |
1484 | 0 | VOID ixheaacd_hbe_apply_ifft_224(FLOAT32 *inp, FLOAT32 *scratch, WORD32 len, WORD32 i_sign) { |
1485 | 0 | WORD32 mpoints = len / 32; |
1486 | 0 | WORD32 fpoints = len / 7; |
1487 | 0 | WORD32 ii, jj; |
1488 | 0 | FLOAT32 *op = scratch; |
1489 | 0 | scratch += 2 * len; |
1490 | |
|
1491 | 0 | for (ii = 0; ii < mpoints; ii++) { |
1492 | 0 | for (jj = 0; jj < fpoints; jj++) { |
1493 | 0 | op[2 * jj + 0] = inp[2 * mpoints * jj + 2 * ii]; |
1494 | 0 | op[2 * jj + 1] = inp[2 * mpoints * jj + 2 * ii + 1]; |
1495 | 0 | } |
1496 | |
|
1497 | 0 | if (fpoints & (fpoints - 1)) |
1498 | 0 | ixheaacd_hbe_apply_cfftn_gen(op, scratch, fpoints, i_sign); |
1499 | 0 | else |
1500 | 0 | ixheaacd_hbe_apply_cfftn(op, scratch, fpoints, i_sign); |
1501 | |
|
1502 | 0 | for (jj = 0; jj < fpoints; jj++) { |
1503 | 0 | inp[mpoints * 2 * jj + 2 * ii + 0] = op[2 * jj + 0]; |
1504 | 0 | inp[mpoints * 2 * jj + 2 * ii + 1] = op[2 * jj + 1]; |
1505 | 0 | } |
1506 | 0 | } |
1507 | |
|
1508 | 0 | ixheaacd_hbe_apply_tw_mult_ifft(inp, op, fpoints, mpoints, ixheaac_twid_tbl_fft_224); |
1509 | |
|
1510 | 0 | for (ii = 0; ii < fpoints; ii++) { |
1511 | 0 | ixheaacd_hbe_apply_ifft_7(op, scratch); |
1512 | 0 | scratch += (mpoints * 2); |
1513 | 0 | op += (mpoints * 2); |
1514 | 0 | } |
1515 | |
|
1516 | 0 | scratch -= fpoints * mpoints * 2; |
1517 | |
|
1518 | 0 | for (jj = 0; jj < fpoints; jj++) { |
1519 | 0 | for (ii = 0; ii < mpoints; ii++) { |
1520 | 0 | inp[fpoints * ii * 2 + 2 * jj + 0] = scratch[mpoints * jj * 2 + 2 * ii + 0]; |
1521 | 0 | inp[fpoints * ii * 2 + 2 * jj + 1] = scratch[mpoints * jj * 2 + 2 * ii + 1]; |
1522 | 0 | } |
1523 | 0 | } |
1524 | 0 | } |
1525 | | |
1526 | | VOID ixheaacd_hbe_apply_ifft_336(FLOAT32 *inp, FLOAT32 *ptr_scratch, WORD32 len, |
1527 | 0 | WORD32 i_sign) { |
1528 | 0 | WORD32 i, j; |
1529 | 0 | WORD32 m_points = len / 7; |
1530 | 0 | WORD32 n_points = len / 48; |
1531 | 0 | FLOAT32 *ptr_real, *ptr_imag, *p_real_1, *p_scratch; |
1532 | 0 | ptr_real = ptr_scratch; |
1533 | 0 | ptr_scratch += 2 * len; |
1534 | 0 | ptr_imag = ptr_scratch; |
1535 | 0 | ptr_scratch += len; |
1536 | 0 | p_scratch = ptr_scratch; |
1537 | 0 | ptr_scratch += len; |
1538 | 0 | p_real_1 = ptr_scratch; |
1539 | 0 | ptr_scratch += len; |
1540 | |
|
1541 | 0 | for (i = 0; i < len; i++) { |
1542 | 0 | ptr_real[i] = inp[2 * i + 0]; |
1543 | 0 | ptr_imag[i] = inp[2 * i + 1]; |
1544 | 0 | } |
1545 | |
|
1546 | 0 | for (i = 0; i < m_points; i++) { |
1547 | 0 | for (j = 0; j < n_points; j++) { |
1548 | 0 | p_real_1[2 * j + 0] = inp[m_points * 2 * j + 2 * i + 0]; |
1549 | 0 | p_real_1[2 * j + 1] = inp[m_points * 2 * j + 2 * i + 1]; |
1550 | 0 | } |
1551 | |
|
1552 | 0 | ixheaacd_hbe_apply_ifft_7(p_real_1, ptr_scratch); |
1553 | |
|
1554 | 0 | for (j = 0; j < n_points; j++) { |
1555 | 0 | inp[m_points * 2 * j + 2 * i + 0] = ptr_scratch[2 * j + 0]; |
1556 | 0 | inp[m_points * 2 * j + 2 * i + 1] = ptr_scratch[2 * j + 1]; |
1557 | 0 | } |
1558 | 0 | } |
1559 | |
|
1560 | 0 | if (m_points == 48) |
1561 | 0 | ixheaacd_hbe_apply_tw_mult_ifft(inp, p_scratch, n_points, m_points, |
1562 | 0 | ixheaac_twid_tbl_fft_336); |
1563 | 0 | else |
1564 | 0 | ixheaacd_hbe_apply_tw_mult_ifft(inp, p_scratch, n_points, m_points, |
1565 | 0 | ixheaac_twid_tbl_fft_168); |
1566 | |
|
1567 | 0 | for (i = 0; i < len; i++) { |
1568 | 0 | ptr_real[2 * i + 0] = p_scratch[2 * i + 0]; |
1569 | 0 | ptr_real[2 * i + 1] = p_scratch[2 * i + 1]; |
1570 | 0 | } |
1571 | |
|
1572 | 0 | for (i = 0; i < n_points; i++) { |
1573 | 0 | ixheaacd_hbe_apply_cfftn_gen(ptr_real, ptr_scratch, m_points, i_sign); |
1574 | 0 | ptr_real += (2 * m_points); |
1575 | 0 | } |
1576 | |
|
1577 | 0 | ptr_real -= n_points * 2 * m_points; |
1578 | |
|
1579 | 0 | for (j = 0; j < n_points; j++) { |
1580 | 0 | for (i = 0; i < m_points; i++) { |
1581 | 0 | inp[n_points * 2 * i + 2 * j + 0] = ptr_real[2 * m_points * j + 2 * i + 0]; |
1582 | 0 | inp[n_points * 2 * i + 2 * j + 1] = ptr_real[2 * m_points * j + 2 * i + 1]; |
1583 | 0 | } |
1584 | 0 | } |
1585 | 0 | return; |
1586 | 0 | } |
1587 | | |