Coverage Report

Created: 2026-05-30 06:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/decoder/ixheaacd_fft.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
#include <stdlib.h>
21
#include <stdio.h>
22
23
#include "ixheaac_type_def.h"
24
#include "ixheaacd_interface.h"
25
#include "ixheaac_constants.h"
26
#include "ixheaac_basic_ops32.h"
27
#include "ixheaac_basic_ops40.h"
28
#include "ixheaacd_function_selector.h"
29
30
extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
31
extern const FLOAT32 ixheaacd_twiddle_table_fft[514];
32
extern const FLOAT32 ixheaacd_twiddle_table_fft_flt[16];
33
extern const WORD32 ixheaacd_twiddle_table_3pr[1155];
34
extern const WORD32 ixheaacd_twiddle_table_3pi[1155];
35
extern const WORD8 ixheaacd_mps_dig_rev[8];
36
37
#define PLATFORM_INLINE __inline
38
39
#define DIG_REV(i, m, j)                                    \
40
58.0M
  do {                                                      \
41
58.0M
    unsigned _ = (i);                                       \
42
58.0M
    _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
43
58.0M
    _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
44
58.0M
    _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
45
58.0M
    (j) = _ >> (m);                                         \
46
58.0M
  } while (0)
47
48
1.54G
static PLATFORM_INLINE WORD32 ixheaacd_mult32_sat(WORD32 a, WORD32 b) {
49
1.54G
  WORD32 result;
50
1.54G
  WORD64 temp_result;
51
52
1.54G
  temp_result = (WORD64)a * (WORD64)b;
53
1.54G
  result = ixheaac_sat64_32(temp_result >> 31);
54
55
1.54G
  return (result);
56
1.54G
}
57
58
251M
static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) {
59
251M
  WORD32 result;
60
61
251M
  result = ixheaac_add32_sat(a, ixheaacd_mult32_sat(b, c));
62
63
251M
  return (result);
64
251M
}
65
66
1.31G
static PLATFORM_INLINE FLOAT32 ixheaacd_mult32X32float(FLOAT32 a, FLOAT32 b) {
67
1.31G
  FLOAT32 result;
68
69
1.31G
  result = a * b;
70
71
1.31G
  return result;
72
1.31G
}
73
74
243M
static PLATFORM_INLINE FLOAT32 ixheaacd_mac32X32float(FLOAT32 a, FLOAT32 b, FLOAT32 c) {
75
243M
  FLOAT32 result;
76
77
243M
  result = a + b * c;
78
79
243M
  return result;
80
243M
}
81
82
VOID ixheaacd_mps_synth_calc_fft(FLOAT32 *ptr_xr, FLOAT32 *ptr_xi,
83
6.05M
                                 WORD32 npoints) {
84
6.05M
  WORD32 i, j, k;
85
6.05M
  FLOAT32 y[64], z[64];
86
6.05M
  FLOAT32 *ptr_y = y, *ptr_z = z;
87
6.05M
  const FLOAT32 *ptr_w = ixheaacd_twiddle_table_fft_flt;
88
89
54.5M
  for (i = 0; i < npoints; i += 4) {
90
48.4M
    FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
91
48.4M
    FLOAT32 *inp = ptr_xr;
92
48.4M
    FLOAT32 tmk;
93
94
48.4M
    WORD32 h2 = ixheaacd_mps_dig_rev[i >> 2];
95
96
48.4M
    inp += (h2);
97
98
48.4M
    x0r = *inp;
99
48.4M
    x0i = *(inp + 1);
100
48.4M
    inp += 16;
101
102
48.4M
    x1r = *inp;
103
48.4M
    x1i = *(inp + 1);
104
48.4M
    inp += 16;
105
106
48.4M
    x2r = *inp;
107
48.4M
    x2i = *(inp + 1);
108
48.4M
    inp += 16;
109
110
48.4M
    x3r = *inp;
111
48.4M
    x3i = *(inp + 1);
112
113
48.4M
    x0r = x0r + x2r;
114
48.4M
    x0i = x0i + x2i;
115
116
48.4M
    tmk = x0r - x2r;
117
48.4M
    x2r = tmk - x2r;
118
48.4M
    tmk = x0i - x2i;
119
48.4M
    x2i = tmk - x2i;
120
121
48.4M
    x1r = x1r + x3r;
122
48.4M
    x1i = x1i + x3i;
123
124
48.4M
    tmk = x1r - x3r;
125
48.4M
    x3r = tmk - x3r;
126
48.4M
    tmk = x1i - x3i;
127
48.4M
    x3i = tmk - x3i;
128
129
48.4M
    x0r = x0r + x1r;
130
48.4M
    x0i = x0i + x1i;
131
132
48.4M
    tmk = x0r - x1r;
133
48.4M
    x1r = tmk - x1r;
134
48.4M
    tmk = x0i - x1i;
135
48.4M
    x1i = tmk - x1i;
136
137
48.4M
    x2r = x2r + x3i;
138
48.4M
    x2i = x2i - x3r;
139
140
48.4M
    tmk = x2r - x3i;
141
48.4M
    x3i = tmk - x3i;
142
48.4M
    tmk = x2i + x3r;
143
48.4M
    x3r = tmk + x3r;
144
145
48.4M
    *ptr_y++ = x0r;
146
48.4M
    *ptr_y++ = x0i;
147
48.4M
    *ptr_y++ = x2r;
148
48.4M
    *ptr_y++ = x2i;
149
48.4M
    *ptr_y++ = x1r;
150
48.4M
    *ptr_y++ = x1i;
151
48.4M
    *ptr_y++ = x3i;
152
48.4M
    *ptr_y++ = x3r;
153
154
48.4M
    inp = ptr_xi;
155
156
48.4M
    inp += (h2);
157
158
48.4M
    x0r = *inp;
159
48.4M
    x0i = *(inp + 1);
160
48.4M
    inp += 16;
161
162
48.4M
    x1r = *inp;
163
48.4M
    x1i = *(inp + 1);
164
48.4M
    inp += 16;
165
166
48.4M
    x2r = *inp;
167
48.4M
    x2i = *(inp + 1);
168
48.4M
    inp += 16;
169
170
48.4M
    x3r = *inp;
171
48.4M
    x3i = *(inp + 1);
172
173
48.4M
    x0r = x0r + x2r;
174
48.4M
    x0i = x0i + x2i;
175
176
48.4M
    tmk = x0r - x2r;
177
48.4M
    x2r = tmk - x2r;
178
48.4M
    tmk = x0i - x2i;
179
48.4M
    x2i = tmk - x2i;
180
181
48.4M
    x1r = x1r + x3r;
182
48.4M
    x1i = x1i + x3i;
183
184
48.4M
    tmk = x1r - x3r;
185
48.4M
    x3r = tmk - x3r;
186
48.4M
    tmk = x1i - x3i;
187
48.4M
    x3i = tmk - x3i;
188
189
48.4M
    x0r = x0r + x1r;
190
48.4M
    x0i = x0i + x1i;
191
192
48.4M
    tmk = x0r - x1r;
193
48.4M
    x1r = tmk - x1r;
194
48.4M
    tmk = x0i - x1i;
195
48.4M
    x1i = tmk - x1i;
196
197
48.4M
    x2r = x2r + x3i;
198
48.4M
    x2i = x2i - x3r;
199
200
48.4M
    tmk = x2r - x3i;
201
48.4M
    x3i = tmk - x3i;
202
48.4M
    tmk = x2i + x3r;
203
48.4M
    x3r = tmk + x3r;
204
205
48.4M
    *ptr_z++ = x0r;
206
48.4M
    *ptr_z++ = x0i;
207
48.4M
    *ptr_z++ = x2r;
208
48.4M
    *ptr_z++ = x2i;
209
48.4M
    *ptr_z++ = x1r;
210
48.4M
    *ptr_z++ = x1i;
211
48.4M
    *ptr_z++ = x3i;
212
48.4M
    *ptr_z++ = x3r;
213
48.4M
  }
214
6.05M
  ptr_y -= 64;
215
6.05M
  ptr_z -= 64;
216
6.05M
  {
217
6.05M
    FLOAT32 *data_r = ptr_y;
218
6.05M
    FLOAT32 *data_i = ptr_z;
219
18.1M
    for (k = 2; k != 0; k--) {
220
12.1M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
221
222
12.1M
      x0r = (*data_r);
223
12.1M
      x0i = (*(data_r + 1));
224
12.1M
      data_r += 8;
225
226
12.1M
      x1r = (*data_r);
227
12.1M
      x1i = (*(data_r + 1));
228
12.1M
      data_r += 8;
229
230
12.1M
      x2r = (*data_r);
231
12.1M
      x2i = (*(data_r + 1));
232
12.1M
      data_r += 8;
233
234
12.1M
      x3r = (*data_r);
235
12.1M
      x3i = (*(data_r + 1));
236
12.1M
      data_r -= 24;
237
238
12.1M
      x0r = x0r + x2r;
239
12.1M
      x0i = x0i + x2i;
240
12.1M
      x2r = x0r - (x2r * 2);
241
12.1M
      x2i = x0i - (x2i * 2);
242
12.1M
      x1r = x1r + x3r;
243
12.1M
      x1i = x1i + x3i;
244
12.1M
      x3r = x1r - (x3r * 2);
245
12.1M
      x3i = x1i - (x3i * 2);
246
247
12.1M
      x0r = x0r + x1r;
248
12.1M
      x0i = x0i + x1i;
249
12.1M
      x1r = x0r - (x1r * 2);
250
12.1M
      x1i = x0i - (x1i * 2);
251
12.1M
      x2r = x2r + x3i;
252
12.1M
      x2i = x2i - x3r;
253
12.1M
      x3i = x2r - (x3i * 2);
254
12.1M
      x3r = x2i + (x3r * 2);
255
256
12.1M
      *data_r = x0r;
257
12.1M
      *(data_r + 1) = x0i;
258
12.1M
      data_r += 8;
259
260
12.1M
      *data_r = x2r;
261
12.1M
      *(data_r + 1) = x2i;
262
12.1M
      data_r += 8;
263
264
12.1M
      *data_r = x1r;
265
12.1M
      *(data_r + 1) = x1i;
266
12.1M
      data_r += 8;
267
268
12.1M
      *data_r = x3i;
269
12.1M
      *(data_r + 1) = x3r;
270
12.1M
      data_r += 8;
271
272
12.1M
      x0r = (*data_i);
273
12.1M
      x0i = (*(data_i + 1));
274
12.1M
      data_i += 8;
275
276
12.1M
      x1r = (*data_i);
277
12.1M
      x1i = (*(data_i + 1));
278
12.1M
      data_i += 8;
279
280
12.1M
      x2r = (*data_i);
281
12.1M
      x2i = (*(data_i + 1));
282
12.1M
      data_i += 8;
283
284
12.1M
      x3r = (*data_i);
285
12.1M
      x3i = (*(data_i + 1));
286
12.1M
      data_i -= 24;
287
288
12.1M
      x0r = x0r + x2r;
289
12.1M
      x0i = x0i + x2i;
290
12.1M
      x2r = x0r - (x2r * 2);
291
12.1M
      x2i = x0i - (x2i * 2);
292
12.1M
      x1r = x1r + x3r;
293
12.1M
      x1i = x1i + x3i;
294
12.1M
      x3r = x1r - (x3r * 2);
295
12.1M
      x3i = x1i - (x3i * 2);
296
297
12.1M
      x0r = x0r + x1r;
298
12.1M
      x0i = x0i + x1i;
299
12.1M
      x1r = x0r - (x1r * 2);
300
12.1M
      x1i = x0i - (x1i * 2);
301
12.1M
      x2r = x2r + x3i;
302
12.1M
      x2i = x2i - x3r;
303
12.1M
      x3i = x2r - (x3i * 2);
304
12.1M
      x3r = x2i + (x3r * 2);
305
306
12.1M
      *data_i = x0r;
307
12.1M
      *(data_i + 1) = x0i;
308
12.1M
      data_i += 8;
309
310
12.1M
      *data_i = x2r;
311
12.1M
      *(data_i + 1) = x2i;
312
12.1M
      data_i += 8;
313
314
12.1M
      *data_i = x1r;
315
12.1M
      *(data_i + 1) = x1i;
316
12.1M
      data_i += 8;
317
318
12.1M
      *data_i = x3i;
319
12.1M
      *(data_i + 1) = x3r;
320
12.1M
      data_i += 8;
321
12.1M
    }
322
6.05M
    data_r = ptr_y + 2;
323
6.05M
    data_i = ptr_z + 2;
324
325
18.1M
    for (k = 2; k != 0; k--) {
326
12.1M
      FLOAT32 tmp;
327
12.1M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
328
329
12.1M
      data_r += 8;
330
331
12.1M
      x1r = *data_r;
332
12.1M
      x1i = *(data_r + 1);
333
12.1M
      data_r += 8;
334
335
12.1M
      x2r = *data_r;
336
12.1M
      x2i = *(data_r + 1);
337
12.1M
      data_r += 8;
338
339
12.1M
      x3r = *data_r;
340
12.1M
      x3i = *(data_r + 1);
341
12.1M
      data_r -= 24;
342
343
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
344
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
345
12.1M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
346
12.1M
                                   (FLOAT32)x1i, 0.923880f);
347
12.1M
      x1r = tmp;
348
349
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
350
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
351
12.1M
      x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
352
12.1M
                                   (FLOAT32)x2i, 0.707107f);
353
12.1M
      x2r = tmp;
354
355
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
356
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
357
12.1M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
358
12.1M
                                   (FLOAT32)x3i, 0.382683f);
359
12.1M
      x3r = tmp;
360
361
12.1M
      x0r = (*data_r);
362
12.1M
      x0i = (*(data_r + 1));
363
364
12.1M
      x0r = x0r + (x2r);
365
12.1M
      x0i = x0i + (x2i);
366
12.1M
      x2r = x0r - (x2r * 2);
367
12.1M
      x2i = x0i - (x2i * 2);
368
12.1M
      x1r = x1r + x3r;
369
12.1M
      x1i = x1i + x3i;
370
12.1M
      x3r = x1r - (x3r * 2);
371
12.1M
      x3i = x1i - (x3i * 2);
372
373
12.1M
      x0r = x0r + (x1r);
374
12.1M
      x0i = x0i + (x1i);
375
12.1M
      x1r = x0r - (x1r * 2);
376
12.1M
      x1i = x0i - (x1i * 2);
377
12.1M
      x2r = x2r + (x3i);
378
12.1M
      x2i = x2i - (x3r);
379
12.1M
      x3i = x2r - (x3i * 2);
380
12.1M
      x3r = x2i + (x3r * 2);
381
382
12.1M
      *data_r = x0r;
383
12.1M
      *(data_r + 1) = x0i;
384
12.1M
      data_r += 8;
385
386
12.1M
      *data_r = x2r;
387
12.1M
      *(data_r + 1) = x2i;
388
12.1M
      data_r += 8;
389
390
12.1M
      *data_r = x1r;
391
12.1M
      *(data_r + 1) = x1i;
392
12.1M
      data_r += 8;
393
394
12.1M
      *data_r = x3i;
395
12.1M
      *(data_r + 1) = x3r;
396
12.1M
      data_r += 8;
397
12.1M
      data_i += 8;
398
399
12.1M
      x1r = *data_i;
400
12.1M
      x1i = *(data_i + 1);
401
12.1M
      data_i += 8;
402
403
12.1M
      x2r = *data_i;
404
12.1M
      x2i = *(data_i + 1);
405
12.1M
      data_i += 8;
406
407
12.1M
      x3r = *data_i;
408
12.1M
      x3i = *(data_i + 1);
409
12.1M
      data_i -= 24;
410
411
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
412
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
413
12.1M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
414
12.1M
                                   (FLOAT32)x1i, 0.923880f);
415
12.1M
      x1r = tmp;
416
417
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
418
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
419
12.1M
      x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
420
12.1M
                                   (FLOAT32)x2i, 0.707107f);
421
12.1M
      x2r = tmp;
422
423
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
424
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
425
12.1M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
426
12.1M
                                   (FLOAT32)x3i, 0.382683f);
427
12.1M
      x3r = tmp;
428
429
12.1M
      x0r = (*data_i);
430
12.1M
      x0i = (*(data_i + 1));
431
432
12.1M
      x0r = x0r + (x2r);
433
12.1M
      x0i = x0i + (x2i);
434
12.1M
      x2r = x0r - (x2r * 2);
435
12.1M
      x2i = x0i - (x2i * 2);
436
12.1M
      x1r = x1r + x3r;
437
12.1M
      x1i = x1i + x3i;
438
12.1M
      x3r = x1r - (x3r * 2);
439
12.1M
      x3i = x1i - (x3i * 2);
440
441
12.1M
      x0r = x0r + (x1r);
442
12.1M
      x0i = x0i + (x1i);
443
12.1M
      x1r = x0r - (x1r * 2);
444
12.1M
      x1i = x0i - (x1i * 2);
445
12.1M
      x2r = x2r + (x3i);
446
12.1M
      x2i = x2i - (x3r);
447
12.1M
      x3i = x2r - (x3i * 2);
448
12.1M
      x3r = x2i + (x3r * 2);
449
450
12.1M
      *data_i = x0r;
451
12.1M
      *(data_i + 1) = x0i;
452
12.1M
      data_i += 8;
453
454
12.1M
      *data_i = x2r;
455
12.1M
      *(data_i + 1) = x2i;
456
12.1M
      data_i += 8;
457
458
12.1M
      *data_i = x1r;
459
12.1M
      *(data_i + 1) = x1i;
460
12.1M
      data_i += 8;
461
462
12.1M
      *data_i = x3i;
463
12.1M
      *(data_i + 1) = x3r;
464
12.1M
      data_i += 8;
465
12.1M
    }
466
6.05M
    data_r -= 62;
467
6.05M
    data_i -= 62;
468
18.1M
    for (k = 2; k != 0; k--) {
469
12.1M
      FLOAT32 tmp;
470
12.1M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
471
472
12.1M
      data_r += 8;
473
474
12.1M
      x1r = *data_r;
475
12.1M
      x1i = *(data_r + 1);
476
12.1M
      data_r += 8;
477
478
12.1M
      x2r = *data_r;
479
12.1M
      x2i = *(data_r + 1);
480
12.1M
      data_r += 8;
481
482
12.1M
      x3r = *data_r;
483
12.1M
      x3i = *(data_r + 1);
484
12.1M
      data_r -= 24;
485
486
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
487
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
488
12.1M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
489
12.1M
                                   (FLOAT32)x1i, 0.707107f);
490
12.1M
      x1r = tmp;
491
492
12.1M
      tmp = x2i;
493
12.1M
      x2i = -x2r;
494
12.1M
      x2r = tmp;
495
496
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
497
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
498
12.1M
      x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
499
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
500
12.1M
      x3r = tmp;
501
502
12.1M
      x0r = (*data_r);
503
12.1M
      x0i = (*(data_r + 1));
504
505
12.1M
      x0r = x0r + (x2r);
506
12.1M
      x0i = x0i + (x2i);
507
12.1M
      x2r = x0r - (x2r * 2);
508
12.1M
      x2i = x0i - (x2i * 2);
509
12.1M
      x1r = x1r + x3r;
510
12.1M
      x1i = x1i + x3i;
511
12.1M
      x3r = x1r - (x3r * 2);
512
12.1M
      x3i = x1i - (x3i * 2);
513
514
12.1M
      x0r = x0r + (x1r);
515
12.1M
      x0i = x0i + (x1i);
516
12.1M
      x1r = x0r - (x1r * 2);
517
12.1M
      x1i = x0i - (x1i * 2);
518
12.1M
      x2r = x2r + (x3i);
519
12.1M
      x2i = x2i - (x3r);
520
12.1M
      x3i = x2r - (x3i * 2);
521
12.1M
      x3r = x2i + (x3r * 2);
522
523
12.1M
      *data_r = x0r;
524
12.1M
      *(data_r + 1) = x0i;
525
12.1M
      data_r += 8;
526
527
12.1M
      *data_r = x2r;
528
12.1M
      *(data_r + 1) = x2i;
529
12.1M
      data_r += 8;
530
531
12.1M
      *data_r = x1r;
532
12.1M
      *(data_r + 1) = x1i;
533
12.1M
      data_r += 8;
534
535
12.1M
      *data_r = x3i;
536
12.1M
      *(data_r + 1) = x3r;
537
12.1M
      data_r += 8;
538
12.1M
      data_i += 8;
539
540
12.1M
      x1r = *data_i;
541
12.1M
      x1i = *(data_i + 1);
542
12.1M
      data_i += 8;
543
544
12.1M
      x2r = *data_i;
545
12.1M
      x2i = *(data_i + 1);
546
12.1M
      data_i += 8;
547
548
12.1M
      x3r = *data_i;
549
12.1M
      x3i = *(data_i + 1);
550
12.1M
      data_i -= 24;
551
552
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
553
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
554
12.1M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
555
12.1M
                                   (FLOAT32)x1i, 0.707107f);
556
12.1M
      x1r = tmp;
557
558
12.1M
      tmp = x2i;
559
12.1M
      x2i = -x2r;
560
12.1M
      x2r = tmp;
561
562
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
563
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
564
12.1M
      x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
565
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
566
12.1M
      x3r = tmp;
567
568
12.1M
      x0r = (*data_i);
569
12.1M
      x0i = (*(data_i + 1));
570
571
12.1M
      x0r = x0r + (x2r);
572
12.1M
      x0i = x0i + (x2i);
573
12.1M
      x2r = x0r - (x2r * 2);
574
12.1M
      x2i = x0i - (x2i * 2);
575
12.1M
      x1r = x1r + x3r;
576
12.1M
      x1i = x1i + x3i;
577
12.1M
      x3r = x1r - (x3r * 2);
578
12.1M
      x3i = x1i - (x3i * 2);
579
580
12.1M
      x0r = x0r + (x1r);
581
12.1M
      x0i = x0i + (x1i);
582
12.1M
      x1r = x0r - (x1r * 2);
583
12.1M
      x1i = x0i - (x1i * 2);
584
12.1M
      x2r = x2r + (x3i);
585
12.1M
      x2i = x2i - (x3r);
586
12.1M
      x3i = x2r - (x3i * 2);
587
12.1M
      x3r = x2i + (x3r * 2);
588
589
12.1M
      *data_i = x0r;
590
12.1M
      *(data_i + 1) = x0i;
591
12.1M
      data_i += 8;
592
593
12.1M
      *data_i = x2r;
594
12.1M
      *(data_i + 1) = x2i;
595
12.1M
      data_i += 8;
596
597
12.1M
      *data_i = x1r;
598
12.1M
      *(data_i + 1) = x1i;
599
12.1M
      data_i += 8;
600
601
12.1M
      *data_i = x3i;
602
12.1M
      *(data_i + 1) = x3r;
603
12.1M
      data_i += 8;
604
12.1M
    }
605
6.05M
    data_r -= 62;
606
6.05M
    data_i -= 62;
607
18.1M
    for (k = 2; k != 0; k--) {
608
12.1M
      FLOAT32 tmp;
609
12.1M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
610
611
12.1M
      data_r += 8;
612
613
12.1M
      x1r = *data_r;
614
12.1M
      x1i = *(data_r + 1);
615
12.1M
      data_r += 8;
616
617
12.1M
      x2r = *data_r;
618
12.1M
      x2i = *(data_r + 1);
619
12.1M
      data_r += 8;
620
621
12.1M
      x3r = *data_r;
622
12.1M
      x3i = *(data_r + 1);
623
12.1M
      data_r -= 24;
624
625
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
626
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
627
12.1M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
628
12.1M
                                   (FLOAT32)x1i, 0.382683f);
629
12.1M
      x1r = tmp;
630
631
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
632
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
633
12.1M
      x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
634
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
635
12.1M
      x2r = tmp;
636
637
12.1M
      tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
638
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
639
12.1M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
640
12.1M
                                   (FLOAT32)x3i, 0.923880f);
641
12.1M
      x3r = tmp;
642
643
12.1M
      x0r = (*data_r);
644
12.1M
      x0i = (*(data_r + 1));
645
646
12.1M
      x0r = x0r + (x2r);
647
12.1M
      x0i = x0i + (x2i);
648
12.1M
      x2r = x0r - (x2r * 2);
649
12.1M
      x2i = x0i - (x2i * 2);
650
12.1M
      x1r = x1r + x3r;
651
12.1M
      x1i = x1i - x3i;
652
12.1M
      x3r = x1r - (x3r * 2);
653
12.1M
      x3i = x1i + (x3i * 2);
654
655
12.1M
      x0r = x0r + (x1r);
656
12.1M
      x0i = x0i + (x1i);
657
12.1M
      x1r = x0r - (x1r * 2);
658
12.1M
      x1i = x0i - (x1i * 2);
659
12.1M
      x2r = x2r + (x3i);
660
12.1M
      x2i = x2i - (x3r);
661
12.1M
      x3i = x2r - (x3i * 2);
662
12.1M
      x3r = x2i + (x3r * 2);
663
664
12.1M
      *data_r = x0r;
665
12.1M
      *(data_r + 1) = x0i;
666
12.1M
      data_r += 8;
667
668
12.1M
      *data_r = x2r;
669
12.1M
      *(data_r + 1) = x2i;
670
12.1M
      data_r += 8;
671
672
12.1M
      *data_r = x1r;
673
12.1M
      *(data_r + 1) = x1i;
674
12.1M
      data_r += 8;
675
676
12.1M
      *data_r = x3i;
677
12.1M
      *(data_r + 1) = x3r;
678
12.1M
      data_r += 8;
679
12.1M
      data_i += 8;
680
681
12.1M
      x1r = *data_i;
682
12.1M
      x1i = *(data_i + 1);
683
12.1M
      data_i += 8;
684
685
12.1M
      x2r = *data_i;
686
12.1M
      x2i = *(data_i + 1);
687
12.1M
      data_i += 8;
688
689
12.1M
      x3r = *data_i;
690
12.1M
      x3i = *(data_i + 1);
691
12.1M
      data_i -= 24;
692
693
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
694
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
695
12.1M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
696
12.1M
                                   (FLOAT32)x1i, 0.382683f);
697
12.1M
      x1r = tmp;
698
699
12.1M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
700
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
701
12.1M
      x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
702
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
703
12.1M
      x2r = tmp;
704
705
12.1M
      tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
706
12.1M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
707
12.1M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
708
12.1M
                                   (FLOAT32)x3i, 0.923880f);
709
12.1M
      x3r = tmp;
710
711
12.1M
      x0r = (*data_i);
712
12.1M
      x0i = (*(data_i + 1));
713
714
12.1M
      x0r = x0r + (x2r);
715
12.1M
      x0i = x0i + (x2i);
716
12.1M
      x2r = x0r - (x2r * 2);
717
12.1M
      x2i = x0i - (x2i * 2);
718
12.1M
      x1r = x1r + x3r;
719
12.1M
      x1i = x1i - x3i;
720
12.1M
      x3r = x1r - (x3r * 2);
721
12.1M
      x3i = x1i + (x3i * 2);
722
723
12.1M
      x0r = x0r + (x1r);
724
12.1M
      x0i = x0i + (x1i);
725
12.1M
      x1r = x0r - (x1r * 2);
726
12.1M
      x1i = x0i - (x1i * 2);
727
12.1M
      x2r = x2r + (x3i);
728
12.1M
      x2i = x2i - (x3r);
729
12.1M
      x3i = x2r - (x3i * 2);
730
12.1M
      x3r = x2i + (x3r * 2);
731
732
12.1M
      *data_i = x0r;
733
12.1M
      *(data_i + 1) = x0i;
734
12.1M
      data_i += 8;
735
736
12.1M
      *data_i = x2r;
737
12.1M
      *(data_i + 1) = x2i;
738
12.1M
      data_i += 8;
739
740
12.1M
      *data_i = x1r;
741
12.1M
      *(data_i + 1) = x1i;
742
12.1M
      data_i += 8;
743
744
12.1M
      *data_i = x3i;
745
12.1M
      *(data_i + 1) = x3r;
746
12.1M
      data_i += 8;
747
12.1M
    }
748
6.05M
    data_r -= 62;
749
6.05M
    data_i -= 62;
750
6.05M
  }
751
6.05M
  {
752
6.05M
    const FLOAT32 *twiddles = ptr_w;
753
6.05M
    FLOAT32 x0r, x0i, x1r, x1i;
754
54.5M
    for (j = 8; j != 0; j--) {
755
48.4M
      FLOAT32 W1 = *twiddles;
756
48.4M
      twiddles++;
757
48.4M
      FLOAT32 W4 = *twiddles;
758
48.4M
      twiddles++;
759
48.4M
      FLOAT32 tmp;
760
761
48.4M
      x0r = *ptr_y;
762
48.4M
      x0i = *(ptr_y + 1);
763
48.4M
      ptr_y += 32;
764
48.4M
      ptr_xr += 32;
765
766
48.4M
      x1r = *ptr_y;
767
48.4M
      x1i = *(ptr_y + 1);
768
769
48.4M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
770
48.4M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
771
48.4M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
772
48.4M
                                   (FLOAT32)x1i, W1);
773
48.4M
      x1r = tmp;
774
775
48.4M
      *ptr_xr = (x0r) - (x1r);
776
48.4M
      *(ptr_xr + 1) = (x0i) - (x1i);
777
48.4M
      ptr_y -= 32;
778
48.4M
      ptr_xr -= 32;
779
780
48.4M
      *ptr_xr = (x0r) + (x1r);
781
48.4M
      *(ptr_xr + 1) = (x0i) + (x1i);
782
48.4M
      ptr_y += 2;
783
48.4M
      ptr_xr += 2;
784
785
48.4M
      x0r = *ptr_z;
786
48.4M
      x0i = *(ptr_z + 1);
787
48.4M
      ptr_z += 32;
788
48.4M
      ptr_xi += 32;
789
790
48.4M
      x1r = *ptr_z;
791
48.4M
      x1i = *(ptr_z + 1);
792
793
48.4M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
794
48.4M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
795
48.4M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
796
48.4M
                                   (FLOAT32)x1i, W1);
797
48.4M
      x1r = tmp;
798
799
48.4M
      *ptr_xi = (x0r) - (x1r);
800
48.4M
      *(ptr_xi + 1) = (x0i) - (x1i);
801
48.4M
      ptr_z -= 32;
802
48.4M
      ptr_xi -= 32;
803
804
48.4M
      *ptr_xi = (x0r) + (x1r);
805
48.4M
      *(ptr_xi + 1) = (x0i) + (x1i);
806
48.4M
      ptr_z += 2;
807
48.4M
      ptr_xi += 2;
808
48.4M
    }
809
6.05M
    twiddles = ptr_w;
810
54.5M
    for (j = 8; j != 0; j--) {
811
48.4M
      FLOAT32 W1 = *twiddles;
812
48.4M
      twiddles++;
813
48.4M
      FLOAT32 W4 = *twiddles;
814
48.4M
      twiddles++;
815
48.4M
      FLOAT32 tmp;
816
817
48.4M
      x0r = *ptr_y;
818
48.4M
      x0i = *(ptr_y + 1);
819
48.4M
      ptr_y += 32;
820
48.4M
      ptr_xr += 32;
821
822
48.4M
      x1r = *ptr_y;
823
48.4M
      x1i = *(ptr_y + 1);
824
825
48.4M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
826
48.4M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W1));
827
48.4M
      x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
828
48.4M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
829
48.4M
      x1r = tmp;
830
831
48.4M
      *ptr_xr = (x0r) - (x1r);
832
48.4M
      *(ptr_xr + 1) = (x0i) - (x1i);
833
48.4M
      ptr_y -= 32;
834
48.4M
      ptr_xr -= 32;
835
836
48.4M
      *ptr_xr = (x0r) + (x1r);
837
48.4M
      *(ptr_xr + 1) = (x0i) + (x1i);
838
48.4M
      ptr_y += 2;
839
48.4M
      ptr_xr += 2;
840
841
48.4M
      x0r = *ptr_z;
842
48.4M
      x0i = *(ptr_z + 1);
843
48.4M
      ptr_z += 32;
844
48.4M
      ptr_xi += 32;
845
846
48.4M
      x1r = *ptr_z;
847
48.4M
      x1i = *(ptr_z + 1);
848
849
48.4M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
850
48.4M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W1));
851
48.4M
      x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
852
48.4M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
853
48.4M
      x1r = tmp;
854
855
48.4M
      *ptr_xi = (x0r) - (x1r);
856
48.4M
      *(ptr_xi + 1) = (x0i) - (x1i);
857
48.4M
      ptr_z -= 32;
858
48.4M
      ptr_xi -= 32;
859
860
48.4M
      *ptr_xi = (x0r) + (x1r);
861
48.4M
      *(ptr_xi + 1) = (x0i) + (x1i);
862
48.4M
      ptr_z += 2;
863
48.4M
      ptr_xi += 2;
864
48.4M
    }
865
6.05M
  }
866
6.05M
}
867
868
24.0k
VOID ixheaacd_mps_complex_fft(FLOAT32 *xr, FLOAT32 *xi, WORD32 nlength) {
869
24.0k
  WORD32 i, j, k, n_stages, h2;
870
24.0k
  FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
871
24.0k
  WORD32 del, nodespacing, in_loop_cnt;
872
24.0k
  WORD32 dig_rev_shift;
873
24.0k
  WORD32 not_power_4;
874
24.0k
  FLOAT32 ptr_x[256];
875
24.0k
  FLOAT32 y[256];
876
24.0k
  WORD32 npoints = nlength;
877
24.0k
  FLOAT32 *ptr_y = y;
878
24.0k
  const FLOAT32 *ptr_w;
879
24.0k
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
880
24.0k
  n_stages = 30 - ixheaac_norm32(npoints);
881
24.0k
  not_power_4 = n_stages & 1;
882
883
24.0k
  n_stages = n_stages >> 1;
884
885
886
1.56M
  for (i = 0; i<nlength; i++)
887
1.54M
  {
888
1.54M
    ptr_x[2 * i] = xr[i];
889
1.54M
    ptr_x[2 * i + 1] = xi[i];
890
1.54M
  }
891
892
24.0k
  ptr_w = ixheaacd_twiddle_table_fft;
893
894
409k
  for (i = 0; i<npoints; i += 4)
895
385k
  {
896
385k
    FLOAT32 *inp = ptr_x;
897
898
385k
    DIG_REV(i, dig_rev_shift, h2);
899
385k
    if (not_power_4)
900
0
    {
901
0
      h2 += 1;
902
0
      h2 &= ~1;
903
0
    }
904
385k
    inp += (h2);
905
906
385k
    x0r = *inp;
907
385k
    x0i = *(inp + 1);
908
385k
    inp += (npoints >> 1);
909
910
385k
    x1r = *inp;
911
385k
    x1i = *(inp + 1);
912
385k
    inp += (npoints >> 1);
913
914
385k
    x2r = *inp;
915
385k
    x2i = *(inp + 1);
916
385k
    inp += (npoints >> 1);
917
918
385k
    x3r = *inp;
919
385k
    x3i = *(inp + 1);
920
921
385k
    x0r = x0r + x2r;
922
385k
    x0i = x0i + x2i;
923
385k
    x2r = x0r - (x2r * 2);
924
385k
    x2i = x0i - (x2i * 2);
925
385k
    x1r = x1r + x3r;
926
385k
    x1i = x1i + x3i;
927
385k
    x3r = x1r - (x3r * 2);
928
385k
    x3i = x1i - (x3i * 2);
929
930
385k
    x0r = x0r + x1r;
931
385k
    x0i = x0i + x1i;
932
385k
    x1r = x0r - (x1r * 2);
933
385k
    x1i = x0i - (x1i * 2);
934
385k
    x2r = x2r + x3i;
935
385k
    x2i = x2i - x3r;
936
385k
    x3i = x2r - (x3i * 2);
937
385k
    x3r = x2i + (x3r * 2);
938
939
385k
    *ptr_y++ = x0r;
940
385k
    *ptr_y++ = x0i;
941
385k
    *ptr_y++ = x2r;
942
385k
    *ptr_y++ = x2i;
943
385k
    *ptr_y++ = x1r;
944
385k
    *ptr_y++ = x1i;
945
385k
    *ptr_y++ = x3i;
946
385k
    *ptr_y++ = x3r;
947
385k
  }
948
24.0k
  ptr_y -= 2 * npoints;
949
24.0k
  del = 4;
950
24.0k
  nodespacing = 64;
951
24.0k
  in_loop_cnt = npoints >> 4;
952
72.2k
  for (i = n_stages - 1; i>0; i--)
953
48.1k
  {
954
48.1k
    const FLOAT32 *twiddles = ptr_w;
955
48.1k
    FLOAT32 *data = ptr_y;
956
48.1k
    FLOAT32 w1h, w2h, w3h, w1l, w2l, w3l;
957
48.1k
    WORD32 sec_loop_cnt;
958
959
168k
    for (k = in_loop_cnt; k != 0; k--)
960
120k
    {
961
120k
      x0r = (*data);
962
120k
      x0i = (*(data + 1));
963
120k
      data += (del << 1);
964
965
120k
      x1r = (*data);
966
120k
      x1i = (*(data + 1));
967
120k
      data += (del << 1);
968
969
120k
      x2r = (*data);
970
120k
      x2i = (*(data + 1));
971
120k
      data += (del << 1);
972
973
120k
      x3r = (*data);
974
120k
      x3i = (*(data + 1));
975
120k
      data -= 3 * (del << 1);
976
977
120k
      x0r = x0r + x2r;
978
120k
      x0i = x0i + x2i;
979
120k
      x2r = x0r - (x2r * 2);
980
120k
      x2i = x0i - (x2i * 2);
981
120k
      x1r = x1r + x3r;
982
120k
      x1i = x1i + x3i;
983
120k
      x3r = x1r - (x3r * 2);
984
120k
      x3i = x1i - (x3i * 2);
985
986
120k
      x0r = x0r + x1r;
987
120k
      x0i = x0i + x1i;
988
120k
      x1r = x0r - (x1r * 2);
989
120k
      x1i = x0i - (x1i * 2);
990
120k
      x2r = x2r + x3i;
991
120k
      x2i = x2i - x3r;
992
120k
      x3i = x2r - (x3i * 2);
993
120k
      x3r = x2i + (x3r * 2);
994
995
120k
      *data = x0r;
996
120k
      *(data + 1) = x0i;
997
120k
      data += (del << 1);
998
999
120k
      *data = x2r;
1000
120k
      *(data + 1) = x2i;
1001
120k
      data += (del << 1);
1002
1003
120k
      *data = x1r;
1004
120k
      *(data + 1) = x1i;
1005
120k
      data += (del << 1);
1006
1007
120k
      *data = x3i;
1008
120k
      *(data + 1) = x3r;
1009
120k
      data += (del << 1);
1010
120k
    }
1011
48.1k
    data = ptr_y + 2;
1012
1013
48.1k
    sec_loop_cnt = (nodespacing * del);
1014
48.1k
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) \
1015
48.1k
            + (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) \
1016
48.1k
            - (sec_loop_cnt / 256);
1017
48.1k
    j = nodespacing;
1018
1019
192k
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing)
1020
144k
    {
1021
144k
      w1h = *(twiddles + 2 * j);
1022
144k
      w1l = *(twiddles + 2 * j + 1);
1023
144k
      w2h = *(twiddles + 2 * (j << 1));
1024
144k
      w2l = *(twiddles + 2 * (j << 1) + 1);
1025
144k
      w3h = *(twiddles + 2 * j + 2 * (j << 1));
1026
144k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1027
1028
361k
      for (k = in_loop_cnt; k != 0; k--)
1029
216k
      {
1030
216k
        FLOAT32 tmp;
1031
216k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1032
1033
216k
        data += (del << 1);
1034
1035
216k
        x1r = *data;
1036
216k
        x1i = *(data + 1);
1037
216k
        data += (del << 1);
1038
1039
216k
        x2r = *data;
1040
216k
        x2i = *(data + 1);
1041
216k
        data += (del << 1);
1042
1043
216k
        x3r = *data;
1044
216k
        x3i = *(data + 1);
1045
216k
        data -= 3 * (del << 1);
1046
1047
216k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1048
216k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1049
216k
        x1r = tmp;
1050
1051
216k
        tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1052
216k
        x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1053
216k
        x2r = tmp;
1054
1055
216k
        tmp = (ixheaacd_mult32X32float(x3r, w3l) - ixheaacd_mult32X32float(x3i, w3h));
1056
216k
        x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1057
216k
        x3r = tmp;
1058
1059
216k
        x0r = (*data);
1060
216k
        x0i = (*(data + 1));
1061
1062
216k
        x0r = x0r + (x2r);
1063
216k
        x0i = x0i + (x2i);
1064
216k
        x2r = x0r - (x2r * 2);
1065
216k
        x2i = x0i - (x2i * 2);
1066
216k
        x1r = x1r + x3r;
1067
216k
        x1i = x1i + x3i;
1068
216k
        x3r = x1r - (x3r * 2);
1069
216k
        x3i = x1i - (x3i * 2);
1070
1071
216k
        x0r = x0r + (x1r);
1072
216k
        x0i = x0i + (x1i);
1073
216k
        x1r = x0r - (x1r * 2);
1074
216k
        x1i = x0i - (x1i * 2);
1075
216k
        x2r = x2r + (x3i);
1076
216k
        x2i = x2i - (x3r);
1077
216k
        x3i = x2r - (x3i * 2);
1078
216k
        x3r = x2i + (x3r * 2);
1079
1080
216k
        *data = x0r;
1081
216k
        *(data + 1) = x0i;
1082
216k
        data += (del << 1);
1083
1084
216k
        *data = x2r;
1085
216k
        *(data + 1) = x2i;
1086
216k
        data += (del << 1);
1087
1088
216k
        *data = x1r;
1089
216k
        *(data + 1) = x1i;
1090
216k
        data += (del << 1);
1091
1092
216k
        *data = x3i;
1093
216k
        *(data + 1) = x3r;
1094
216k
        data += (del << 1);
1095
216k
      }
1096
144k
      data -= 2 * npoints;
1097
144k
      data += 2;
1098
144k
    }
1099
144k
    for (; j <= (nodespacing * del) >> 1; j += nodespacing)
1100
96.2k
    {
1101
96.2k
      w1h = *(twiddles + 2 * j);
1102
96.2k
      w2h = *(twiddles + 2 * (j << 1));
1103
96.2k
      w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1104
96.2k
      w1l = *(twiddles + 2 * j + 1);
1105
96.2k
      w2l = *(twiddles + 2 * (j << 1) + 1);
1106
96.2k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1107
1108
264k
      for (k = in_loop_cnt; k != 0; k--)
1109
168k
      {
1110
168k
        FLOAT32 tmp;
1111
168k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1112
1113
168k
        data += (del << 1);
1114
1115
168k
        x1r = *data;
1116
168k
        x1i = *(data + 1);
1117
168k
        data += (del << 1);
1118
1119
168k
        x2r = *data;
1120
168k
        x2i = *(data + 1);
1121
168k
        data += (del << 1);
1122
1123
168k
        x3r = *data;
1124
168k
        x3i = *(data + 1);
1125
168k
        data -= 3 * (del << 1);
1126
1127
168k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1128
168k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1129
168k
        x1r = tmp;
1130
1131
168k
        tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1132
168k
        x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1133
168k
        x2r = tmp;
1134
1135
168k
        tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1136
168k
        x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1137
168k
        x3r = tmp;
1138
1139
168k
        x0r = (*data);
1140
168k
        x0i = (*(data + 1));
1141
1142
168k
        x0r = x0r + (x2r);
1143
168k
        x0i = x0i + (x2i);
1144
168k
        x2r = x0r - (x2r * 2);
1145
168k
        x2i = x0i - (x2i * 2);
1146
168k
        x1r = x1r + x3r;
1147
168k
        x1i = x1i + x3i;
1148
168k
        x3r = x1r - (x3r * 2);
1149
168k
        x3i = x1i - (x3i * 2);
1150
1151
168k
        x0r = x0r + (x1r);
1152
168k
        x0i = x0i + (x1i);
1153
168k
        x1r = x0r - (x1r * 2);
1154
168k
        x1i = x0i - (x1i * 2);
1155
168k
        x2r = x2r + (x3i);
1156
168k
        x2i = x2i - (x3r);
1157
168k
        x3i = x2r - (x3i * 2);
1158
168k
        x3r = x2i + (x3r * 2);
1159
1160
168k
        *data = x0r;
1161
168k
        *(data + 1) = x0i;
1162
168k
        data += (del << 1);
1163
1164
168k
        *data = x2r;
1165
168k
        *(data + 1) = x2i;
1166
168k
        data += (del << 1);
1167
1168
168k
        *data = x1r;
1169
168k
        *(data + 1) = x1i;
1170
168k
        data += (del << 1);
1171
1172
168k
        *data = x3i;
1173
168k
        *(data + 1) = x3r;
1174
168k
        data += (del << 1);
1175
168k
      }
1176
96.2k
      data -= 2 * npoints;
1177
96.2k
      data += 2;
1178
96.2k
    }
1179
96.2k
    for (; j <= sec_loop_cnt * 2; j += nodespacing)
1180
48.1k
    {
1181
48.1k
      w1h = *(twiddles + 2 * j);
1182
48.1k
      w2h = *(twiddles + 2 * (j << 1) - 512);
1183
48.1k
      w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1184
48.1k
      w1l = *(twiddles + 2 * j + 1);
1185
48.1k
      w2l = *(twiddles + 2 * (j << 1) - 511);
1186
48.1k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1187
1188
96.2k
      for (k = in_loop_cnt; k != 0; k--)
1189
48.1k
      {
1190
48.1k
        FLOAT32 tmp;
1191
48.1k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1192
1193
48.1k
        data += (del << 1);
1194
1195
48.1k
        x1r = *data;
1196
48.1k
        x1i = *(data + 1);
1197
48.1k
        data += (del << 1);
1198
1199
48.1k
        x2r = *data;
1200
48.1k
        x2i = *(data + 1);
1201
48.1k
        data += (del << 1);
1202
1203
48.1k
        x3r = *data;
1204
48.1k
        x3i = *(data + 1);
1205
48.1k
        data -= 3 * (del << 1);
1206
1207
48.1k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1208
48.1k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1209
48.1k
        x1r = tmp;
1210
1211
48.1k
        tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1212
48.1k
        x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1213
48.1k
        x2r = tmp;
1214
1215
48.1k
        tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1216
48.1k
        x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1217
48.1k
        x3r = tmp;
1218
1219
48.1k
        x0r = (*data);
1220
48.1k
        x0i = (*(data + 1));
1221
1222
48.1k
        x0r = x0r + (x2r);
1223
48.1k
        x0i = x0i + (x2i);
1224
48.1k
        x2r = x0r - (x2r * 2);
1225
48.1k
        x2i = x0i - (x2i * 2);
1226
48.1k
        x1r = x1r + x3r;
1227
48.1k
        x1i = x1i + x3i;
1228
48.1k
        x3r = x1r - (x3r * 2);
1229
48.1k
        x3i = x1i - (x3i * 2);
1230
1231
48.1k
        x0r = x0r + (x1r);
1232
48.1k
        x0i = x0i + (x1i);
1233
48.1k
        x1r = x0r - (x1r * 2);
1234
48.1k
        x1i = x0i - (x1i * 2);
1235
48.1k
        x2r = x2r + (x3i);
1236
48.1k
        x2i = x2i - (x3r);
1237
48.1k
        x3i = x2r - (x3i * 2);
1238
48.1k
        x3r = x2i + (x3r * 2);
1239
1240
48.1k
        *data = x0r;
1241
48.1k
        *(data + 1) = x0i;
1242
48.1k
        data += (del << 1);
1243
1244
48.1k
        *data = x2r;
1245
48.1k
        *(data + 1) = x2i;
1246
48.1k
        data += (del << 1);
1247
1248
48.1k
        *data = x1r;
1249
48.1k
        *(data + 1) = x1i;
1250
48.1k
        data += (del << 1);
1251
1252
48.1k
        *data = x3i;
1253
48.1k
        *(data + 1) = x3r;
1254
48.1k
        data += (del << 1);
1255
48.1k
      }
1256
48.1k
      data -= 2 * npoints;
1257
48.1k
      data += 2;
1258
48.1k
    }
1259
192k
    for (; j<nodespacing * del; j += nodespacing)
1260
144k
    {
1261
144k
      w1h = *(twiddles + 2 * j);
1262
144k
      w2h = *(twiddles + 2 * (j << 1) - 512);
1263
144k
      w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1264
144k
      w1l = *(twiddles + 2 * j + 1);
1265
144k
      w2l = *(twiddles + 2 * (j << 1) - 511);
1266
144k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1267
1268
361k
      for (k = in_loop_cnt; k != 0; k--)
1269
216k
      {
1270
216k
        FLOAT32 tmp;
1271
216k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1272
1273
216k
        data += (del << 1);
1274
1275
216k
        x1r = *data;
1276
216k
        x1i = *(data + 1);
1277
216k
        data += (del << 1);
1278
1279
216k
        x2r = *data;
1280
216k
        x2i = *(data + 1);
1281
216k
        data += (del << 1);
1282
1283
216k
        x3r = *data;
1284
216k
        x3i = *(data + 1);
1285
216k
        data -= 3 * (del << 1);
1286
1287
216k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1288
216k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1289
216k
        x1r = tmp;
1290
1291
216k
        tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1292
216k
        x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1293
216k
        x2r = tmp;
1294
1295
216k
        tmp = (-ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h));
1296
216k
        x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1297
216k
        x3r = tmp;
1298
1299
216k
        x0r = (*data);
1300
216k
        x0i = (*(data + 1));
1301
1302
216k
        x0r = x0r + (x2r);
1303
216k
        x0i = x0i + (x2i);
1304
216k
        x2r = x0r - (x2r * 2);
1305
216k
        x2i = x0i - (x2i * 2);
1306
216k
        x1r = x1r + x3r;
1307
216k
        x1i = x1i - x3i;
1308
216k
        x3r = x1r - (x3r * 2);
1309
216k
        x3i = x1i + (x3i * 2);
1310
1311
216k
        x0r = x0r + (x1r);
1312
216k
        x0i = x0i + (x1i);
1313
216k
        x1r = x0r - (x1r * 2);
1314
216k
        x1i = x0i - (x1i * 2);
1315
216k
        x2r = x2r + (x3i);
1316
216k
        x2i = x2i - (x3r);
1317
216k
        x3i = x2r - (x3i * 2);
1318
216k
        x3r = x2i + (x3r * 2);
1319
1320
216k
        *data = x0r;
1321
216k
        *(data + 1) = x0i;
1322
216k
        data += (del << 1);
1323
1324
216k
        *data = x2r;
1325
216k
        *(data + 1) = x2i;
1326
216k
        data += (del << 1);
1327
1328
216k
        *data = x1r;
1329
216k
        *(data + 1) = x1i;
1330
216k
        data += (del << 1);
1331
1332
216k
        *data = x3i;
1333
216k
        *(data + 1) = x3r;
1334
216k
        data += (del << 1);
1335
216k
      }
1336
144k
      data -= 2 * npoints;
1337
144k
      data += 2;
1338
144k
    }
1339
48.1k
    nodespacing >>= 2;
1340
48.1k
    del <<= 2;
1341
48.1k
    in_loop_cnt >>= 2;
1342
48.1k
  }
1343
24.0k
  if (not_power_4)
1344
0
  {
1345
0
    const FLOAT32 *twiddles = ptr_w;
1346
0
    nodespacing <<= 1;
1347
1348
0
    for (j = del / 2; j != 0; j--)
1349
0
    {
1350
0
      FLOAT32 w1h = *twiddles;
1351
0
      FLOAT32 w1l = *(twiddles + 1);
1352
0
      FLOAT32 tmp;
1353
0
      twiddles += nodespacing * 2;
1354
1355
0
      x0r = *ptr_y;
1356
0
      x0i = *(ptr_y + 1);
1357
0
      ptr_y += (del << 1);
1358
1359
0
      x1r = *ptr_y;
1360
0
      x1i = *(ptr_y + 1);
1361
1362
0
      tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1363
0
      x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1364
0
      x1r = tmp;
1365
1366
0
      *ptr_y = (x0r) - (x1r);
1367
0
      *(ptr_y + 1) = (x0i) - (x1i);
1368
0
      ptr_y -= (del << 1);
1369
1370
0
      *ptr_y = (x0r) + (x1r);
1371
0
      *(ptr_y + 1) = (x0i) + (x1i);
1372
0
      ptr_y += 2;
1373
0
    }
1374
0
    twiddles = ptr_w;
1375
0
    for (j = del / 2; j != 0; j--)
1376
0
    {
1377
0
      FLOAT32 w1h = *twiddles;
1378
0
      FLOAT32 w1l = *(twiddles + 1);
1379
0
      FLOAT32 tmp;
1380
0
      twiddles += nodespacing * 2;
1381
1382
0
      x0r = *ptr_y;
1383
0
      x0i = *(ptr_y + 1);
1384
0
      ptr_y += (del << 1);
1385
1386
0
      x1r = *ptr_y;
1387
0
      x1i = *(ptr_y + 1);
1388
1389
0
      tmp = (ixheaacd_mult32X32float(x1r, w1h) + ixheaacd_mult32X32float(x1i, w1l));
1390
0
      x1i = -ixheaacd_mult32X32float(x1r, w1l) + ixheaacd_mult32X32float(x1i, w1h);
1391
0
      x1r = tmp;
1392
1393
0
      *ptr_y = (x0r) - (x1r);
1394
0
      *(ptr_y + 1) = (x0i) - (x1i);
1395
0
      ptr_y -= (del << 1);
1396
1397
0
      *ptr_y = (x0r) + (x1r);
1398
0
      *(ptr_y + 1) = (x0i) + (x1i);
1399
0
      ptr_y += 2;
1400
0
    }
1401
0
  }
1402
1403
1.56M
  for (i = 0; i<nlength; i++)
1404
1.54M
  {
1405
1.54M
    xr[i] = y[2 * i];
1406
1.54M
    xi[i] = y[2 * i + 1];
1407
1.54M
  }
1408
1409
24.0k
  return;
1410
24.0k
}
1411
1412
VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength,
1413
9.38M
                                 WORD32 fft_mode, WORD32 *preshift) {
1414
9.38M
  WORD32 i, j, k, n_stages;
1415
9.38M
  WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1416
9.38M
  WORD32 del, nodespacing, in_loop_cnt;
1417
9.38M
  WORD32 not_power_4;
1418
9.38M
  WORD32 npts, shift;
1419
9.38M
  WORD32 dig_rev_shift;
1420
9.38M
  WORD32 ptr_x[1024];
1421
9.38M
  WORD32 y[1024];
1422
9.38M
  WORD32 npoints = nlength;
1423
9.38M
  WORD32 n = 0;
1424
9.38M
  WORD32 *ptr_y = y;
1425
9.38M
  const WORD32 *ptr_w;
1426
9.38M
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
1427
9.38M
  n_stages = 30 - ixheaac_norm32(npoints);
1428
9.38M
  not_power_4 = n_stages & 1;
1429
1430
9.38M
  n_stages = n_stages >> 1;
1431
1432
9.38M
  npts = npoints;
1433
39.2M
  while (npts >> 1) {
1434
29.8M
    n++;
1435
29.8M
    npts = npts >> 1;
1436
29.8M
  }
1437
1438
9.38M
  if (n % 2 == 0)
1439
4.99M
    shift = ((n + 4)) / 2;
1440
4.39M
  else
1441
4.39M
    shift = ((n + 3) / 2);
1442
1443
240M
  for (i = 0; i < nlength; i++) {
1444
230M
    ptr_x[2 * i] = (xr[i] / (1 << (shift)));
1445
230M
    ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
1446
230M
  }
1447
1448
9.38M
  if (fft_mode == -1) {
1449
8.39M
    ptr_w = ixheaacd_twiddle_table_fft_32x32;
1450
1451
33.4M
    for (i = 0; i < npoints; i += 4) {
1452
25.0M
      WORD32 *inp = ptr_x;
1453
1454
25.0M
      DIG_REV(i, dig_rev_shift, h2);
1455
25.0M
      if (not_power_4) {
1456
16.2M
        h2 += 1;
1457
16.2M
        h2 &= ~1;
1458
16.2M
      }
1459
25.0M
      inp += (h2);
1460
1461
25.0M
      x0r = *inp;
1462
25.0M
      x0i = *(inp + 1);
1463
25.0M
      inp += (npoints >> 1);
1464
1465
25.0M
      x1r = *inp;
1466
25.0M
      x1i = *(inp + 1);
1467
25.0M
      inp += (npoints >> 1);
1468
1469
25.0M
      x2r = *inp;
1470
25.0M
      x2i = *(inp + 1);
1471
25.0M
      inp += (npoints >> 1);
1472
1473
25.0M
      x3r = *inp;
1474
25.0M
      x3i = *(inp + 1);
1475
1476
25.0M
      x0r = ixheaac_add32_sat(x0r, x2r);
1477
25.0M
      x0i = ixheaac_add32_sat(x0i, x2i);
1478
25.0M
      x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1479
25.0M
      x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1480
25.0M
      x1r = ixheaac_add32_sat(x1r, x3r);
1481
25.0M
      x1i = ixheaac_add32_sat(x1i, x3i);
1482
25.0M
      x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1483
25.0M
      x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1484
1485
25.0M
      x0r = ixheaac_add32_sat(x0r, x1r);
1486
25.0M
      x0i = ixheaac_add32_sat(x0i, x1i);
1487
25.0M
      x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1488
25.0M
      x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1489
25.0M
      x2r = ixheaac_add32_sat(x2r, x3i);
1490
25.0M
      x2i = ixheaac_sub32_sat(x2i, x3r);
1491
25.0M
      x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1492
25.0M
      x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1493
1494
25.0M
      *ptr_y++ = x0r;
1495
25.0M
      *ptr_y++ = x0i;
1496
25.0M
      *ptr_y++ = x2r;
1497
25.0M
      *ptr_y++ = x2i;
1498
25.0M
      *ptr_y++ = x1r;
1499
25.0M
      *ptr_y++ = x1i;
1500
25.0M
      *ptr_y++ = x3i;
1501
25.0M
      *ptr_y++ = x3r;
1502
25.0M
    }
1503
8.39M
    ptr_y -= 2 * npoints;
1504
8.39M
    del = 4;
1505
8.39M
    nodespacing = 64;
1506
8.39M
    in_loop_cnt = npoints >> 4;
1507
9.69M
    for (i = n_stages - 1; i > 0; i--) {
1508
1.30M
      const WORD32 *twiddles = ptr_w;
1509
1.30M
      WORD32 *data = ptr_y;
1510
1.30M
      WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
1511
1.30M
      WORD32 sec_loop_cnt;
1512
1513
5.45M
      for (k = in_loop_cnt; k != 0; k--) {
1514
4.15M
        x0r = (*data);
1515
4.15M
        x0i = (*(data + 1));
1516
4.15M
        data += (del << 1);
1517
1518
4.15M
        x1r = (*data);
1519
4.15M
        x1i = (*(data + 1));
1520
4.15M
        data += (del << 1);
1521
1522
4.15M
        x2r = (*data);
1523
4.15M
        x2i = (*(data + 1));
1524
4.15M
        data += (del << 1);
1525
1526
4.15M
        x3r = (*data);
1527
4.15M
        x3i = (*(data + 1));
1528
4.15M
        data -= 3 * (del << 1);
1529
1530
4.15M
        x0r = ixheaac_add32_sat(x0r, x2r);
1531
4.15M
        x0i = ixheaac_add32_sat(x0i, x2i);
1532
4.15M
        x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1533
4.15M
        x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1534
4.15M
        x1r = ixheaac_add32_sat(x1r, x3r);
1535
4.15M
        x1i = ixheaac_add32_sat(x1i, x3i);
1536
4.15M
        x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1537
4.15M
        x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1538
1539
4.15M
        x0r = ixheaac_add32_sat(x0r, x1r);
1540
4.15M
        x0i = ixheaac_add32_sat(x0i, x1i);
1541
4.15M
        x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1542
4.15M
        x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1543
4.15M
        x2r = ixheaac_add32_sat(x2r, x3i);
1544
4.15M
        x2i = ixheaac_sub32_sat(x2i, x3r);
1545
4.15M
        x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1546
4.15M
        x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1547
1548
4.15M
        *data = x0r;
1549
4.15M
        *(data + 1) = x0i;
1550
4.15M
        data += (del << 1);
1551
1552
4.15M
        *data = x2r;
1553
4.15M
        *(data + 1) = x2i;
1554
4.15M
        data += (del << 1);
1555
1556
4.15M
        *data = x1r;
1557
4.15M
        *(data + 1) = x1i;
1558
4.15M
        data += (del << 1);
1559
1560
4.15M
        *data = x3i;
1561
4.15M
        *(data + 1) = x3r;
1562
4.15M
        data += (del << 1);
1563
4.15M
      }
1564
1.30M
      data = ptr_y + 2;
1565
1566
1.30M
      sec_loop_cnt = (nodespacing * del);
1567
1.30M
      sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
1568
1.30M
                     (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
1569
1.30M
                     (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
1570
1.30M
                     (sec_loop_cnt / 256);
1571
1.30M
      j = nodespacing;
1572
1573
4.66M
      for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
1574
3.35M
        w1h = *(twiddles + 2 * j);
1575
3.35M
        w1l = *(twiddles + 2 * j + 1);
1576
3.35M
        w2h = *(twiddles + 2 * (j << 1));
1577
3.35M
        w2l = *(twiddles + 2 * (j << 1) + 1);
1578
3.35M
        w3h = *(twiddles + 2 * j + 2 * (j << 1));
1579
3.35M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1580
1581
10.5M
        for (k = in_loop_cnt; k != 0; k--) {
1582
7.23M
          WORD32 tmp;
1583
7.23M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1584
1585
7.23M
          data += (del << 1);
1586
1587
7.23M
          x1r = *data;
1588
7.23M
          x1i = *(data + 1);
1589
7.23M
          data += (del << 1);
1590
1591
7.23M
          x2r = *data;
1592
7.23M
          x2i = *(data + 1);
1593
7.23M
          data += (del << 1);
1594
1595
7.23M
          x3r = *data;
1596
7.23M
          x3i = *(data + 1);
1597
7.23M
          data -= 3 * (del << 1);
1598
1599
7.23M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1600
7.23M
                                   ixheaacd_mult32_sat(x1i, w1h));
1601
7.23M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1602
7.23M
          x1r = tmp;
1603
1604
7.23M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1605
7.23M
                                   ixheaacd_mult32_sat(x2i, w2h));
1606
7.23M
          x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1607
7.23M
          x2r = tmp;
1608
1609
7.23M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3l),
1610
7.23M
                                   ixheaacd_mult32_sat(x3i, w3h));
1611
7.23M
          x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1612
7.23M
          x3r = tmp;
1613
1614
7.23M
          x0r = (*data);
1615
7.23M
          x0i = (*(data + 1));
1616
1617
7.23M
          x0r = ixheaac_add32_sat(x0r, x2r);
1618
7.23M
          x0i = ixheaac_add32_sat(x0i, x2i);
1619
7.23M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1620
7.23M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1621
7.23M
          x1r = ixheaac_add32_sat(x1r, x3r);
1622
7.23M
          x1i = ixheaac_add32_sat(x1i, x3i);
1623
7.23M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1624
7.23M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1625
1626
7.23M
          x0r = ixheaac_add32_sat(x0r, x1r);
1627
7.23M
          x0i = ixheaac_add32_sat(x0i, x1i);
1628
7.23M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1629
7.23M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1630
7.23M
          x2r = ixheaac_add32_sat(x2r, x3i);
1631
7.23M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1632
7.23M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1633
7.23M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1634
1635
7.23M
          *data = x0r;
1636
7.23M
          *(data + 1) = x0i;
1637
7.23M
          data += (del << 1);
1638
1639
7.23M
          *data = x2r;
1640
7.23M
          *(data + 1) = x2i;
1641
7.23M
          data += (del << 1);
1642
1643
7.23M
          *data = x1r;
1644
7.23M
          *(data + 1) = x1i;
1645
7.23M
          data += (del << 1);
1646
1647
7.23M
          *data = x3i;
1648
7.23M
          *(data + 1) = x3r;
1649
7.23M
          data += (del << 1);
1650
7.23M
        }
1651
3.35M
        data -= 2 * npoints;
1652
3.35M
        data += 2;
1653
3.35M
      }
1654
3.63M
      for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1655
2.33M
        w1h = *(twiddles + 2 * j);
1656
2.33M
        w2h = *(twiddles + 2 * (j << 1));
1657
2.33M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1658
2.33M
        w1l = *(twiddles + 2 * j + 1);
1659
2.33M
        w2l = *(twiddles + 2 * (j << 1) + 1);
1660
2.33M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1661
1662
8.02M
        for (k = in_loop_cnt; k != 0; k--) {
1663
5.69M
          WORD32 tmp;
1664
5.69M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1665
5.69M
          data += (del << 1);
1666
1667
5.69M
          x1r = *data;
1668
5.69M
          x1i = *(data + 1);
1669
5.69M
          data += (del << 1);
1670
1671
5.69M
          x2r = *data;
1672
5.69M
          x2i = *(data + 1);
1673
5.69M
          data += (del << 1);
1674
1675
5.69M
          x3r = *data;
1676
5.69M
          x3i = *(data + 1);
1677
5.69M
          data -= 3 * (del << 1);
1678
1679
5.69M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1680
5.69M
                                   ixheaacd_mult32_sat(x1i, w1h));
1681
5.69M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1682
5.69M
          x1r = tmp;
1683
1684
5.69M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1685
5.69M
                                   ixheaacd_mult32_sat(x2i, w2h));
1686
5.69M
          x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1687
5.69M
          x2r = tmp;
1688
1689
5.69M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1690
5.69M
                                   ixheaacd_mult32_sat(x3i, w3l));
1691
5.69M
          x3i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1692
5.69M
                                   ixheaacd_mult32_sat(x3r, w3l));
1693
5.69M
          x3r = tmp;
1694
1695
5.69M
          x0r = (*data);
1696
5.69M
          x0i = (*(data + 1));
1697
1698
5.69M
          x0r = ixheaac_add32_sat(x0r, x2r);
1699
5.69M
          x0i = ixheaac_add32_sat(x0i, x2i);
1700
5.69M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1701
5.69M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1702
5.69M
          x1r = ixheaac_add32_sat(x1r, x3r);
1703
5.69M
          x1i = ixheaac_add32_sat(x1i, x3i);
1704
5.69M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1705
5.69M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1706
1707
5.69M
          x0r = ixheaac_add32_sat(x0r, x1r);
1708
5.69M
          x0i = ixheaac_add32_sat(x0i, x1i);
1709
5.69M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1710
5.69M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1711
5.69M
          x2r = ixheaac_add32_sat(x2r, x3i);
1712
5.69M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1713
5.69M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1714
5.69M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1715
1716
5.69M
          *data = x0r;
1717
5.69M
          *(data + 1) = x0i;
1718
5.69M
          data += (del << 1);
1719
1720
5.69M
          *data = x2r;
1721
5.69M
          *(data + 1) = x2i;
1722
5.69M
          data += (del << 1);
1723
1724
5.69M
          *data = x1r;
1725
5.69M
          *(data + 1) = x1i;
1726
5.69M
          data += (del << 1);
1727
1728
5.69M
          *data = x3i;
1729
5.69M
          *(data + 1) = x3r;
1730
5.69M
          data += (del << 1);
1731
5.69M
        }
1732
2.33M
        data -= 2 * npoints;
1733
2.33M
        data += 2;
1734
2.33M
      }
1735
2.33M
      for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1736
1.02M
        w1h = *(twiddles + 2 * j);
1737
1.02M
        w2h = *(twiddles + 2 * (j << 1) - 512);
1738
1.02M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1739
1.02M
        w1l = *(twiddles + 2 * j + 1);
1740
1.02M
        w2l = *(twiddles + 2 * (j << 1) - 511);
1741
1.02M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1742
1743
2.56M
        for (k = in_loop_cnt; k != 0; k--) {
1744
1.53M
          WORD32 tmp;
1745
1.53M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1746
1747
1.53M
          data += (del << 1);
1748
1749
1.53M
          x1r = *data;
1750
1.53M
          x1i = *(data + 1);
1751
1.53M
          data += (del << 1);
1752
1753
1.53M
          x2r = *data;
1754
1.53M
          x2i = *(data + 1);
1755
1.53M
          data += (del << 1);
1756
1757
1.53M
          x3r = *data;
1758
1.53M
          x3i = *(data + 1);
1759
1.53M
          data -= 3 * (del << 1);
1760
1761
1.53M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1762
1.53M
                                   ixheaacd_mult32_sat(x1i, w1h));
1763
1.53M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1764
1.53M
          x1r = tmp;
1765
1766
1.53M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1767
1.53M
                                   ixheaacd_mult32_sat(x2i, w2l));
1768
1.53M
          x2i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1769
1.53M
                                   ixheaacd_mult32_sat(x2r, w2l));
1770
1.53M
          x2r = tmp;
1771
1772
1.53M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1773
1.53M
                                   ixheaacd_mult32_sat(x3i, w3l));
1774
1.53M
          x3i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1775
1.53M
                                   ixheaacd_mult32_sat(x3r, w3l));
1776
1.53M
          x3r = tmp;
1777
1778
1.53M
          x0r = (*data);
1779
1.53M
          x0i = (*(data + 1));
1780
1781
1.53M
          x0r = ixheaac_add32_sat(x0r, x2r);
1782
1.53M
          x0i = ixheaac_add32_sat(x0i, x2i);
1783
1.53M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1784
1.53M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1785
1.53M
          x1r = ixheaac_add32_sat(x1r, x3r);
1786
1.53M
          x1i = ixheaac_add32_sat(x1i, x3i);
1787
1.53M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1788
1.53M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1789
1790
1.53M
          x0r = ixheaac_add32_sat(x0r, x1r);
1791
1.53M
          x0i = ixheaac_add32_sat(x0i, x1i);
1792
1.53M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1793
1.53M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1794
1.53M
          x2r = ixheaac_add32_sat(x2r, x3i);
1795
1.53M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1796
1.53M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1797
1.53M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1798
1799
1.53M
          *data = x0r;
1800
1.53M
          *(data + 1) = x0i;
1801
1.53M
          data += (del << 1);
1802
1803
1.53M
          *data = x2r;
1804
1.53M
          *(data + 1) = x2i;
1805
1.53M
          data += (del << 1);
1806
1807
1.53M
          *data = x1r;
1808
1.53M
          *(data + 1) = x1i;
1809
1.53M
          data += (del << 1);
1810
1811
1.53M
          *data = x3i;
1812
1.53M
          *(data + 1) = x3r;
1813
1.53M
          data += (del << 1);
1814
1.53M
        }
1815
1.02M
        data -= 2 * npoints;
1816
1.02M
        data += 2;
1817
1.02M
      }
1818
4.66M
      for (; j < nodespacing * del; j += nodespacing) {
1819
3.35M
        w1h = *(twiddles + 2 * j);
1820
3.35M
        w2h = *(twiddles + 2 * (j << 1) - 512);
1821
3.35M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1822
3.35M
        w1l = *(twiddles + 2 * j + 1);
1823
3.35M
        w2l = *(twiddles + 2 * (j << 1) - 511);
1824
3.35M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1825
1826
10.5M
        for (k = in_loop_cnt; k != 0; k--) {
1827
7.23M
          WORD32 tmp;
1828
7.23M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1829
1830
7.23M
          data += (del << 1);
1831
1832
7.23M
          x1r = *data;
1833
7.23M
          x1i = *(data + 1);
1834
7.23M
          data += (del << 1);
1835
1836
7.23M
          x2r = *data;
1837
7.23M
          x2i = *(data + 1);
1838
7.23M
          data += (del << 1);
1839
1840
7.23M
          x3r = *data;
1841
7.23M
          x3i = *(data + 1);
1842
7.23M
          data -= 3 * (del << 1);
1843
1844
7.23M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1845
7.23M
                                   ixheaacd_mult32_sat(x1i, w1h));
1846
7.23M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1847
7.23M
          x1r = tmp;
1848
1849
7.23M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1850
7.23M
                                   ixheaacd_mult32_sat(x2i, w2l));
1851
7.23M
          x2i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1852
7.23M
                                   ixheaacd_mult32_sat(x2r, w2l));
1853
7.23M
          x2r = tmp;
1854
1855
7.23M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1856
7.23M
                                   ixheaacd_mult32_sat(x3r, w3l));
1857
7.23M
          x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1858
7.23M
          x3r = tmp;
1859
1860
7.23M
          x0r = (*data);
1861
7.23M
          x0i = (*(data + 1));
1862
1863
7.23M
          x0r = ixheaac_add32_sat(x0r, x2r);
1864
7.23M
          x0i = ixheaac_add32_sat(x0i, x2i);
1865
7.23M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1866
7.23M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1867
7.23M
          x1r = ixheaac_add32_sat(x1r, x3r);
1868
7.23M
          x1i = ixheaac_sub32_sat(x1i, x3i);
1869
7.23M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1870
7.23M
          x3i = ixheaac_add32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1871
1872
7.23M
          x0r = ixheaac_add32_sat(x0r, x1r);
1873
7.23M
          x0i = ixheaac_add32_sat(x0i, x1i);
1874
7.23M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1875
7.23M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1876
7.23M
          x2r = ixheaac_add32_sat(x2r, x3i);
1877
7.23M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1878
7.23M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1879
7.23M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1880
1881
7.23M
          *data = x0r;
1882
7.23M
          *(data + 1) = x0i;
1883
7.23M
          data += (del << 1);
1884
1885
7.23M
          *data = x2r;
1886
7.23M
          *(data + 1) = x2i;
1887
7.23M
          data += (del << 1);
1888
1889
7.23M
          *data = x1r;
1890
7.23M
          *(data + 1) = x1i;
1891
7.23M
          data += (del << 1);
1892
1893
7.23M
          *data = x3i;
1894
7.23M
          *(data + 1) = x3r;
1895
7.23M
          data += (del << 1);
1896
7.23M
        }
1897
3.35M
        data -= 2 * npoints;
1898
3.35M
        data += 2;
1899
3.35M
      }
1900
1.30M
      nodespacing >>= 2;
1901
1.30M
      del <<= 2;
1902
1.30M
      in_loop_cnt >>= 2;
1903
1.30M
    }
1904
8.39M
    if (not_power_4) {
1905
4.20M
      const WORD32 *twiddles = ptr_w;
1906
4.20M
      nodespacing <<= 1;
1907
4.20M
      shift += 1;
1908
1909
20.4M
      for (j = del / 2; j != 0; j--) {
1910
16.2M
        WORD32 w1h = *twiddles;
1911
16.2M
        WORD32 w1l = *(twiddles + 1);
1912
16.2M
        WORD32 tmp;
1913
16.2M
        twiddles += nodespacing * 2;
1914
1915
16.2M
        x0r = *ptr_y;
1916
16.2M
        x0i = *(ptr_y + 1);
1917
16.2M
        ptr_y += (del << 1);
1918
1919
16.2M
        x1r = *ptr_y;
1920
16.2M
        x1i = *(ptr_y + 1);
1921
1922
16.2M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1923
16.2M
                                 ixheaacd_mult32_sat(x1i, w1h));
1924
16.2M
        x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1925
16.2M
        x1r = tmp;
1926
1927
16.2M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
1928
16.2M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1929
16.2M
        ptr_y -= (del << 1);
1930
1931
16.2M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
1932
16.2M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1933
16.2M
        ptr_y += 2;
1934
16.2M
      }
1935
4.20M
      twiddles = ptr_w;
1936
20.4M
      for (j = del / 2; j != 0; j--) {
1937
16.2M
        WORD32 w1h = *twiddles;
1938
16.2M
        WORD32 w1l = *(twiddles + 1);
1939
16.2M
        WORD32 tmp;
1940
16.2M
        twiddles += nodespacing * 2;
1941
1942
16.2M
        x0r = *ptr_y;
1943
16.2M
        x0i = *(ptr_y + 1);
1944
16.2M
        ptr_y += (del << 1);
1945
1946
16.2M
        x1r = *ptr_y;
1947
16.2M
        x1i = *(ptr_y + 1);
1948
1949
16.2M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1h),
1950
16.2M
                                 ixheaacd_mult32_sat(x1i, w1l));
1951
16.2M
        x1i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1i, w1h),
1952
16.2M
                                 ixheaacd_mult32_sat(x1r, w1l));
1953
16.2M
        x1r = tmp;
1954
1955
16.2M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
1956
16.2M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1957
16.2M
        ptr_y -= (del << 1);
1958
1959
16.2M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
1960
16.2M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1961
16.2M
        ptr_y += 2;
1962
16.2M
      }
1963
4.20M
    }
1964
8.39M
  }
1965
1966
995k
  else {
1967
995k
    ptr_w = ixheaacd_twiddle_table_fft_32x32;
1968
1969
33.6M
    for (i = 0; i < npoints; i += 4) {
1970
32.6M
      WORD32 *inp = ptr_x;
1971
1972
32.6M
      DIG_REV(i, dig_rev_shift, h2);
1973
32.6M
      if (not_power_4) {
1974
20.9M
        h2 += 1;
1975
20.9M
        h2 &= ~1;
1976
20.9M
      }
1977
32.6M
      inp += (h2);
1978
1979
32.6M
      x0r = *inp;
1980
32.6M
      x0i = *(inp + 1);
1981
32.6M
      inp += (npoints >> 1);
1982
1983
32.6M
      x1r = *inp;
1984
32.6M
      x1i = *(inp + 1);
1985
32.6M
      inp += (npoints >> 1);
1986
1987
32.6M
      x2r = *inp;
1988
32.6M
      x2i = *(inp + 1);
1989
32.6M
      inp += (npoints >> 1);
1990
1991
32.6M
      x3r = *inp;
1992
32.6M
      x3i = *(inp + 1);
1993
1994
32.6M
      x0r = ixheaac_add32_sat(x0r, x2r);
1995
32.6M
      x0i = ixheaac_add32_sat(x0i, x2i);
1996
32.6M
      x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1997
32.6M
      x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1998
32.6M
      x1r = ixheaac_add32_sat(x1r, x3r);
1999
32.6M
      x1i = ixheaac_add32_sat(x1i, x3i);
2000
32.6M
      x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2001
32.6M
      x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2002
2003
32.6M
      x0r = ixheaac_add32_sat(x0r, x1r);
2004
32.6M
      x0i = ixheaac_add32_sat(x0i, x1i);
2005
32.6M
      x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2006
32.6M
      x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2007
32.6M
      x2r = ixheaac_sub32_sat(x2r, x3i);
2008
32.6M
      x2i = ixheaac_add32_sat(x2i, x3r);
2009
32.6M
      x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2010
32.6M
      x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2011
2012
32.6M
      *ptr_y++ = x0r;
2013
32.6M
      *ptr_y++ = x0i;
2014
32.6M
      *ptr_y++ = x2r;
2015
32.6M
      *ptr_y++ = x2i;
2016
32.6M
      *ptr_y++ = x1r;
2017
32.6M
      *ptr_y++ = x1i;
2018
32.6M
      *ptr_y++ = x3i;
2019
32.6M
      *ptr_y++ = x3r;
2020
32.6M
    }
2021
995k
    ptr_y -= 2 * npoints;
2022
995k
    del = 4;
2023
995k
    nodespacing = 64;
2024
995k
    in_loop_cnt = npoints >> 4;
2025
3.04M
    for (i = n_stages - 1; i > 0; i--) {
2026
2.04M
      const WORD32 *twiddles = ptr_w;
2027
2.04M
      WORD32 *data = ptr_y;
2028
2.04M
      WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
2029
2.04M
      WORD32 sec_loop_cnt;
2030
2031
12.5M
      for (k = in_loop_cnt; k != 0; k--) {
2032
10.4M
        x0r = (*data);
2033
10.4M
        x0i = (*(data + 1));
2034
10.4M
        data += (del << 1);
2035
2036
10.4M
        x1r = (*data);
2037
10.4M
        x1i = (*(data + 1));
2038
10.4M
        data += (del << 1);
2039
2040
10.4M
        x2r = (*data);
2041
10.4M
        x2i = (*(data + 1));
2042
10.4M
        data += (del << 1);
2043
2044
10.4M
        x3r = (*data);
2045
10.4M
        x3i = (*(data + 1));
2046
10.4M
        data -= 3 * (del << 1);
2047
2048
10.4M
        x0r = ixheaac_add32_sat(x0r, x2r);
2049
10.4M
        x0i = ixheaac_add32_sat(x0i, x2i);
2050
10.4M
        x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2051
10.4M
        x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2052
10.4M
        x1r = ixheaac_add32_sat(x1r, x3r);
2053
10.4M
        x1i = ixheaac_add32_sat(x1i, x3i);
2054
10.4M
        x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2055
10.4M
        x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2056
2057
10.4M
        x0r = ixheaac_add32_sat(x0r, x1r);
2058
10.4M
        x0i = ixheaac_add32_sat(x0i, x1i);
2059
10.4M
        x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2060
10.4M
        x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2061
10.4M
        x2r = ixheaac_sub32_sat(x2r, x3i);
2062
10.4M
        x2i = ixheaac_add32_sat(x2i, x3r);
2063
10.4M
        x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2064
10.4M
        x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2065
2066
10.4M
        *data = x0r;
2067
10.4M
        *(data + 1) = x0i;
2068
10.4M
        data += (del << 1);
2069
2070
10.4M
        *data = x2r;
2071
10.4M
        *(data + 1) = x2i;
2072
10.4M
        data += (del << 1);
2073
2074
10.4M
        *data = x1r;
2075
10.4M
        *(data + 1) = x1i;
2076
10.4M
        data += (del << 1);
2077
2078
10.4M
        *data = x3i;
2079
10.4M
        *(data + 1) = x3r;
2080
10.4M
        data += (del << 1);
2081
10.4M
      }
2082
2.04M
      data = ptr_y + 2;
2083
2084
2.04M
      sec_loop_cnt = (nodespacing * del);
2085
2.04M
      sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
2086
2.04M
                     (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
2087
2.04M
                     (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
2088
2.04M
                     (sec_loop_cnt / 256);
2089
2.04M
      j = nodespacing;
2090
2091
10.7M
      for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
2092
8.70M
        w1h = *(twiddles + 2 * j);
2093
8.70M
        w2h = *(twiddles + 2 * (j << 1));
2094
8.70M
        w3h = *(twiddles + 2 * j + 2 * (j << 1));
2095
8.70M
        w1l = *(twiddles + 2 * j + 1);
2096
8.70M
        w2l = *(twiddles + 2 * (j << 1) + 1);
2097
8.70M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
2098
2099
33.3M
        for (k = in_loop_cnt; k != 0; k--) {
2100
24.6M
          WORD32 tmp;
2101
24.6M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2102
2103
24.6M
          data += (del << 1);
2104
2105
24.6M
          x1r = *data;
2106
24.6M
          x1i = *(data + 1);
2107
24.6M
          data += (del << 1);
2108
2109
24.6M
          x2r = *data;
2110
24.6M
          x2i = *(data + 1);
2111
24.6M
          data += (del << 1);
2112
2113
24.6M
          x3r = *data;
2114
24.6M
          x3i = *(data + 1);
2115
24.6M
          data -= 3 * (del << 1);
2116
2117
24.6M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2118
24.6M
                                   ixheaacd_mult32_sat(x1i, w1h));
2119
24.6M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2120
24.6M
          x1r = tmp;
2121
2122
24.6M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2123
24.6M
                                   ixheaacd_mult32_sat(x2i, w2h));
2124
24.6M
          x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2125
24.6M
          x2r = tmp;
2126
2127
24.6M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2128
24.6M
                                   ixheaacd_mult32_sat(x3i, w3h));
2129
24.6M
          x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2130
24.6M
          x3r = tmp;
2131
2132
24.6M
          x0r = (*data);
2133
24.6M
          x0i = (*(data + 1));
2134
2135
24.6M
          x0r = ixheaac_add32_sat(x0r, x2r);
2136
24.6M
          x0i = ixheaac_add32_sat(x0i, x2i);
2137
24.6M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2138
24.6M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2139
24.6M
          x1r = ixheaac_add32_sat(x1r, x3r);
2140
24.6M
          x1i = ixheaac_add32_sat(x1i, x3i);
2141
24.6M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2142
24.6M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2143
2144
24.6M
          x0r = ixheaac_add32_sat(x0r, x1r);
2145
24.6M
          x0i = ixheaac_add32_sat(x0i, x1i);
2146
24.6M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2147
24.6M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2148
24.6M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2149
24.6M
          x2i = ixheaac_add32_sat(x2i, x3r);
2150
24.6M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2151
24.6M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2152
2153
24.6M
          *data = x0r;
2154
24.6M
          *(data + 1) = x0i;
2155
24.6M
          data += (del << 1);
2156
2157
24.6M
          *data = x2r;
2158
24.6M
          *(data + 1) = x2i;
2159
24.6M
          data += (del << 1);
2160
2161
24.6M
          *data = x1r;
2162
24.6M
          *(data + 1) = x1i;
2163
24.6M
          data += (del << 1);
2164
2165
24.6M
          *data = x3i;
2166
24.6M
          *(data + 1) = x3r;
2167
24.6M
          data += (del << 1);
2168
24.6M
        }
2169
8.70M
        data -= 2 * npoints;
2170
8.70M
        data += 2;
2171
8.70M
      }
2172
7.42M
      for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
2173
5.37M
        w1h = *(twiddles + 2 * j);
2174
5.37M
        w2h = *(twiddles + 2 * (j << 1));
2175
5.37M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2176
5.37M
        w1l = *(twiddles + 2 * j + 1);
2177
5.37M
        w2l = *(twiddles + 2 * (j << 1) + 1);
2178
5.37M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2179
2180
22.9M
        for (k = in_loop_cnt; k != 0; k--) {
2181
17.5M
          WORD32 tmp;
2182
17.5M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2183
2184
17.5M
          data += (del << 1);
2185
2186
17.5M
          x1r = *data;
2187
17.5M
          x1i = *(data + 1);
2188
17.5M
          data += (del << 1);
2189
2190
17.5M
          x2r = *data;
2191
17.5M
          x2i = *(data + 1);
2192
17.5M
          data += (del << 1);
2193
2194
17.5M
          x3r = *data;
2195
17.5M
          x3i = *(data + 1);
2196
17.5M
          data -= 3 * (del << 1);
2197
2198
17.5M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2199
17.5M
                                   ixheaacd_mult32_sat(x1i, w1h));
2200
17.5M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2201
17.5M
          x1r = tmp;
2202
2203
17.5M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2204
17.5M
                                   ixheaacd_mult32_sat(x2i, w2h));
2205
17.5M
          x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2206
17.5M
          x2r = tmp;
2207
2208
17.5M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2209
17.5M
                                   ixheaacd_mult32_sat(x3i, w3l));
2210
17.5M
          x3i = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2211
17.5M
                                   ixheaacd_mult32_sat(x3i, w3h));
2212
17.5M
          x3r = tmp;
2213
2214
17.5M
          x0r = (*data);
2215
17.5M
          x0i = (*(data + 1));
2216
2217
17.5M
          x0r = ixheaac_add32_sat(x0r, x2r);
2218
17.5M
          x0i = ixheaac_add32_sat(x0i, x2i);
2219
17.5M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2220
17.5M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2221
17.5M
          x1r = ixheaac_add32_sat(x1r, x3r);
2222
17.5M
          x1i = ixheaac_add32_sat(x1i, x3i);
2223
17.5M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2224
17.5M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2225
2226
17.5M
          x0r = ixheaac_add32_sat(x0r, x1r);
2227
17.5M
          x0i = ixheaac_add32_sat(x0i, x1i);
2228
17.5M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2229
17.5M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2230
17.5M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2231
17.5M
          x2i = ixheaac_add32_sat(x2i, x3r);
2232
17.5M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2233
17.5M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2234
2235
17.5M
          *data = x0r;
2236
17.5M
          *(data + 1) = x0i;
2237
17.5M
          data += (del << 1);
2238
2239
17.5M
          *data = x2r;
2240
17.5M
          *(data + 1) = x2i;
2241
17.5M
          data += (del << 1);
2242
2243
17.5M
          *data = x1r;
2244
17.5M
          *(data + 1) = x1i;
2245
17.5M
          data += (del << 1);
2246
2247
17.5M
          *data = x3i;
2248
17.5M
          *(data + 1) = x3r;
2249
17.5M
          data += (del << 1);
2250
17.5M
        }
2251
5.37M
        data -= 2 * npoints;
2252
5.37M
        data += 2;
2253
5.37M
      }
2254
5.37M
      for (; j <= sec_loop_cnt * 2; j += nodespacing) {
2255
3.33M
        w1h = *(twiddles + 2 * j);
2256
3.33M
        w2h = *(twiddles + 2 * (j << 1) - 512);
2257
3.33M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2258
3.33M
        w1l = *(twiddles + 2 * j + 1);
2259
3.33M
        w2l = *(twiddles + 2 * (j << 1) - 511);
2260
3.33M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2261
2262
10.4M
        for (k = in_loop_cnt; k != 0; k--) {
2263
7.09M
          WORD32 tmp;
2264
7.09M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2265
2266
7.09M
          data += (del << 1);
2267
2268
7.09M
          x1r = *data;
2269
7.09M
          x1i = *(data + 1);
2270
7.09M
          data += (del << 1);
2271
2272
7.09M
          x2r = *data;
2273
7.09M
          x2i = *(data + 1);
2274
7.09M
          data += (del << 1);
2275
2276
7.09M
          x3r = *data;
2277
7.09M
          x3i = *(data + 1);
2278
7.09M
          data -= 3 * (del << 1);
2279
2280
7.09M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2281
7.09M
                                   ixheaacd_mult32_sat(x1i, w1h));
2282
7.09M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2283
7.09M
          x1r = tmp;
2284
2285
7.09M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2286
7.09M
                                   ixheaacd_mult32_sat(x2i, w2l));
2287
7.09M
          x2i = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2288
7.09M
                                   ixheaacd_mult32_sat(x2i, w2h));
2289
7.09M
          x2r = tmp;
2290
2291
7.09M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2292
7.09M
                                   ixheaacd_mult32_sat(x3i, w3l));
2293
7.09M
          x3i = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2294
7.09M
                                   ixheaacd_mult32_sat(x3i, w3h));
2295
7.09M
          x3r = tmp;
2296
2297
7.09M
          x0r = (*data);
2298
7.09M
          x0i = (*(data + 1));
2299
2300
7.09M
          x0r = ixheaac_add32_sat(x0r, x2r);
2301
7.09M
          x0i = ixheaac_add32_sat(x0i, x2i);
2302
7.09M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2303
7.09M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2304
7.09M
          x1r = ixheaac_add32_sat(x1r, x3r);
2305
7.09M
          x1i = ixheaac_add32_sat(x1i, x3i);
2306
7.09M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2307
7.09M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2308
2309
7.09M
          x0r = ixheaac_add32_sat(x0r, x1r);
2310
7.09M
          x0i = ixheaac_add32_sat(x0i, x1i);
2311
7.09M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2312
7.09M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2313
7.09M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2314
7.09M
          x2i = ixheaac_add32_sat(x2i, x3r);
2315
7.09M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2316
7.09M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2317
2318
7.09M
          *data = x0r;
2319
7.09M
          *(data + 1) = x0i;
2320
7.09M
          data += (del << 1);
2321
2322
7.09M
          *data = x2r;
2323
7.09M
          *(data + 1) = x2i;
2324
7.09M
          data += (del << 1);
2325
2326
7.09M
          *data = x1r;
2327
7.09M
          *(data + 1) = x1i;
2328
7.09M
          data += (del << 1);
2329
2330
7.09M
          *data = x3i;
2331
7.09M
          *(data + 1) = x3r;
2332
7.09M
          data += (del << 1);
2333
7.09M
        }
2334
3.33M
        data -= 2 * npoints;
2335
3.33M
        data += 2;
2336
3.33M
      }
2337
10.7M
      for (; j < nodespacing * del; j += nodespacing) {
2338
8.70M
        w1h = *(twiddles + 2 * j);
2339
8.70M
        w2h = *(twiddles + 2 * (j << 1) - 512);
2340
8.70M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
2341
8.70M
        w1l = *(twiddles + 2 * j + 1);
2342
8.70M
        w2l = *(twiddles + 2 * (j << 1) - 511);
2343
8.70M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
2344
2345
33.3M
        for (k = in_loop_cnt; k != 0; k--) {
2346
24.6M
          WORD32 tmp;
2347
24.6M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2348
2349
24.6M
          data += (del << 1);
2350
2351
24.6M
          x1r = *data;
2352
24.6M
          x1i = *(data + 1);
2353
24.6M
          data += (del << 1);
2354
2355
24.6M
          x2r = *data;
2356
24.6M
          x2i = *(data + 1);
2357
24.6M
          data += (del << 1);
2358
2359
24.6M
          x3r = *data;
2360
24.6M
          x3i = *(data + 1);
2361
24.6M
          data -= 3 * (del << 1);
2362
2363
24.6M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2364
24.6M
                                   ixheaacd_mult32_sat(x1i, w1h));
2365
24.6M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2366
24.6M
          x1r = tmp;
2367
2368
24.6M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2369
24.6M
                                   ixheaacd_mult32_sat(x2i, w2l));
2370
24.6M
          x2i = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2371
24.6M
                                   ixheaacd_mult32_sat(x2i, w2h));
2372
24.6M
          x2r = tmp;
2373
2374
24.6M
          tmp = -ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2375
24.6M
                                    ixheaacd_mult32_sat(x3i, w3h));
2376
24.6M
          x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2377
24.6M
          x3r = tmp;
2378
2379
24.6M
          x0r = (*data);
2380
24.6M
          x0i = (*(data + 1));
2381
2382
24.6M
          x0r = ixheaac_add32_sat(x0r, x2r);
2383
24.6M
          x0i = ixheaac_add32_sat(x0i, x2i);
2384
24.6M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2385
24.6M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2386
24.6M
          x1r = ixheaac_add32_sat(x1r, x3r);
2387
24.6M
          x1i = ixheaac_sub32_sat(x1i, x3i);
2388
24.6M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2389
24.6M
          x3i = ixheaac_add32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2390
2391
24.6M
          x0r = ixheaac_add32_sat(x0r, x1r);
2392
24.6M
          x0i = ixheaac_add32_sat(x0i, x1i);
2393
24.6M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2394
24.6M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2395
24.6M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2396
24.6M
          x2i = ixheaac_add32_sat(x2i, x3r);
2397
24.6M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2398
24.6M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2399
2400
24.6M
          *data = x0r;
2401
24.6M
          *(data + 1) = x0i;
2402
24.6M
          data += (del << 1);
2403
2404
24.6M
          *data = x2r;
2405
24.6M
          *(data + 1) = x2i;
2406
24.6M
          data += (del << 1);
2407
2408
24.6M
          *data = x1r;
2409
24.6M
          *(data + 1) = x1i;
2410
24.6M
          data += (del << 1);
2411
2412
24.6M
          *data = x3i;
2413
24.6M
          *(data + 1) = x3r;
2414
24.6M
          data += (del << 1);
2415
24.6M
        }
2416
8.70M
        data -= 2 * npoints;
2417
8.70M
        data += 2;
2418
8.70M
      }
2419
2.04M
      nodespacing >>= 2;
2420
2.04M
      del <<= 2;
2421
2.04M
      in_loop_cnt >>= 2;
2422
2.04M
    }
2423
995k
    if (not_power_4) {
2424
195k
      const WORD32 *twiddles = ptr_w;
2425
195k
      nodespacing <<= 1;
2426
195k
      shift += 1;
2427
21.1M
      for (j = del / 2; j != 0; j--) {
2428
20.9M
        WORD32 w1h = *twiddles;
2429
20.9M
        WORD32 w1l = *(twiddles + 1);
2430
2431
20.9M
        WORD32 tmp;
2432
20.9M
        twiddles += nodespacing * 2;
2433
2434
20.9M
        x0r = *ptr_y;
2435
20.9M
        x0i = *(ptr_y + 1);
2436
20.9M
        ptr_y += (del << 1);
2437
2438
20.9M
        x1r = *ptr_y;
2439
20.9M
        x1i = *(ptr_y + 1);
2440
2441
20.9M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2442
20.9M
                                 ixheaacd_mult32_sat(x1i, w1h));
2443
20.9M
        x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2444
20.9M
        x1r = tmp;
2445
2446
20.9M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
2447
20.9M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2448
20.9M
        ptr_y -= (del << 1);
2449
2450
20.9M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
2451
20.9M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2452
20.9M
        ptr_y += 2;
2453
20.9M
      }
2454
195k
      twiddles = ptr_w;
2455
21.1M
      for (j = del / 2; j != 0; j--) {
2456
20.9M
        WORD32 w1h = *twiddles;
2457
20.9M
        WORD32 w1l = *(twiddles + 1);
2458
20.9M
        WORD32 tmp;
2459
20.9M
        twiddles += nodespacing * 2;
2460
2461
20.9M
        x0r = *ptr_y;
2462
20.9M
        x0i = *(ptr_y + 1);
2463
20.9M
        ptr_y += (del << 1);
2464
2465
20.9M
        x1r = *ptr_y;
2466
20.9M
        x1i = *(ptr_y + 1);
2467
2468
20.9M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1h),
2469
20.9M
                                 ixheaacd_mult32_sat(x1i, w1l));
2470
20.9M
        x1i = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2471
20.9M
                                 ixheaacd_mult32_sat(x1i, w1h));
2472
20.9M
        x1r = tmp;
2473
2474
20.9M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
2475
20.9M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2476
20.9M
        ptr_y -= (del << 1);
2477
2478
20.9M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
2479
20.9M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2480
20.9M
        ptr_y += 2;
2481
20.9M
      }
2482
195k
    }
2483
995k
  }
2484
2485
240M
  for (i = 0; i < nlength; i++) {
2486
230M
    xr[i] = y[2 * i];
2487
230M
    xi[i] = y[2 * i + 1];
2488
230M
  }
2489
2490
9.38M
  *preshift = shift - *preshift;
2491
9.38M
  return;
2492
9.38M
}
2493
2494
static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op,
2495
12.1M
                                                        WORD32 sign_dir) {
2496
12.1M
  WORD32 add_r, sub_r;
2497
12.1M
  WORD32 add_i, sub_i;
2498
12.1M
  WORD32 temp_real, temp_imag, temp;
2499
2500
12.1M
  WORD32 p1, p2, p3, p4;
2501
2502
12.1M
  WORD32 sinmu;
2503
12.1M
  sinmu = -1859775393 * sign_dir;
2504
2505
12.1M
  temp_real = ixheaac_add32_sat(inp[0], inp[2]);
2506
12.1M
  temp_imag = ixheaac_add32_sat(inp[1], inp[3]);
2507
2508
12.1M
  add_r = ixheaac_add32_sat(inp[2], inp[4]);
2509
12.1M
  add_i = ixheaac_add32_sat(inp[3], inp[5]);
2510
2511
12.1M
  sub_r = ixheaac_sub32_sat(inp[2], inp[4]);
2512
12.1M
  sub_i = ixheaac_sub32_sat(inp[3], inp[5]);
2513
2514
12.1M
  p1 = add_r >> 1;
2515
12.1M
  p4 = add_i >> 1;
2516
12.1M
  p2 = ixheaac_mult32_shl(sub_i, sinmu);
2517
12.1M
  p3 = ixheaac_mult32_shl(sub_r, sinmu);
2518
2519
12.1M
  temp = ixheaac_sub32(inp[0], p1);
2520
2521
12.1M
  op[0] = ixheaac_add32_sat(temp_real, inp[4]);
2522
12.1M
  op[1] = ixheaac_add32_sat(temp_imag, inp[5]);
2523
12.1M
  op[2] = ixheaac_add32_sat(temp, p2);
2524
12.1M
  op[3] = ixheaac_sub32_sat(ixheaac_sub32_sat(inp[1], p3), p4);
2525
12.1M
  op[4] = ixheaac_sub32_sat(temp, p2);
2526
12.1M
  op[5] = ixheaac_sub32_sat(ixheaac_add32_sat(inp[1], p3), p4);
2527
2528
12.1M
  return;
2529
12.1M
}
2530
2531
VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
2532
1.48M
                             WORD32 fft_mode, WORD32 *preshift) {
2533
1.48M
  WORD32 i, j;
2534
1.48M
  WORD32 shift = 0;
2535
1.48M
  WORD32 xr_3[384];
2536
1.48M
  WORD32 xi_3[384];
2537
1.48M
  WORD32 x[1024];
2538
1.48M
  WORD32 y[1024];
2539
1.48M
  WORD32 cnfac, npts;
2540
1.48M
  WORD32 mpass = nlength;
2541
1.48M
  WORD32 n = 0;
2542
1.48M
  WORD32 *ptr_x = x;
2543
1.48M
  WORD32 *ptr_y = y;
2544
2545
1.48M
  cnfac = 0;
2546
2.96M
  while (mpass % 3 == 0) {
2547
1.48M
    mpass /= 3;
2548
1.48M
    cnfac++;
2549
1.48M
  }
2550
1.48M
  npts = mpass;
2551
2552
5.92M
  for (i = 0; i < 3 * cnfac; i++) {
2553
40.7M
    for (j = 0; j < mpass; j++) {
2554
36.3M
      xr_3[j] = xr[3 * j + i];
2555
36.3M
      xi_3[j] = xi[3 * j + i];
2556
36.3M
    }
2557
2558
4.44M
    (*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift);
2559
2560
40.7M
    for (j = 0; j < mpass; j++) {
2561
36.3M
      xr[3 * j + i] = xr_3[j];
2562
36.3M
      xi[3 * j + i] = xi_3[j];
2563
36.3M
    }
2564
4.44M
  }
2565
2566
5.01M
  while (npts >> 1) {
2567
3.53M
    n++;
2568
3.53M
    npts = npts >> 1;
2569
3.53M
  }
2570
2571
1.48M
  if (n % 2 == 0)
2572
1.36M
    shift = ((n + 4)) / 2;
2573
112k
  else
2574
112k
    shift = ((n + 5) / 2);
2575
2576
1.48M
  *preshift = shift - *preshift + 1;
2577
2578
37.7M
  for (i = 0; i < nlength; i++) {
2579
36.3M
    ptr_x[2 * i] = (xr[i] >> 1);
2580
36.3M
    ptr_x[2 * i + 1] = (xi[i] >> 1);
2581
36.3M
  }
2582
2583
1.48M
  {
2584
1.48M
    const WORD32 *w1r, *w1i;
2585
1.48M
    WORD32 tmp;
2586
1.48M
    w1r = ixheaacd_twiddle_table_3pr;
2587
1.48M
    w1i = ixheaacd_twiddle_table_3pi;
2588
2589
1.48M
    if (fft_mode < 0) {
2590
11.2M
      for (i = 0; i < nlength; i += 3) {
2591
9.79M
        w1r++;
2592
9.79M
        w1i++;
2593
2594
9.79M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2595
9.79M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2596
9.79M
        ptr_x[2 * i + 3] =
2597
9.79M
            ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)),
2598
9.79M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)));
2599
9.79M
        ptr_x[2 * i + 2] = tmp;
2600
2601
9.79M
        w1r++;
2602
9.79M
        w1i++;
2603
2604
9.79M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2605
9.79M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2606
9.79M
        ptr_x[2 * i + 5] =
2607
9.79M
            ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)),
2608
9.79M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)));
2609
9.79M
        ptr_x[2 * i + 4] = tmp;
2610
2611
9.79M
        w1r += 3 * (128 / mpass - 1) + 1;
2612
9.79M
        w1i += 3 * (128 / mpass - 1) + 1;
2613
9.79M
      }
2614
1.43M
    }
2615
2616
46.3k
    else {
2617
2.35M
      for (i = 0; i < nlength; i += 3) {
2618
2.30M
        w1r++;
2619
2.30M
        w1i++;
2620
2621
2.30M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2622
2.30M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2623
2.30M
        ptr_x[2 * i + 3] =
2624
2.30M
            ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)),
2625
2.30M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)));
2626
2.30M
        ptr_x[2 * i + 2] = tmp;
2627
2628
2.30M
        w1r++;
2629
2.30M
        w1i++;
2630
2631
2.30M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2632
2.30M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2633
2.30M
        ptr_x[2 * i + 5] =
2634
2.30M
            ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)),
2635
2.30M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)));
2636
2.30M
        ptr_x[2 * i + 4] = tmp;
2637
2638
2.30M
        w1r += 3 * (128 / mpass - 1) + 1;
2639
2.30M
        w1i += 3 * (128 / mpass - 1) + 1;
2640
2.30M
      }
2641
46.3k
    }
2642
1.48M
  }
2643
2644
13.5M
  for (i = 0; i < mpass; i++) {
2645
12.1M
    ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode);
2646
2647
12.1M
    ptr_x = ptr_x + 6;
2648
12.1M
    ptr_y = ptr_y + 6;
2649
12.1M
  }
2650
2651
1.48M
  ptr_y = y;
2652
13.5M
  for (i = 0; i < mpass; i++) {
2653
12.1M
    xr[i] = *ptr_y++;
2654
12.1M
    xi[i] = *ptr_y++;
2655
12.1M
    xr[mpass + i] = *ptr_y++;
2656
12.1M
    xi[mpass + i] = *ptr_y++;
2657
12.1M
    xr[2 * mpass + i] = *ptr_y++;
2658
12.1M
    xi[2 * mpass + i] = *ptr_y++;
2659
12.1M
  }
2660
2661
1.48M
  return;
2662
1.48M
}
2663
2664
VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, WORD32 fft_mode,
2665
1.45M
                          WORD32 *preshift) {
2666
1.45M
  if (nlength & (nlength - 1)) {
2667
204k
    ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift);
2668
204k
  } else
2669
1.24M
    (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift);
2670
2671
1.45M
  return;
2672
1.45M
}