Coverage Report

Created: 2025-08-29 06:09

/src/libxaac/decoder/ixheaacd_fft.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
#include <stdlib.h>
21
#include <stdio.h>
22
23
#include "ixheaac_type_def.h"
24
#include "ixheaacd_interface.h"
25
#include "ixheaac_constants.h"
26
#include "ixheaac_basic_ops32.h"
27
#include "ixheaac_basic_ops40.h"
28
#include "ixheaacd_function_selector.h"
29
30
extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
31
extern const FLOAT32 ixheaacd_twiddle_table_fft[514];
32
extern const FLOAT32 ixheaacd_twiddle_table_fft_flt[16];
33
extern const WORD32 ixheaacd_twiddle_table_3pr[1155];
34
extern const WORD32 ixheaacd_twiddle_table_3pi[1155];
35
extern const WORD8 ixheaacd_mps_dig_rev[8];
36
37
#define PLATFORM_INLINE __inline
38
39
#define DIG_REV(i, m, j)                                    \
40
75.3M
  do {                                                      \
41
75.3M
    unsigned _ = (i);                                       \
42
75.3M
    _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
43
75.3M
    _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
44
75.3M
    _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
45
75.3M
    (j) = _ >> (m);                                         \
46
75.3M
  } while (0)
47
48
2.05G
static PLATFORM_INLINE WORD32 ixheaacd_mult32_sat(WORD32 a, WORD32 b) {
49
2.05G
  WORD32 result;
50
2.05G
  WORD64 temp_result;
51
52
2.05G
  temp_result = (WORD64)a * (WORD64)b;
53
2.05G
  result = ixheaac_sat64_32(temp_result >> 31);
54
55
2.05G
  return (result);
56
2.05G
}
57
58
330M
static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) {
59
330M
  WORD32 result;
60
61
330M
  result = ixheaac_add32_sat(a, ixheaacd_mult32_sat(b, c));
62
63
330M
  return (result);
64
330M
}
65
66
1.72G
static PLATFORM_INLINE FLOAT32 ixheaacd_mult32X32float(FLOAT32 a, FLOAT32 b) {
67
1.72G
  FLOAT32 result;
68
69
1.72G
  result = a * b;
70
71
1.72G
  return result;
72
1.72G
}
73
74
319M
static PLATFORM_INLINE FLOAT32 ixheaacd_mac32X32float(FLOAT32 a, FLOAT32 b, FLOAT32 c) {
75
319M
  FLOAT32 result;
76
77
319M
  result = a + b * c;
78
79
319M
  return result;
80
319M
}
81
82
VOID ixheaacd_mps_synth_calc_fft(FLOAT32 *ptr_xr, FLOAT32 *ptr_xi,
83
7.96M
                                 WORD32 npoints) {
84
7.96M
  WORD32 i, j, k;
85
7.96M
  FLOAT32 y[64], z[64];
86
7.96M
  FLOAT32 *ptr_y = y, *ptr_z = z;
87
7.96M
  const FLOAT32 *ptr_w = ixheaacd_twiddle_table_fft_flt;
88
89
71.6M
  for (i = 0; i < npoints; i += 4) {
90
63.7M
    FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
91
63.7M
    FLOAT32 *inp = ptr_xr;
92
63.7M
    FLOAT32 tmk;
93
94
63.7M
    WORD32 h2 = ixheaacd_mps_dig_rev[i >> 2];
95
96
63.7M
    inp += (h2);
97
98
63.7M
    x0r = *inp;
99
63.7M
    x0i = *(inp + 1);
100
63.7M
    inp += 16;
101
102
63.7M
    x1r = *inp;
103
63.7M
    x1i = *(inp + 1);
104
63.7M
    inp += 16;
105
106
63.7M
    x2r = *inp;
107
63.7M
    x2i = *(inp + 1);
108
63.7M
    inp += 16;
109
110
63.7M
    x3r = *inp;
111
63.7M
    x3i = *(inp + 1);
112
113
63.7M
    x0r = x0r + x2r;
114
63.7M
    x0i = x0i + x2i;
115
116
63.7M
    tmk = x0r - x2r;
117
63.7M
    x2r = tmk - x2r;
118
63.7M
    tmk = x0i - x2i;
119
63.7M
    x2i = tmk - x2i;
120
121
63.7M
    x1r = x1r + x3r;
122
63.7M
    x1i = x1i + x3i;
123
124
63.7M
    tmk = x1r - x3r;
125
63.7M
    x3r = tmk - x3r;
126
63.7M
    tmk = x1i - x3i;
127
63.7M
    x3i = tmk - x3i;
128
129
63.7M
    x0r = x0r + x1r;
130
63.7M
    x0i = x0i + x1i;
131
132
63.7M
    tmk = x0r - x1r;
133
63.7M
    x1r = tmk - x1r;
134
63.7M
    tmk = x0i - x1i;
135
63.7M
    x1i = tmk - x1i;
136
137
63.7M
    x2r = x2r + x3i;
138
63.7M
    x2i = x2i - x3r;
139
140
63.7M
    tmk = x2r - x3i;
141
63.7M
    x3i = tmk - x3i;
142
63.7M
    tmk = x2i + x3r;
143
63.7M
    x3r = tmk + x3r;
144
145
63.7M
    *ptr_y++ = x0r;
146
63.7M
    *ptr_y++ = x0i;
147
63.7M
    *ptr_y++ = x2r;
148
63.7M
    *ptr_y++ = x2i;
149
63.7M
    *ptr_y++ = x1r;
150
63.7M
    *ptr_y++ = x1i;
151
63.7M
    *ptr_y++ = x3i;
152
63.7M
    *ptr_y++ = x3r;
153
154
63.7M
    inp = ptr_xi;
155
156
63.7M
    inp += (h2);
157
158
63.7M
    x0r = *inp;
159
63.7M
    x0i = *(inp + 1);
160
63.7M
    inp += 16;
161
162
63.7M
    x1r = *inp;
163
63.7M
    x1i = *(inp + 1);
164
63.7M
    inp += 16;
165
166
63.7M
    x2r = *inp;
167
63.7M
    x2i = *(inp + 1);
168
63.7M
    inp += 16;
169
170
63.7M
    x3r = *inp;
171
63.7M
    x3i = *(inp + 1);
172
173
63.7M
    x0r = x0r + x2r;
174
63.7M
    x0i = x0i + x2i;
175
176
63.7M
    tmk = x0r - x2r;
177
63.7M
    x2r = tmk - x2r;
178
63.7M
    tmk = x0i - x2i;
179
63.7M
    x2i = tmk - x2i;
180
181
63.7M
    x1r = x1r + x3r;
182
63.7M
    x1i = x1i + x3i;
183
184
63.7M
    tmk = x1r - x3r;
185
63.7M
    x3r = tmk - x3r;
186
63.7M
    tmk = x1i - x3i;
187
63.7M
    x3i = tmk - x3i;
188
189
63.7M
    x0r = x0r + x1r;
190
63.7M
    x0i = x0i + x1i;
191
192
63.7M
    tmk = x0r - x1r;
193
63.7M
    x1r = tmk - x1r;
194
63.7M
    tmk = x0i - x1i;
195
63.7M
    x1i = tmk - x1i;
196
197
63.7M
    x2r = x2r + x3i;
198
63.7M
    x2i = x2i - x3r;
199
200
63.7M
    tmk = x2r - x3i;
201
63.7M
    x3i = tmk - x3i;
202
63.7M
    tmk = x2i + x3r;
203
63.7M
    x3r = tmk + x3r;
204
205
63.7M
    *ptr_z++ = x0r;
206
63.7M
    *ptr_z++ = x0i;
207
63.7M
    *ptr_z++ = x2r;
208
63.7M
    *ptr_z++ = x2i;
209
63.7M
    *ptr_z++ = x1r;
210
63.7M
    *ptr_z++ = x1i;
211
63.7M
    *ptr_z++ = x3i;
212
63.7M
    *ptr_z++ = x3r;
213
63.7M
  }
214
7.96M
  ptr_y -= 64;
215
7.96M
  ptr_z -= 64;
216
7.96M
  {
217
7.96M
    FLOAT32 *data_r = ptr_y;
218
7.96M
    FLOAT32 *data_i = ptr_z;
219
23.8M
    for (k = 2; k != 0; k--) {
220
15.9M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
221
222
15.9M
      x0r = (*data_r);
223
15.9M
      x0i = (*(data_r + 1));
224
15.9M
      data_r += 8;
225
226
15.9M
      x1r = (*data_r);
227
15.9M
      x1i = (*(data_r + 1));
228
15.9M
      data_r += 8;
229
230
15.9M
      x2r = (*data_r);
231
15.9M
      x2i = (*(data_r + 1));
232
15.9M
      data_r += 8;
233
234
15.9M
      x3r = (*data_r);
235
15.9M
      x3i = (*(data_r + 1));
236
15.9M
      data_r -= 24;
237
238
15.9M
      x0r = x0r + x2r;
239
15.9M
      x0i = x0i + x2i;
240
15.9M
      x2r = x0r - (x2r * 2);
241
15.9M
      x2i = x0i - (x2i * 2);
242
15.9M
      x1r = x1r + x3r;
243
15.9M
      x1i = x1i + x3i;
244
15.9M
      x3r = x1r - (x3r * 2);
245
15.9M
      x3i = x1i - (x3i * 2);
246
247
15.9M
      x0r = x0r + x1r;
248
15.9M
      x0i = x0i + x1i;
249
15.9M
      x1r = x0r - (x1r * 2);
250
15.9M
      x1i = x0i - (x1i * 2);
251
15.9M
      x2r = x2r + x3i;
252
15.9M
      x2i = x2i - x3r;
253
15.9M
      x3i = x2r - (x3i * 2);
254
15.9M
      x3r = x2i + (x3r * 2);
255
256
15.9M
      *data_r = x0r;
257
15.9M
      *(data_r + 1) = x0i;
258
15.9M
      data_r += 8;
259
260
15.9M
      *data_r = x2r;
261
15.9M
      *(data_r + 1) = x2i;
262
15.9M
      data_r += 8;
263
264
15.9M
      *data_r = x1r;
265
15.9M
      *(data_r + 1) = x1i;
266
15.9M
      data_r += 8;
267
268
15.9M
      *data_r = x3i;
269
15.9M
      *(data_r + 1) = x3r;
270
15.9M
      data_r += 8;
271
272
15.9M
      x0r = (*data_i);
273
15.9M
      x0i = (*(data_i + 1));
274
15.9M
      data_i += 8;
275
276
15.9M
      x1r = (*data_i);
277
15.9M
      x1i = (*(data_i + 1));
278
15.9M
      data_i += 8;
279
280
15.9M
      x2r = (*data_i);
281
15.9M
      x2i = (*(data_i + 1));
282
15.9M
      data_i += 8;
283
284
15.9M
      x3r = (*data_i);
285
15.9M
      x3i = (*(data_i + 1));
286
15.9M
      data_i -= 24;
287
288
15.9M
      x0r = x0r + x2r;
289
15.9M
      x0i = x0i + x2i;
290
15.9M
      x2r = x0r - (x2r * 2);
291
15.9M
      x2i = x0i - (x2i * 2);
292
15.9M
      x1r = x1r + x3r;
293
15.9M
      x1i = x1i + x3i;
294
15.9M
      x3r = x1r - (x3r * 2);
295
15.9M
      x3i = x1i - (x3i * 2);
296
297
15.9M
      x0r = x0r + x1r;
298
15.9M
      x0i = x0i + x1i;
299
15.9M
      x1r = x0r - (x1r * 2);
300
15.9M
      x1i = x0i - (x1i * 2);
301
15.9M
      x2r = x2r + x3i;
302
15.9M
      x2i = x2i - x3r;
303
15.9M
      x3i = x2r - (x3i * 2);
304
15.9M
      x3r = x2i + (x3r * 2);
305
306
15.9M
      *data_i = x0r;
307
15.9M
      *(data_i + 1) = x0i;
308
15.9M
      data_i += 8;
309
310
15.9M
      *data_i = x2r;
311
15.9M
      *(data_i + 1) = x2i;
312
15.9M
      data_i += 8;
313
314
15.9M
      *data_i = x1r;
315
15.9M
      *(data_i + 1) = x1i;
316
15.9M
      data_i += 8;
317
318
15.9M
      *data_i = x3i;
319
15.9M
      *(data_i + 1) = x3r;
320
15.9M
      data_i += 8;
321
15.9M
    }
322
7.96M
    data_r = ptr_y + 2;
323
7.96M
    data_i = ptr_z + 2;
324
325
23.8M
    for (k = 2; k != 0; k--) {
326
15.9M
      FLOAT32 tmp;
327
15.9M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
328
329
15.9M
      data_r += 8;
330
331
15.9M
      x1r = *data_r;
332
15.9M
      x1i = *(data_r + 1);
333
15.9M
      data_r += 8;
334
335
15.9M
      x2r = *data_r;
336
15.9M
      x2i = *(data_r + 1);
337
15.9M
      data_r += 8;
338
339
15.9M
      x3r = *data_r;
340
15.9M
      x3i = *(data_r + 1);
341
15.9M
      data_r -= 24;
342
343
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
344
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
345
15.9M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
346
15.9M
                                   (FLOAT32)x1i, 0.923880f);
347
15.9M
      x1r = tmp;
348
349
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
350
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
351
15.9M
      x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
352
15.9M
                                   (FLOAT32)x2i, 0.707107f);
353
15.9M
      x2r = tmp;
354
355
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
356
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
357
15.9M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
358
15.9M
                                   (FLOAT32)x3i, 0.382683f);
359
15.9M
      x3r = tmp;
360
361
15.9M
      x0r = (*data_r);
362
15.9M
      x0i = (*(data_r + 1));
363
364
15.9M
      x0r = x0r + (x2r);
365
15.9M
      x0i = x0i + (x2i);
366
15.9M
      x2r = x0r - (x2r * 2);
367
15.9M
      x2i = x0i - (x2i * 2);
368
15.9M
      x1r = x1r + x3r;
369
15.9M
      x1i = x1i + x3i;
370
15.9M
      x3r = x1r - (x3r * 2);
371
15.9M
      x3i = x1i - (x3i * 2);
372
373
15.9M
      x0r = x0r + (x1r);
374
15.9M
      x0i = x0i + (x1i);
375
15.9M
      x1r = x0r - (x1r * 2);
376
15.9M
      x1i = x0i - (x1i * 2);
377
15.9M
      x2r = x2r + (x3i);
378
15.9M
      x2i = x2i - (x3r);
379
15.9M
      x3i = x2r - (x3i * 2);
380
15.9M
      x3r = x2i + (x3r * 2);
381
382
15.9M
      *data_r = x0r;
383
15.9M
      *(data_r + 1) = x0i;
384
15.9M
      data_r += 8;
385
386
15.9M
      *data_r = x2r;
387
15.9M
      *(data_r + 1) = x2i;
388
15.9M
      data_r += 8;
389
390
15.9M
      *data_r = x1r;
391
15.9M
      *(data_r + 1) = x1i;
392
15.9M
      data_r += 8;
393
394
15.9M
      *data_r = x3i;
395
15.9M
      *(data_r + 1) = x3r;
396
15.9M
      data_r += 8;
397
15.9M
      data_i += 8;
398
399
15.9M
      x1r = *data_i;
400
15.9M
      x1i = *(data_i + 1);
401
15.9M
      data_i += 8;
402
403
15.9M
      x2r = *data_i;
404
15.9M
      x2i = *(data_i + 1);
405
15.9M
      data_i += 8;
406
407
15.9M
      x3r = *data_i;
408
15.9M
      x3i = *(data_i + 1);
409
15.9M
      data_i -= 24;
410
411
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
412
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
413
15.9M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
414
15.9M
                                   (FLOAT32)x1i, 0.923880f);
415
15.9M
      x1r = tmp;
416
417
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
418
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
419
15.9M
      x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
420
15.9M
                                   (FLOAT32)x2i, 0.707107f);
421
15.9M
      x2r = tmp;
422
423
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
424
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
425
15.9M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
426
15.9M
                                   (FLOAT32)x3i, 0.382683f);
427
15.9M
      x3r = tmp;
428
429
15.9M
      x0r = (*data_i);
430
15.9M
      x0i = (*(data_i + 1));
431
432
15.9M
      x0r = x0r + (x2r);
433
15.9M
      x0i = x0i + (x2i);
434
15.9M
      x2r = x0r - (x2r * 2);
435
15.9M
      x2i = x0i - (x2i * 2);
436
15.9M
      x1r = x1r + x3r;
437
15.9M
      x1i = x1i + x3i;
438
15.9M
      x3r = x1r - (x3r * 2);
439
15.9M
      x3i = x1i - (x3i * 2);
440
441
15.9M
      x0r = x0r + (x1r);
442
15.9M
      x0i = x0i + (x1i);
443
15.9M
      x1r = x0r - (x1r * 2);
444
15.9M
      x1i = x0i - (x1i * 2);
445
15.9M
      x2r = x2r + (x3i);
446
15.9M
      x2i = x2i - (x3r);
447
15.9M
      x3i = x2r - (x3i * 2);
448
15.9M
      x3r = x2i + (x3r * 2);
449
450
15.9M
      *data_i = x0r;
451
15.9M
      *(data_i + 1) = x0i;
452
15.9M
      data_i += 8;
453
454
15.9M
      *data_i = x2r;
455
15.9M
      *(data_i + 1) = x2i;
456
15.9M
      data_i += 8;
457
458
15.9M
      *data_i = x1r;
459
15.9M
      *(data_i + 1) = x1i;
460
15.9M
      data_i += 8;
461
462
15.9M
      *data_i = x3i;
463
15.9M
      *(data_i + 1) = x3r;
464
15.9M
      data_i += 8;
465
15.9M
    }
466
7.96M
    data_r -= 62;
467
7.96M
    data_i -= 62;
468
23.8M
    for (k = 2; k != 0; k--) {
469
15.9M
      FLOAT32 tmp;
470
15.9M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
471
472
15.9M
      data_r += 8;
473
474
15.9M
      x1r = *data_r;
475
15.9M
      x1i = *(data_r + 1);
476
15.9M
      data_r += 8;
477
478
15.9M
      x2r = *data_r;
479
15.9M
      x2i = *(data_r + 1);
480
15.9M
      data_r += 8;
481
482
15.9M
      x3r = *data_r;
483
15.9M
      x3i = *(data_r + 1);
484
15.9M
      data_r -= 24;
485
486
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
487
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
488
15.9M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
489
15.9M
                                   (FLOAT32)x1i, 0.707107f);
490
15.9M
      x1r = tmp;
491
492
15.9M
      tmp = x2i;
493
15.9M
      x2i = -x2r;
494
15.9M
      x2r = tmp;
495
496
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
497
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
498
15.9M
      x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
499
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
500
15.9M
      x3r = tmp;
501
502
15.9M
      x0r = (*data_r);
503
15.9M
      x0i = (*(data_r + 1));
504
505
15.9M
      x0r = x0r + (x2r);
506
15.9M
      x0i = x0i + (x2i);
507
15.9M
      x2r = x0r - (x2r * 2);
508
15.9M
      x2i = x0i - (x2i * 2);
509
15.9M
      x1r = x1r + x3r;
510
15.9M
      x1i = x1i + x3i;
511
15.9M
      x3r = x1r - (x3r * 2);
512
15.9M
      x3i = x1i - (x3i * 2);
513
514
15.9M
      x0r = x0r + (x1r);
515
15.9M
      x0i = x0i + (x1i);
516
15.9M
      x1r = x0r - (x1r * 2);
517
15.9M
      x1i = x0i - (x1i * 2);
518
15.9M
      x2r = x2r + (x3i);
519
15.9M
      x2i = x2i - (x3r);
520
15.9M
      x3i = x2r - (x3i * 2);
521
15.9M
      x3r = x2i + (x3r * 2);
522
523
15.9M
      *data_r = x0r;
524
15.9M
      *(data_r + 1) = x0i;
525
15.9M
      data_r += 8;
526
527
15.9M
      *data_r = x2r;
528
15.9M
      *(data_r + 1) = x2i;
529
15.9M
      data_r += 8;
530
531
15.9M
      *data_r = x1r;
532
15.9M
      *(data_r + 1) = x1i;
533
15.9M
      data_r += 8;
534
535
15.9M
      *data_r = x3i;
536
15.9M
      *(data_r + 1) = x3r;
537
15.9M
      data_r += 8;
538
15.9M
      data_i += 8;
539
540
15.9M
      x1r = *data_i;
541
15.9M
      x1i = *(data_i + 1);
542
15.9M
      data_i += 8;
543
544
15.9M
      x2r = *data_i;
545
15.9M
      x2i = *(data_i + 1);
546
15.9M
      data_i += 8;
547
548
15.9M
      x3r = *data_i;
549
15.9M
      x3i = *(data_i + 1);
550
15.9M
      data_i -= 24;
551
552
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
553
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
554
15.9M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
555
15.9M
                                   (FLOAT32)x1i, 0.707107f);
556
15.9M
      x1r = tmp;
557
558
15.9M
      tmp = x2i;
559
15.9M
      x2i = -x2r;
560
15.9M
      x2r = tmp;
561
562
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
563
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
564
15.9M
      x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
565
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
566
15.9M
      x3r = tmp;
567
568
15.9M
      x0r = (*data_i);
569
15.9M
      x0i = (*(data_i + 1));
570
571
15.9M
      x0r = x0r + (x2r);
572
15.9M
      x0i = x0i + (x2i);
573
15.9M
      x2r = x0r - (x2r * 2);
574
15.9M
      x2i = x0i - (x2i * 2);
575
15.9M
      x1r = x1r + x3r;
576
15.9M
      x1i = x1i + x3i;
577
15.9M
      x3r = x1r - (x3r * 2);
578
15.9M
      x3i = x1i - (x3i * 2);
579
580
15.9M
      x0r = x0r + (x1r);
581
15.9M
      x0i = x0i + (x1i);
582
15.9M
      x1r = x0r - (x1r * 2);
583
15.9M
      x1i = x0i - (x1i * 2);
584
15.9M
      x2r = x2r + (x3i);
585
15.9M
      x2i = x2i - (x3r);
586
15.9M
      x3i = x2r - (x3i * 2);
587
15.9M
      x3r = x2i + (x3r * 2);
588
589
15.9M
      *data_i = x0r;
590
15.9M
      *(data_i + 1) = x0i;
591
15.9M
      data_i += 8;
592
593
15.9M
      *data_i = x2r;
594
15.9M
      *(data_i + 1) = x2i;
595
15.9M
      data_i += 8;
596
597
15.9M
      *data_i = x1r;
598
15.9M
      *(data_i + 1) = x1i;
599
15.9M
      data_i += 8;
600
601
15.9M
      *data_i = x3i;
602
15.9M
      *(data_i + 1) = x3r;
603
15.9M
      data_i += 8;
604
15.9M
    }
605
7.96M
    data_r -= 62;
606
7.96M
    data_i -= 62;
607
23.8M
    for (k = 2; k != 0; k--) {
608
15.9M
      FLOAT32 tmp;
609
15.9M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
610
611
15.9M
      data_r += 8;
612
613
15.9M
      x1r = *data_r;
614
15.9M
      x1i = *(data_r + 1);
615
15.9M
      data_r += 8;
616
617
15.9M
      x2r = *data_r;
618
15.9M
      x2i = *(data_r + 1);
619
15.9M
      data_r += 8;
620
621
15.9M
      x3r = *data_r;
622
15.9M
      x3i = *(data_r + 1);
623
15.9M
      data_r -= 24;
624
625
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
626
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
627
15.9M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
628
15.9M
                                   (FLOAT32)x1i, 0.382683f);
629
15.9M
      x1r = tmp;
630
631
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
632
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
633
15.9M
      x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
634
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
635
15.9M
      x2r = tmp;
636
637
15.9M
      tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
638
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
639
15.9M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
640
15.9M
                                   (FLOAT32)x3i, 0.923880f);
641
15.9M
      x3r = tmp;
642
643
15.9M
      x0r = (*data_r);
644
15.9M
      x0i = (*(data_r + 1));
645
646
15.9M
      x0r = x0r + (x2r);
647
15.9M
      x0i = x0i + (x2i);
648
15.9M
      x2r = x0r - (x2r * 2);
649
15.9M
      x2i = x0i - (x2i * 2);
650
15.9M
      x1r = x1r + x3r;
651
15.9M
      x1i = x1i - x3i;
652
15.9M
      x3r = x1r - (x3r * 2);
653
15.9M
      x3i = x1i + (x3i * 2);
654
655
15.9M
      x0r = x0r + (x1r);
656
15.9M
      x0i = x0i + (x1i);
657
15.9M
      x1r = x0r - (x1r * 2);
658
15.9M
      x1i = x0i - (x1i * 2);
659
15.9M
      x2r = x2r + (x3i);
660
15.9M
      x2i = x2i - (x3r);
661
15.9M
      x3i = x2r - (x3i * 2);
662
15.9M
      x3r = x2i + (x3r * 2);
663
664
15.9M
      *data_r = x0r;
665
15.9M
      *(data_r + 1) = x0i;
666
15.9M
      data_r += 8;
667
668
15.9M
      *data_r = x2r;
669
15.9M
      *(data_r + 1) = x2i;
670
15.9M
      data_r += 8;
671
672
15.9M
      *data_r = x1r;
673
15.9M
      *(data_r + 1) = x1i;
674
15.9M
      data_r += 8;
675
676
15.9M
      *data_r = x3i;
677
15.9M
      *(data_r + 1) = x3r;
678
15.9M
      data_r += 8;
679
15.9M
      data_i += 8;
680
681
15.9M
      x1r = *data_i;
682
15.9M
      x1i = *(data_i + 1);
683
15.9M
      data_i += 8;
684
685
15.9M
      x2r = *data_i;
686
15.9M
      x2i = *(data_i + 1);
687
15.9M
      data_i += 8;
688
689
15.9M
      x3r = *data_i;
690
15.9M
      x3i = *(data_i + 1);
691
15.9M
      data_i -= 24;
692
693
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
694
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
695
15.9M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
696
15.9M
                                   (FLOAT32)x1i, 0.382683f);
697
15.9M
      x1r = tmp;
698
699
15.9M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
700
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
701
15.9M
      x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
702
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
703
15.9M
      x2r = tmp;
704
705
15.9M
      tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
706
15.9M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
707
15.9M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
708
15.9M
                                   (FLOAT32)x3i, 0.923880f);
709
15.9M
      x3r = tmp;
710
711
15.9M
      x0r = (*data_i);
712
15.9M
      x0i = (*(data_i + 1));
713
714
15.9M
      x0r = x0r + (x2r);
715
15.9M
      x0i = x0i + (x2i);
716
15.9M
      x2r = x0r - (x2r * 2);
717
15.9M
      x2i = x0i - (x2i * 2);
718
15.9M
      x1r = x1r + x3r;
719
15.9M
      x1i = x1i - x3i;
720
15.9M
      x3r = x1r - (x3r * 2);
721
15.9M
      x3i = x1i + (x3i * 2);
722
723
15.9M
      x0r = x0r + (x1r);
724
15.9M
      x0i = x0i + (x1i);
725
15.9M
      x1r = x0r - (x1r * 2);
726
15.9M
      x1i = x0i - (x1i * 2);
727
15.9M
      x2r = x2r + (x3i);
728
15.9M
      x2i = x2i - (x3r);
729
15.9M
      x3i = x2r - (x3i * 2);
730
15.9M
      x3r = x2i + (x3r * 2);
731
732
15.9M
      *data_i = x0r;
733
15.9M
      *(data_i + 1) = x0i;
734
15.9M
      data_i += 8;
735
736
15.9M
      *data_i = x2r;
737
15.9M
      *(data_i + 1) = x2i;
738
15.9M
      data_i += 8;
739
740
15.9M
      *data_i = x1r;
741
15.9M
      *(data_i + 1) = x1i;
742
15.9M
      data_i += 8;
743
744
15.9M
      *data_i = x3i;
745
15.9M
      *(data_i + 1) = x3r;
746
15.9M
      data_i += 8;
747
15.9M
    }
748
7.96M
    data_r -= 62;
749
7.96M
    data_i -= 62;
750
7.96M
  }
751
7.96M
  {
752
7.96M
    const FLOAT32 *twiddles = ptr_w;
753
7.96M
    FLOAT32 x0r, x0i, x1r, x1i;
754
71.6M
    for (j = 8; j != 0; j--) {
755
63.7M
      FLOAT32 W1 = *twiddles;
756
63.7M
      twiddles++;
757
63.7M
      FLOAT32 W4 = *twiddles;
758
63.7M
      twiddles++;
759
63.7M
      FLOAT32 tmp;
760
761
63.7M
      x0r = *ptr_y;
762
63.7M
      x0i = *(ptr_y + 1);
763
63.7M
      ptr_y += 32;
764
63.7M
      ptr_xr += 32;
765
766
63.7M
      x1r = *ptr_y;
767
63.7M
      x1i = *(ptr_y + 1);
768
769
63.7M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
770
63.7M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
771
63.7M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
772
63.7M
                                   (FLOAT32)x1i, W1);
773
63.7M
      x1r = tmp;
774
775
63.7M
      *ptr_xr = (x0r) - (x1r);
776
63.7M
      *(ptr_xr + 1) = (x0i) - (x1i);
777
63.7M
      ptr_y -= 32;
778
63.7M
      ptr_xr -= 32;
779
780
63.7M
      *ptr_xr = (x0r) + (x1r);
781
63.7M
      *(ptr_xr + 1) = (x0i) + (x1i);
782
63.7M
      ptr_y += 2;
783
63.7M
      ptr_xr += 2;
784
785
63.7M
      x0r = *ptr_z;
786
63.7M
      x0i = *(ptr_z + 1);
787
63.7M
      ptr_z += 32;
788
63.7M
      ptr_xi += 32;
789
790
63.7M
      x1r = *ptr_z;
791
63.7M
      x1i = *(ptr_z + 1);
792
793
63.7M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
794
63.7M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
795
63.7M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
796
63.7M
                                   (FLOAT32)x1i, W1);
797
63.7M
      x1r = tmp;
798
799
63.7M
      *ptr_xi = (x0r) - (x1r);
800
63.7M
      *(ptr_xi + 1) = (x0i) - (x1i);
801
63.7M
      ptr_z -= 32;
802
63.7M
      ptr_xi -= 32;
803
804
63.7M
      *ptr_xi = (x0r) + (x1r);
805
63.7M
      *(ptr_xi + 1) = (x0i) + (x1i);
806
63.7M
      ptr_z += 2;
807
63.7M
      ptr_xi += 2;
808
63.7M
    }
809
7.96M
    twiddles = ptr_w;
810
71.6M
    for (j = 8; j != 0; j--) {
811
63.7M
      FLOAT32 W1 = *twiddles;
812
63.7M
      twiddles++;
813
63.7M
      FLOAT32 W4 = *twiddles;
814
63.7M
      twiddles++;
815
63.7M
      FLOAT32 tmp;
816
817
63.7M
      x0r = *ptr_y;
818
63.7M
      x0i = *(ptr_y + 1);
819
63.7M
      ptr_y += 32;
820
63.7M
      ptr_xr += 32;
821
822
63.7M
      x1r = *ptr_y;
823
63.7M
      x1i = *(ptr_y + 1);
824
825
63.7M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
826
63.7M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W1));
827
63.7M
      x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
828
63.7M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
829
63.7M
      x1r = tmp;
830
831
63.7M
      *ptr_xr = (x0r) - (x1r);
832
63.7M
      *(ptr_xr + 1) = (x0i) - (x1i);
833
63.7M
      ptr_y -= 32;
834
63.7M
      ptr_xr -= 32;
835
836
63.7M
      *ptr_xr = (x0r) + (x1r);
837
63.7M
      *(ptr_xr + 1) = (x0i) + (x1i);
838
63.7M
      ptr_y += 2;
839
63.7M
      ptr_xr += 2;
840
841
63.7M
      x0r = *ptr_z;
842
63.7M
      x0i = *(ptr_z + 1);
843
63.7M
      ptr_z += 32;
844
63.7M
      ptr_xi += 32;
845
846
63.7M
      x1r = *ptr_z;
847
63.7M
      x1i = *(ptr_z + 1);
848
849
63.7M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
850
63.7M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W1));
851
63.7M
      x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
852
63.7M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
853
63.7M
      x1r = tmp;
854
855
63.7M
      *ptr_xi = (x0r) - (x1r);
856
63.7M
      *(ptr_xi + 1) = (x0i) - (x1i);
857
63.7M
      ptr_z -= 32;
858
63.7M
      ptr_xi -= 32;
859
860
63.7M
      *ptr_xi = (x0r) + (x1r);
861
63.7M
      *(ptr_xi + 1) = (x0i) + (x1i);
862
63.7M
      ptr_z += 2;
863
63.7M
      ptr_xi += 2;
864
63.7M
    }
865
7.96M
  }
866
7.96M
}
867
868
17.9k
VOID ixheaacd_mps_complex_fft(FLOAT32 *xr, FLOAT32 *xi, WORD32 nlength) {
869
17.9k
  WORD32 i, j, k, n_stages, h2;
870
17.9k
  FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
871
17.9k
  WORD32 del, nodespacing, in_loop_cnt;
872
17.9k
  WORD32 dig_rev_shift;
873
17.9k
  WORD32 not_power_4;
874
17.9k
  FLOAT32 ptr_x[256];
875
17.9k
  FLOAT32 y[256];
876
17.9k
  WORD32 npoints = nlength;
877
17.9k
  FLOAT32 *ptr_y = y;
878
17.9k
  const FLOAT32 *ptr_w;
879
17.9k
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
880
17.9k
  n_stages = 30 - ixheaac_norm32(npoints);
881
17.9k
  not_power_4 = n_stages & 1;
882
883
17.9k
  n_stages = n_stages >> 1;
884
885
886
1.16M
  for (i = 0; i<nlength; i++)
887
1.14M
  {
888
1.14M
    ptr_x[2 * i] = xr[i];
889
1.14M
    ptr_x[2 * i + 1] = xi[i];
890
1.14M
  }
891
892
17.9k
  ptr_w = ixheaacd_twiddle_table_fft;
893
894
304k
  for (i = 0; i<npoints; i += 4)
895
286k
  {
896
286k
    FLOAT32 *inp = ptr_x;
897
898
286k
    DIG_REV(i, dig_rev_shift, h2);
899
286k
    if (not_power_4)
900
0
    {
901
0
      h2 += 1;
902
0
      h2 &= ~1;
903
0
    }
904
286k
    inp += (h2);
905
906
286k
    x0r = *inp;
907
286k
    x0i = *(inp + 1);
908
286k
    inp += (npoints >> 1);
909
910
286k
    x1r = *inp;
911
286k
    x1i = *(inp + 1);
912
286k
    inp += (npoints >> 1);
913
914
286k
    x2r = *inp;
915
286k
    x2i = *(inp + 1);
916
286k
    inp += (npoints >> 1);
917
918
286k
    x3r = *inp;
919
286k
    x3i = *(inp + 1);
920
921
286k
    x0r = x0r + x2r;
922
286k
    x0i = x0i + x2i;
923
286k
    x2r = x0r - (x2r * 2);
924
286k
    x2i = x0i - (x2i * 2);
925
286k
    x1r = x1r + x3r;
926
286k
    x1i = x1i + x3i;
927
286k
    x3r = x1r - (x3r * 2);
928
286k
    x3i = x1i - (x3i * 2);
929
930
286k
    x0r = x0r + x1r;
931
286k
    x0i = x0i + x1i;
932
286k
    x1r = x0r - (x1r * 2);
933
286k
    x1i = x0i - (x1i * 2);
934
286k
    x2r = x2r + x3i;
935
286k
    x2i = x2i - x3r;
936
286k
    x3i = x2r - (x3i * 2);
937
286k
    x3r = x2i + (x3r * 2);
938
939
286k
    *ptr_y++ = x0r;
940
286k
    *ptr_y++ = x0i;
941
286k
    *ptr_y++ = x2r;
942
286k
    *ptr_y++ = x2i;
943
286k
    *ptr_y++ = x1r;
944
286k
    *ptr_y++ = x1i;
945
286k
    *ptr_y++ = x3i;
946
286k
    *ptr_y++ = x3r;
947
286k
  }
948
17.9k
  ptr_y -= 2 * npoints;
949
17.9k
  del = 4;
950
17.9k
  nodespacing = 64;
951
17.9k
  in_loop_cnt = npoints >> 4;
952
53.7k
  for (i = n_stages - 1; i>0; i--)
953
35.8k
  {
954
35.8k
    const FLOAT32 *twiddles = ptr_w;
955
35.8k
    FLOAT32 *data = ptr_y;
956
35.8k
    FLOAT32 w1h, w2h, w3h, w1l, w2l, w3l;
957
35.8k
    WORD32 sec_loop_cnt;
958
959
125k
    for (k = in_loop_cnt; k != 0; k--)
960
89.5k
    {
961
89.5k
      x0r = (*data);
962
89.5k
      x0i = (*(data + 1));
963
89.5k
      data += (del << 1);
964
965
89.5k
      x1r = (*data);
966
89.5k
      x1i = (*(data + 1));
967
89.5k
      data += (del << 1);
968
969
89.5k
      x2r = (*data);
970
89.5k
      x2i = (*(data + 1));
971
89.5k
      data += (del << 1);
972
973
89.5k
      x3r = (*data);
974
89.5k
      x3i = (*(data + 1));
975
89.5k
      data -= 3 * (del << 1);
976
977
89.5k
      x0r = x0r + x2r;
978
89.5k
      x0i = x0i + x2i;
979
89.5k
      x2r = x0r - (x2r * 2);
980
89.5k
      x2i = x0i - (x2i * 2);
981
89.5k
      x1r = x1r + x3r;
982
89.5k
      x1i = x1i + x3i;
983
89.5k
      x3r = x1r - (x3r * 2);
984
89.5k
      x3i = x1i - (x3i * 2);
985
986
89.5k
      x0r = x0r + x1r;
987
89.5k
      x0i = x0i + x1i;
988
89.5k
      x1r = x0r - (x1r * 2);
989
89.5k
      x1i = x0i - (x1i * 2);
990
89.5k
      x2r = x2r + x3i;
991
89.5k
      x2i = x2i - x3r;
992
89.5k
      x3i = x2r - (x3i * 2);
993
89.5k
      x3r = x2i + (x3r * 2);
994
995
89.5k
      *data = x0r;
996
89.5k
      *(data + 1) = x0i;
997
89.5k
      data += (del << 1);
998
999
89.5k
      *data = x2r;
1000
89.5k
      *(data + 1) = x2i;
1001
89.5k
      data += (del << 1);
1002
1003
89.5k
      *data = x1r;
1004
89.5k
      *(data + 1) = x1i;
1005
89.5k
      data += (del << 1);
1006
1007
89.5k
      *data = x3i;
1008
89.5k
      *(data + 1) = x3r;
1009
89.5k
      data += (del << 1);
1010
89.5k
    }
1011
35.8k
    data = ptr_y + 2;
1012
1013
35.8k
    sec_loop_cnt = (nodespacing * del);
1014
35.8k
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) \
1015
35.8k
            + (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) \
1016
35.8k
            - (sec_loop_cnt / 256);
1017
35.8k
    j = nodespacing;
1018
1019
143k
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing)
1020
107k
    {
1021
107k
      w1h = *(twiddles + 2 * j);
1022
107k
      w1l = *(twiddles + 2 * j + 1);
1023
107k
      w2h = *(twiddles + 2 * (j << 1));
1024
107k
      w2l = *(twiddles + 2 * (j << 1) + 1);
1025
107k
      w3h = *(twiddles + 2 * j + 2 * (j << 1));
1026
107k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1027
1028
268k
      for (k = in_loop_cnt; k != 0; k--)
1029
161k
      {
1030
161k
        FLOAT32 tmp;
1031
161k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1032
1033
161k
        data += (del << 1);
1034
1035
161k
        x1r = *data;
1036
161k
        x1i = *(data + 1);
1037
161k
        data += (del << 1);
1038
1039
161k
        x2r = *data;
1040
161k
        x2i = *(data + 1);
1041
161k
        data += (del << 1);
1042
1043
161k
        x3r = *data;
1044
161k
        x3i = *(data + 1);
1045
161k
        data -= 3 * (del << 1);
1046
1047
161k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1048
161k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1049
161k
        x1r = tmp;
1050
1051
161k
        tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1052
161k
        x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1053
161k
        x2r = tmp;
1054
1055
161k
        tmp = (ixheaacd_mult32X32float(x3r, w3l) - ixheaacd_mult32X32float(x3i, w3h));
1056
161k
        x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1057
161k
        x3r = tmp;
1058
1059
161k
        x0r = (*data);
1060
161k
        x0i = (*(data + 1));
1061
1062
161k
        x0r = x0r + (x2r);
1063
161k
        x0i = x0i + (x2i);
1064
161k
        x2r = x0r - (x2r * 2);
1065
161k
        x2i = x0i - (x2i * 2);
1066
161k
        x1r = x1r + x3r;
1067
161k
        x1i = x1i + x3i;
1068
161k
        x3r = x1r - (x3r * 2);
1069
161k
        x3i = x1i - (x3i * 2);
1070
1071
161k
        x0r = x0r + (x1r);
1072
161k
        x0i = x0i + (x1i);
1073
161k
        x1r = x0r - (x1r * 2);
1074
161k
        x1i = x0i - (x1i * 2);
1075
161k
        x2r = x2r + (x3i);
1076
161k
        x2i = x2i - (x3r);
1077
161k
        x3i = x2r - (x3i * 2);
1078
161k
        x3r = x2i + (x3r * 2);
1079
1080
161k
        *data = x0r;
1081
161k
        *(data + 1) = x0i;
1082
161k
        data += (del << 1);
1083
1084
161k
        *data = x2r;
1085
161k
        *(data + 1) = x2i;
1086
161k
        data += (del << 1);
1087
1088
161k
        *data = x1r;
1089
161k
        *(data + 1) = x1i;
1090
161k
        data += (del << 1);
1091
1092
161k
        *data = x3i;
1093
161k
        *(data + 1) = x3r;
1094
161k
        data += (del << 1);
1095
161k
      }
1096
107k
      data -= 2 * npoints;
1097
107k
      data += 2;
1098
107k
    }
1099
107k
    for (; j <= (nodespacing * del) >> 1; j += nodespacing)
1100
71.6k
    {
1101
71.6k
      w1h = *(twiddles + 2 * j);
1102
71.6k
      w2h = *(twiddles + 2 * (j << 1));
1103
71.6k
      w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1104
71.6k
      w1l = *(twiddles + 2 * j + 1);
1105
71.6k
      w2l = *(twiddles + 2 * (j << 1) + 1);
1106
71.6k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1107
1108
197k
      for (k = in_loop_cnt; k != 0; k--)
1109
125k
      {
1110
125k
        FLOAT32 tmp;
1111
125k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1112
1113
125k
        data += (del << 1);
1114
1115
125k
        x1r = *data;
1116
125k
        x1i = *(data + 1);
1117
125k
        data += (del << 1);
1118
1119
125k
        x2r = *data;
1120
125k
        x2i = *(data + 1);
1121
125k
        data += (del << 1);
1122
1123
125k
        x3r = *data;
1124
125k
        x3i = *(data + 1);
1125
125k
        data -= 3 * (del << 1);
1126
1127
125k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1128
125k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1129
125k
        x1r = tmp;
1130
1131
125k
        tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1132
125k
        x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1133
125k
        x2r = tmp;
1134
1135
125k
        tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1136
125k
        x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1137
125k
        x3r = tmp;
1138
1139
125k
        x0r = (*data);
1140
125k
        x0i = (*(data + 1));
1141
1142
125k
        x0r = x0r + (x2r);
1143
125k
        x0i = x0i + (x2i);
1144
125k
        x2r = x0r - (x2r * 2);
1145
125k
        x2i = x0i - (x2i * 2);
1146
125k
        x1r = x1r + x3r;
1147
125k
        x1i = x1i + x3i;
1148
125k
        x3r = x1r - (x3r * 2);
1149
125k
        x3i = x1i - (x3i * 2);
1150
1151
125k
        x0r = x0r + (x1r);
1152
125k
        x0i = x0i + (x1i);
1153
125k
        x1r = x0r - (x1r * 2);
1154
125k
        x1i = x0i - (x1i * 2);
1155
125k
        x2r = x2r + (x3i);
1156
125k
        x2i = x2i - (x3r);
1157
125k
        x3i = x2r - (x3i * 2);
1158
125k
        x3r = x2i + (x3r * 2);
1159
1160
125k
        *data = x0r;
1161
125k
        *(data + 1) = x0i;
1162
125k
        data += (del << 1);
1163
1164
125k
        *data = x2r;
1165
125k
        *(data + 1) = x2i;
1166
125k
        data += (del << 1);
1167
1168
125k
        *data = x1r;
1169
125k
        *(data + 1) = x1i;
1170
125k
        data += (del << 1);
1171
1172
125k
        *data = x3i;
1173
125k
        *(data + 1) = x3r;
1174
125k
        data += (del << 1);
1175
125k
      }
1176
71.6k
      data -= 2 * npoints;
1177
71.6k
      data += 2;
1178
71.6k
    }
1179
71.6k
    for (; j <= sec_loop_cnt * 2; j += nodespacing)
1180
35.8k
    {
1181
35.8k
      w1h = *(twiddles + 2 * j);
1182
35.8k
      w2h = *(twiddles + 2 * (j << 1) - 512);
1183
35.8k
      w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1184
35.8k
      w1l = *(twiddles + 2 * j + 1);
1185
35.8k
      w2l = *(twiddles + 2 * (j << 1) - 511);
1186
35.8k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1187
1188
71.6k
      for (k = in_loop_cnt; k != 0; k--)
1189
35.8k
      {
1190
35.8k
        FLOAT32 tmp;
1191
35.8k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1192
1193
35.8k
        data += (del << 1);
1194
1195
35.8k
        x1r = *data;
1196
35.8k
        x1i = *(data + 1);
1197
35.8k
        data += (del << 1);
1198
1199
35.8k
        x2r = *data;
1200
35.8k
        x2i = *(data + 1);
1201
35.8k
        data += (del << 1);
1202
1203
35.8k
        x3r = *data;
1204
35.8k
        x3i = *(data + 1);
1205
35.8k
        data -= 3 * (del << 1);
1206
1207
35.8k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1208
35.8k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1209
35.8k
        x1r = tmp;
1210
1211
35.8k
        tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1212
35.8k
        x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1213
35.8k
        x2r = tmp;
1214
1215
35.8k
        tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1216
35.8k
        x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1217
35.8k
        x3r = tmp;
1218
1219
35.8k
        x0r = (*data);
1220
35.8k
        x0i = (*(data + 1));
1221
1222
35.8k
        x0r = x0r + (x2r);
1223
35.8k
        x0i = x0i + (x2i);
1224
35.8k
        x2r = x0r - (x2r * 2);
1225
35.8k
        x2i = x0i - (x2i * 2);
1226
35.8k
        x1r = x1r + x3r;
1227
35.8k
        x1i = x1i + x3i;
1228
35.8k
        x3r = x1r - (x3r * 2);
1229
35.8k
        x3i = x1i - (x3i * 2);
1230
1231
35.8k
        x0r = x0r + (x1r);
1232
35.8k
        x0i = x0i + (x1i);
1233
35.8k
        x1r = x0r - (x1r * 2);
1234
35.8k
        x1i = x0i - (x1i * 2);
1235
35.8k
        x2r = x2r + (x3i);
1236
35.8k
        x2i = x2i - (x3r);
1237
35.8k
        x3i = x2r - (x3i * 2);
1238
35.8k
        x3r = x2i + (x3r * 2);
1239
1240
35.8k
        *data = x0r;
1241
35.8k
        *(data + 1) = x0i;
1242
35.8k
        data += (del << 1);
1243
1244
35.8k
        *data = x2r;
1245
35.8k
        *(data + 1) = x2i;
1246
35.8k
        data += (del << 1);
1247
1248
35.8k
        *data = x1r;
1249
35.8k
        *(data + 1) = x1i;
1250
35.8k
        data += (del << 1);
1251
1252
35.8k
        *data = x3i;
1253
35.8k
        *(data + 1) = x3r;
1254
35.8k
        data += (del << 1);
1255
35.8k
      }
1256
35.8k
      data -= 2 * npoints;
1257
35.8k
      data += 2;
1258
35.8k
    }
1259
143k
    for (; j<nodespacing * del; j += nodespacing)
1260
107k
    {
1261
107k
      w1h = *(twiddles + 2 * j);
1262
107k
      w2h = *(twiddles + 2 * (j << 1) - 512);
1263
107k
      w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1264
107k
      w1l = *(twiddles + 2 * j + 1);
1265
107k
      w2l = *(twiddles + 2 * (j << 1) - 511);
1266
107k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1267
1268
268k
      for (k = in_loop_cnt; k != 0; k--)
1269
161k
      {
1270
161k
        FLOAT32 tmp;
1271
161k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1272
1273
161k
        data += (del << 1);
1274
1275
161k
        x1r = *data;
1276
161k
        x1i = *(data + 1);
1277
161k
        data += (del << 1);
1278
1279
161k
        x2r = *data;
1280
161k
        x2i = *(data + 1);
1281
161k
        data += (del << 1);
1282
1283
161k
        x3r = *data;
1284
161k
        x3i = *(data + 1);
1285
161k
        data -= 3 * (del << 1);
1286
1287
161k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1288
161k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1289
161k
        x1r = tmp;
1290
1291
161k
        tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1292
161k
        x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1293
161k
        x2r = tmp;
1294
1295
161k
        tmp = (-ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h));
1296
161k
        x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1297
161k
        x3r = tmp;
1298
1299
161k
        x0r = (*data);
1300
161k
        x0i = (*(data + 1));
1301
1302
161k
        x0r = x0r + (x2r);
1303
161k
        x0i = x0i + (x2i);
1304
161k
        x2r = x0r - (x2r * 2);
1305
161k
        x2i = x0i - (x2i * 2);
1306
161k
        x1r = x1r + x3r;
1307
161k
        x1i = x1i - x3i;
1308
161k
        x3r = x1r - (x3r * 2);
1309
161k
        x3i = x1i + (x3i * 2);
1310
1311
161k
        x0r = x0r + (x1r);
1312
161k
        x0i = x0i + (x1i);
1313
161k
        x1r = x0r - (x1r * 2);
1314
161k
        x1i = x0i - (x1i * 2);
1315
161k
        x2r = x2r + (x3i);
1316
161k
        x2i = x2i - (x3r);
1317
161k
        x3i = x2r - (x3i * 2);
1318
161k
        x3r = x2i + (x3r * 2);
1319
1320
161k
        *data = x0r;
1321
161k
        *(data + 1) = x0i;
1322
161k
        data += (del << 1);
1323
1324
161k
        *data = x2r;
1325
161k
        *(data + 1) = x2i;
1326
161k
        data += (del << 1);
1327
1328
161k
        *data = x1r;
1329
161k
        *(data + 1) = x1i;
1330
161k
        data += (del << 1);
1331
1332
161k
        *data = x3i;
1333
161k
        *(data + 1) = x3r;
1334
161k
        data += (del << 1);
1335
161k
      }
1336
107k
      data -= 2 * npoints;
1337
107k
      data += 2;
1338
107k
    }
1339
35.8k
    nodespacing >>= 2;
1340
35.8k
    del <<= 2;
1341
35.8k
    in_loop_cnt >>= 2;
1342
35.8k
  }
1343
17.9k
  if (not_power_4)
1344
0
  {
1345
0
    const FLOAT32 *twiddles = ptr_w;
1346
0
    nodespacing <<= 1;
1347
1348
0
    for (j = del / 2; j != 0; j--)
1349
0
    {
1350
0
      FLOAT32 w1h = *twiddles;
1351
0
      FLOAT32 w1l = *(twiddles + 1);
1352
0
      FLOAT32 tmp;
1353
0
      twiddles += nodespacing * 2;
1354
1355
0
      x0r = *ptr_y;
1356
0
      x0i = *(ptr_y + 1);
1357
0
      ptr_y += (del << 1);
1358
1359
0
      x1r = *ptr_y;
1360
0
      x1i = *(ptr_y + 1);
1361
1362
0
      tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1363
0
      x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1364
0
      x1r = tmp;
1365
1366
0
      *ptr_y = (x0r) - (x1r);
1367
0
      *(ptr_y + 1) = (x0i) - (x1i);
1368
0
      ptr_y -= (del << 1);
1369
1370
0
      *ptr_y = (x0r) + (x1r);
1371
0
      *(ptr_y + 1) = (x0i) + (x1i);
1372
0
      ptr_y += 2;
1373
0
    }
1374
0
    twiddles = ptr_w;
1375
0
    for (j = del / 2; j != 0; j--)
1376
0
    {
1377
0
      FLOAT32 w1h = *twiddles;
1378
0
      FLOAT32 w1l = *(twiddles + 1);
1379
0
      FLOAT32 tmp;
1380
0
      twiddles += nodespacing * 2;
1381
1382
0
      x0r = *ptr_y;
1383
0
      x0i = *(ptr_y + 1);
1384
0
      ptr_y += (del << 1);
1385
1386
0
      x1r = *ptr_y;
1387
0
      x1i = *(ptr_y + 1);
1388
1389
0
      tmp = (ixheaacd_mult32X32float(x1r, w1h) + ixheaacd_mult32X32float(x1i, w1l));
1390
0
      x1i = -ixheaacd_mult32X32float(x1r, w1l) + ixheaacd_mult32X32float(x1i, w1h);
1391
0
      x1r = tmp;
1392
1393
0
      *ptr_y = (x0r) - (x1r);
1394
0
      *(ptr_y + 1) = (x0i) - (x1i);
1395
0
      ptr_y -= (del << 1);
1396
1397
0
      *ptr_y = (x0r) + (x1r);
1398
0
      *(ptr_y + 1) = (x0i) + (x1i);
1399
0
      ptr_y += 2;
1400
0
    }
1401
0
  }
1402
1403
1.16M
  for (i = 0; i<nlength; i++)
1404
1.14M
  {
1405
1.14M
    xr[i] = y[2 * i];
1406
1.14M
    xi[i] = y[2 * i + 1];
1407
1.14M
  }
1408
1409
17.9k
  return;
1410
17.9k
}
1411
1412
VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength,
1413
13.4M
                                 WORD32 fft_mode, WORD32 *preshift) {
1414
13.4M
  WORD32 i, j, k, n_stages;
1415
13.4M
  WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1416
13.4M
  WORD32 del, nodespacing, in_loop_cnt;
1417
13.4M
  WORD32 not_power_4;
1418
13.4M
  WORD32 npts, shift;
1419
13.4M
  WORD32 dig_rev_shift;
1420
13.4M
  WORD32 ptr_x[1024];
1421
13.4M
  WORD32 y[1024];
1422
13.4M
  WORD32 npoints = nlength;
1423
13.4M
  WORD32 n = 0;
1424
13.4M
  WORD32 *ptr_y = y;
1425
13.4M
  const WORD32 *ptr_w;
1426
13.4M
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
1427
13.4M
  n_stages = 30 - ixheaac_norm32(npoints);
1428
13.4M
  not_power_4 = n_stages & 1;
1429
1430
13.4M
  n_stages = n_stages >> 1;
1431
1432
13.4M
  npts = npoints;
1433
54.1M
  while (npts >> 1) {
1434
40.6M
    n++;
1435
40.6M
    npts = npts >> 1;
1436
40.6M
  }
1437
1438
13.4M
  if (n % 2 == 0)
1439
7.63M
    shift = ((n + 4)) / 2;
1440
5.83M
  else
1441
5.83M
    shift = ((n + 3) / 2);
1442
1443
313M
  for (i = 0; i < nlength; i++) {
1444
300M
    ptr_x[2 * i] = (xr[i] / (1 << (shift)));
1445
300M
    ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
1446
300M
  }
1447
1448
13.4M
  if (fft_mode == -1) {
1449
12.2M
    ptr_w = ixheaacd_twiddle_table_fft_32x32;
1450
1451
42.5M
    for (i = 0; i < npoints; i += 4) {
1452
30.2M
      WORD32 *inp = ptr_x;
1453
1454
30.2M
      DIG_REV(i, dig_rev_shift, h2);
1455
30.2M
      if (not_power_4) {
1456
19.0M
        h2 += 1;
1457
19.0M
        h2 &= ~1;
1458
19.0M
      }
1459
30.2M
      inp += (h2);
1460
1461
30.2M
      x0r = *inp;
1462
30.2M
      x0i = *(inp + 1);
1463
30.2M
      inp += (npoints >> 1);
1464
1465
30.2M
      x1r = *inp;
1466
30.2M
      x1i = *(inp + 1);
1467
30.2M
      inp += (npoints >> 1);
1468
1469
30.2M
      x2r = *inp;
1470
30.2M
      x2i = *(inp + 1);
1471
30.2M
      inp += (npoints >> 1);
1472
1473
30.2M
      x3r = *inp;
1474
30.2M
      x3i = *(inp + 1);
1475
1476
30.2M
      x0r = ixheaac_add32_sat(x0r, x2r);
1477
30.2M
      x0i = ixheaac_add32_sat(x0i, x2i);
1478
30.2M
      x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1479
30.2M
      x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1480
30.2M
      x1r = ixheaac_add32_sat(x1r, x3r);
1481
30.2M
      x1i = ixheaac_add32_sat(x1i, x3i);
1482
30.2M
      x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1483
30.2M
      x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1484
1485
30.2M
      x0r = ixheaac_add32_sat(x0r, x1r);
1486
30.2M
      x0i = ixheaac_add32_sat(x0i, x1i);
1487
30.2M
      x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1488
30.2M
      x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1489
30.2M
      x2r = ixheaac_add32_sat(x2r, x3i);
1490
30.2M
      x2i = ixheaac_sub32_sat(x2i, x3r);
1491
30.2M
      x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1492
30.2M
      x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1493
1494
30.2M
      *ptr_y++ = x0r;
1495
30.2M
      *ptr_y++ = x0i;
1496
30.2M
      *ptr_y++ = x2r;
1497
30.2M
      *ptr_y++ = x2i;
1498
30.2M
      *ptr_y++ = x1r;
1499
30.2M
      *ptr_y++ = x1i;
1500
30.2M
      *ptr_y++ = x3i;
1501
30.2M
      *ptr_y++ = x3r;
1502
30.2M
    }
1503
12.2M
    ptr_y -= 2 * npoints;
1504
12.2M
    del = 4;
1505
12.2M
    nodespacing = 64;
1506
12.2M
    in_loop_cnt = npoints >> 4;
1507
13.6M
    for (i = n_stages - 1; i > 0; i--) {
1508
1.41M
      const WORD32 *twiddles = ptr_w;
1509
1.41M
      WORD32 *data = ptr_y;
1510
1.41M
      WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
1511
1.41M
      WORD32 sec_loop_cnt;
1512
1513
5.58M
      for (k = in_loop_cnt; k != 0; k--) {
1514
4.16M
        x0r = (*data);
1515
4.16M
        x0i = (*(data + 1));
1516
4.16M
        data += (del << 1);
1517
1518
4.16M
        x1r = (*data);
1519
4.16M
        x1i = (*(data + 1));
1520
4.16M
        data += (del << 1);
1521
1522
4.16M
        x2r = (*data);
1523
4.16M
        x2i = (*(data + 1));
1524
4.16M
        data += (del << 1);
1525
1526
4.16M
        x3r = (*data);
1527
4.16M
        x3i = (*(data + 1));
1528
4.16M
        data -= 3 * (del << 1);
1529
1530
4.16M
        x0r = ixheaac_add32_sat(x0r, x2r);
1531
4.16M
        x0i = ixheaac_add32_sat(x0i, x2i);
1532
4.16M
        x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1533
4.16M
        x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1534
4.16M
        x1r = ixheaac_add32_sat(x1r, x3r);
1535
4.16M
        x1i = ixheaac_add32_sat(x1i, x3i);
1536
4.16M
        x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1537
4.16M
        x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1538
1539
4.16M
        x0r = ixheaac_add32_sat(x0r, x1r);
1540
4.16M
        x0i = ixheaac_add32_sat(x0i, x1i);
1541
4.16M
        x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1542
4.16M
        x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1543
4.16M
        x2r = ixheaac_add32_sat(x2r, x3i);
1544
4.16M
        x2i = ixheaac_sub32_sat(x2i, x3r);
1545
4.16M
        x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1546
4.16M
        x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1547
1548
4.16M
        *data = x0r;
1549
4.16M
        *(data + 1) = x0i;
1550
4.16M
        data += (del << 1);
1551
1552
4.16M
        *data = x2r;
1553
4.16M
        *(data + 1) = x2i;
1554
4.16M
        data += (del << 1);
1555
1556
4.16M
        *data = x1r;
1557
4.16M
        *(data + 1) = x1i;
1558
4.16M
        data += (del << 1);
1559
1560
4.16M
        *data = x3i;
1561
4.16M
        *(data + 1) = x3r;
1562
4.16M
        data += (del << 1);
1563
4.16M
      }
1564
1.41M
      data = ptr_y + 2;
1565
1566
1.41M
      sec_loop_cnt = (nodespacing * del);
1567
1.41M
      sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
1568
1.41M
                     (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
1569
1.41M
                     (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
1570
1.41M
                     (sec_loop_cnt / 256);
1571
1.41M
      j = nodespacing;
1572
1573
4.72M
      for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
1574
3.30M
        w1h = *(twiddles + 2 * j);
1575
3.30M
        w1l = *(twiddles + 2 * j + 1);
1576
3.30M
        w2h = *(twiddles + 2 * (j << 1));
1577
3.30M
        w2l = *(twiddles + 2 * (j << 1) + 1);
1578
3.30M
        w3h = *(twiddles + 2 * j + 2 * (j << 1));
1579
3.30M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1580
1581
10.2M
        for (k = in_loop_cnt; k != 0; k--) {
1582
6.94M
          WORD32 tmp;
1583
6.94M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1584
1585
6.94M
          data += (del << 1);
1586
1587
6.94M
          x1r = *data;
1588
6.94M
          x1i = *(data + 1);
1589
6.94M
          data += (del << 1);
1590
1591
6.94M
          x2r = *data;
1592
6.94M
          x2i = *(data + 1);
1593
6.94M
          data += (del << 1);
1594
1595
6.94M
          x3r = *data;
1596
6.94M
          x3i = *(data + 1);
1597
6.94M
          data -= 3 * (del << 1);
1598
1599
6.94M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1600
6.94M
                                   ixheaacd_mult32_sat(x1i, w1h));
1601
6.94M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1602
6.94M
          x1r = tmp;
1603
1604
6.94M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1605
6.94M
                                   ixheaacd_mult32_sat(x2i, w2h));
1606
6.94M
          x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1607
6.94M
          x2r = tmp;
1608
1609
6.94M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3l),
1610
6.94M
                                   ixheaacd_mult32_sat(x3i, w3h));
1611
6.94M
          x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1612
6.94M
          x3r = tmp;
1613
1614
6.94M
          x0r = (*data);
1615
6.94M
          x0i = (*(data + 1));
1616
1617
6.94M
          x0r = ixheaac_add32_sat(x0r, x2r);
1618
6.94M
          x0i = ixheaac_add32_sat(x0i, x2i);
1619
6.94M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1620
6.94M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1621
6.94M
          x1r = ixheaac_add32_sat(x1r, x3r);
1622
6.94M
          x1i = ixheaac_add32_sat(x1i, x3i);
1623
6.94M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1624
6.94M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1625
1626
6.94M
          x0r = ixheaac_add32_sat(x0r, x1r);
1627
6.94M
          x0i = ixheaac_add32_sat(x0i, x1i);
1628
6.94M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1629
6.94M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1630
6.94M
          x2r = ixheaac_add32_sat(x2r, x3i);
1631
6.94M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1632
6.94M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1633
6.94M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1634
1635
6.94M
          *data = x0r;
1636
6.94M
          *(data + 1) = x0i;
1637
6.94M
          data += (del << 1);
1638
1639
6.94M
          *data = x2r;
1640
6.94M
          *(data + 1) = x2i;
1641
6.94M
          data += (del << 1);
1642
1643
6.94M
          *data = x1r;
1644
6.94M
          *(data + 1) = x1i;
1645
6.94M
          data += (del << 1);
1646
1647
6.94M
          *data = x3i;
1648
6.94M
          *(data + 1) = x3r;
1649
6.94M
          data += (del << 1);
1650
6.94M
        }
1651
3.30M
        data -= 2 * npoints;
1652
3.30M
        data += 2;
1653
3.30M
      }
1654
3.77M
      for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1655
2.36M
        w1h = *(twiddles + 2 * j);
1656
2.36M
        w2h = *(twiddles + 2 * (j << 1));
1657
2.36M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1658
2.36M
        w1l = *(twiddles + 2 * j + 1);
1659
2.36M
        w2l = *(twiddles + 2 * (j << 1) + 1);
1660
2.36M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1661
1662
7.91M
        for (k = in_loop_cnt; k != 0; k--) {
1663
5.55M
          WORD32 tmp;
1664
5.55M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1665
5.55M
          data += (del << 1);
1666
1667
5.55M
          x1r = *data;
1668
5.55M
          x1i = *(data + 1);
1669
5.55M
          data += (del << 1);
1670
1671
5.55M
          x2r = *data;
1672
5.55M
          x2i = *(data + 1);
1673
5.55M
          data += (del << 1);
1674
1675
5.55M
          x3r = *data;
1676
5.55M
          x3i = *(data + 1);
1677
5.55M
          data -= 3 * (del << 1);
1678
1679
5.55M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1680
5.55M
                                   ixheaacd_mult32_sat(x1i, w1h));
1681
5.55M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1682
5.55M
          x1r = tmp;
1683
1684
5.55M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1685
5.55M
                                   ixheaacd_mult32_sat(x2i, w2h));
1686
5.55M
          x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1687
5.55M
          x2r = tmp;
1688
1689
5.55M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1690
5.55M
                                   ixheaacd_mult32_sat(x3i, w3l));
1691
5.55M
          x3i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1692
5.55M
                                   ixheaacd_mult32_sat(x3r, w3l));
1693
5.55M
          x3r = tmp;
1694
1695
5.55M
          x0r = (*data);
1696
5.55M
          x0i = (*(data + 1));
1697
1698
5.55M
          x0r = ixheaac_add32_sat(x0r, x2r);
1699
5.55M
          x0i = ixheaac_add32_sat(x0i, x2i);
1700
5.55M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1701
5.55M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1702
5.55M
          x1r = ixheaac_add32_sat(x1r, x3r);
1703
5.55M
          x1i = ixheaac_add32_sat(x1i, x3i);
1704
5.55M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1705
5.55M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1706
1707
5.55M
          x0r = ixheaac_add32_sat(x0r, x1r);
1708
5.55M
          x0i = ixheaac_add32_sat(x0i, x1i);
1709
5.55M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1710
5.55M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1711
5.55M
          x2r = ixheaac_add32_sat(x2r, x3i);
1712
5.55M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1713
5.55M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1714
5.55M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1715
1716
5.55M
          *data = x0r;
1717
5.55M
          *(data + 1) = x0i;
1718
5.55M
          data += (del << 1);
1719
1720
5.55M
          *data = x2r;
1721
5.55M
          *(data + 1) = x2i;
1722
5.55M
          data += (del << 1);
1723
1724
5.55M
          *data = x1r;
1725
5.55M
          *(data + 1) = x1i;
1726
5.55M
          data += (del << 1);
1727
1728
5.55M
          *data = x3i;
1729
5.55M
          *(data + 1) = x3r;
1730
5.55M
          data += (del << 1);
1731
5.55M
        }
1732
2.36M
        data -= 2 * npoints;
1733
2.36M
        data += 2;
1734
2.36M
      }
1735
2.36M
      for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1736
942k
        w1h = *(twiddles + 2 * j);
1737
942k
        w2h = *(twiddles + 2 * (j << 1) - 512);
1738
942k
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1739
942k
        w1l = *(twiddles + 2 * j + 1);
1740
942k
        w2l = *(twiddles + 2 * (j << 1) - 511);
1741
942k
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1742
1743
2.33M
        for (k = in_loop_cnt; k != 0; k--) {
1744
1.38M
          WORD32 tmp;
1745
1.38M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1746
1747
1.38M
          data += (del << 1);
1748
1749
1.38M
          x1r = *data;
1750
1.38M
          x1i = *(data + 1);
1751
1.38M
          data += (del << 1);
1752
1753
1.38M
          x2r = *data;
1754
1.38M
          x2i = *(data + 1);
1755
1.38M
          data += (del << 1);
1756
1757
1.38M
          x3r = *data;
1758
1.38M
          x3i = *(data + 1);
1759
1.38M
          data -= 3 * (del << 1);
1760
1761
1.38M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1762
1.38M
                                   ixheaacd_mult32_sat(x1i, w1h));
1763
1.38M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1764
1.38M
          x1r = tmp;
1765
1766
1.38M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1767
1.38M
                                   ixheaacd_mult32_sat(x2i, w2l));
1768
1.38M
          x2i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1769
1.38M
                                   ixheaacd_mult32_sat(x2r, w2l));
1770
1.38M
          x2r = tmp;
1771
1772
1.38M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1773
1.38M
                                   ixheaacd_mult32_sat(x3i, w3l));
1774
1.38M
          x3i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1775
1.38M
                                   ixheaacd_mult32_sat(x3r, w3l));
1776
1.38M
          x3r = tmp;
1777
1778
1.38M
          x0r = (*data);
1779
1.38M
          x0i = (*(data + 1));
1780
1781
1.38M
          x0r = ixheaac_add32_sat(x0r, x2r);
1782
1.38M
          x0i = ixheaac_add32_sat(x0i, x2i);
1783
1.38M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1784
1.38M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1785
1.38M
          x1r = ixheaac_add32_sat(x1r, x3r);
1786
1.38M
          x1i = ixheaac_add32_sat(x1i, x3i);
1787
1.38M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1788
1.38M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1789
1790
1.38M
          x0r = ixheaac_add32_sat(x0r, x1r);
1791
1.38M
          x0i = ixheaac_add32_sat(x0i, x1i);
1792
1.38M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1793
1.38M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1794
1.38M
          x2r = ixheaac_add32_sat(x2r, x3i);
1795
1.38M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1796
1.38M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1797
1.38M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1798
1799
1.38M
          *data = x0r;
1800
1.38M
          *(data + 1) = x0i;
1801
1.38M
          data += (del << 1);
1802
1803
1.38M
          *data = x2r;
1804
1.38M
          *(data + 1) = x2i;
1805
1.38M
          data += (del << 1);
1806
1807
1.38M
          *data = x1r;
1808
1.38M
          *(data + 1) = x1i;
1809
1.38M
          data += (del << 1);
1810
1811
1.38M
          *data = x3i;
1812
1.38M
          *(data + 1) = x3r;
1813
1.38M
          data += (del << 1);
1814
1.38M
        }
1815
942k
        data -= 2 * npoints;
1816
942k
        data += 2;
1817
942k
      }
1818
4.72M
      for (; j < nodespacing * del; j += nodespacing) {
1819
3.30M
        w1h = *(twiddles + 2 * j);
1820
3.30M
        w2h = *(twiddles + 2 * (j << 1) - 512);
1821
3.30M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1822
3.30M
        w1l = *(twiddles + 2 * j + 1);
1823
3.30M
        w2l = *(twiddles + 2 * (j << 1) - 511);
1824
3.30M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1825
1826
10.2M
        for (k = in_loop_cnt; k != 0; k--) {
1827
6.94M
          WORD32 tmp;
1828
6.94M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1829
1830
6.94M
          data += (del << 1);
1831
1832
6.94M
          x1r = *data;
1833
6.94M
          x1i = *(data + 1);
1834
6.94M
          data += (del << 1);
1835
1836
6.94M
          x2r = *data;
1837
6.94M
          x2i = *(data + 1);
1838
6.94M
          data += (del << 1);
1839
1840
6.94M
          x3r = *data;
1841
6.94M
          x3i = *(data + 1);
1842
6.94M
          data -= 3 * (del << 1);
1843
1844
6.94M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1845
6.94M
                                   ixheaacd_mult32_sat(x1i, w1h));
1846
6.94M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1847
6.94M
          x1r = tmp;
1848
1849
6.94M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1850
6.94M
                                   ixheaacd_mult32_sat(x2i, w2l));
1851
6.94M
          x2i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1852
6.94M
                                   ixheaacd_mult32_sat(x2r, w2l));
1853
6.94M
          x2r = tmp;
1854
1855
6.94M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1856
6.94M
                                   ixheaacd_mult32_sat(x3r, w3l));
1857
6.94M
          x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1858
6.94M
          x3r = tmp;
1859
1860
6.94M
          x0r = (*data);
1861
6.94M
          x0i = (*(data + 1));
1862
1863
6.94M
          x0r = ixheaac_add32_sat(x0r, x2r);
1864
6.94M
          x0i = ixheaac_add32_sat(x0i, x2i);
1865
6.94M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1866
6.94M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1867
6.94M
          x1r = ixheaac_add32_sat(x1r, x3r);
1868
6.94M
          x1i = ixheaac_sub32_sat(x1i, x3i);
1869
6.94M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1870
6.94M
          x3i = ixheaac_add32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1871
1872
6.94M
          x0r = ixheaac_add32_sat(x0r, x1r);
1873
6.94M
          x0i = ixheaac_add32_sat(x0i, x1i);
1874
6.94M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1875
6.94M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1876
6.94M
          x2r = ixheaac_add32_sat(x2r, x3i);
1877
6.94M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1878
6.94M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1879
6.94M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1880
1881
6.94M
          *data = x0r;
1882
6.94M
          *(data + 1) = x0i;
1883
6.94M
          data += (del << 1);
1884
1885
6.94M
          *data = x2r;
1886
6.94M
          *(data + 1) = x2i;
1887
6.94M
          data += (del << 1);
1888
1889
6.94M
          *data = x1r;
1890
6.94M
          *(data + 1) = x1i;
1891
6.94M
          data += (del << 1);
1892
1893
6.94M
          *data = x3i;
1894
6.94M
          *(data + 1) = x3r;
1895
6.94M
          data += (del << 1);
1896
6.94M
        }
1897
3.30M
        data -= 2 * npoints;
1898
3.30M
        data += 2;
1899
3.30M
      }
1900
1.41M
      nodespacing >>= 2;
1901
1.41M
      del <<= 2;
1902
1.41M
      in_loop_cnt >>= 2;
1903
1.41M
    }
1904
12.2M
    if (not_power_4) {
1905
5.53M
      const WORD32 *twiddles = ptr_w;
1906
5.53M
      nodespacing <<= 1;
1907
5.53M
      shift += 1;
1908
1909
24.5M
      for (j = del / 2; j != 0; j--) {
1910
19.0M
        WORD32 w1h = *twiddles;
1911
19.0M
        WORD32 w1l = *(twiddles + 1);
1912
19.0M
        WORD32 tmp;
1913
19.0M
        twiddles += nodespacing * 2;
1914
1915
19.0M
        x0r = *ptr_y;
1916
19.0M
        x0i = *(ptr_y + 1);
1917
19.0M
        ptr_y += (del << 1);
1918
1919
19.0M
        x1r = *ptr_y;
1920
19.0M
        x1i = *(ptr_y + 1);
1921
1922
19.0M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1923
19.0M
                                 ixheaacd_mult32_sat(x1i, w1h));
1924
19.0M
        x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1925
19.0M
        x1r = tmp;
1926
1927
19.0M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
1928
19.0M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1929
19.0M
        ptr_y -= (del << 1);
1930
1931
19.0M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
1932
19.0M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1933
19.0M
        ptr_y += 2;
1934
19.0M
      }
1935
5.53M
      twiddles = ptr_w;
1936
24.5M
      for (j = del / 2; j != 0; j--) {
1937
19.0M
        WORD32 w1h = *twiddles;
1938
19.0M
        WORD32 w1l = *(twiddles + 1);
1939
19.0M
        WORD32 tmp;
1940
19.0M
        twiddles += nodespacing * 2;
1941
1942
19.0M
        x0r = *ptr_y;
1943
19.0M
        x0i = *(ptr_y + 1);
1944
19.0M
        ptr_y += (del << 1);
1945
1946
19.0M
        x1r = *ptr_y;
1947
19.0M
        x1i = *(ptr_y + 1);
1948
1949
19.0M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1h),
1950
19.0M
                                 ixheaacd_mult32_sat(x1i, w1l));
1951
19.0M
        x1i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1i, w1h),
1952
19.0M
                                 ixheaacd_mult32_sat(x1r, w1l));
1953
19.0M
        x1r = tmp;
1954
1955
19.0M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
1956
19.0M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1957
19.0M
        ptr_y -= (del << 1);
1958
1959
19.0M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
1960
19.0M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1961
19.0M
        ptr_y += 2;
1962
19.0M
      }
1963
5.53M
    }
1964
12.2M
  }
1965
1966
1.23M
  else {
1967
1.23M
    ptr_w = ixheaacd_twiddle_table_fft_32x32;
1968
1969
45.9M
    for (i = 0; i < npoints; i += 4) {
1970
44.7M
      WORD32 *inp = ptr_x;
1971
1972
44.7M
      DIG_REV(i, dig_rev_shift, h2);
1973
44.7M
      if (not_power_4) {
1974
31.7M
        h2 += 1;
1975
31.7M
        h2 &= ~1;
1976
31.7M
      }
1977
44.7M
      inp += (h2);
1978
1979
44.7M
      x0r = *inp;
1980
44.7M
      x0i = *(inp + 1);
1981
44.7M
      inp += (npoints >> 1);
1982
1983
44.7M
      x1r = *inp;
1984
44.7M
      x1i = *(inp + 1);
1985
44.7M
      inp += (npoints >> 1);
1986
1987
44.7M
      x2r = *inp;
1988
44.7M
      x2i = *(inp + 1);
1989
44.7M
      inp += (npoints >> 1);
1990
1991
44.7M
      x3r = *inp;
1992
44.7M
      x3i = *(inp + 1);
1993
1994
44.7M
      x0r = ixheaac_add32_sat(x0r, x2r);
1995
44.7M
      x0i = ixheaac_add32_sat(x0i, x2i);
1996
44.7M
      x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1997
44.7M
      x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1998
44.7M
      x1r = ixheaac_add32_sat(x1r, x3r);
1999
44.7M
      x1i = ixheaac_add32_sat(x1i, x3i);
2000
44.7M
      x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2001
44.7M
      x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2002
2003
44.7M
      x0r = ixheaac_add32_sat(x0r, x1r);
2004
44.7M
      x0i = ixheaac_add32_sat(x0i, x1i);
2005
44.7M
      x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2006
44.7M
      x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2007
44.7M
      x2r = ixheaac_sub32_sat(x2r, x3i);
2008
44.7M
      x2i = ixheaac_add32_sat(x2i, x3r);
2009
44.7M
      x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2010
44.7M
      x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2011
2012
44.7M
      *ptr_y++ = x0r;
2013
44.7M
      *ptr_y++ = x0i;
2014
44.7M
      *ptr_y++ = x2r;
2015
44.7M
      *ptr_y++ = x2i;
2016
44.7M
      *ptr_y++ = x1r;
2017
44.7M
      *ptr_y++ = x1i;
2018
44.7M
      *ptr_y++ = x3i;
2019
44.7M
      *ptr_y++ = x3r;
2020
44.7M
    }
2021
1.23M
    ptr_y -= 2 * npoints;
2022
1.23M
    del = 4;
2023
1.23M
    nodespacing = 64;
2024
1.23M
    in_loop_cnt = npoints >> 4;
2025
3.77M
    for (i = n_stages - 1; i > 0; i--) {
2026
2.53M
      const WORD32 *twiddles = ptr_w;
2027
2.53M
      WORD32 *data = ptr_y;
2028
2.53M
      WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
2029
2.53M
      WORD32 sec_loop_cnt;
2030
2031
16.9M
      for (k = in_loop_cnt; k != 0; k--) {
2032
14.4M
        x0r = (*data);
2033
14.4M
        x0i = (*(data + 1));
2034
14.4M
        data += (del << 1);
2035
2036
14.4M
        x1r = (*data);
2037
14.4M
        x1i = (*(data + 1));
2038
14.4M
        data += (del << 1);
2039
2040
14.4M
        x2r = (*data);
2041
14.4M
        x2i = (*(data + 1));
2042
14.4M
        data += (del << 1);
2043
2044
14.4M
        x3r = (*data);
2045
14.4M
        x3i = (*(data + 1));
2046
14.4M
        data -= 3 * (del << 1);
2047
2048
14.4M
        x0r = ixheaac_add32_sat(x0r, x2r);
2049
14.4M
        x0i = ixheaac_add32_sat(x0i, x2i);
2050
14.4M
        x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2051
14.4M
        x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2052
14.4M
        x1r = ixheaac_add32_sat(x1r, x3r);
2053
14.4M
        x1i = ixheaac_add32_sat(x1i, x3i);
2054
14.4M
        x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2055
14.4M
        x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2056
2057
14.4M
        x0r = ixheaac_add32_sat(x0r, x1r);
2058
14.4M
        x0i = ixheaac_add32_sat(x0i, x1i);
2059
14.4M
        x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2060
14.4M
        x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2061
14.4M
        x2r = ixheaac_sub32_sat(x2r, x3i);
2062
14.4M
        x2i = ixheaac_add32_sat(x2i, x3r);
2063
14.4M
        x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2064
14.4M
        x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2065
2066
14.4M
        *data = x0r;
2067
14.4M
        *(data + 1) = x0i;
2068
14.4M
        data += (del << 1);
2069
2070
14.4M
        *data = x2r;
2071
14.4M
        *(data + 1) = x2i;
2072
14.4M
        data += (del << 1);
2073
2074
14.4M
        *data = x1r;
2075
14.4M
        *(data + 1) = x1i;
2076
14.4M
        data += (del << 1);
2077
2078
14.4M
        *data = x3i;
2079
14.4M
        *(data + 1) = x3r;
2080
14.4M
        data += (del << 1);
2081
14.4M
      }
2082
2.53M
      data = ptr_y + 2;
2083
2084
2.53M
      sec_loop_cnt = (nodespacing * del);
2085
2.53M
      sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
2086
2.53M
                     (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
2087
2.53M
                     (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
2088
2.53M
                     (sec_loop_cnt / 256);
2089
2.53M
      j = nodespacing;
2090
2091
13.9M
      for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
2092
11.4M
        w1h = *(twiddles + 2 * j);
2093
11.4M
        w2h = *(twiddles + 2 * (j << 1));
2094
11.4M
        w3h = *(twiddles + 2 * j + 2 * (j << 1));
2095
11.4M
        w1l = *(twiddles + 2 * j + 1);
2096
11.4M
        w2l = *(twiddles + 2 * (j << 1) + 1);
2097
11.4M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
2098
2099
46.1M
        for (k = in_loop_cnt; k != 0; k--) {
2100
34.6M
          WORD32 tmp;
2101
34.6M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2102
2103
34.6M
          data += (del << 1);
2104
2105
34.6M
          x1r = *data;
2106
34.6M
          x1i = *(data + 1);
2107
34.6M
          data += (del << 1);
2108
2109
34.6M
          x2r = *data;
2110
34.6M
          x2i = *(data + 1);
2111
34.6M
          data += (del << 1);
2112
2113
34.6M
          x3r = *data;
2114
34.6M
          x3i = *(data + 1);
2115
34.6M
          data -= 3 * (del << 1);
2116
2117
34.6M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2118
34.6M
                                   ixheaacd_mult32_sat(x1i, w1h));
2119
34.6M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2120
34.6M
          x1r = tmp;
2121
2122
34.6M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2123
34.6M
                                   ixheaacd_mult32_sat(x2i, w2h));
2124
34.6M
          x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2125
34.6M
          x2r = tmp;
2126
2127
34.6M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2128
34.6M
                                   ixheaacd_mult32_sat(x3i, w3h));
2129
34.6M
          x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2130
34.6M
          x3r = tmp;
2131
2132
34.6M
          x0r = (*data);
2133
34.6M
          x0i = (*(data + 1));
2134
2135
34.6M
          x0r = ixheaac_add32_sat(x0r, x2r);
2136
34.6M
          x0i = ixheaac_add32_sat(x0i, x2i);
2137
34.6M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2138
34.6M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2139
34.6M
          x1r = ixheaac_add32_sat(x1r, x3r);
2140
34.6M
          x1i = ixheaac_add32_sat(x1i, x3i);
2141
34.6M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2142
34.6M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2143
2144
34.6M
          x0r = ixheaac_add32_sat(x0r, x1r);
2145
34.6M
          x0i = ixheaac_add32_sat(x0i, x1i);
2146
34.6M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2147
34.6M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2148
34.6M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2149
34.6M
          x2i = ixheaac_add32_sat(x2i, x3r);
2150
34.6M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2151
34.6M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2152
2153
34.6M
          *data = x0r;
2154
34.6M
          *(data + 1) = x0i;
2155
34.6M
          data += (del << 1);
2156
2157
34.6M
          *data = x2r;
2158
34.6M
          *(data + 1) = x2i;
2159
34.6M
          data += (del << 1);
2160
2161
34.6M
          *data = x1r;
2162
34.6M
          *(data + 1) = x1i;
2163
34.6M
          data += (del << 1);
2164
2165
34.6M
          *data = x3i;
2166
34.6M
          *(data + 1) = x3r;
2167
34.6M
          data += (del << 1);
2168
34.6M
        }
2169
11.4M
        data -= 2 * npoints;
2170
11.4M
        data += 2;
2171
11.4M
      }
2172
9.52M
      for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
2173
6.99M
        w1h = *(twiddles + 2 * j);
2174
6.99M
        w2h = *(twiddles + 2 * (j << 1));
2175
6.99M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2176
6.99M
        w1l = *(twiddles + 2 * j + 1);
2177
6.99M
        w2l = *(twiddles + 2 * (j << 1) + 1);
2178
6.99M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2179
2180
31.5M
        for (k = in_loop_cnt; k != 0; k--) {
2181
24.5M
          WORD32 tmp;
2182
24.5M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2183
2184
24.5M
          data += (del << 1);
2185
2186
24.5M
          x1r = *data;
2187
24.5M
          x1i = *(data + 1);
2188
24.5M
          data += (del << 1);
2189
2190
24.5M
          x2r = *data;
2191
24.5M
          x2i = *(data + 1);
2192
24.5M
          data += (del << 1);
2193
2194
24.5M
          x3r = *data;
2195
24.5M
          x3i = *(data + 1);
2196
24.5M
          data -= 3 * (del << 1);
2197
2198
24.5M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2199
24.5M
                                   ixheaacd_mult32_sat(x1i, w1h));
2200
24.5M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2201
24.5M
          x1r = tmp;
2202
2203
24.5M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2204
24.5M
                                   ixheaacd_mult32_sat(x2i, w2h));
2205
24.5M
          x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2206
24.5M
          x2r = tmp;
2207
2208
24.5M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2209
24.5M
                                   ixheaacd_mult32_sat(x3i, w3l));
2210
24.5M
          x3i = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2211
24.5M
                                   ixheaacd_mult32_sat(x3i, w3h));
2212
24.5M
          x3r = tmp;
2213
2214
24.5M
          x0r = (*data);
2215
24.5M
          x0i = (*(data + 1));
2216
2217
24.5M
          x0r = ixheaac_add32_sat(x0r, x2r);
2218
24.5M
          x0i = ixheaac_add32_sat(x0i, x2i);
2219
24.5M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2220
24.5M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2221
24.5M
          x1r = ixheaac_add32_sat(x1r, x3r);
2222
24.5M
          x1i = ixheaac_add32_sat(x1i, x3i);
2223
24.5M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2224
24.5M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2225
2226
24.5M
          x0r = ixheaac_add32_sat(x0r, x1r);
2227
24.5M
          x0i = ixheaac_add32_sat(x0i, x1i);
2228
24.5M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2229
24.5M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2230
24.5M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2231
24.5M
          x2i = ixheaac_add32_sat(x2i, x3r);
2232
24.5M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2233
24.5M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2234
2235
24.5M
          *data = x0r;
2236
24.5M
          *(data + 1) = x0i;
2237
24.5M
          data += (del << 1);
2238
2239
24.5M
          *data = x2r;
2240
24.5M
          *(data + 1) = x2i;
2241
24.5M
          data += (del << 1);
2242
2243
24.5M
          *data = x1r;
2244
24.5M
          *(data + 1) = x1i;
2245
24.5M
          data += (del << 1);
2246
2247
24.5M
          *data = x3i;
2248
24.5M
          *(data + 1) = x3r;
2249
24.5M
          data += (del << 1);
2250
24.5M
        }
2251
6.99M
        data -= 2 * npoints;
2252
6.99M
        data += 2;
2253
6.99M
      }
2254
6.99M
      for (; j <= sec_loop_cnt * 2; j += nodespacing) {
2255
4.45M
        w1h = *(twiddles + 2 * j);
2256
4.45M
        w2h = *(twiddles + 2 * (j << 1) - 512);
2257
4.45M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2258
4.45M
        w1l = *(twiddles + 2 * j + 1);
2259
4.45M
        w2l = *(twiddles + 2 * (j << 1) - 511);
2260
4.45M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2261
2262
14.5M
        for (k = in_loop_cnt; k != 0; k--) {
2263
10.1M
          WORD32 tmp;
2264
10.1M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2265
2266
10.1M
          data += (del << 1);
2267
2268
10.1M
          x1r = *data;
2269
10.1M
          x1i = *(data + 1);
2270
10.1M
          data += (del << 1);
2271
2272
10.1M
          x2r = *data;
2273
10.1M
          x2i = *(data + 1);
2274
10.1M
          data += (del << 1);
2275
2276
10.1M
          x3r = *data;
2277
10.1M
          x3i = *(data + 1);
2278
10.1M
          data -= 3 * (del << 1);
2279
2280
10.1M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2281
10.1M
                                   ixheaacd_mult32_sat(x1i, w1h));
2282
10.1M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2283
10.1M
          x1r = tmp;
2284
2285
10.1M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2286
10.1M
                                   ixheaacd_mult32_sat(x2i, w2l));
2287
10.1M
          x2i = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2288
10.1M
                                   ixheaacd_mult32_sat(x2i, w2h));
2289
10.1M
          x2r = tmp;
2290
2291
10.1M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2292
10.1M
                                   ixheaacd_mult32_sat(x3i, w3l));
2293
10.1M
          x3i = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2294
10.1M
                                   ixheaacd_mult32_sat(x3i, w3h));
2295
10.1M
          x3r = tmp;
2296
2297
10.1M
          x0r = (*data);
2298
10.1M
          x0i = (*(data + 1));
2299
2300
10.1M
          x0r = ixheaac_add32_sat(x0r, x2r);
2301
10.1M
          x0i = ixheaac_add32_sat(x0i, x2i);
2302
10.1M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2303
10.1M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2304
10.1M
          x1r = ixheaac_add32_sat(x1r, x3r);
2305
10.1M
          x1i = ixheaac_add32_sat(x1i, x3i);
2306
10.1M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2307
10.1M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2308
2309
10.1M
          x0r = ixheaac_add32_sat(x0r, x1r);
2310
10.1M
          x0i = ixheaac_add32_sat(x0i, x1i);
2311
10.1M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2312
10.1M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2313
10.1M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2314
10.1M
          x2i = ixheaac_add32_sat(x2i, x3r);
2315
10.1M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2316
10.1M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2317
2318
10.1M
          *data = x0r;
2319
10.1M
          *(data + 1) = x0i;
2320
10.1M
          data += (del << 1);
2321
2322
10.1M
          *data = x2r;
2323
10.1M
          *(data + 1) = x2i;
2324
10.1M
          data += (del << 1);
2325
2326
10.1M
          *data = x1r;
2327
10.1M
          *(data + 1) = x1i;
2328
10.1M
          data += (del << 1);
2329
2330
10.1M
          *data = x3i;
2331
10.1M
          *(data + 1) = x3r;
2332
10.1M
          data += (del << 1);
2333
10.1M
        }
2334
4.45M
        data -= 2 * npoints;
2335
4.45M
        data += 2;
2336
4.45M
      }
2337
13.9M
      for (; j < nodespacing * del; j += nodespacing) {
2338
11.4M
        w1h = *(twiddles + 2 * j);
2339
11.4M
        w2h = *(twiddles + 2 * (j << 1) - 512);
2340
11.4M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
2341
11.4M
        w1l = *(twiddles + 2 * j + 1);
2342
11.4M
        w2l = *(twiddles + 2 * (j << 1) - 511);
2343
11.4M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
2344
2345
46.1M
        for (k = in_loop_cnt; k != 0; k--) {
2346
34.6M
          WORD32 tmp;
2347
34.6M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2348
2349
34.6M
          data += (del << 1);
2350
2351
34.6M
          x1r = *data;
2352
34.6M
          x1i = *(data + 1);
2353
34.6M
          data += (del << 1);
2354
2355
34.6M
          x2r = *data;
2356
34.6M
          x2i = *(data + 1);
2357
34.6M
          data += (del << 1);
2358
2359
34.6M
          x3r = *data;
2360
34.6M
          x3i = *(data + 1);
2361
34.6M
          data -= 3 * (del << 1);
2362
2363
34.6M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2364
34.6M
                                   ixheaacd_mult32_sat(x1i, w1h));
2365
34.6M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2366
34.6M
          x1r = tmp;
2367
2368
34.6M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2369
34.6M
                                   ixheaacd_mult32_sat(x2i, w2l));
2370
34.6M
          x2i = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2371
34.6M
                                   ixheaacd_mult32_sat(x2i, w2h));
2372
34.6M
          x2r = tmp;
2373
2374
34.6M
          tmp = -ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2375
34.6M
                                    ixheaacd_mult32_sat(x3i, w3h));
2376
34.6M
          x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2377
34.6M
          x3r = tmp;
2378
2379
34.6M
          x0r = (*data);
2380
34.6M
          x0i = (*(data + 1));
2381
2382
34.6M
          x0r = ixheaac_add32_sat(x0r, x2r);
2383
34.6M
          x0i = ixheaac_add32_sat(x0i, x2i);
2384
34.6M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2385
34.6M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2386
34.6M
          x1r = ixheaac_add32_sat(x1r, x3r);
2387
34.6M
          x1i = ixheaac_sub32_sat(x1i, x3i);
2388
34.6M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2389
34.6M
          x3i = ixheaac_add32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2390
2391
34.6M
          x0r = ixheaac_add32_sat(x0r, x1r);
2392
34.6M
          x0i = ixheaac_add32_sat(x0i, x1i);
2393
34.6M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2394
34.6M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2395
34.6M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2396
34.6M
          x2i = ixheaac_add32_sat(x2i, x3r);
2397
34.6M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2398
34.6M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2399
2400
34.6M
          *data = x0r;
2401
34.6M
          *(data + 1) = x0i;
2402
34.6M
          data += (del << 1);
2403
2404
34.6M
          *data = x2r;
2405
34.6M
          *(data + 1) = x2i;
2406
34.6M
          data += (del << 1);
2407
2408
34.6M
          *data = x1r;
2409
34.6M
          *(data + 1) = x1i;
2410
34.6M
          data += (del << 1);
2411
2412
34.6M
          *data = x3i;
2413
34.6M
          *(data + 1) = x3r;
2414
34.6M
          data += (del << 1);
2415
34.6M
        }
2416
11.4M
        data -= 2 * npoints;
2417
11.4M
        data += 2;
2418
11.4M
      }
2419
2.53M
      nodespacing >>= 2;
2420
2.53M
      del <<= 2;
2421
2.53M
      in_loop_cnt >>= 2;
2422
2.53M
    }
2423
1.23M
    if (not_power_4) {
2424
298k
      const WORD32 *twiddles = ptr_w;
2425
298k
      nodespacing <<= 1;
2426
298k
      shift += 1;
2427
32.0M
      for (j = del / 2; j != 0; j--) {
2428
31.7M
        WORD32 w1h = *twiddles;
2429
31.7M
        WORD32 w1l = *(twiddles + 1);
2430
2431
31.7M
        WORD32 tmp;
2432
31.7M
        twiddles += nodespacing * 2;
2433
2434
31.7M
        x0r = *ptr_y;
2435
31.7M
        x0i = *(ptr_y + 1);
2436
31.7M
        ptr_y += (del << 1);
2437
2438
31.7M
        x1r = *ptr_y;
2439
31.7M
        x1i = *(ptr_y + 1);
2440
2441
31.7M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2442
31.7M
                                 ixheaacd_mult32_sat(x1i, w1h));
2443
31.7M
        x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2444
31.7M
        x1r = tmp;
2445
2446
31.7M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
2447
31.7M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2448
31.7M
        ptr_y -= (del << 1);
2449
2450
31.7M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
2451
31.7M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2452
31.7M
        ptr_y += 2;
2453
31.7M
      }
2454
298k
      twiddles = ptr_w;
2455
32.0M
      for (j = del / 2; j != 0; j--) {
2456
31.7M
        WORD32 w1h = *twiddles;
2457
31.7M
        WORD32 w1l = *(twiddles + 1);
2458
31.7M
        WORD32 tmp;
2459
31.7M
        twiddles += nodespacing * 2;
2460
2461
31.7M
        x0r = *ptr_y;
2462
31.7M
        x0i = *(ptr_y + 1);
2463
31.7M
        ptr_y += (del << 1);
2464
2465
31.7M
        x1r = *ptr_y;
2466
31.7M
        x1i = *(ptr_y + 1);
2467
2468
31.7M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1h),
2469
31.7M
                                 ixheaacd_mult32_sat(x1i, w1l));
2470
31.7M
        x1i = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2471
31.7M
                                 ixheaacd_mult32_sat(x1i, w1h));
2472
31.7M
        x1r = tmp;
2473
2474
31.7M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
2475
31.7M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2476
31.7M
        ptr_y -= (del << 1);
2477
2478
31.7M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
2479
31.7M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2480
31.7M
        ptr_y += 2;
2481
31.7M
      }
2482
298k
    }
2483
1.23M
  }
2484
2485
313M
  for (i = 0; i < nlength; i++) {
2486
300M
    xr[i] = y[2 * i];
2487
300M
    xi[i] = y[2 * i + 1];
2488
300M
  }
2489
2490
13.4M
  *preshift = shift - *preshift;
2491
13.4M
  return;
2492
13.4M
}
2493
2494
static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op,
2495
18.2M
                                                        WORD32 sign_dir) {
2496
18.2M
  WORD32 add_r, sub_r;
2497
18.2M
  WORD32 add_i, sub_i;
2498
18.2M
  WORD32 temp_real, temp_imag, temp;
2499
2500
18.2M
  WORD32 p1, p2, p3, p4;
2501
2502
18.2M
  WORD32 sinmu;
2503
18.2M
  sinmu = -1859775393 * sign_dir;
2504
2505
18.2M
  temp_real = ixheaac_add32_sat(inp[0], inp[2]);
2506
18.2M
  temp_imag = ixheaac_add32_sat(inp[1], inp[3]);
2507
2508
18.2M
  add_r = ixheaac_add32_sat(inp[2], inp[4]);
2509
18.2M
  add_i = ixheaac_add32_sat(inp[3], inp[5]);
2510
2511
18.2M
  sub_r = ixheaac_sub32_sat(inp[2], inp[4]);
2512
18.2M
  sub_i = ixheaac_sub32_sat(inp[3], inp[5]);
2513
2514
18.2M
  p1 = add_r >> 1;
2515
18.2M
  p4 = add_i >> 1;
2516
18.2M
  p2 = ixheaac_mult32_shl(sub_i, sinmu);
2517
18.2M
  p3 = ixheaac_mult32_shl(sub_r, sinmu);
2518
2519
18.2M
  temp = ixheaac_sub32(inp[0], p1);
2520
2521
18.2M
  op[0] = ixheaac_add32_sat(temp_real, inp[4]);
2522
18.2M
  op[1] = ixheaac_add32_sat(temp_imag, inp[5]);
2523
18.2M
  op[2] = ixheaac_add32_sat(temp, p2);
2524
18.2M
  op[3] = ixheaac_sub32_sat(ixheaac_sub32_sat(inp[1], p3), p4);
2525
18.2M
  op[4] = ixheaac_sub32_sat(temp, p2);
2526
18.2M
  op[5] = ixheaac_sub32_sat(ixheaac_add32_sat(inp[1], p3), p4);
2527
2528
18.2M
  return;
2529
18.2M
}
2530
2531
VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
2532
2.38M
                             WORD32 fft_mode, WORD32 *preshift) {
2533
2.38M
  WORD32 i, j;
2534
2.38M
  WORD32 shift = 0;
2535
2.38M
  WORD32 xr_3[384];
2536
2.38M
  WORD32 xi_3[384];
2537
2.38M
  WORD32 x[1024];
2538
2.38M
  WORD32 y[1024];
2539
2.38M
  WORD32 cnfac, npts;
2540
2.38M
  WORD32 mpass = nlength;
2541
2.38M
  WORD32 n = 0;
2542
2.38M
  WORD32 *ptr_x = x;
2543
2.38M
  WORD32 *ptr_y = y;
2544
2545
2.38M
  cnfac = 0;
2546
4.76M
  while (mpass % 3 == 0) {
2547
2.38M
    mpass /= 3;
2548
2.38M
    cnfac++;
2549
2.38M
  }
2550
2.38M
  npts = mpass;
2551
2552
9.53M
  for (i = 0; i < 3 * cnfac; i++) {
2553
61.8M
    for (j = 0; j < mpass; j++) {
2554
54.7M
      xr_3[j] = xr[3 * j + i];
2555
54.7M
      xi_3[j] = xi[3 * j + i];
2556
54.7M
    }
2557
2558
7.15M
    (*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift);
2559
2560
61.8M
    for (j = 0; j < mpass; j++) {
2561
54.7M
      xr[3 * j + i] = xr_3[j];
2562
54.7M
      xi[3 * j + i] = xi_3[j];
2563
54.7M
    }
2564
7.15M
  }
2565
2566
7.94M
  while (npts >> 1) {
2567
5.56M
    n++;
2568
5.56M
    npts = npts >> 1;
2569
5.56M
  }
2570
2571
2.38M
  if (n % 2 == 0)
2572
2.22M
    shift = ((n + 4)) / 2;
2573
157k
  else
2574
157k
    shift = ((n + 5) / 2);
2575
2576
2.38M
  *preshift = shift - *preshift + 1;
2577
2578
57.1M
  for (i = 0; i < nlength; i++) {
2579
54.7M
    ptr_x[2 * i] = (xr[i] >> 1);
2580
54.7M
    ptr_x[2 * i + 1] = (xi[i] >> 1);
2581
54.7M
  }
2582
2583
2.38M
  {
2584
2.38M
    const WORD32 *w1r, *w1i;
2585
2.38M
    WORD32 tmp;
2586
2.38M
    w1r = ixheaacd_twiddle_table_3pr;
2587
2.38M
    w1i = ixheaacd_twiddle_table_3pi;
2588
2589
2.38M
    if (fft_mode < 0) {
2590
16.8M
      for (i = 0; i < nlength; i += 3) {
2591
14.4M
        w1r++;
2592
14.4M
        w1i++;
2593
2594
14.4M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2595
14.4M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2596
14.4M
        ptr_x[2 * i + 3] =
2597
14.4M
            ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)),
2598
14.4M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)));
2599
14.4M
        ptr_x[2 * i + 2] = tmp;
2600
2601
14.4M
        w1r++;
2602
14.4M
        w1i++;
2603
2604
14.4M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2605
14.4M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2606
14.4M
        ptr_x[2 * i + 5] =
2607
14.4M
            ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)),
2608
14.4M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)));
2609
14.4M
        ptr_x[2 * i + 4] = tmp;
2610
2611
14.4M
        w1r += 3 * (128 / mpass - 1) + 1;
2612
14.4M
        w1i += 3 * (128 / mpass - 1) + 1;
2613
14.4M
      }
2614
2.30M
    }
2615
2616
75.9k
    else {
2617
3.82M
      for (i = 0; i < nlength; i += 3) {
2618
3.74M
        w1r++;
2619
3.74M
        w1i++;
2620
2621
3.74M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2622
3.74M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2623
3.74M
        ptr_x[2 * i + 3] =
2624
3.74M
            ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)),
2625
3.74M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)));
2626
3.74M
        ptr_x[2 * i + 2] = tmp;
2627
2628
3.74M
        w1r++;
2629
3.74M
        w1i++;
2630
2631
3.74M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2632
3.74M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2633
3.74M
        ptr_x[2 * i + 5] =
2634
3.74M
            ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)),
2635
3.74M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)));
2636
3.74M
        ptr_x[2 * i + 4] = tmp;
2637
2638
3.74M
        w1r += 3 * (128 / mpass - 1) + 1;
2639
3.74M
        w1i += 3 * (128 / mpass - 1) + 1;
2640
3.74M
      }
2641
75.9k
    }
2642
2.38M
  }
2643
2644
20.6M
  for (i = 0; i < mpass; i++) {
2645
18.2M
    ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode);
2646
2647
18.2M
    ptr_x = ptr_x + 6;
2648
18.2M
    ptr_y = ptr_y + 6;
2649
18.2M
  }
2650
2651
2.38M
  ptr_y = y;
2652
20.6M
  for (i = 0; i < mpass; i++) {
2653
18.2M
    xr[i] = *ptr_y++;
2654
18.2M
    xi[i] = *ptr_y++;
2655
18.2M
    xr[mpass + i] = *ptr_y++;
2656
18.2M
    xi[mpass + i] = *ptr_y++;
2657
18.2M
    xr[2 * mpass + i] = *ptr_y++;
2658
18.2M
    xi[2 * mpass + i] = *ptr_y++;
2659
18.2M
  }
2660
2661
2.38M
  return;
2662
2.38M
}
2663
2664
VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, WORD32 fft_mode,
2665
1.66M
                          WORD32 *preshift) {
2666
1.66M
  if (nlength & (nlength - 1)) {
2667
282k
    ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift);
2668
282k
  } else
2669
1.38M
    (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift);
2670
2671
1.66M
  return;
2672
1.66M
}