Coverage Report

Created: 2025-09-17 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/decoder/ixheaacd_fft.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
#include <stdlib.h>
21
#include <stdio.h>
22
23
#include "ixheaac_type_def.h"
24
#include "ixheaacd_interface.h"
25
#include "ixheaac_constants.h"
26
#include "ixheaac_basic_ops32.h"
27
#include "ixheaac_basic_ops40.h"
28
#include "ixheaacd_function_selector.h"
29
30
extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
31
extern const FLOAT32 ixheaacd_twiddle_table_fft[514];
32
extern const FLOAT32 ixheaacd_twiddle_table_fft_flt[16];
33
extern const WORD32 ixheaacd_twiddle_table_3pr[1155];
34
extern const WORD32 ixheaacd_twiddle_table_3pi[1155];
35
extern const WORD8 ixheaacd_mps_dig_rev[8];
36
37
#define PLATFORM_INLINE __inline
38
39
#define DIG_REV(i, m, j)                                    \
40
75.9M
  do {                                                      \
41
75.9M
    unsigned _ = (i);                                       \
42
75.9M
    _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
43
75.9M
    _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
44
75.9M
    _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
45
75.9M
    (j) = _ >> (m);                                         \
46
75.9M
  } while (0)
47
48
2.09G
static PLATFORM_INLINE WORD32 ixheaacd_mult32_sat(WORD32 a, WORD32 b) {
49
2.09G
  WORD32 result;
50
2.09G
  WORD64 temp_result;
51
52
2.09G
  temp_result = (WORD64)a * (WORD64)b;
53
2.09G
  result = ixheaac_sat64_32(temp_result >> 31);
54
55
2.09G
  return (result);
56
2.09G
}
57
58
335M
static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) {
59
335M
  WORD32 result;
60
61
335M
  result = ixheaac_add32_sat(a, ixheaacd_mult32_sat(b, c));
62
63
335M
  return (result);
64
335M
}
65
66
1.78G
static PLATFORM_INLINE FLOAT32 ixheaacd_mult32X32float(FLOAT32 a, FLOAT32 b) {
67
1.78G
  FLOAT32 result;
68
69
1.78G
  result = a * b;
70
71
1.78G
  return result;
72
1.78G
}
73
74
331M
static PLATFORM_INLINE FLOAT32 ixheaacd_mac32X32float(FLOAT32 a, FLOAT32 b, FLOAT32 c) {
75
331M
  FLOAT32 result;
76
77
331M
  result = a + b * c;
78
79
331M
  return result;
80
331M
}
81
82
VOID ixheaacd_mps_synth_calc_fft(FLOAT32 *ptr_xr, FLOAT32 *ptr_xi,
83
8.25M
                                 WORD32 npoints) {
84
8.25M
  WORD32 i, j, k;
85
8.25M
  FLOAT32 y[64], z[64];
86
8.25M
  FLOAT32 *ptr_y = y, *ptr_z = z;
87
8.25M
  const FLOAT32 *ptr_w = ixheaacd_twiddle_table_fft_flt;
88
89
74.2M
  for (i = 0; i < npoints; i += 4) {
90
66.0M
    FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
91
66.0M
    FLOAT32 *inp = ptr_xr;
92
66.0M
    FLOAT32 tmk;
93
94
66.0M
    WORD32 h2 = ixheaacd_mps_dig_rev[i >> 2];
95
96
66.0M
    inp += (h2);
97
98
66.0M
    x0r = *inp;
99
66.0M
    x0i = *(inp + 1);
100
66.0M
    inp += 16;
101
102
66.0M
    x1r = *inp;
103
66.0M
    x1i = *(inp + 1);
104
66.0M
    inp += 16;
105
106
66.0M
    x2r = *inp;
107
66.0M
    x2i = *(inp + 1);
108
66.0M
    inp += 16;
109
110
66.0M
    x3r = *inp;
111
66.0M
    x3i = *(inp + 1);
112
113
66.0M
    x0r = x0r + x2r;
114
66.0M
    x0i = x0i + x2i;
115
116
66.0M
    tmk = x0r - x2r;
117
66.0M
    x2r = tmk - x2r;
118
66.0M
    tmk = x0i - x2i;
119
66.0M
    x2i = tmk - x2i;
120
121
66.0M
    x1r = x1r + x3r;
122
66.0M
    x1i = x1i + x3i;
123
124
66.0M
    tmk = x1r - x3r;
125
66.0M
    x3r = tmk - x3r;
126
66.0M
    tmk = x1i - x3i;
127
66.0M
    x3i = tmk - x3i;
128
129
66.0M
    x0r = x0r + x1r;
130
66.0M
    x0i = x0i + x1i;
131
132
66.0M
    tmk = x0r - x1r;
133
66.0M
    x1r = tmk - x1r;
134
66.0M
    tmk = x0i - x1i;
135
66.0M
    x1i = tmk - x1i;
136
137
66.0M
    x2r = x2r + x3i;
138
66.0M
    x2i = x2i - x3r;
139
140
66.0M
    tmk = x2r - x3i;
141
66.0M
    x3i = tmk - x3i;
142
66.0M
    tmk = x2i + x3r;
143
66.0M
    x3r = tmk + x3r;
144
145
66.0M
    *ptr_y++ = x0r;
146
66.0M
    *ptr_y++ = x0i;
147
66.0M
    *ptr_y++ = x2r;
148
66.0M
    *ptr_y++ = x2i;
149
66.0M
    *ptr_y++ = x1r;
150
66.0M
    *ptr_y++ = x1i;
151
66.0M
    *ptr_y++ = x3i;
152
66.0M
    *ptr_y++ = x3r;
153
154
66.0M
    inp = ptr_xi;
155
156
66.0M
    inp += (h2);
157
158
66.0M
    x0r = *inp;
159
66.0M
    x0i = *(inp + 1);
160
66.0M
    inp += 16;
161
162
66.0M
    x1r = *inp;
163
66.0M
    x1i = *(inp + 1);
164
66.0M
    inp += 16;
165
166
66.0M
    x2r = *inp;
167
66.0M
    x2i = *(inp + 1);
168
66.0M
    inp += 16;
169
170
66.0M
    x3r = *inp;
171
66.0M
    x3i = *(inp + 1);
172
173
66.0M
    x0r = x0r + x2r;
174
66.0M
    x0i = x0i + x2i;
175
176
66.0M
    tmk = x0r - x2r;
177
66.0M
    x2r = tmk - x2r;
178
66.0M
    tmk = x0i - x2i;
179
66.0M
    x2i = tmk - x2i;
180
181
66.0M
    x1r = x1r + x3r;
182
66.0M
    x1i = x1i + x3i;
183
184
66.0M
    tmk = x1r - x3r;
185
66.0M
    x3r = tmk - x3r;
186
66.0M
    tmk = x1i - x3i;
187
66.0M
    x3i = tmk - x3i;
188
189
66.0M
    x0r = x0r + x1r;
190
66.0M
    x0i = x0i + x1i;
191
192
66.0M
    tmk = x0r - x1r;
193
66.0M
    x1r = tmk - x1r;
194
66.0M
    tmk = x0i - x1i;
195
66.0M
    x1i = tmk - x1i;
196
197
66.0M
    x2r = x2r + x3i;
198
66.0M
    x2i = x2i - x3r;
199
200
66.0M
    tmk = x2r - x3i;
201
66.0M
    x3i = tmk - x3i;
202
66.0M
    tmk = x2i + x3r;
203
66.0M
    x3r = tmk + x3r;
204
205
66.0M
    *ptr_z++ = x0r;
206
66.0M
    *ptr_z++ = x0i;
207
66.0M
    *ptr_z++ = x2r;
208
66.0M
    *ptr_z++ = x2i;
209
66.0M
    *ptr_z++ = x1r;
210
66.0M
    *ptr_z++ = x1i;
211
66.0M
    *ptr_z++ = x3i;
212
66.0M
    *ptr_z++ = x3r;
213
66.0M
  }
214
8.25M
  ptr_y -= 64;
215
8.25M
  ptr_z -= 64;
216
8.25M
  {
217
8.25M
    FLOAT32 *data_r = ptr_y;
218
8.25M
    FLOAT32 *data_i = ptr_z;
219
24.7M
    for (k = 2; k != 0; k--) {
220
16.5M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
221
222
16.5M
      x0r = (*data_r);
223
16.5M
      x0i = (*(data_r + 1));
224
16.5M
      data_r += 8;
225
226
16.5M
      x1r = (*data_r);
227
16.5M
      x1i = (*(data_r + 1));
228
16.5M
      data_r += 8;
229
230
16.5M
      x2r = (*data_r);
231
16.5M
      x2i = (*(data_r + 1));
232
16.5M
      data_r += 8;
233
234
16.5M
      x3r = (*data_r);
235
16.5M
      x3i = (*(data_r + 1));
236
16.5M
      data_r -= 24;
237
238
16.5M
      x0r = x0r + x2r;
239
16.5M
      x0i = x0i + x2i;
240
16.5M
      x2r = x0r - (x2r * 2);
241
16.5M
      x2i = x0i - (x2i * 2);
242
16.5M
      x1r = x1r + x3r;
243
16.5M
      x1i = x1i + x3i;
244
16.5M
      x3r = x1r - (x3r * 2);
245
16.5M
      x3i = x1i - (x3i * 2);
246
247
16.5M
      x0r = x0r + x1r;
248
16.5M
      x0i = x0i + x1i;
249
16.5M
      x1r = x0r - (x1r * 2);
250
16.5M
      x1i = x0i - (x1i * 2);
251
16.5M
      x2r = x2r + x3i;
252
16.5M
      x2i = x2i - x3r;
253
16.5M
      x3i = x2r - (x3i * 2);
254
16.5M
      x3r = x2i + (x3r * 2);
255
256
16.5M
      *data_r = x0r;
257
16.5M
      *(data_r + 1) = x0i;
258
16.5M
      data_r += 8;
259
260
16.5M
      *data_r = x2r;
261
16.5M
      *(data_r + 1) = x2i;
262
16.5M
      data_r += 8;
263
264
16.5M
      *data_r = x1r;
265
16.5M
      *(data_r + 1) = x1i;
266
16.5M
      data_r += 8;
267
268
16.5M
      *data_r = x3i;
269
16.5M
      *(data_r + 1) = x3r;
270
16.5M
      data_r += 8;
271
272
16.5M
      x0r = (*data_i);
273
16.5M
      x0i = (*(data_i + 1));
274
16.5M
      data_i += 8;
275
276
16.5M
      x1r = (*data_i);
277
16.5M
      x1i = (*(data_i + 1));
278
16.5M
      data_i += 8;
279
280
16.5M
      x2r = (*data_i);
281
16.5M
      x2i = (*(data_i + 1));
282
16.5M
      data_i += 8;
283
284
16.5M
      x3r = (*data_i);
285
16.5M
      x3i = (*(data_i + 1));
286
16.5M
      data_i -= 24;
287
288
16.5M
      x0r = x0r + x2r;
289
16.5M
      x0i = x0i + x2i;
290
16.5M
      x2r = x0r - (x2r * 2);
291
16.5M
      x2i = x0i - (x2i * 2);
292
16.5M
      x1r = x1r + x3r;
293
16.5M
      x1i = x1i + x3i;
294
16.5M
      x3r = x1r - (x3r * 2);
295
16.5M
      x3i = x1i - (x3i * 2);
296
297
16.5M
      x0r = x0r + x1r;
298
16.5M
      x0i = x0i + x1i;
299
16.5M
      x1r = x0r - (x1r * 2);
300
16.5M
      x1i = x0i - (x1i * 2);
301
16.5M
      x2r = x2r + x3i;
302
16.5M
      x2i = x2i - x3r;
303
16.5M
      x3i = x2r - (x3i * 2);
304
16.5M
      x3r = x2i + (x3r * 2);
305
306
16.5M
      *data_i = x0r;
307
16.5M
      *(data_i + 1) = x0i;
308
16.5M
      data_i += 8;
309
310
16.5M
      *data_i = x2r;
311
16.5M
      *(data_i + 1) = x2i;
312
16.5M
      data_i += 8;
313
314
16.5M
      *data_i = x1r;
315
16.5M
      *(data_i + 1) = x1i;
316
16.5M
      data_i += 8;
317
318
16.5M
      *data_i = x3i;
319
16.5M
      *(data_i + 1) = x3r;
320
16.5M
      data_i += 8;
321
16.5M
    }
322
8.25M
    data_r = ptr_y + 2;
323
8.25M
    data_i = ptr_z + 2;
324
325
24.7M
    for (k = 2; k != 0; k--) {
326
16.5M
      FLOAT32 tmp;
327
16.5M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
328
329
16.5M
      data_r += 8;
330
331
16.5M
      x1r = *data_r;
332
16.5M
      x1i = *(data_r + 1);
333
16.5M
      data_r += 8;
334
335
16.5M
      x2r = *data_r;
336
16.5M
      x2i = *(data_r + 1);
337
16.5M
      data_r += 8;
338
339
16.5M
      x3r = *data_r;
340
16.5M
      x3i = *(data_r + 1);
341
16.5M
      data_r -= 24;
342
343
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
344
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
345
16.5M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
346
16.5M
                                   (FLOAT32)x1i, 0.923880f);
347
16.5M
      x1r = tmp;
348
349
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
350
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
351
16.5M
      x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
352
16.5M
                                   (FLOAT32)x2i, 0.707107f);
353
16.5M
      x2r = tmp;
354
355
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
356
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
357
16.5M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
358
16.5M
                                   (FLOAT32)x3i, 0.382683f);
359
16.5M
      x3r = tmp;
360
361
16.5M
      x0r = (*data_r);
362
16.5M
      x0i = (*(data_r + 1));
363
364
16.5M
      x0r = x0r + (x2r);
365
16.5M
      x0i = x0i + (x2i);
366
16.5M
      x2r = x0r - (x2r * 2);
367
16.5M
      x2i = x0i - (x2i * 2);
368
16.5M
      x1r = x1r + x3r;
369
16.5M
      x1i = x1i + x3i;
370
16.5M
      x3r = x1r - (x3r * 2);
371
16.5M
      x3i = x1i - (x3i * 2);
372
373
16.5M
      x0r = x0r + (x1r);
374
16.5M
      x0i = x0i + (x1i);
375
16.5M
      x1r = x0r - (x1r * 2);
376
16.5M
      x1i = x0i - (x1i * 2);
377
16.5M
      x2r = x2r + (x3i);
378
16.5M
      x2i = x2i - (x3r);
379
16.5M
      x3i = x2r - (x3i * 2);
380
16.5M
      x3r = x2i + (x3r * 2);
381
382
16.5M
      *data_r = x0r;
383
16.5M
      *(data_r + 1) = x0i;
384
16.5M
      data_r += 8;
385
386
16.5M
      *data_r = x2r;
387
16.5M
      *(data_r + 1) = x2i;
388
16.5M
      data_r += 8;
389
390
16.5M
      *data_r = x1r;
391
16.5M
      *(data_r + 1) = x1i;
392
16.5M
      data_r += 8;
393
394
16.5M
      *data_r = x3i;
395
16.5M
      *(data_r + 1) = x3r;
396
16.5M
      data_r += 8;
397
16.5M
      data_i += 8;
398
399
16.5M
      x1r = *data_i;
400
16.5M
      x1i = *(data_i + 1);
401
16.5M
      data_i += 8;
402
403
16.5M
      x2r = *data_i;
404
16.5M
      x2i = *(data_i + 1);
405
16.5M
      data_i += 8;
406
407
16.5M
      x3r = *data_i;
408
16.5M
      x3i = *(data_i + 1);
409
16.5M
      data_i -= 24;
410
411
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
412
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
413
16.5M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
414
16.5M
                                   (FLOAT32)x1i, 0.923880f);
415
16.5M
      x1r = tmp;
416
417
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
418
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
419
16.5M
      x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
420
16.5M
                                   (FLOAT32)x2i, 0.707107f);
421
16.5M
      x2r = tmp;
422
423
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
424
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
425
16.5M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
426
16.5M
                                   (FLOAT32)x3i, 0.382683f);
427
16.5M
      x3r = tmp;
428
429
16.5M
      x0r = (*data_i);
430
16.5M
      x0i = (*(data_i + 1));
431
432
16.5M
      x0r = x0r + (x2r);
433
16.5M
      x0i = x0i + (x2i);
434
16.5M
      x2r = x0r - (x2r * 2);
435
16.5M
      x2i = x0i - (x2i * 2);
436
16.5M
      x1r = x1r + x3r;
437
16.5M
      x1i = x1i + x3i;
438
16.5M
      x3r = x1r - (x3r * 2);
439
16.5M
      x3i = x1i - (x3i * 2);
440
441
16.5M
      x0r = x0r + (x1r);
442
16.5M
      x0i = x0i + (x1i);
443
16.5M
      x1r = x0r - (x1r * 2);
444
16.5M
      x1i = x0i - (x1i * 2);
445
16.5M
      x2r = x2r + (x3i);
446
16.5M
      x2i = x2i - (x3r);
447
16.5M
      x3i = x2r - (x3i * 2);
448
16.5M
      x3r = x2i + (x3r * 2);
449
450
16.5M
      *data_i = x0r;
451
16.5M
      *(data_i + 1) = x0i;
452
16.5M
      data_i += 8;
453
454
16.5M
      *data_i = x2r;
455
16.5M
      *(data_i + 1) = x2i;
456
16.5M
      data_i += 8;
457
458
16.5M
      *data_i = x1r;
459
16.5M
      *(data_i + 1) = x1i;
460
16.5M
      data_i += 8;
461
462
16.5M
      *data_i = x3i;
463
16.5M
      *(data_i + 1) = x3r;
464
16.5M
      data_i += 8;
465
16.5M
    }
466
8.25M
    data_r -= 62;
467
8.25M
    data_i -= 62;
468
24.7M
    for (k = 2; k != 0; k--) {
469
16.5M
      FLOAT32 tmp;
470
16.5M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
471
472
16.5M
      data_r += 8;
473
474
16.5M
      x1r = *data_r;
475
16.5M
      x1i = *(data_r + 1);
476
16.5M
      data_r += 8;
477
478
16.5M
      x2r = *data_r;
479
16.5M
      x2i = *(data_r + 1);
480
16.5M
      data_r += 8;
481
482
16.5M
      x3r = *data_r;
483
16.5M
      x3i = *(data_r + 1);
484
16.5M
      data_r -= 24;
485
486
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
487
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
488
16.5M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
489
16.5M
                                   (FLOAT32)x1i, 0.707107f);
490
16.5M
      x1r = tmp;
491
492
16.5M
      tmp = x2i;
493
16.5M
      x2i = -x2r;
494
16.5M
      x2r = tmp;
495
496
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
497
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
498
16.5M
      x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
499
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
500
16.5M
      x3r = tmp;
501
502
16.5M
      x0r = (*data_r);
503
16.5M
      x0i = (*(data_r + 1));
504
505
16.5M
      x0r = x0r + (x2r);
506
16.5M
      x0i = x0i + (x2i);
507
16.5M
      x2r = x0r - (x2r * 2);
508
16.5M
      x2i = x0i - (x2i * 2);
509
16.5M
      x1r = x1r + x3r;
510
16.5M
      x1i = x1i + x3i;
511
16.5M
      x3r = x1r - (x3r * 2);
512
16.5M
      x3i = x1i - (x3i * 2);
513
514
16.5M
      x0r = x0r + (x1r);
515
16.5M
      x0i = x0i + (x1i);
516
16.5M
      x1r = x0r - (x1r * 2);
517
16.5M
      x1i = x0i - (x1i * 2);
518
16.5M
      x2r = x2r + (x3i);
519
16.5M
      x2i = x2i - (x3r);
520
16.5M
      x3i = x2r - (x3i * 2);
521
16.5M
      x3r = x2i + (x3r * 2);
522
523
16.5M
      *data_r = x0r;
524
16.5M
      *(data_r + 1) = x0i;
525
16.5M
      data_r += 8;
526
527
16.5M
      *data_r = x2r;
528
16.5M
      *(data_r + 1) = x2i;
529
16.5M
      data_r += 8;
530
531
16.5M
      *data_r = x1r;
532
16.5M
      *(data_r + 1) = x1i;
533
16.5M
      data_r += 8;
534
535
16.5M
      *data_r = x3i;
536
16.5M
      *(data_r + 1) = x3r;
537
16.5M
      data_r += 8;
538
16.5M
      data_i += 8;
539
540
16.5M
      x1r = *data_i;
541
16.5M
      x1i = *(data_i + 1);
542
16.5M
      data_i += 8;
543
544
16.5M
      x2r = *data_i;
545
16.5M
      x2i = *(data_i + 1);
546
16.5M
      data_i += 8;
547
548
16.5M
      x3r = *data_i;
549
16.5M
      x3i = *(data_i + 1);
550
16.5M
      data_i -= 24;
551
552
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
553
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
554
16.5M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
555
16.5M
                                   (FLOAT32)x1i, 0.707107f);
556
16.5M
      x1r = tmp;
557
558
16.5M
      tmp = x2i;
559
16.5M
      x2i = -x2r;
560
16.5M
      x2r = tmp;
561
562
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
563
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
564
16.5M
      x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
565
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
566
16.5M
      x3r = tmp;
567
568
16.5M
      x0r = (*data_i);
569
16.5M
      x0i = (*(data_i + 1));
570
571
16.5M
      x0r = x0r + (x2r);
572
16.5M
      x0i = x0i + (x2i);
573
16.5M
      x2r = x0r - (x2r * 2);
574
16.5M
      x2i = x0i - (x2i * 2);
575
16.5M
      x1r = x1r + x3r;
576
16.5M
      x1i = x1i + x3i;
577
16.5M
      x3r = x1r - (x3r * 2);
578
16.5M
      x3i = x1i - (x3i * 2);
579
580
16.5M
      x0r = x0r + (x1r);
581
16.5M
      x0i = x0i + (x1i);
582
16.5M
      x1r = x0r - (x1r * 2);
583
16.5M
      x1i = x0i - (x1i * 2);
584
16.5M
      x2r = x2r + (x3i);
585
16.5M
      x2i = x2i - (x3r);
586
16.5M
      x3i = x2r - (x3i * 2);
587
16.5M
      x3r = x2i + (x3r * 2);
588
589
16.5M
      *data_i = x0r;
590
16.5M
      *(data_i + 1) = x0i;
591
16.5M
      data_i += 8;
592
593
16.5M
      *data_i = x2r;
594
16.5M
      *(data_i + 1) = x2i;
595
16.5M
      data_i += 8;
596
597
16.5M
      *data_i = x1r;
598
16.5M
      *(data_i + 1) = x1i;
599
16.5M
      data_i += 8;
600
601
16.5M
      *data_i = x3i;
602
16.5M
      *(data_i + 1) = x3r;
603
16.5M
      data_i += 8;
604
16.5M
    }
605
8.25M
    data_r -= 62;
606
8.25M
    data_i -= 62;
607
24.7M
    for (k = 2; k != 0; k--) {
608
16.5M
      FLOAT32 tmp;
609
16.5M
      FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
610
611
16.5M
      data_r += 8;
612
613
16.5M
      x1r = *data_r;
614
16.5M
      x1i = *(data_r + 1);
615
16.5M
      data_r += 8;
616
617
16.5M
      x2r = *data_r;
618
16.5M
      x2i = *(data_r + 1);
619
16.5M
      data_r += 8;
620
621
16.5M
      x3r = *data_r;
622
16.5M
      x3i = *(data_r + 1);
623
16.5M
      data_r -= 24;
624
625
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
626
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
627
16.5M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
628
16.5M
                                   (FLOAT32)x1i, 0.382683f);
629
16.5M
      x1r = tmp;
630
631
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
632
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
633
16.5M
      x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
634
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
635
16.5M
      x2r = tmp;
636
637
16.5M
      tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
638
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
639
16.5M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
640
16.5M
                                   (FLOAT32)x3i, 0.923880f);
641
16.5M
      x3r = tmp;
642
643
16.5M
      x0r = (*data_r);
644
16.5M
      x0i = (*(data_r + 1));
645
646
16.5M
      x0r = x0r + (x2r);
647
16.5M
      x0i = x0i + (x2i);
648
16.5M
      x2r = x0r - (x2r * 2);
649
16.5M
      x2i = x0i - (x2i * 2);
650
16.5M
      x1r = x1r + x3r;
651
16.5M
      x1i = x1i - x3i;
652
16.5M
      x3r = x1r - (x3r * 2);
653
16.5M
      x3i = x1i + (x3i * 2);
654
655
16.5M
      x0r = x0r + (x1r);
656
16.5M
      x0i = x0i + (x1i);
657
16.5M
      x1r = x0r - (x1r * 2);
658
16.5M
      x1i = x0i - (x1i * 2);
659
16.5M
      x2r = x2r + (x3i);
660
16.5M
      x2i = x2i - (x3r);
661
16.5M
      x3i = x2r - (x3i * 2);
662
16.5M
      x3r = x2i + (x3r * 2);
663
664
16.5M
      *data_r = x0r;
665
16.5M
      *(data_r + 1) = x0i;
666
16.5M
      data_r += 8;
667
668
16.5M
      *data_r = x2r;
669
16.5M
      *(data_r + 1) = x2i;
670
16.5M
      data_r += 8;
671
672
16.5M
      *data_r = x1r;
673
16.5M
      *(data_r + 1) = x1i;
674
16.5M
      data_r += 8;
675
676
16.5M
      *data_r = x3i;
677
16.5M
      *(data_r + 1) = x3r;
678
16.5M
      data_r += 8;
679
16.5M
      data_i += 8;
680
681
16.5M
      x1r = *data_i;
682
16.5M
      x1i = *(data_i + 1);
683
16.5M
      data_i += 8;
684
685
16.5M
      x2r = *data_i;
686
16.5M
      x2i = *(data_i + 1);
687
16.5M
      data_i += 8;
688
689
16.5M
      x3r = *data_i;
690
16.5M
      x3i = *(data_i + 1);
691
16.5M
      data_i -= 24;
692
693
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
694
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
695
16.5M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
696
16.5M
                                   (FLOAT32)x1i, 0.382683f);
697
16.5M
      x1r = tmp;
698
699
16.5M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
700
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
701
16.5M
      x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
702
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
703
16.5M
      x2r = tmp;
704
705
16.5M
      tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
706
16.5M
                      ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
707
16.5M
      x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
708
16.5M
                                   (FLOAT32)x3i, 0.923880f);
709
16.5M
      x3r = tmp;
710
711
16.5M
      x0r = (*data_i);
712
16.5M
      x0i = (*(data_i + 1));
713
714
16.5M
      x0r = x0r + (x2r);
715
16.5M
      x0i = x0i + (x2i);
716
16.5M
      x2r = x0r - (x2r * 2);
717
16.5M
      x2i = x0i - (x2i * 2);
718
16.5M
      x1r = x1r + x3r;
719
16.5M
      x1i = x1i - x3i;
720
16.5M
      x3r = x1r - (x3r * 2);
721
16.5M
      x3i = x1i + (x3i * 2);
722
723
16.5M
      x0r = x0r + (x1r);
724
16.5M
      x0i = x0i + (x1i);
725
16.5M
      x1r = x0r - (x1r * 2);
726
16.5M
      x1i = x0i - (x1i * 2);
727
16.5M
      x2r = x2r + (x3i);
728
16.5M
      x2i = x2i - (x3r);
729
16.5M
      x3i = x2r - (x3i * 2);
730
16.5M
      x3r = x2i + (x3r * 2);
731
732
16.5M
      *data_i = x0r;
733
16.5M
      *(data_i + 1) = x0i;
734
16.5M
      data_i += 8;
735
736
16.5M
      *data_i = x2r;
737
16.5M
      *(data_i + 1) = x2i;
738
16.5M
      data_i += 8;
739
740
16.5M
      *data_i = x1r;
741
16.5M
      *(data_i + 1) = x1i;
742
16.5M
      data_i += 8;
743
744
16.5M
      *data_i = x3i;
745
16.5M
      *(data_i + 1) = x3r;
746
16.5M
      data_i += 8;
747
16.5M
    }
748
8.25M
    data_r -= 62;
749
8.25M
    data_i -= 62;
750
8.25M
  }
751
8.25M
  {
752
8.25M
    const FLOAT32 *twiddles = ptr_w;
753
8.25M
    FLOAT32 x0r, x0i, x1r, x1i;
754
74.2M
    for (j = 8; j != 0; j--) {
755
66.0M
      FLOAT32 W1 = *twiddles;
756
66.0M
      twiddles++;
757
66.0M
      FLOAT32 W4 = *twiddles;
758
66.0M
      twiddles++;
759
66.0M
      FLOAT32 tmp;
760
761
66.0M
      x0r = *ptr_y;
762
66.0M
      x0i = *(ptr_y + 1);
763
66.0M
      ptr_y += 32;
764
66.0M
      ptr_xr += 32;
765
766
66.0M
      x1r = *ptr_y;
767
66.0M
      x1i = *(ptr_y + 1);
768
769
66.0M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
770
66.0M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
771
66.0M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
772
66.0M
                                   (FLOAT32)x1i, W1);
773
66.0M
      x1r = tmp;
774
775
66.0M
      *ptr_xr = (x0r) - (x1r);
776
66.0M
      *(ptr_xr + 1) = (x0i) - (x1i);
777
66.0M
      ptr_y -= 32;
778
66.0M
      ptr_xr -= 32;
779
780
66.0M
      *ptr_xr = (x0r) + (x1r);
781
66.0M
      *(ptr_xr + 1) = (x0i) + (x1i);
782
66.0M
      ptr_y += 2;
783
66.0M
      ptr_xr += 2;
784
785
66.0M
      x0r = *ptr_z;
786
66.0M
      x0i = *(ptr_z + 1);
787
66.0M
      ptr_z += 32;
788
66.0M
      ptr_xi += 32;
789
790
66.0M
      x1r = *ptr_z;
791
66.0M
      x1i = *(ptr_z + 1);
792
793
66.0M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
794
66.0M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
795
66.0M
      x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
796
66.0M
                                   (FLOAT32)x1i, W1);
797
66.0M
      x1r = tmp;
798
799
66.0M
      *ptr_xi = (x0r) - (x1r);
800
66.0M
      *(ptr_xi + 1) = (x0i) - (x1i);
801
66.0M
      ptr_z -= 32;
802
66.0M
      ptr_xi -= 32;
803
804
66.0M
      *ptr_xi = (x0r) + (x1r);
805
66.0M
      *(ptr_xi + 1) = (x0i) + (x1i);
806
66.0M
      ptr_z += 2;
807
66.0M
      ptr_xi += 2;
808
66.0M
    }
809
8.25M
    twiddles = ptr_w;
810
74.2M
    for (j = 8; j != 0; j--) {
811
66.0M
      FLOAT32 W1 = *twiddles;
812
66.0M
      twiddles++;
813
66.0M
      FLOAT32 W4 = *twiddles;
814
66.0M
      twiddles++;
815
66.0M
      FLOAT32 tmp;
816
817
66.0M
      x0r = *ptr_y;
818
66.0M
      x0i = *(ptr_y + 1);
819
66.0M
      ptr_y += 32;
820
66.0M
      ptr_xr += 32;
821
822
66.0M
      x1r = *ptr_y;
823
66.0M
      x1i = *(ptr_y + 1);
824
825
66.0M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
826
66.0M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W1));
827
66.0M
      x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
828
66.0M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
829
66.0M
      x1r = tmp;
830
831
66.0M
      *ptr_xr = (x0r) - (x1r);
832
66.0M
      *(ptr_xr + 1) = (x0i) - (x1i);
833
66.0M
      ptr_y -= 32;
834
66.0M
      ptr_xr -= 32;
835
836
66.0M
      *ptr_xr = (x0r) + (x1r);
837
66.0M
      *(ptr_xr + 1) = (x0i) + (x1i);
838
66.0M
      ptr_y += 2;
839
66.0M
      ptr_xr += 2;
840
841
66.0M
      x0r = *ptr_z;
842
66.0M
      x0i = *(ptr_z + 1);
843
66.0M
      ptr_z += 32;
844
66.0M
      ptr_xi += 32;
845
846
66.0M
      x1r = *ptr_z;
847
66.0M
      x1i = *(ptr_z + 1);
848
849
66.0M
      tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
850
66.0M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W1));
851
66.0M
      x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
852
66.0M
                      ixheaacd_mult32X32float((FLOAT32)x1i, W4));
853
66.0M
      x1r = tmp;
854
855
66.0M
      *ptr_xi = (x0r) - (x1r);
856
66.0M
      *(ptr_xi + 1) = (x0i) - (x1i);
857
66.0M
      ptr_z -= 32;
858
66.0M
      ptr_xi -= 32;
859
860
66.0M
      *ptr_xi = (x0r) + (x1r);
861
66.0M
      *(ptr_xi + 1) = (x0i) + (x1i);
862
66.0M
      ptr_z += 2;
863
66.0M
      ptr_xi += 2;
864
66.0M
    }
865
8.25M
  }
866
8.25M
}
867
868
21.3k
VOID ixheaacd_mps_complex_fft(FLOAT32 *xr, FLOAT32 *xi, WORD32 nlength) {
869
21.3k
  WORD32 i, j, k, n_stages, h2;
870
21.3k
  FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
871
21.3k
  WORD32 del, nodespacing, in_loop_cnt;
872
21.3k
  WORD32 dig_rev_shift;
873
21.3k
  WORD32 not_power_4;
874
21.3k
  FLOAT32 ptr_x[256];
875
21.3k
  FLOAT32 y[256];
876
21.3k
  WORD32 npoints = nlength;
877
21.3k
  FLOAT32 *ptr_y = y;
878
21.3k
  const FLOAT32 *ptr_w;
879
21.3k
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
880
21.3k
  n_stages = 30 - ixheaac_norm32(npoints);
881
21.3k
  not_power_4 = n_stages & 1;
882
883
21.3k
  n_stages = n_stages >> 1;
884
885
886
1.38M
  for (i = 0; i<nlength; i++)
887
1.36M
  {
888
1.36M
    ptr_x[2 * i] = xr[i];
889
1.36M
    ptr_x[2 * i + 1] = xi[i];
890
1.36M
  }
891
892
21.3k
  ptr_w = ixheaacd_twiddle_table_fft;
893
894
363k
  for (i = 0; i<npoints; i += 4)
895
342k
  {
896
342k
    FLOAT32 *inp = ptr_x;
897
898
342k
    DIG_REV(i, dig_rev_shift, h2);
899
342k
    if (not_power_4)
900
0
    {
901
0
      h2 += 1;
902
0
      h2 &= ~1;
903
0
    }
904
342k
    inp += (h2);
905
906
342k
    x0r = *inp;
907
342k
    x0i = *(inp + 1);
908
342k
    inp += (npoints >> 1);
909
910
342k
    x1r = *inp;
911
342k
    x1i = *(inp + 1);
912
342k
    inp += (npoints >> 1);
913
914
342k
    x2r = *inp;
915
342k
    x2i = *(inp + 1);
916
342k
    inp += (npoints >> 1);
917
918
342k
    x3r = *inp;
919
342k
    x3i = *(inp + 1);
920
921
342k
    x0r = x0r + x2r;
922
342k
    x0i = x0i + x2i;
923
342k
    x2r = x0r - (x2r * 2);
924
342k
    x2i = x0i - (x2i * 2);
925
342k
    x1r = x1r + x3r;
926
342k
    x1i = x1i + x3i;
927
342k
    x3r = x1r - (x3r * 2);
928
342k
    x3i = x1i - (x3i * 2);
929
930
342k
    x0r = x0r + x1r;
931
342k
    x0i = x0i + x1i;
932
342k
    x1r = x0r - (x1r * 2);
933
342k
    x1i = x0i - (x1i * 2);
934
342k
    x2r = x2r + x3i;
935
342k
    x2i = x2i - x3r;
936
342k
    x3i = x2r - (x3i * 2);
937
342k
    x3r = x2i + (x3r * 2);
938
939
342k
    *ptr_y++ = x0r;
940
342k
    *ptr_y++ = x0i;
941
342k
    *ptr_y++ = x2r;
942
342k
    *ptr_y++ = x2i;
943
342k
    *ptr_y++ = x1r;
944
342k
    *ptr_y++ = x1i;
945
342k
    *ptr_y++ = x3i;
946
342k
    *ptr_y++ = x3r;
947
342k
  }
948
21.3k
  ptr_y -= 2 * npoints;
949
21.3k
  del = 4;
950
21.3k
  nodespacing = 64;
951
21.3k
  in_loop_cnt = npoints >> 4;
952
64.1k
  for (i = n_stages - 1; i>0; i--)
953
42.7k
  {
954
42.7k
    const FLOAT32 *twiddles = ptr_w;
955
42.7k
    FLOAT32 *data = ptr_y;
956
42.7k
    FLOAT32 w1h, w2h, w3h, w1l, w2l, w3l;
957
42.7k
    WORD32 sec_loop_cnt;
958
959
149k
    for (k = in_loop_cnt; k != 0; k--)
960
106k
    {
961
106k
      x0r = (*data);
962
106k
      x0i = (*(data + 1));
963
106k
      data += (del << 1);
964
965
106k
      x1r = (*data);
966
106k
      x1i = (*(data + 1));
967
106k
      data += (del << 1);
968
969
106k
      x2r = (*data);
970
106k
      x2i = (*(data + 1));
971
106k
      data += (del << 1);
972
973
106k
      x3r = (*data);
974
106k
      x3i = (*(data + 1));
975
106k
      data -= 3 * (del << 1);
976
977
106k
      x0r = x0r + x2r;
978
106k
      x0i = x0i + x2i;
979
106k
      x2r = x0r - (x2r * 2);
980
106k
      x2i = x0i - (x2i * 2);
981
106k
      x1r = x1r + x3r;
982
106k
      x1i = x1i + x3i;
983
106k
      x3r = x1r - (x3r * 2);
984
106k
      x3i = x1i - (x3i * 2);
985
986
106k
      x0r = x0r + x1r;
987
106k
      x0i = x0i + x1i;
988
106k
      x1r = x0r - (x1r * 2);
989
106k
      x1i = x0i - (x1i * 2);
990
106k
      x2r = x2r + x3i;
991
106k
      x2i = x2i - x3r;
992
106k
      x3i = x2r - (x3i * 2);
993
106k
      x3r = x2i + (x3r * 2);
994
995
106k
      *data = x0r;
996
106k
      *(data + 1) = x0i;
997
106k
      data += (del << 1);
998
999
106k
      *data = x2r;
1000
106k
      *(data + 1) = x2i;
1001
106k
      data += (del << 1);
1002
1003
106k
      *data = x1r;
1004
106k
      *(data + 1) = x1i;
1005
106k
      data += (del << 1);
1006
1007
106k
      *data = x3i;
1008
106k
      *(data + 1) = x3r;
1009
106k
      data += (del << 1);
1010
106k
    }
1011
42.7k
    data = ptr_y + 2;
1012
1013
42.7k
    sec_loop_cnt = (nodespacing * del);
1014
42.7k
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) \
1015
42.7k
            + (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) \
1016
42.7k
            - (sec_loop_cnt / 256);
1017
42.7k
    j = nodespacing;
1018
1019
171k
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing)
1020
128k
    {
1021
128k
      w1h = *(twiddles + 2 * j);
1022
128k
      w1l = *(twiddles + 2 * j + 1);
1023
128k
      w2h = *(twiddles + 2 * (j << 1));
1024
128k
      w2l = *(twiddles + 2 * (j << 1) + 1);
1025
128k
      w3h = *(twiddles + 2 * j + 2 * (j << 1));
1026
128k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1027
1028
320k
      for (k = in_loop_cnt; k != 0; k--)
1029
192k
      {
1030
192k
        FLOAT32 tmp;
1031
192k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1032
1033
192k
        data += (del << 1);
1034
1035
192k
        x1r = *data;
1036
192k
        x1i = *(data + 1);
1037
192k
        data += (del << 1);
1038
1039
192k
        x2r = *data;
1040
192k
        x2i = *(data + 1);
1041
192k
        data += (del << 1);
1042
1043
192k
        x3r = *data;
1044
192k
        x3i = *(data + 1);
1045
192k
        data -= 3 * (del << 1);
1046
1047
192k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1048
192k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1049
192k
        x1r = tmp;
1050
1051
192k
        tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1052
192k
        x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1053
192k
        x2r = tmp;
1054
1055
192k
        tmp = (ixheaacd_mult32X32float(x3r, w3l) - ixheaacd_mult32X32float(x3i, w3h));
1056
192k
        x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1057
192k
        x3r = tmp;
1058
1059
192k
        x0r = (*data);
1060
192k
        x0i = (*(data + 1));
1061
1062
192k
        x0r = x0r + (x2r);
1063
192k
        x0i = x0i + (x2i);
1064
192k
        x2r = x0r - (x2r * 2);
1065
192k
        x2i = x0i - (x2i * 2);
1066
192k
        x1r = x1r + x3r;
1067
192k
        x1i = x1i + x3i;
1068
192k
        x3r = x1r - (x3r * 2);
1069
192k
        x3i = x1i - (x3i * 2);
1070
1071
192k
        x0r = x0r + (x1r);
1072
192k
        x0i = x0i + (x1i);
1073
192k
        x1r = x0r - (x1r * 2);
1074
192k
        x1i = x0i - (x1i * 2);
1075
192k
        x2r = x2r + (x3i);
1076
192k
        x2i = x2i - (x3r);
1077
192k
        x3i = x2r - (x3i * 2);
1078
192k
        x3r = x2i + (x3r * 2);
1079
1080
192k
        *data = x0r;
1081
192k
        *(data + 1) = x0i;
1082
192k
        data += (del << 1);
1083
1084
192k
        *data = x2r;
1085
192k
        *(data + 1) = x2i;
1086
192k
        data += (del << 1);
1087
1088
192k
        *data = x1r;
1089
192k
        *(data + 1) = x1i;
1090
192k
        data += (del << 1);
1091
1092
192k
        *data = x3i;
1093
192k
        *(data + 1) = x3r;
1094
192k
        data += (del << 1);
1095
192k
      }
1096
128k
      data -= 2 * npoints;
1097
128k
      data += 2;
1098
128k
    }
1099
128k
    for (; j <= (nodespacing * del) >> 1; j += nodespacing)
1100
85.5k
    {
1101
85.5k
      w1h = *(twiddles + 2 * j);
1102
85.5k
      w2h = *(twiddles + 2 * (j << 1));
1103
85.5k
      w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1104
85.5k
      w1l = *(twiddles + 2 * j + 1);
1105
85.5k
      w2l = *(twiddles + 2 * (j << 1) + 1);
1106
85.5k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1107
1108
235k
      for (k = in_loop_cnt; k != 0; k--)
1109
149k
      {
1110
149k
        FLOAT32 tmp;
1111
149k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1112
1113
149k
        data += (del << 1);
1114
1115
149k
        x1r = *data;
1116
149k
        x1i = *(data + 1);
1117
149k
        data += (del << 1);
1118
1119
149k
        x2r = *data;
1120
149k
        x2i = *(data + 1);
1121
149k
        data += (del << 1);
1122
1123
149k
        x3r = *data;
1124
149k
        x3i = *(data + 1);
1125
149k
        data -= 3 * (del << 1);
1126
1127
149k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1128
149k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1129
149k
        x1r = tmp;
1130
1131
149k
        tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1132
149k
        x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1133
149k
        x2r = tmp;
1134
1135
149k
        tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1136
149k
        x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1137
149k
        x3r = tmp;
1138
1139
149k
        x0r = (*data);
1140
149k
        x0i = (*(data + 1));
1141
1142
149k
        x0r = x0r + (x2r);
1143
149k
        x0i = x0i + (x2i);
1144
149k
        x2r = x0r - (x2r * 2);
1145
149k
        x2i = x0i - (x2i * 2);
1146
149k
        x1r = x1r + x3r;
1147
149k
        x1i = x1i + x3i;
1148
149k
        x3r = x1r - (x3r * 2);
1149
149k
        x3i = x1i - (x3i * 2);
1150
1151
149k
        x0r = x0r + (x1r);
1152
149k
        x0i = x0i + (x1i);
1153
149k
        x1r = x0r - (x1r * 2);
1154
149k
        x1i = x0i - (x1i * 2);
1155
149k
        x2r = x2r + (x3i);
1156
149k
        x2i = x2i - (x3r);
1157
149k
        x3i = x2r - (x3i * 2);
1158
149k
        x3r = x2i + (x3r * 2);
1159
1160
149k
        *data = x0r;
1161
149k
        *(data + 1) = x0i;
1162
149k
        data += (del << 1);
1163
1164
149k
        *data = x2r;
1165
149k
        *(data + 1) = x2i;
1166
149k
        data += (del << 1);
1167
1168
149k
        *data = x1r;
1169
149k
        *(data + 1) = x1i;
1170
149k
        data += (del << 1);
1171
1172
149k
        *data = x3i;
1173
149k
        *(data + 1) = x3r;
1174
149k
        data += (del << 1);
1175
149k
      }
1176
85.5k
      data -= 2 * npoints;
1177
85.5k
      data += 2;
1178
85.5k
    }
1179
85.5k
    for (; j <= sec_loop_cnt * 2; j += nodespacing)
1180
42.7k
    {
1181
42.7k
      w1h = *(twiddles + 2 * j);
1182
42.7k
      w2h = *(twiddles + 2 * (j << 1) - 512);
1183
42.7k
      w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1184
42.7k
      w1l = *(twiddles + 2 * j + 1);
1185
42.7k
      w2l = *(twiddles + 2 * (j << 1) - 511);
1186
42.7k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1187
1188
85.5k
      for (k = in_loop_cnt; k != 0; k--)
1189
42.7k
      {
1190
42.7k
        FLOAT32 tmp;
1191
42.7k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1192
1193
42.7k
        data += (del << 1);
1194
1195
42.7k
        x1r = *data;
1196
42.7k
        x1i = *(data + 1);
1197
42.7k
        data += (del << 1);
1198
1199
42.7k
        x2r = *data;
1200
42.7k
        x2i = *(data + 1);
1201
42.7k
        data += (del << 1);
1202
1203
42.7k
        x3r = *data;
1204
42.7k
        x3i = *(data + 1);
1205
42.7k
        data -= 3 * (del << 1);
1206
1207
42.7k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1208
42.7k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1209
42.7k
        x1r = tmp;
1210
1211
42.7k
        tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1212
42.7k
        x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1213
42.7k
        x2r = tmp;
1214
1215
42.7k
        tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1216
42.7k
        x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1217
42.7k
        x3r = tmp;
1218
1219
42.7k
        x0r = (*data);
1220
42.7k
        x0i = (*(data + 1));
1221
1222
42.7k
        x0r = x0r + (x2r);
1223
42.7k
        x0i = x0i + (x2i);
1224
42.7k
        x2r = x0r - (x2r * 2);
1225
42.7k
        x2i = x0i - (x2i * 2);
1226
42.7k
        x1r = x1r + x3r;
1227
42.7k
        x1i = x1i + x3i;
1228
42.7k
        x3r = x1r - (x3r * 2);
1229
42.7k
        x3i = x1i - (x3i * 2);
1230
1231
42.7k
        x0r = x0r + (x1r);
1232
42.7k
        x0i = x0i + (x1i);
1233
42.7k
        x1r = x0r - (x1r * 2);
1234
42.7k
        x1i = x0i - (x1i * 2);
1235
42.7k
        x2r = x2r + (x3i);
1236
42.7k
        x2i = x2i - (x3r);
1237
42.7k
        x3i = x2r - (x3i * 2);
1238
42.7k
        x3r = x2i + (x3r * 2);
1239
1240
42.7k
        *data = x0r;
1241
42.7k
        *(data + 1) = x0i;
1242
42.7k
        data += (del << 1);
1243
1244
42.7k
        *data = x2r;
1245
42.7k
        *(data + 1) = x2i;
1246
42.7k
        data += (del << 1);
1247
1248
42.7k
        *data = x1r;
1249
42.7k
        *(data + 1) = x1i;
1250
42.7k
        data += (del << 1);
1251
1252
42.7k
        *data = x3i;
1253
42.7k
        *(data + 1) = x3r;
1254
42.7k
        data += (del << 1);
1255
42.7k
      }
1256
42.7k
      data -= 2 * npoints;
1257
42.7k
      data += 2;
1258
42.7k
    }
1259
171k
    for (; j<nodespacing * del; j += nodespacing)
1260
128k
    {
1261
128k
      w1h = *(twiddles + 2 * j);
1262
128k
      w2h = *(twiddles + 2 * (j << 1) - 512);
1263
128k
      w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1264
128k
      w1l = *(twiddles + 2 * j + 1);
1265
128k
      w2l = *(twiddles + 2 * (j << 1) - 511);
1266
128k
      w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1267
1268
320k
      for (k = in_loop_cnt; k != 0; k--)
1269
192k
      {
1270
192k
        FLOAT32 tmp;
1271
192k
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1272
1273
192k
        data += (del << 1);
1274
1275
192k
        x1r = *data;
1276
192k
        x1i = *(data + 1);
1277
192k
        data += (del << 1);
1278
1279
192k
        x2r = *data;
1280
192k
        x2i = *(data + 1);
1281
192k
        data += (del << 1);
1282
1283
192k
        x3r = *data;
1284
192k
        x3i = *(data + 1);
1285
192k
        data -= 3 * (del << 1);
1286
1287
192k
        tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1288
192k
        x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1289
192k
        x1r = tmp;
1290
1291
192k
        tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1292
192k
        x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1293
192k
        x2r = tmp;
1294
1295
192k
        tmp = (-ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h));
1296
192k
        x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1297
192k
        x3r = tmp;
1298
1299
192k
        x0r = (*data);
1300
192k
        x0i = (*(data + 1));
1301
1302
192k
        x0r = x0r + (x2r);
1303
192k
        x0i = x0i + (x2i);
1304
192k
        x2r = x0r - (x2r * 2);
1305
192k
        x2i = x0i - (x2i * 2);
1306
192k
        x1r = x1r + x3r;
1307
192k
        x1i = x1i - x3i;
1308
192k
        x3r = x1r - (x3r * 2);
1309
192k
        x3i = x1i + (x3i * 2);
1310
1311
192k
        x0r = x0r + (x1r);
1312
192k
        x0i = x0i + (x1i);
1313
192k
        x1r = x0r - (x1r * 2);
1314
192k
        x1i = x0i - (x1i * 2);
1315
192k
        x2r = x2r + (x3i);
1316
192k
        x2i = x2i - (x3r);
1317
192k
        x3i = x2r - (x3i * 2);
1318
192k
        x3r = x2i + (x3r * 2);
1319
1320
192k
        *data = x0r;
1321
192k
        *(data + 1) = x0i;
1322
192k
        data += (del << 1);
1323
1324
192k
        *data = x2r;
1325
192k
        *(data + 1) = x2i;
1326
192k
        data += (del << 1);
1327
1328
192k
        *data = x1r;
1329
192k
        *(data + 1) = x1i;
1330
192k
        data += (del << 1);
1331
1332
192k
        *data = x3i;
1333
192k
        *(data + 1) = x3r;
1334
192k
        data += (del << 1);
1335
192k
      }
1336
128k
      data -= 2 * npoints;
1337
128k
      data += 2;
1338
128k
    }
1339
42.7k
    nodespacing >>= 2;
1340
42.7k
    del <<= 2;
1341
42.7k
    in_loop_cnt >>= 2;
1342
42.7k
  }
1343
21.3k
  if (not_power_4)
1344
0
  {
1345
0
    const FLOAT32 *twiddles = ptr_w;
1346
0
    nodespacing <<= 1;
1347
1348
0
    for (j = del / 2; j != 0; j--)
1349
0
    {
1350
0
      FLOAT32 w1h = *twiddles;
1351
0
      FLOAT32 w1l = *(twiddles + 1);
1352
0
      FLOAT32 tmp;
1353
0
      twiddles += nodespacing * 2;
1354
1355
0
      x0r = *ptr_y;
1356
0
      x0i = *(ptr_y + 1);
1357
0
      ptr_y += (del << 1);
1358
1359
0
      x1r = *ptr_y;
1360
0
      x1i = *(ptr_y + 1);
1361
1362
0
      tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1363
0
      x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1364
0
      x1r = tmp;
1365
1366
0
      *ptr_y = (x0r) - (x1r);
1367
0
      *(ptr_y + 1) = (x0i) - (x1i);
1368
0
      ptr_y -= (del << 1);
1369
1370
0
      *ptr_y = (x0r) + (x1r);
1371
0
      *(ptr_y + 1) = (x0i) + (x1i);
1372
0
      ptr_y += 2;
1373
0
    }
1374
0
    twiddles = ptr_w;
1375
0
    for (j = del / 2; j != 0; j--)
1376
0
    {
1377
0
      FLOAT32 w1h = *twiddles;
1378
0
      FLOAT32 w1l = *(twiddles + 1);
1379
0
      FLOAT32 tmp;
1380
0
      twiddles += nodespacing * 2;
1381
1382
0
      x0r = *ptr_y;
1383
0
      x0i = *(ptr_y + 1);
1384
0
      ptr_y += (del << 1);
1385
1386
0
      x1r = *ptr_y;
1387
0
      x1i = *(ptr_y + 1);
1388
1389
0
      tmp = (ixheaacd_mult32X32float(x1r, w1h) + ixheaacd_mult32X32float(x1i, w1l));
1390
0
      x1i = -ixheaacd_mult32X32float(x1r, w1l) + ixheaacd_mult32X32float(x1i, w1h);
1391
0
      x1r = tmp;
1392
1393
0
      *ptr_y = (x0r) - (x1r);
1394
0
      *(ptr_y + 1) = (x0i) - (x1i);
1395
0
      ptr_y -= (del << 1);
1396
1397
0
      *ptr_y = (x0r) + (x1r);
1398
0
      *(ptr_y + 1) = (x0i) + (x1i);
1399
0
      ptr_y += 2;
1400
0
    }
1401
0
  }
1402
1403
1.38M
  for (i = 0; i<nlength; i++)
1404
1.36M
  {
1405
1.36M
    xr[i] = y[2 * i];
1406
1.36M
    xi[i] = y[2 * i + 1];
1407
1.36M
  }
1408
1409
21.3k
  return;
1410
21.3k
}
1411
1412
VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength,
1413
13.2M
                                 WORD32 fft_mode, WORD32 *preshift) {
1414
13.2M
  WORD32 i, j, k, n_stages;
1415
13.2M
  WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1416
13.2M
  WORD32 del, nodespacing, in_loop_cnt;
1417
13.2M
  WORD32 not_power_4;
1418
13.2M
  WORD32 npts, shift;
1419
13.2M
  WORD32 dig_rev_shift;
1420
13.2M
  WORD32 ptr_x[1024];
1421
13.2M
  WORD32 y[1024];
1422
13.2M
  WORD32 npoints = nlength;
1423
13.2M
  WORD32 n = 0;
1424
13.2M
  WORD32 *ptr_y = y;
1425
13.2M
  const WORD32 *ptr_w;
1426
13.2M
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
1427
13.2M
  n_stages = 30 - ixheaac_norm32(npoints);
1428
13.2M
  not_power_4 = n_stages & 1;
1429
1430
13.2M
  n_stages = n_stages >> 1;
1431
1432
13.2M
  npts = npoints;
1433
52.9M
  while (npts >> 1) {
1434
39.7M
    n++;
1435
39.7M
    npts = npts >> 1;
1436
39.7M
  }
1437
1438
13.2M
  if (n % 2 == 0)
1439
8.39M
    shift = ((n + 4)) / 2;
1440
4.85M
  else
1441
4.85M
    shift = ((n + 3) / 2);
1442
1443
315M
  for (i = 0; i < nlength; i++) {
1444
302M
    ptr_x[2 * i] = (xr[i] / (1 << (shift)));
1445
302M
    ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
1446
302M
  }
1447
1448
13.2M
  if (fft_mode == -1) {
1449
12.0M
    ptr_w = ixheaacd_twiddle_table_fft_32x32;
1450
1451
43.0M
    for (i = 0; i < npoints; i += 4) {
1452
31.0M
      WORD32 *inp = ptr_x;
1453
1454
31.0M
      DIG_REV(i, dig_rev_shift, h2);
1455
31.0M
      if (not_power_4) {
1456
18.2M
        h2 += 1;
1457
18.2M
        h2 &= ~1;
1458
18.2M
      }
1459
31.0M
      inp += (h2);
1460
1461
31.0M
      x0r = *inp;
1462
31.0M
      x0i = *(inp + 1);
1463
31.0M
      inp += (npoints >> 1);
1464
1465
31.0M
      x1r = *inp;
1466
31.0M
      x1i = *(inp + 1);
1467
31.0M
      inp += (npoints >> 1);
1468
1469
31.0M
      x2r = *inp;
1470
31.0M
      x2i = *(inp + 1);
1471
31.0M
      inp += (npoints >> 1);
1472
1473
31.0M
      x3r = *inp;
1474
31.0M
      x3i = *(inp + 1);
1475
1476
31.0M
      x0r = ixheaac_add32_sat(x0r, x2r);
1477
31.0M
      x0i = ixheaac_add32_sat(x0i, x2i);
1478
31.0M
      x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1479
31.0M
      x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1480
31.0M
      x1r = ixheaac_add32_sat(x1r, x3r);
1481
31.0M
      x1i = ixheaac_add32_sat(x1i, x3i);
1482
31.0M
      x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1483
31.0M
      x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1484
1485
31.0M
      x0r = ixheaac_add32_sat(x0r, x1r);
1486
31.0M
      x0i = ixheaac_add32_sat(x0i, x1i);
1487
31.0M
      x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1488
31.0M
      x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1489
31.0M
      x2r = ixheaac_add32_sat(x2r, x3i);
1490
31.0M
      x2i = ixheaac_sub32_sat(x2i, x3r);
1491
31.0M
      x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1492
31.0M
      x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1493
1494
31.0M
      *ptr_y++ = x0r;
1495
31.0M
      *ptr_y++ = x0i;
1496
31.0M
      *ptr_y++ = x2r;
1497
31.0M
      *ptr_y++ = x2i;
1498
31.0M
      *ptr_y++ = x1r;
1499
31.0M
      *ptr_y++ = x1i;
1500
31.0M
      *ptr_y++ = x3i;
1501
31.0M
      *ptr_y++ = x3r;
1502
31.0M
    }
1503
12.0M
    ptr_y -= 2 * npoints;
1504
12.0M
    del = 4;
1505
12.0M
    nodespacing = 64;
1506
12.0M
    in_loop_cnt = npoints >> 4;
1507
13.6M
    for (i = n_stages - 1; i > 0; i--) {
1508
1.64M
      const WORD32 *twiddles = ptr_w;
1509
1.64M
      WORD32 *data = ptr_y;
1510
1.64M
      WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
1511
1.64M
      WORD32 sec_loop_cnt;
1512
1513
6.45M
      for (k = in_loop_cnt; k != 0; k--) {
1514
4.81M
        x0r = (*data);
1515
4.81M
        x0i = (*(data + 1));
1516
4.81M
        data += (del << 1);
1517
1518
4.81M
        x1r = (*data);
1519
4.81M
        x1i = (*(data + 1));
1520
4.81M
        data += (del << 1);
1521
1522
4.81M
        x2r = (*data);
1523
4.81M
        x2i = (*(data + 1));
1524
4.81M
        data += (del << 1);
1525
1526
4.81M
        x3r = (*data);
1527
4.81M
        x3i = (*(data + 1));
1528
4.81M
        data -= 3 * (del << 1);
1529
1530
4.81M
        x0r = ixheaac_add32_sat(x0r, x2r);
1531
4.81M
        x0i = ixheaac_add32_sat(x0i, x2i);
1532
4.81M
        x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1533
4.81M
        x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1534
4.81M
        x1r = ixheaac_add32_sat(x1r, x3r);
1535
4.81M
        x1i = ixheaac_add32_sat(x1i, x3i);
1536
4.81M
        x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1537
4.81M
        x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1538
1539
4.81M
        x0r = ixheaac_add32_sat(x0r, x1r);
1540
4.81M
        x0i = ixheaac_add32_sat(x0i, x1i);
1541
4.81M
        x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1542
4.81M
        x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1543
4.81M
        x2r = ixheaac_add32_sat(x2r, x3i);
1544
4.81M
        x2i = ixheaac_sub32_sat(x2i, x3r);
1545
4.81M
        x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1546
4.81M
        x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1547
1548
4.81M
        *data = x0r;
1549
4.81M
        *(data + 1) = x0i;
1550
4.81M
        data += (del << 1);
1551
1552
4.81M
        *data = x2r;
1553
4.81M
        *(data + 1) = x2i;
1554
4.81M
        data += (del << 1);
1555
1556
4.81M
        *data = x1r;
1557
4.81M
        *(data + 1) = x1i;
1558
4.81M
        data += (del << 1);
1559
1560
4.81M
        *data = x3i;
1561
4.81M
        *(data + 1) = x3r;
1562
4.81M
        data += (del << 1);
1563
4.81M
      }
1564
1.64M
      data = ptr_y + 2;
1565
1566
1.64M
      sec_loop_cnt = (nodespacing * del);
1567
1.64M
      sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
1568
1.64M
                     (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
1569
1.64M
                     (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
1570
1.64M
                     (sec_loop_cnt / 256);
1571
1.64M
      j = nodespacing;
1572
1573
5.47M
      for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
1574
3.83M
        w1h = *(twiddles + 2 * j);
1575
3.83M
        w1l = *(twiddles + 2 * j + 1);
1576
3.83M
        w2h = *(twiddles + 2 * (j << 1));
1577
3.83M
        w2l = *(twiddles + 2 * (j << 1) + 1);
1578
3.83M
        w3h = *(twiddles + 2 * j + 2 * (j << 1));
1579
3.83M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1580
1581
11.8M
        for (k = in_loop_cnt; k != 0; k--) {
1582
8.03M
          WORD32 tmp;
1583
8.03M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1584
1585
8.03M
          data += (del << 1);
1586
1587
8.03M
          x1r = *data;
1588
8.03M
          x1i = *(data + 1);
1589
8.03M
          data += (del << 1);
1590
1591
8.03M
          x2r = *data;
1592
8.03M
          x2i = *(data + 1);
1593
8.03M
          data += (del << 1);
1594
1595
8.03M
          x3r = *data;
1596
8.03M
          x3i = *(data + 1);
1597
8.03M
          data -= 3 * (del << 1);
1598
1599
8.03M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1600
8.03M
                                   ixheaacd_mult32_sat(x1i, w1h));
1601
8.03M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1602
8.03M
          x1r = tmp;
1603
1604
8.03M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1605
8.03M
                                   ixheaacd_mult32_sat(x2i, w2h));
1606
8.03M
          x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1607
8.03M
          x2r = tmp;
1608
1609
8.03M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3l),
1610
8.03M
                                   ixheaacd_mult32_sat(x3i, w3h));
1611
8.03M
          x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1612
8.03M
          x3r = tmp;
1613
1614
8.03M
          x0r = (*data);
1615
8.03M
          x0i = (*(data + 1));
1616
1617
8.03M
          x0r = ixheaac_add32_sat(x0r, x2r);
1618
8.03M
          x0i = ixheaac_add32_sat(x0i, x2i);
1619
8.03M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1620
8.03M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1621
8.03M
          x1r = ixheaac_add32_sat(x1r, x3r);
1622
8.03M
          x1i = ixheaac_add32_sat(x1i, x3i);
1623
8.03M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1624
8.03M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1625
1626
8.03M
          x0r = ixheaac_add32_sat(x0r, x1r);
1627
8.03M
          x0i = ixheaac_add32_sat(x0i, x1i);
1628
8.03M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1629
8.03M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1630
8.03M
          x2r = ixheaac_add32_sat(x2r, x3i);
1631
8.03M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1632
8.03M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1633
8.03M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1634
1635
8.03M
          *data = x0r;
1636
8.03M
          *(data + 1) = x0i;
1637
8.03M
          data += (del << 1);
1638
1639
8.03M
          *data = x2r;
1640
8.03M
          *(data + 1) = x2i;
1641
8.03M
          data += (del << 1);
1642
1643
8.03M
          *data = x1r;
1644
8.03M
          *(data + 1) = x1i;
1645
8.03M
          data += (del << 1);
1646
1647
8.03M
          *data = x3i;
1648
8.03M
          *(data + 1) = x3r;
1649
8.03M
          data += (del << 1);
1650
8.03M
        }
1651
3.83M
        data -= 2 * npoints;
1652
3.83M
        data += 2;
1653
3.83M
      }
1654
4.38M
      for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1655
2.73M
        w1h = *(twiddles + 2 * j);
1656
2.73M
        w2h = *(twiddles + 2 * (j << 1));
1657
2.73M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1658
2.73M
        w1l = *(twiddles + 2 * j + 1);
1659
2.73M
        w2l = *(twiddles + 2 * (j << 1) + 1);
1660
2.73M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1661
1662
9.16M
        for (k = in_loop_cnt; k != 0; k--) {
1663
6.42M
          WORD32 tmp;
1664
6.42M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1665
6.42M
          data += (del << 1);
1666
1667
6.42M
          x1r = *data;
1668
6.42M
          x1i = *(data + 1);
1669
6.42M
          data += (del << 1);
1670
1671
6.42M
          x2r = *data;
1672
6.42M
          x2i = *(data + 1);
1673
6.42M
          data += (del << 1);
1674
1675
6.42M
          x3r = *data;
1676
6.42M
          x3i = *(data + 1);
1677
6.42M
          data -= 3 * (del << 1);
1678
1679
6.42M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1680
6.42M
                                   ixheaacd_mult32_sat(x1i, w1h));
1681
6.42M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1682
6.42M
          x1r = tmp;
1683
1684
6.42M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1685
6.42M
                                   ixheaacd_mult32_sat(x2i, w2h));
1686
6.42M
          x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1687
6.42M
          x2r = tmp;
1688
1689
6.42M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1690
6.42M
                                   ixheaacd_mult32_sat(x3i, w3l));
1691
6.42M
          x3i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1692
6.42M
                                   ixheaacd_mult32_sat(x3r, w3l));
1693
6.42M
          x3r = tmp;
1694
1695
6.42M
          x0r = (*data);
1696
6.42M
          x0i = (*(data + 1));
1697
1698
6.42M
          x0r = ixheaac_add32_sat(x0r, x2r);
1699
6.42M
          x0i = ixheaac_add32_sat(x0i, x2i);
1700
6.42M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1701
6.42M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1702
6.42M
          x1r = ixheaac_add32_sat(x1r, x3r);
1703
6.42M
          x1i = ixheaac_add32_sat(x1i, x3i);
1704
6.42M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1705
6.42M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1706
1707
6.42M
          x0r = ixheaac_add32_sat(x0r, x1r);
1708
6.42M
          x0i = ixheaac_add32_sat(x0i, x1i);
1709
6.42M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1710
6.42M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1711
6.42M
          x2r = ixheaac_add32_sat(x2r, x3i);
1712
6.42M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1713
6.42M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1714
6.42M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1715
1716
6.42M
          *data = x0r;
1717
6.42M
          *(data + 1) = x0i;
1718
6.42M
          data += (del << 1);
1719
1720
6.42M
          *data = x2r;
1721
6.42M
          *(data + 1) = x2i;
1722
6.42M
          data += (del << 1);
1723
1724
6.42M
          *data = x1r;
1725
6.42M
          *(data + 1) = x1i;
1726
6.42M
          data += (del << 1);
1727
1728
6.42M
          *data = x3i;
1729
6.42M
          *(data + 1) = x3r;
1730
6.42M
          data += (del << 1);
1731
6.42M
        }
1732
2.73M
        data -= 2 * npoints;
1733
2.73M
        data += 2;
1734
2.73M
      }
1735
2.73M
      for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1736
1.09M
        w1h = *(twiddles + 2 * j);
1737
1.09M
        w2h = *(twiddles + 2 * (j << 1) - 512);
1738
1.09M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1739
1.09M
        w1l = *(twiddles + 2 * j + 1);
1740
1.09M
        w2l = *(twiddles + 2 * (j << 1) - 511);
1741
1.09M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1742
1743
2.70M
        for (k = in_loop_cnt; k != 0; k--) {
1744
1.61M
          WORD32 tmp;
1745
1.61M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1746
1747
1.61M
          data += (del << 1);
1748
1749
1.61M
          x1r = *data;
1750
1.61M
          x1i = *(data + 1);
1751
1.61M
          data += (del << 1);
1752
1753
1.61M
          x2r = *data;
1754
1.61M
          x2i = *(data + 1);
1755
1.61M
          data += (del << 1);
1756
1757
1.61M
          x3r = *data;
1758
1.61M
          x3i = *(data + 1);
1759
1.61M
          data -= 3 * (del << 1);
1760
1761
1.61M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1762
1.61M
                                   ixheaacd_mult32_sat(x1i, w1h));
1763
1.61M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1764
1.61M
          x1r = tmp;
1765
1766
1.61M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1767
1.61M
                                   ixheaacd_mult32_sat(x2i, w2l));
1768
1.61M
          x2i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1769
1.61M
                                   ixheaacd_mult32_sat(x2r, w2l));
1770
1.61M
          x2r = tmp;
1771
1772
1.61M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1773
1.61M
                                   ixheaacd_mult32_sat(x3i, w3l));
1774
1.61M
          x3i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1775
1.61M
                                   ixheaacd_mult32_sat(x3r, w3l));
1776
1.61M
          x3r = tmp;
1777
1778
1.61M
          x0r = (*data);
1779
1.61M
          x0i = (*(data + 1));
1780
1781
1.61M
          x0r = ixheaac_add32_sat(x0r, x2r);
1782
1.61M
          x0i = ixheaac_add32_sat(x0i, x2i);
1783
1.61M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1784
1.61M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1785
1.61M
          x1r = ixheaac_add32_sat(x1r, x3r);
1786
1.61M
          x1i = ixheaac_add32_sat(x1i, x3i);
1787
1.61M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1788
1.61M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1789
1790
1.61M
          x0r = ixheaac_add32_sat(x0r, x1r);
1791
1.61M
          x0i = ixheaac_add32_sat(x0i, x1i);
1792
1.61M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1793
1.61M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1794
1.61M
          x2r = ixheaac_add32_sat(x2r, x3i);
1795
1.61M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1796
1.61M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1797
1.61M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1798
1799
1.61M
          *data = x0r;
1800
1.61M
          *(data + 1) = x0i;
1801
1.61M
          data += (del << 1);
1802
1803
1.61M
          *data = x2r;
1804
1.61M
          *(data + 1) = x2i;
1805
1.61M
          data += (del << 1);
1806
1807
1.61M
          *data = x1r;
1808
1.61M
          *(data + 1) = x1i;
1809
1.61M
          data += (del << 1);
1810
1811
1.61M
          *data = x3i;
1812
1.61M
          *(data + 1) = x3r;
1813
1.61M
          data += (del << 1);
1814
1.61M
        }
1815
1.09M
        data -= 2 * npoints;
1816
1.09M
        data += 2;
1817
1.09M
      }
1818
5.47M
      for (; j < nodespacing * del; j += nodespacing) {
1819
3.83M
        w1h = *(twiddles + 2 * j);
1820
3.83M
        w2h = *(twiddles + 2 * (j << 1) - 512);
1821
3.83M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1822
3.83M
        w1l = *(twiddles + 2 * j + 1);
1823
3.83M
        w2l = *(twiddles + 2 * (j << 1) - 511);
1824
3.83M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1825
1826
11.8M
        for (k = in_loop_cnt; k != 0; k--) {
1827
8.03M
          WORD32 tmp;
1828
8.03M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1829
1830
8.03M
          data += (del << 1);
1831
1832
8.03M
          x1r = *data;
1833
8.03M
          x1i = *(data + 1);
1834
8.03M
          data += (del << 1);
1835
1836
8.03M
          x2r = *data;
1837
8.03M
          x2i = *(data + 1);
1838
8.03M
          data += (del << 1);
1839
1840
8.03M
          x3r = *data;
1841
8.03M
          x3i = *(data + 1);
1842
8.03M
          data -= 3 * (del << 1);
1843
1844
8.03M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1845
8.03M
                                   ixheaacd_mult32_sat(x1i, w1h));
1846
8.03M
          x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1847
8.03M
          x1r = tmp;
1848
1849
8.03M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1850
8.03M
                                   ixheaacd_mult32_sat(x2i, w2l));
1851
8.03M
          x2i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1852
8.03M
                                   ixheaacd_mult32_sat(x2r, w2l));
1853
8.03M
          x2r = tmp;
1854
1855
8.03M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1856
8.03M
                                   ixheaacd_mult32_sat(x3r, w3l));
1857
8.03M
          x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1858
8.03M
          x3r = tmp;
1859
1860
8.03M
          x0r = (*data);
1861
8.03M
          x0i = (*(data + 1));
1862
1863
8.03M
          x0r = ixheaac_add32_sat(x0r, x2r);
1864
8.03M
          x0i = ixheaac_add32_sat(x0i, x2i);
1865
8.03M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1866
8.03M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1867
8.03M
          x1r = ixheaac_add32_sat(x1r, x3r);
1868
8.03M
          x1i = ixheaac_sub32_sat(x1i, x3i);
1869
8.03M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
1870
8.03M
          x3i = ixheaac_add32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
1871
1872
8.03M
          x0r = ixheaac_add32_sat(x0r, x1r);
1873
8.03M
          x0i = ixheaac_add32_sat(x0i, x1i);
1874
8.03M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
1875
8.03M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
1876
8.03M
          x2r = ixheaac_add32_sat(x2r, x3i);
1877
8.03M
          x2i = ixheaac_sub32_sat(x2i, x3r);
1878
8.03M
          x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
1879
8.03M
          x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
1880
1881
8.03M
          *data = x0r;
1882
8.03M
          *(data + 1) = x0i;
1883
8.03M
          data += (del << 1);
1884
1885
8.03M
          *data = x2r;
1886
8.03M
          *(data + 1) = x2i;
1887
8.03M
          data += (del << 1);
1888
1889
8.03M
          *data = x1r;
1890
8.03M
          *(data + 1) = x1i;
1891
8.03M
          data += (del << 1);
1892
1893
8.03M
          *data = x3i;
1894
8.03M
          *(data + 1) = x3r;
1895
8.03M
          data += (del << 1);
1896
8.03M
        }
1897
3.83M
        data -= 2 * npoints;
1898
3.83M
        data += 2;
1899
3.83M
      }
1900
1.64M
      nodespacing >>= 2;
1901
1.64M
      del <<= 2;
1902
1.64M
      in_loop_cnt >>= 2;
1903
1.64M
    }
1904
12.0M
    if (not_power_4) {
1905
4.54M
      const WORD32 *twiddles = ptr_w;
1906
4.54M
      nodespacing <<= 1;
1907
4.54M
      shift += 1;
1908
1909
22.7M
      for (j = del / 2; j != 0; j--) {
1910
18.2M
        WORD32 w1h = *twiddles;
1911
18.2M
        WORD32 w1l = *(twiddles + 1);
1912
18.2M
        WORD32 tmp;
1913
18.2M
        twiddles += nodespacing * 2;
1914
1915
18.2M
        x0r = *ptr_y;
1916
18.2M
        x0i = *(ptr_y + 1);
1917
18.2M
        ptr_y += (del << 1);
1918
1919
18.2M
        x1r = *ptr_y;
1920
18.2M
        x1i = *(ptr_y + 1);
1921
1922
18.2M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1923
18.2M
                                 ixheaacd_mult32_sat(x1i, w1h));
1924
18.2M
        x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1925
18.2M
        x1r = tmp;
1926
1927
18.2M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
1928
18.2M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1929
18.2M
        ptr_y -= (del << 1);
1930
1931
18.2M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
1932
18.2M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1933
18.2M
        ptr_y += 2;
1934
18.2M
      }
1935
4.54M
      twiddles = ptr_w;
1936
22.7M
      for (j = del / 2; j != 0; j--) {
1937
18.2M
        WORD32 w1h = *twiddles;
1938
18.2M
        WORD32 w1l = *(twiddles + 1);
1939
18.2M
        WORD32 tmp;
1940
18.2M
        twiddles += nodespacing * 2;
1941
1942
18.2M
        x0r = *ptr_y;
1943
18.2M
        x0i = *(ptr_y + 1);
1944
18.2M
        ptr_y += (del << 1);
1945
1946
18.2M
        x1r = *ptr_y;
1947
18.2M
        x1i = *(ptr_y + 1);
1948
1949
18.2M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1h),
1950
18.2M
                                 ixheaacd_mult32_sat(x1i, w1l));
1951
18.2M
        x1i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1i, w1h),
1952
18.2M
                                 ixheaacd_mult32_sat(x1r, w1l));
1953
18.2M
        x1r = tmp;
1954
1955
18.2M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
1956
18.2M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1957
18.2M
        ptr_y -= (del << 1);
1958
1959
18.2M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
1960
18.2M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1961
18.2M
        ptr_y += 2;
1962
18.2M
      }
1963
4.54M
    }
1964
12.0M
  }
1965
1966
1.22M
  else {
1967
1.22M
    ptr_w = ixheaacd_twiddle_table_fft_32x32;
1968
1969
45.8M
    for (i = 0; i < npoints; i += 4) {
1970
44.6M
      WORD32 *inp = ptr_x;
1971
1972
44.6M
      DIG_REV(i, dig_rev_shift, h2);
1973
44.6M
      if (not_power_4) {
1974
31.7M
        h2 += 1;
1975
31.7M
        h2 &= ~1;
1976
31.7M
      }
1977
44.6M
      inp += (h2);
1978
1979
44.6M
      x0r = *inp;
1980
44.6M
      x0i = *(inp + 1);
1981
44.6M
      inp += (npoints >> 1);
1982
1983
44.6M
      x1r = *inp;
1984
44.6M
      x1i = *(inp + 1);
1985
44.6M
      inp += (npoints >> 1);
1986
1987
44.6M
      x2r = *inp;
1988
44.6M
      x2i = *(inp + 1);
1989
44.6M
      inp += (npoints >> 1);
1990
1991
44.6M
      x3r = *inp;
1992
44.6M
      x3i = *(inp + 1);
1993
1994
44.6M
      x0r = ixheaac_add32_sat(x0r, x2r);
1995
44.6M
      x0i = ixheaac_add32_sat(x0i, x2i);
1996
44.6M
      x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
1997
44.6M
      x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
1998
44.6M
      x1r = ixheaac_add32_sat(x1r, x3r);
1999
44.6M
      x1i = ixheaac_add32_sat(x1i, x3i);
2000
44.6M
      x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2001
44.6M
      x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2002
2003
44.6M
      x0r = ixheaac_add32_sat(x0r, x1r);
2004
44.6M
      x0i = ixheaac_add32_sat(x0i, x1i);
2005
44.6M
      x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2006
44.6M
      x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2007
44.6M
      x2r = ixheaac_sub32_sat(x2r, x3i);
2008
44.6M
      x2i = ixheaac_add32_sat(x2i, x3r);
2009
44.6M
      x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2010
44.6M
      x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2011
2012
44.6M
      *ptr_y++ = x0r;
2013
44.6M
      *ptr_y++ = x0i;
2014
44.6M
      *ptr_y++ = x2r;
2015
44.6M
      *ptr_y++ = x2i;
2016
44.6M
      *ptr_y++ = x1r;
2017
44.6M
      *ptr_y++ = x1i;
2018
44.6M
      *ptr_y++ = x3i;
2019
44.6M
      *ptr_y++ = x3r;
2020
44.6M
    }
2021
1.22M
    ptr_y -= 2 * npoints;
2022
1.22M
    del = 4;
2023
1.22M
    nodespacing = 64;
2024
1.22M
    in_loop_cnt = npoints >> 4;
2025
3.75M
    for (i = n_stages - 1; i > 0; i--) {
2026
2.52M
      const WORD32 *twiddles = ptr_w;
2027
2.52M
      WORD32 *data = ptr_y;
2028
2.52M
      WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
2029
2.52M
      WORD32 sec_loop_cnt;
2030
2031
16.8M
      for (k = in_loop_cnt; k != 0; k--) {
2032
14.3M
        x0r = (*data);
2033
14.3M
        x0i = (*(data + 1));
2034
14.3M
        data += (del << 1);
2035
2036
14.3M
        x1r = (*data);
2037
14.3M
        x1i = (*(data + 1));
2038
14.3M
        data += (del << 1);
2039
2040
14.3M
        x2r = (*data);
2041
14.3M
        x2i = (*(data + 1));
2042
14.3M
        data += (del << 1);
2043
2044
14.3M
        x3r = (*data);
2045
14.3M
        x3i = (*(data + 1));
2046
14.3M
        data -= 3 * (del << 1);
2047
2048
14.3M
        x0r = ixheaac_add32_sat(x0r, x2r);
2049
14.3M
        x0i = ixheaac_add32_sat(x0i, x2i);
2050
14.3M
        x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2051
14.3M
        x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2052
14.3M
        x1r = ixheaac_add32_sat(x1r, x3r);
2053
14.3M
        x1i = ixheaac_add32_sat(x1i, x3i);
2054
14.3M
        x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2055
14.3M
        x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2056
2057
14.3M
        x0r = ixheaac_add32_sat(x0r, x1r);
2058
14.3M
        x0i = ixheaac_add32_sat(x0i, x1i);
2059
14.3M
        x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2060
14.3M
        x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2061
14.3M
        x2r = ixheaac_sub32_sat(x2r, x3i);
2062
14.3M
        x2i = ixheaac_add32_sat(x2i, x3r);
2063
14.3M
        x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2064
14.3M
        x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2065
2066
14.3M
        *data = x0r;
2067
14.3M
        *(data + 1) = x0i;
2068
14.3M
        data += (del << 1);
2069
2070
14.3M
        *data = x2r;
2071
14.3M
        *(data + 1) = x2i;
2072
14.3M
        data += (del << 1);
2073
2074
14.3M
        *data = x1r;
2075
14.3M
        *(data + 1) = x1i;
2076
14.3M
        data += (del << 1);
2077
2078
14.3M
        *data = x3i;
2079
14.3M
        *(data + 1) = x3r;
2080
14.3M
        data += (del << 1);
2081
14.3M
      }
2082
2.52M
      data = ptr_y + 2;
2083
2084
2.52M
      sec_loop_cnt = (nodespacing * del);
2085
2.52M
      sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
2086
2.52M
                     (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
2087
2.52M
                     (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
2088
2.52M
                     (sec_loop_cnt / 256);
2089
2.52M
      j = nodespacing;
2090
2091
13.9M
      for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
2092
11.4M
        w1h = *(twiddles + 2 * j);
2093
11.4M
        w2h = *(twiddles + 2 * (j << 1));
2094
11.4M
        w3h = *(twiddles + 2 * j + 2 * (j << 1));
2095
11.4M
        w1l = *(twiddles + 2 * j + 1);
2096
11.4M
        w2l = *(twiddles + 2 * (j << 1) + 1);
2097
11.4M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
2098
2099
45.8M
        for (k = in_loop_cnt; k != 0; k--) {
2100
34.4M
          WORD32 tmp;
2101
34.4M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2102
2103
34.4M
          data += (del << 1);
2104
2105
34.4M
          x1r = *data;
2106
34.4M
          x1i = *(data + 1);
2107
34.4M
          data += (del << 1);
2108
2109
34.4M
          x2r = *data;
2110
34.4M
          x2i = *(data + 1);
2111
34.4M
          data += (del << 1);
2112
2113
34.4M
          x3r = *data;
2114
34.4M
          x3i = *(data + 1);
2115
34.4M
          data -= 3 * (del << 1);
2116
2117
34.4M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2118
34.4M
                                   ixheaacd_mult32_sat(x1i, w1h));
2119
34.4M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2120
34.4M
          x1r = tmp;
2121
2122
34.4M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2123
34.4M
                                   ixheaacd_mult32_sat(x2i, w2h));
2124
34.4M
          x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2125
34.4M
          x2r = tmp;
2126
2127
34.4M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2128
34.4M
                                   ixheaacd_mult32_sat(x3i, w3h));
2129
34.4M
          x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2130
34.4M
          x3r = tmp;
2131
2132
34.4M
          x0r = (*data);
2133
34.4M
          x0i = (*(data + 1));
2134
2135
34.4M
          x0r = ixheaac_add32_sat(x0r, x2r);
2136
34.4M
          x0i = ixheaac_add32_sat(x0i, x2i);
2137
34.4M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2138
34.4M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2139
34.4M
          x1r = ixheaac_add32_sat(x1r, x3r);
2140
34.4M
          x1i = ixheaac_add32_sat(x1i, x3i);
2141
34.4M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2142
34.4M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2143
2144
34.4M
          x0r = ixheaac_add32_sat(x0r, x1r);
2145
34.4M
          x0i = ixheaac_add32_sat(x0i, x1i);
2146
34.4M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2147
34.4M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2148
34.4M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2149
34.4M
          x2i = ixheaac_add32_sat(x2i, x3r);
2150
34.4M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2151
34.4M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2152
2153
34.4M
          *data = x0r;
2154
34.4M
          *(data + 1) = x0i;
2155
34.4M
          data += (del << 1);
2156
2157
34.4M
          *data = x2r;
2158
34.4M
          *(data + 1) = x2i;
2159
34.4M
          data += (del << 1);
2160
2161
34.4M
          *data = x1r;
2162
34.4M
          *(data + 1) = x1i;
2163
34.4M
          data += (del << 1);
2164
2165
34.4M
          *data = x3i;
2166
34.4M
          *(data + 1) = x3r;
2167
34.4M
          data += (del << 1);
2168
34.4M
        }
2169
11.4M
        data -= 2 * npoints;
2170
11.4M
        data += 2;
2171
11.4M
      }
2172
9.49M
      for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
2173
6.96M
        w1h = *(twiddles + 2 * j);
2174
6.96M
        w2h = *(twiddles + 2 * (j << 1));
2175
6.96M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2176
6.96M
        w1l = *(twiddles + 2 * j + 1);
2177
6.96M
        w2l = *(twiddles + 2 * (j << 1) + 1);
2178
6.96M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2179
2180
31.3M
        for (k = in_loop_cnt; k != 0; k--) {
2181
24.4M
          WORD32 tmp;
2182
24.4M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2183
2184
24.4M
          data += (del << 1);
2185
2186
24.4M
          x1r = *data;
2187
24.4M
          x1i = *(data + 1);
2188
24.4M
          data += (del << 1);
2189
2190
24.4M
          x2r = *data;
2191
24.4M
          x2i = *(data + 1);
2192
24.4M
          data += (del << 1);
2193
2194
24.4M
          x3r = *data;
2195
24.4M
          x3i = *(data + 1);
2196
24.4M
          data -= 3 * (del << 1);
2197
2198
24.4M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2199
24.4M
                                   ixheaacd_mult32_sat(x1i, w1h));
2200
24.4M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2201
24.4M
          x1r = tmp;
2202
2203
24.4M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2204
24.4M
                                   ixheaacd_mult32_sat(x2i, w2h));
2205
24.4M
          x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2206
24.4M
          x2r = tmp;
2207
2208
24.4M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2209
24.4M
                                   ixheaacd_mult32_sat(x3i, w3l));
2210
24.4M
          x3i = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2211
24.4M
                                   ixheaacd_mult32_sat(x3i, w3h));
2212
24.4M
          x3r = tmp;
2213
2214
24.4M
          x0r = (*data);
2215
24.4M
          x0i = (*(data + 1));
2216
2217
24.4M
          x0r = ixheaac_add32_sat(x0r, x2r);
2218
24.4M
          x0i = ixheaac_add32_sat(x0i, x2i);
2219
24.4M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2220
24.4M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2221
24.4M
          x1r = ixheaac_add32_sat(x1r, x3r);
2222
24.4M
          x1i = ixheaac_add32_sat(x1i, x3i);
2223
24.4M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2224
24.4M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2225
2226
24.4M
          x0r = ixheaac_add32_sat(x0r, x1r);
2227
24.4M
          x0i = ixheaac_add32_sat(x0i, x1i);
2228
24.4M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2229
24.4M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2230
24.4M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2231
24.4M
          x2i = ixheaac_add32_sat(x2i, x3r);
2232
24.4M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2233
24.4M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2234
2235
24.4M
          *data = x0r;
2236
24.4M
          *(data + 1) = x0i;
2237
24.4M
          data += (del << 1);
2238
2239
24.4M
          *data = x2r;
2240
24.4M
          *(data + 1) = x2i;
2241
24.4M
          data += (del << 1);
2242
2243
24.4M
          *data = x1r;
2244
24.4M
          *(data + 1) = x1i;
2245
24.4M
          data += (del << 1);
2246
2247
24.4M
          *data = x3i;
2248
24.4M
          *(data + 1) = x3r;
2249
24.4M
          data += (del << 1);
2250
24.4M
        }
2251
6.96M
        data -= 2 * npoints;
2252
6.96M
        data += 2;
2253
6.96M
      }
2254
6.96M
      for (; j <= sec_loop_cnt * 2; j += nodespacing) {
2255
4.43M
        w1h = *(twiddles + 2 * j);
2256
4.43M
        w2h = *(twiddles + 2 * (j << 1) - 512);
2257
4.43M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2258
4.43M
        w1l = *(twiddles + 2 * j + 1);
2259
4.43M
        w2l = *(twiddles + 2 * (j << 1) - 511);
2260
4.43M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2261
2262
14.4M
        for (k = in_loop_cnt; k != 0; k--) {
2263
10.0M
          WORD32 tmp;
2264
10.0M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2265
2266
10.0M
          data += (del << 1);
2267
2268
10.0M
          x1r = *data;
2269
10.0M
          x1i = *(data + 1);
2270
10.0M
          data += (del << 1);
2271
2272
10.0M
          x2r = *data;
2273
10.0M
          x2i = *(data + 1);
2274
10.0M
          data += (del << 1);
2275
2276
10.0M
          x3r = *data;
2277
10.0M
          x3i = *(data + 1);
2278
10.0M
          data -= 3 * (del << 1);
2279
2280
10.0M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2281
10.0M
                                   ixheaacd_mult32_sat(x1i, w1h));
2282
10.0M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2283
10.0M
          x1r = tmp;
2284
2285
10.0M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2286
10.0M
                                   ixheaacd_mult32_sat(x2i, w2l));
2287
10.0M
          x2i = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2288
10.0M
                                   ixheaacd_mult32_sat(x2i, w2h));
2289
10.0M
          x2r = tmp;
2290
2291
10.0M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2292
10.0M
                                   ixheaacd_mult32_sat(x3i, w3l));
2293
10.0M
          x3i = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2294
10.0M
                                   ixheaacd_mult32_sat(x3i, w3h));
2295
10.0M
          x3r = tmp;
2296
2297
10.0M
          x0r = (*data);
2298
10.0M
          x0i = (*(data + 1));
2299
2300
10.0M
          x0r = ixheaac_add32_sat(x0r, x2r);
2301
10.0M
          x0i = ixheaac_add32_sat(x0i, x2i);
2302
10.0M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2303
10.0M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2304
10.0M
          x1r = ixheaac_add32_sat(x1r, x3r);
2305
10.0M
          x1i = ixheaac_add32_sat(x1i, x3i);
2306
10.0M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2307
10.0M
          x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2308
2309
10.0M
          x0r = ixheaac_add32_sat(x0r, x1r);
2310
10.0M
          x0i = ixheaac_add32_sat(x0i, x1i);
2311
10.0M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2312
10.0M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2313
10.0M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2314
10.0M
          x2i = ixheaac_add32_sat(x2i, x3r);
2315
10.0M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2316
10.0M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2317
2318
10.0M
          *data = x0r;
2319
10.0M
          *(data + 1) = x0i;
2320
10.0M
          data += (del << 1);
2321
2322
10.0M
          *data = x2r;
2323
10.0M
          *(data + 1) = x2i;
2324
10.0M
          data += (del << 1);
2325
2326
10.0M
          *data = x1r;
2327
10.0M
          *(data + 1) = x1i;
2328
10.0M
          data += (del << 1);
2329
2330
10.0M
          *data = x3i;
2331
10.0M
          *(data + 1) = x3r;
2332
10.0M
          data += (del << 1);
2333
10.0M
        }
2334
4.43M
        data -= 2 * npoints;
2335
4.43M
        data += 2;
2336
4.43M
      }
2337
13.9M
      for (; j < nodespacing * del; j += nodespacing) {
2338
11.4M
        w1h = *(twiddles + 2 * j);
2339
11.4M
        w2h = *(twiddles + 2 * (j << 1) - 512);
2340
11.4M
        w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
2341
11.4M
        w1l = *(twiddles + 2 * j + 1);
2342
11.4M
        w2l = *(twiddles + 2 * (j << 1) - 511);
2343
11.4M
        w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
2344
2345
45.8M
        for (k = in_loop_cnt; k != 0; k--) {
2346
34.4M
          WORD32 tmp;
2347
34.4M
          WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2348
2349
34.4M
          data += (del << 1);
2350
2351
34.4M
          x1r = *data;
2352
34.4M
          x1i = *(data + 1);
2353
34.4M
          data += (del << 1);
2354
2355
34.4M
          x2r = *data;
2356
34.4M
          x2i = *(data + 1);
2357
34.4M
          data += (del << 1);
2358
2359
34.4M
          x3r = *data;
2360
34.4M
          x3i = *(data + 1);
2361
34.4M
          data -= 3 * (del << 1);
2362
2363
34.4M
          tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2364
34.4M
                                   ixheaacd_mult32_sat(x1i, w1h));
2365
34.4M
          x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2366
34.4M
          x1r = tmp;
2367
2368
34.4M
          tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2369
34.4M
                                   ixheaacd_mult32_sat(x2i, w2l));
2370
34.4M
          x2i = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2371
34.4M
                                   ixheaacd_mult32_sat(x2i, w2h));
2372
34.4M
          x2r = tmp;
2373
2374
34.4M
          tmp = -ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2375
34.4M
                                    ixheaacd_mult32_sat(x3i, w3h));
2376
34.4M
          x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2377
34.4M
          x3r = tmp;
2378
2379
34.4M
          x0r = (*data);
2380
34.4M
          x0i = (*(data + 1));
2381
2382
34.4M
          x0r = ixheaac_add32_sat(x0r, x2r);
2383
34.4M
          x0i = ixheaac_add32_sat(x0i, x2i);
2384
34.4M
          x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
2385
34.4M
          x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
2386
34.4M
          x1r = ixheaac_add32_sat(x1r, x3r);
2387
34.4M
          x1i = ixheaac_sub32_sat(x1i, x3i);
2388
34.4M
          x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
2389
34.4M
          x3i = ixheaac_add32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
2390
2391
34.4M
          x0r = ixheaac_add32_sat(x0r, x1r);
2392
34.4M
          x0i = ixheaac_add32_sat(x0i, x1i);
2393
34.4M
          x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
2394
34.4M
          x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
2395
34.4M
          x2r = ixheaac_sub32_sat(x2r, x3i);
2396
34.4M
          x2i = ixheaac_add32_sat(x2i, x3r);
2397
34.4M
          x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
2398
34.4M
          x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
2399
2400
34.4M
          *data = x0r;
2401
34.4M
          *(data + 1) = x0i;
2402
34.4M
          data += (del << 1);
2403
2404
34.4M
          *data = x2r;
2405
34.4M
          *(data + 1) = x2i;
2406
34.4M
          data += (del << 1);
2407
2408
34.4M
          *data = x1r;
2409
34.4M
          *(data + 1) = x1i;
2410
34.4M
          data += (del << 1);
2411
2412
34.4M
          *data = x3i;
2413
34.4M
          *(data + 1) = x3r;
2414
34.4M
          data += (del << 1);
2415
34.4M
        }
2416
11.4M
        data -= 2 * npoints;
2417
11.4M
        data += 2;
2418
11.4M
      }
2419
2.52M
      nodespacing >>= 2;
2420
2.52M
      del <<= 2;
2421
2.52M
      in_loop_cnt >>= 2;
2422
2.52M
    }
2423
1.22M
    if (not_power_4) {
2424
307k
      const WORD32 *twiddles = ptr_w;
2425
307k
      nodespacing <<= 1;
2426
307k
      shift += 1;
2427
32.0M
      for (j = del / 2; j != 0; j--) {
2428
31.7M
        WORD32 w1h = *twiddles;
2429
31.7M
        WORD32 w1l = *(twiddles + 1);
2430
2431
31.7M
        WORD32 tmp;
2432
31.7M
        twiddles += nodespacing * 2;
2433
2434
31.7M
        x0r = *ptr_y;
2435
31.7M
        x0i = *(ptr_y + 1);
2436
31.7M
        ptr_y += (del << 1);
2437
2438
31.7M
        x1r = *ptr_y;
2439
31.7M
        x1i = *(ptr_y + 1);
2440
2441
31.7M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2442
31.7M
                                 ixheaacd_mult32_sat(x1i, w1h));
2443
31.7M
        x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2444
31.7M
        x1r = tmp;
2445
2446
31.7M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
2447
31.7M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2448
31.7M
        ptr_y -= (del << 1);
2449
2450
31.7M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
2451
31.7M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2452
31.7M
        ptr_y += 2;
2453
31.7M
      }
2454
307k
      twiddles = ptr_w;
2455
32.0M
      for (j = del / 2; j != 0; j--) {
2456
31.7M
        WORD32 w1h = *twiddles;
2457
31.7M
        WORD32 w1l = *(twiddles + 1);
2458
31.7M
        WORD32 tmp;
2459
31.7M
        twiddles += nodespacing * 2;
2460
2461
31.7M
        x0r = *ptr_y;
2462
31.7M
        x0i = *(ptr_y + 1);
2463
31.7M
        ptr_y += (del << 1);
2464
2465
31.7M
        x1r = *ptr_y;
2466
31.7M
        x1i = *(ptr_y + 1);
2467
2468
31.7M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1h),
2469
31.7M
                                 ixheaacd_mult32_sat(x1i, w1l));
2470
31.7M
        x1i = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2471
31.7M
                                 ixheaacd_mult32_sat(x1i, w1h));
2472
31.7M
        x1r = tmp;
2473
2474
31.7M
        *ptr_y = (x0r) / 2 - (x1r) / 2;
2475
31.7M
        *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2476
31.7M
        ptr_y -= (del << 1);
2477
2478
31.7M
        *ptr_y = (x0r) / 2 + (x1r) / 2;
2479
31.7M
        *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2480
31.7M
        ptr_y += 2;
2481
31.7M
      }
2482
307k
    }
2483
1.22M
  }
2484
2485
315M
  for (i = 0; i < nlength; i++) {
2486
302M
    xr[i] = y[2 * i];
2487
302M
    xi[i] = y[2 * i + 1];
2488
302M
  }
2489
2490
13.2M
  *preshift = shift - *preshift;
2491
13.2M
  return;
2492
13.2M
}
2493
2494
static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op,
2495
20.9M
                                                        WORD32 sign_dir) {
2496
20.9M
  WORD32 add_r, sub_r;
2497
20.9M
  WORD32 add_i, sub_i;
2498
20.9M
  WORD32 temp_real, temp_imag, temp;
2499
2500
20.9M
  WORD32 p1, p2, p3, p4;
2501
2502
20.9M
  WORD32 sinmu;
2503
20.9M
  sinmu = -1859775393 * sign_dir;
2504
2505
20.9M
  temp_real = ixheaac_add32_sat(inp[0], inp[2]);
2506
20.9M
  temp_imag = ixheaac_add32_sat(inp[1], inp[3]);
2507
2508
20.9M
  add_r = ixheaac_add32_sat(inp[2], inp[4]);
2509
20.9M
  add_i = ixheaac_add32_sat(inp[3], inp[5]);
2510
2511
20.9M
  sub_r = ixheaac_sub32_sat(inp[2], inp[4]);
2512
20.9M
  sub_i = ixheaac_sub32_sat(inp[3], inp[5]);
2513
2514
20.9M
  p1 = add_r >> 1;
2515
20.9M
  p4 = add_i >> 1;
2516
20.9M
  p2 = ixheaac_mult32_shl(sub_i, sinmu);
2517
20.9M
  p3 = ixheaac_mult32_shl(sub_r, sinmu);
2518
2519
20.9M
  temp = ixheaac_sub32(inp[0], p1);
2520
2521
20.9M
  op[0] = ixheaac_add32_sat(temp_real, inp[4]);
2522
20.9M
  op[1] = ixheaac_add32_sat(temp_imag, inp[5]);
2523
20.9M
  op[2] = ixheaac_add32_sat(temp, p2);
2524
20.9M
  op[3] = ixheaac_sub32_sat(ixheaac_sub32_sat(inp[1], p3), p4);
2525
20.9M
  op[4] = ixheaac_sub32_sat(temp, p2);
2526
20.9M
  op[5] = ixheaac_sub32_sat(ixheaac_add32_sat(inp[1], p3), p4);
2527
2528
20.9M
  return;
2529
20.9M
}
2530
2531
VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
2532
2.66M
                             WORD32 fft_mode, WORD32 *preshift) {
2533
2.66M
  WORD32 i, j;
2534
2.66M
  WORD32 shift = 0;
2535
2.66M
  WORD32 xr_3[384];
2536
2.66M
  WORD32 xi_3[384];
2537
2.66M
  WORD32 x[1024];
2538
2.66M
  WORD32 y[1024];
2539
2.66M
  WORD32 cnfac, npts;
2540
2.66M
  WORD32 mpass = nlength;
2541
2.66M
  WORD32 n = 0;
2542
2.66M
  WORD32 *ptr_x = x;
2543
2.66M
  WORD32 *ptr_y = y;
2544
2545
2.66M
  cnfac = 0;
2546
5.32M
  while (mpass % 3 == 0) {
2547
2.66M
    mpass /= 3;
2548
2.66M
    cnfac++;
2549
2.66M
  }
2550
2.66M
  npts = mpass;
2551
2552
10.6M
  for (i = 0; i < 3 * cnfac; i++) {
2553
70.6M
    for (j = 0; j < mpass; j++) {
2554
62.7M
      xr_3[j] = xr[3 * j + i];
2555
62.7M
      xi_3[j] = xi[3 * j + i];
2556
62.7M
    }
2557
2558
7.98M
    (*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift);
2559
2560
70.6M
    for (j = 0; j < mpass; j++) {
2561
62.7M
      xr[3 * j + i] = xr_3[j];
2562
62.7M
      xi[3 * j + i] = xi_3[j];
2563
62.7M
    }
2564
7.98M
  }
2565
2566
8.90M
  while (npts >> 1) {
2567
6.24M
    n++;
2568
6.24M
    npts = npts >> 1;
2569
6.24M
  }
2570
2571
2.66M
  if (n % 2 == 0)
2572
2.47M
    shift = ((n + 4)) / 2;
2573
187k
  else
2574
187k
    shift = ((n + 5) / 2);
2575
2576
2.66M
  *preshift = shift - *preshift + 1;
2577
2578
65.3M
  for (i = 0; i < nlength; i++) {
2579
62.7M
    ptr_x[2 * i] = (xr[i] >> 1);
2580
62.7M
    ptr_x[2 * i + 1] = (xi[i] >> 1);
2581
62.7M
  }
2582
2583
2.66M
  {
2584
2.66M
    const WORD32 *w1r, *w1i;
2585
2.66M
    WORD32 tmp;
2586
2.66M
    w1r = ixheaacd_twiddle_table_3pr;
2587
2.66M
    w1i = ixheaacd_twiddle_table_3pi;
2588
2589
2.66M
    if (fft_mode < 0) {
2590
19.3M
      for (i = 0; i < nlength; i += 3) {
2591
16.7M
        w1r++;
2592
16.7M
        w1i++;
2593
2594
16.7M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2595
16.7M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2596
16.7M
        ptr_x[2 * i + 3] =
2597
16.7M
            ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)),
2598
16.7M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)));
2599
16.7M
        ptr_x[2 * i + 2] = tmp;
2600
2601
16.7M
        w1r++;
2602
16.7M
        w1i++;
2603
2604
16.7M
        tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2605
16.7M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2606
16.7M
        ptr_x[2 * i + 5] =
2607
16.7M
            ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)),
2608
16.7M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)));
2609
16.7M
        ptr_x[2 * i + 4] = tmp;
2610
2611
16.7M
        w1r += 3 * (128 / mpass - 1) + 1;
2612
16.7M
        w1i += 3 * (128 / mpass - 1) + 1;
2613
16.7M
      }
2614
2.58M
    }
2615
2616
74.2k
    else {
2617
4.24M
      for (i = 0; i < nlength; i += 3) {
2618
4.17M
        w1r++;
2619
4.17M
        w1i++;
2620
2621
4.17M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2622
4.17M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2623
4.17M
        ptr_x[2 * i + 3] =
2624
4.17M
            ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)),
2625
4.17M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)));
2626
4.17M
        ptr_x[2 * i + 2] = tmp;
2627
2628
4.17M
        w1r++;
2629
4.17M
        w1i++;
2630
2631
4.17M
        tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2632
4.17M
                                 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2633
4.17M
        ptr_x[2 * i + 5] =
2634
4.17M
            ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)),
2635
4.17M
                               ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)));
2636
4.17M
        ptr_x[2 * i + 4] = tmp;
2637
2638
4.17M
        w1r += 3 * (128 / mpass - 1) + 1;
2639
4.17M
        w1i += 3 * (128 / mpass - 1) + 1;
2640
4.17M
      }
2641
74.2k
    }
2642
2.66M
  }
2643
2644
23.5M
  for (i = 0; i < mpass; i++) {
2645
20.9M
    ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode);
2646
2647
20.9M
    ptr_x = ptr_x + 6;
2648
20.9M
    ptr_y = ptr_y + 6;
2649
20.9M
  }
2650
2651
2.66M
  ptr_y = y;
2652
23.5M
  for (i = 0; i < mpass; i++) {
2653
20.9M
    xr[i] = *ptr_y++;
2654
20.9M
    xi[i] = *ptr_y++;
2655
20.9M
    xr[mpass + i] = *ptr_y++;
2656
20.9M
    xi[mpass + i] = *ptr_y++;
2657
20.9M
    xr[2 * mpass + i] = *ptr_y++;
2658
20.9M
    xi[2 * mpass + i] = *ptr_y++;
2659
20.9M
  }
2660
2661
2.66M
  return;
2662
2.66M
}
2663
2664
VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, WORD32 fft_mode,
2665
1.74M
                          WORD32 *preshift) {
2666
1.74M
  if (nlength & (nlength - 1)) {
2667
323k
    ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift);
2668
323k
  } else
2669
1.41M
    (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift);
2670
2671
1.74M
  return;
2672
1.74M
}