Coverage Report

Created: 2025-08-03 06:57

/src/libxaac/decoder/ixheaacd_fft_ifft_32x32.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
#include <stdlib.h>
21
#include <math.h>
22
#include "ixheaac_type_def.h"
23
#include "ixheaac_constants.h"
24
#include "ixheaac_basic_ops32.h"
25
#include "ixheaac_fft_ifft_rom.h"
26
#include "ixheaacd_dsp_fft32x32s.h"
27
28
#define DIG_REV(i, m, j)                                    \
29
0
  do {                                                      \
30
0
    unsigned _ = (i);                                       \
31
0
    _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
32
0
    _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
33
0
    _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
34
0
    (j) = _ >> (m);                                         \
35
0
  } while (0)
36
37
0
FLOAT64 ixheaacd_mult32X32float(FLOAT64 a, FLOAT64 b) {
38
0
  FLOAT64 result;
39
40
0
  result = a * b;
41
42
0
  return result;
43
0
}
44
45
0
FLOAT64 ixheaacd_mac32X32float(FLOAT64 a, FLOAT64 b, FLOAT64 c) {
46
0
  FLOAT64 result;
47
48
0
  result = a + b * c;
49
50
0
  return result;
51
0
}
52
53
0
VOID ixheaacd_hbe_apply_ifft_7(FLOAT32 *inp, FLOAT32 *op) {
54
0
  FLOAT32 x0r, x1r, x2r, x3r, x4r, x5r, x6r, x7r, x8r;
55
0
  FLOAT32 x0i, x1i, x2i, x3i, x4i, x5i, x6i, x7i, x8i;
56
0
  FLOAT32 y0r, y1r, y2r, y3r, y4r, y5r, y6r, y7r, y8r;
57
0
  FLOAT32 y0i, y1i, y2i, y3i, y4i, y5i, y6i, y7i, y8i;
58
59
0
  x0r = inp[0];
60
0
  x0i = inp[1];
61
0
  x1r = inp[2] + inp[12];
62
0
  x1i = inp[3] + inp[13];
63
0
  x2r = inp[2] - inp[12];
64
0
  x2i = inp[3] - inp[13];
65
0
  x3r = inp[4] + inp[10];
66
0
  x3i = inp[5] + inp[11];
67
0
  x4r = inp[4] - inp[10];
68
0
  x4i = inp[5] - inp[11];
69
0
  x5r = inp[8] + inp[6];
70
0
  x5i = inp[9] + inp[7];
71
0
  x6r = inp[8] - inp[6];
72
0
  x6i = inp[9] - inp[7];
73
74
0
  y0r = x0r;
75
0
  y0i = x0i;
76
0
  y1r = x1r + x3r + x5r;
77
0
  y1i = x1i + x3i + x5i;
78
0
  y2r = x1r - x3r;
79
0
  y2i = x1i - x3i;
80
0
  y3r = x5r - x1r;
81
0
  y3i = x5i - x1i;
82
0
  y4r = x3r - x5r;
83
0
  y4i = x3i - x5i;
84
0
  y5r = x2r + x4r + x6r;
85
0
  y5i = x2i + x4i + x6i;
86
0
  y6r = x2r - x4r;
87
0
  y6i = x2i - x4i;
88
0
  y7r = x6r - x2r;
89
0
  y7i = x6i - x2i;
90
0
  y8r = x4r - x6r;
91
0
  y8i = x4i - x6i;
92
93
0
  x0r = y0r + y1r;
94
0
  x0i = y0i + y1i;
95
0
  x1r = y0r + C70 * y1r;
96
0
  x1i = y0i + C70 * y1i;
97
0
  x2r = C71 * y2r;
98
0
  x2i = C71 * y2i;
99
0
  x3r = C72 * y3r;
100
0
  x3i = C72 * y3i;
101
0
  x4r = C73 * y4r;
102
0
  x4i = C73 * y4i;
103
0
  x5r = C74 * y5i;
104
0
  x5i = -C74 * y5r;
105
0
  x6r = C75 * y6i;
106
0
  x6i = -C75 * y6r;
107
0
  x7r = C76 * y7i;
108
0
  x7i = -C76 * y7r;
109
0
  x8r = C77 * y8i;
110
0
  x8i = -C77 * y8r;
111
112
0
  y0r = x0r;
113
0
  y0i = x0i;
114
0
  y1r = x1r + x2r + x4r;
115
0
  y1i = x1i + x2i + x4i;
116
0
  y2r = x1r - x2r - x3r;
117
0
  y2i = x1i - x2i - x3i;
118
0
  y3r = x1r + x3r - x4r;
119
0
  y3i = x1i + x3i - x4i;
120
0
  y4r = x5r + x6r + x8r;
121
0
  y4i = x5i + x6i + x8i;
122
0
  y5r = x5r - x6r - x7r;
123
0
  y5i = x5i - x6i - x7i;
124
0
  y6r = x5r + x7r - x8r;
125
0
  y6i = x5i + x7i - x8i;
126
127
0
  x0r = y0r;
128
0
  x0i = y0i;
129
0
  x1r = y1r + y4r;
130
0
  x1i = y1i + y4i;
131
0
  x2r = y3r + y6r;
132
0
  x2i = y3i + y6i;
133
0
  x3r = y2r - y5r;
134
0
  x3i = y2i - y5i;
135
0
  x4r = y2r + y5r;
136
0
  x4i = y2i + y5i;
137
0
  x5r = y3r - y6r;
138
0
  x5i = y3i - y6i;
139
0
  x6r = y1r - y4r;
140
0
  x6i = y1i - y4i;
141
142
0
  op[0] = x0r;
143
0
  op[1] = x0i;
144
0
  op[2] = x1r;
145
0
  op[3] = x1i;
146
0
  op[4] = x2r;
147
0
  op[5] = x2i;
148
0
  op[6] = x3r;
149
0
  op[7] = x3i;
150
0
  op[8] = x4r;
151
0
  op[9] = x4i;
152
0
  op[10] = x5r;
153
0
  op[11] = x5i;
154
0
  op[12] = x6r;
155
0
  op[13] = x6i;
156
157
0
  return;
158
0
}
159
160
0
VOID ixheaacd_hbe_apply_fft_3(FLOAT32 *inp, FLOAT32 *op, WORD32 i_sign) {
161
0
  FLOAT32 add_r, sub_r;
162
0
  FLOAT32 add_i, sub_i;
163
0
  FLOAT32 X01r, X01i, temp;
164
165
0
  FLOAT32 p1, p2, p3, p4;
166
167
0
  FLOAT64 sinmu;
168
0
  sinmu = -0.866025403784439 * (FLOAT64)i_sign;
169
170
0
  X01r = inp[0] + inp[2];
171
0
  X01i = inp[1] + inp[3];
172
173
0
  add_r = inp[2] + inp[4];
174
0
  add_i = inp[3] + inp[5];
175
176
0
  sub_r = inp[2] - inp[4];
177
0
  sub_i = inp[3] - inp[5];
178
179
0
  p1 = add_r / (FLOAT32)2.0;
180
0
  p4 = add_i / (FLOAT32)2.0;
181
0
  p2 = (FLOAT32)((FLOAT64)sub_i * sinmu);
182
0
  p3 = (FLOAT32)((FLOAT64)sub_r * sinmu);
183
184
0
  temp = inp[0] - p1;
185
186
0
  op[0] = X01r + inp[4];
187
0
  op[1] = X01i + inp[5];
188
0
  op[2] = temp + p2;
189
0
  op[3] = (inp[1] - p3) - p4;
190
0
  op[4] = temp - p2;
191
0
  op[5] = (inp[1] + p3) - p4;
192
193
0
  return;
194
0
}
195
196
VOID ixheaacd_hbe_apply_tw_mult_ifft(FLOAT32 *inp, FLOAT32 *op, WORD32 dim1, WORD32 dim2,
197
0
                                     const FLOAT32 *tw) {
198
0
  FLOAT32 accu1, accu2;
199
0
  WORD32 i, j;
200
0
  WORD32 step_val = (dim2 - 1) << 1;
201
0
  for (i = 0; i < (dim2); i++) {
202
0
    op[0] = inp[0];
203
0
    op[1] = inp[1];
204
0
    op += 2;
205
0
    inp += 2;
206
0
  }
207
208
0
  for (j = 0; j < (dim1 - 1); j++) {
209
0
    op[0] = inp[0];
210
0
    op[1] = inp[1];
211
0
    inp += 2;
212
0
    op += 2;
213
0
    for (i = 0; i < (dim2 - 1); i++) {
214
0
      CPLX_MPY_IFFT(accu1, accu2, inp[2 * i + 0], inp[2 * i + 1], tw[2 * i + 1], tw[2 * i]);
215
0
      op[2 * i + 0] = accu1;
216
0
      op[2 * i + 1] = accu2;
217
0
    }
218
0
    inp += step_val;
219
0
    op += step_val;
220
0
    tw += (dim2 - 1) * 2;
221
0
  }
222
0
}
223
224
VOID ixheaacd_hbe_apply_tw_mult_fft(FLOAT32 *inp, FLOAT32 *op, WORD32 dim1, WORD32 dim2,
225
0
                                    const FLOAT32 *tw) {
226
0
  FLOAT32 accu1, accu2;
227
0
  WORD32 i, j;
228
0
  WORD32 step_val = (dim2 - 1) << 1;
229
0
  for (i = 0; i < (dim2); i++) {
230
0
    op[0] = inp[0];
231
0
    op[1] = inp[1];
232
0
    op += 2;
233
0
    inp += 2;
234
0
  }
235
236
0
  for (j = 0; j < (dim1 - 1); j++) {
237
0
    op[0] = inp[0];
238
0
    op[1] = inp[1];
239
0
    inp += 2;
240
0
    op += 2;
241
0
    for (i = 0; i < (dim2 - 1); i++) {
242
0
      CPLX_MPY_FFT(accu1, accu2, inp[2 * i + 0], inp[2 * i + 1], tw[2 * i + 1], tw[2 * i]);
243
0
      op[2 * i + 0] = accu1;
244
0
      op[2 * i + 1] = accu2;
245
0
    }
246
0
    inp += step_val;
247
0
    op += step_val;
248
0
    tw += (dim2 - 1) * 2;
249
0
  }
250
0
}
251
252
0
VOID ixheaacd_hbe_apply_cfftn(FLOAT32 re[], FLOAT32 *scratch, WORD32 n_pass, WORD32 i_sign) {
253
0
  WORD32 i, j, k, n_stages, h2;
254
0
  FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
255
0
  WORD32 del, nodespacing, in_loop_cnt;
256
0
  WORD32 not_power_4;
257
0
  WORD32 dig_rev_shift;
258
0
  WORD32 mpass = n_pass;
259
0
  WORD32 npoints = n_pass;
260
0
  const FLOAT64 *ptr_w;
261
0
  FLOAT32 *ptr_x = scratch;
262
0
  FLOAT32 *y = scratch + (2 * n_pass);
263
0
  FLOAT32 *ptr_y = y;
264
265
0
  dig_rev_shift = ixheaac_norm32(mpass) + 1 - 16;
266
0
  n_stages = 30 - ixheaac_norm32(mpass);
267
0
  not_power_4 = n_stages & 1;
268
269
0
  n_stages = n_stages >> 1;
270
271
0
  ptr_w = ixheaac_twid_tbl_fft_double;
272
0
  ptr_x = re;
273
274
0
  if (i_sign == -1) {
275
0
    for (i = 0; i < npoints; i += 4) {
276
0
      FLOAT32 *inp = ptr_x;
277
0
      FLOAT32 tmk;
278
279
0
      DIG_REV(i, dig_rev_shift, h2);
280
0
      if (not_power_4) {
281
0
        h2 += 1;
282
0
        h2 &= ~1;
283
0
      }
284
0
      inp += (h2);
285
286
0
      x0r = *inp;
287
0
      x0i = *(inp + 1);
288
0
      inp += (npoints >> 1);
289
290
0
      x1r = *inp;
291
0
      x1i = *(inp + 1);
292
0
      inp += (npoints >> 1);
293
294
0
      x2r = *inp;
295
0
      x2i = *(inp + 1);
296
0
      inp += (npoints >> 1);
297
298
0
      x3r = *inp;
299
0
      x3i = *(inp + 1);
300
301
0
      x0r = x0r + x2r;
302
0
      x0i = x0i + x2i;
303
304
0
      tmk = x0r - x2r;
305
0
      x2r = tmk - x2r;
306
0
      tmk = x0i - x2i;
307
0
      x2i = tmk - x2i;
308
309
0
      x1r = x1r + x3r;
310
0
      x1i = x1i + x3i;
311
312
0
      tmk = x1r - x3r;
313
0
      x3r = tmk - x3r;
314
0
      tmk = x1i - x3i;
315
0
      x3i = tmk - x3i;
316
317
0
      x0r = x0r + x1r;
318
0
      x0i = x0i + x1i;
319
320
0
      tmk = x0r - x1r;
321
0
      x1r = tmk - x1r;
322
0
      tmk = x0i - x1i;
323
0
      x1i = tmk - x1i;
324
325
0
      x2r = x2r + x3i;
326
0
      x2i = x2i - x3r;
327
328
0
      tmk = x2r - x3i;
329
0
      x3i = tmk - x3i;
330
0
      tmk = x2i + x3r;
331
0
      x3r = tmk + x3r;
332
333
0
      *ptr_y++ = x0r;
334
0
      *ptr_y++ = x0i;
335
0
      *ptr_y++ = x2r;
336
0
      *ptr_y++ = x2i;
337
0
      *ptr_y++ = x1r;
338
0
      *ptr_y++ = x1i;
339
0
      *ptr_y++ = x3i;
340
0
      *ptr_y++ = x3r;
341
0
    }
342
0
    ptr_y -= 2 * npoints;
343
0
    del = 4;
344
0
    nodespacing = 64;
345
0
    in_loop_cnt = npoints >> 4;
346
0
    for (i = n_stages - 1; i > 0; i--) {
347
0
      const FLOAT64 *twiddles = ptr_w;
348
0
      FLOAT32 *data = ptr_y;
349
0
      FLOAT64 W1, W2, W3, W4, W5, W6;
350
0
      WORD32 sec_loop_cnt;
351
352
0
      for (k = in_loop_cnt; k != 0; k--) {
353
0
        x0r = (*data);
354
0
        x0i = (*(data + 1));
355
0
        data += (del << 1);
356
357
0
        x1r = (*data);
358
0
        x1i = (*(data + 1));
359
0
        data += (del << 1);
360
361
0
        x2r = (*data);
362
0
        x2i = (*(data + 1));
363
0
        data += (del << 1);
364
365
0
        x3r = (*data);
366
0
        x3i = (*(data + 1));
367
0
        data -= 3 * (del << 1);
368
369
0
        x0r = x0r + x2r;
370
0
        x0i = x0i + x2i;
371
0
        x2r = x0r - (x2r * 2);
372
0
        x2i = x0i - (x2i * 2);
373
0
        x1r = x1r + x3r;
374
0
        x1i = x1i + x3i;
375
0
        x3r = x1r - (x3r * 2);
376
0
        x3i = x1i - (x3i * 2);
377
378
0
        x0r = x0r + x1r;
379
0
        x0i = x0i + x1i;
380
0
        x1r = x0r - (x1r * 2);
381
0
        x1i = x0i - (x1i * 2);
382
0
        x2r = x2r + x3i;
383
0
        x2i = x2i - x3r;
384
0
        x3i = x2r - (x3i * 2);
385
0
        x3r = x2i + (x3r * 2);
386
387
0
        *data = x0r;
388
0
        *(data + 1) = x0i;
389
0
        data += (del << 1);
390
391
0
        *data = x2r;
392
0
        *(data + 1) = x2i;
393
0
        data += (del << 1);
394
395
0
        *data = x1r;
396
0
        *(data + 1) = x1i;
397
0
        data += (del << 1);
398
399
0
        *data = x3i;
400
0
        *(data + 1) = x3r;
401
0
        data += (del << 1);
402
0
      }
403
0
      data = ptr_y + 2;
404
405
0
      sec_loop_cnt = (nodespacing * del);
406
0
      sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
407
0
                     (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
408
0
                     (sec_loop_cnt / 256);
409
0
      j = nodespacing;
410
411
0
      for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
412
0
        W1 = *(twiddles + j);
413
0
        W4 = *(twiddles + j + 257);
414
0
        W2 = *(twiddles + (j << 1));
415
0
        W5 = *(twiddles + (j << 1) + 257);
416
0
        W3 = *(twiddles + j + (j << 1));
417
0
        W6 = *(twiddles + j + (j << 1) + 257);
418
419
0
        for (k = in_loop_cnt; k != 0; k--) {
420
0
          FLOAT32 tmp;
421
0
          FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
422
423
0
          data += (del << 1);
424
425
0
          x1r = *data;
426
0
          x1i = *(data + 1);
427
0
          data += (del << 1);
428
429
0
          x2r = *data;
430
0
          x2i = *(data + 1);
431
0
          data += (del << 1);
432
433
0
          x3r = *data;
434
0
          x3i = *(data + 1);
435
0
          data -= 3 * (del << 1);
436
437
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
438
0
                          ixheaacd_mult32X32float((FLOAT64)x1i, W4));
439
0
          x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4),
440
0
                                                      (FLOAT64)x1i, W1);
441
0
          x1r = tmp;
442
443
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W2) -
444
0
                          ixheaacd_mult32X32float((FLOAT64)x2i, W5));
445
0
          x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x2r, W5),
446
0
                                                      (FLOAT64)x2i, W2);
447
0
          x2r = tmp;
448
449
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x3r, W3) -
450
0
                          ixheaacd_mult32X32float((FLOAT64)x3i, W6));
451
0
          x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x3r, W6),
452
0
                                                      (FLOAT64)x3i, W3);
453
0
          x3r = tmp;
454
455
0
          x0r = (*data);
456
0
          x0i = (*(data + 1));
457
458
0
          x0r = x0r + (x2r);
459
0
          x0i = x0i + (x2i);
460
0
          x2r = x0r - (x2r * 2);
461
0
          x2i = x0i - (x2i * 2);
462
0
          x1r = x1r + x3r;
463
0
          x1i = x1i + x3i;
464
0
          x3r = x1r - (x3r * 2);
465
0
          x3i = x1i - (x3i * 2);
466
467
0
          x0r = x0r + (x1r);
468
0
          x0i = x0i + (x1i);
469
0
          x1r = x0r - (x1r * 2);
470
0
          x1i = x0i - (x1i * 2);
471
0
          x2r = x2r + (x3i);
472
0
          x2i = x2i - (x3r);
473
0
          x3i = x2r - (x3i * 2);
474
0
          x3r = x2i + (x3r * 2);
475
476
0
          *data = x0r;
477
0
          *(data + 1) = x0i;
478
0
          data += (del << 1);
479
480
0
          *data = x2r;
481
0
          *(data + 1) = x2i;
482
0
          data += (del << 1);
483
484
0
          *data = x1r;
485
0
          *(data + 1) = x1i;
486
0
          data += (del << 1);
487
488
0
          *data = x3i;
489
0
          *(data + 1) = x3r;
490
0
          data += (del << 1);
491
0
        }
492
0
        data -= 2 * npoints;
493
0
        data += 2;
494
0
      }
495
0
      for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
496
0
        W1 = *(twiddles + j);
497
0
        W4 = *(twiddles + j + 257);
498
0
        W2 = *(twiddles + (j << 1));
499
0
        W5 = *(twiddles + (j << 1) + 257);
500
0
        W3 = *(twiddles + j + (j << 1) - 256);
501
0
        W6 = *(twiddles + j + (j << 1) + 1);
502
503
0
        for (k = in_loop_cnt; k != 0; k--) {
504
0
          FLOAT32 tmp;
505
0
          FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
506
507
0
          data += (del << 1);
508
509
0
          x1r = *data;
510
0
          x1i = *(data + 1);
511
0
          data += (del << 1);
512
513
0
          x2r = *data;
514
0
          x2i = *(data + 1);
515
0
          data += (del << 1);
516
517
0
          x3r = *data;
518
0
          x3i = *(data + 1);
519
0
          data -= 3 * (del << 1);
520
521
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
522
0
                          ixheaacd_mult32X32float((FLOAT64)x1i, W4));
523
0
          x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4),
524
0
                                                      (FLOAT64)x1i, W1);
525
0
          x1r = tmp;
526
527
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W2) -
528
0
                          ixheaacd_mult32X32float((FLOAT64)x2i, W5));
529
0
          x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x2r, W5),
530
0
                                                      (FLOAT64)x2i, W2);
531
0
          x2r = tmp;
532
533
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x3r, W6) +
534
0
                          ixheaacd_mult32X32float((FLOAT64)x3i, W3));
535
0
          x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x3r, W3) +
536
0
                          ixheaacd_mult32X32float((FLOAT64)x3i, W6));
537
0
          x3r = tmp;
538
539
0
          x0r = (*data);
540
0
          x0i = (*(data + 1));
541
542
0
          x0r = x0r + (x2r);
543
0
          x0i = x0i + (x2i);
544
0
          x2r = x0r - (x2r * 2);
545
0
          x2i = x0i - (x2i * 2);
546
0
          x1r = x1r + x3r;
547
0
          x1i = x1i + x3i;
548
0
          x3r = x1r - (x3r * 2);
549
0
          x3i = x1i - (x3i * 2);
550
551
0
          x0r = x0r + (x1r);
552
0
          x0i = x0i + (x1i);
553
0
          x1r = x0r - (x1r * 2);
554
0
          x1i = x0i - (x1i * 2);
555
0
          x2r = x2r + (x3i);
556
0
          x2i = x2i - (x3r);
557
0
          x3i = x2r - (x3i * 2);
558
0
          x3r = x2i + (x3r * 2);
559
560
0
          *data = x0r;
561
0
          *(data + 1) = x0i;
562
0
          data += (del << 1);
563
564
0
          *data = x2r;
565
0
          *(data + 1) = x2i;
566
0
          data += (del << 1);
567
568
0
          *data = x1r;
569
0
          *(data + 1) = x1i;
570
0
          data += (del << 1);
571
572
0
          *data = x3i;
573
0
          *(data + 1) = x3r;
574
0
          data += (del << 1);
575
0
        }
576
0
        data -= 2 * npoints;
577
0
        data += 2;
578
0
      }
579
0
      for (; j <= sec_loop_cnt * 2; j += nodespacing) {
580
0
        W1 = *(twiddles + j);
581
0
        W4 = *(twiddles + j + 257);
582
0
        W2 = *(twiddles + (j << 1) - 256);
583
0
        W5 = *(twiddles + (j << 1) + 1);
584
0
        W3 = *(twiddles + j + (j << 1) - 256);
585
0
        W6 = *(twiddles + j + (j << 1) + 1);
586
587
0
        for (k = in_loop_cnt; k != 0; k--) {
588
0
          FLOAT32 tmp;
589
0
          FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
590
591
0
          data += (del << 1);
592
593
0
          x1r = *data;
594
0
          x1i = *(data + 1);
595
0
          data += (del << 1);
596
597
0
          x2r = *data;
598
0
          x2i = *(data + 1);
599
0
          data += (del << 1);
600
601
0
          x3r = *data;
602
0
          x3i = *(data + 1);
603
0
          data -= 3 * (del << 1);
604
605
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
606
0
                          ixheaacd_mult32X32float((FLOAT64)x1i, W4));
607
0
          x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, W4), x1i, W1);
608
0
          x1r = tmp;
609
610
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W5) +
611
0
                          ixheaacd_mult32X32float((FLOAT64)x2i, W2));
612
0
          x2i = (FLOAT32)(-ixheaacd_mult32X32float(x2r, W2) + ixheaacd_mult32X32float(x2i, W5));
613
0
          x2r = tmp;
614
615
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x3r, W6) +
616
0
                          ixheaacd_mult32X32float((FLOAT64)x3i, W3));
617
0
          x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x3r, W3) +
618
0
                          ixheaacd_mult32X32float((FLOAT64)x3i, W6));
619
0
          x3r = tmp;
620
621
0
          x0r = (*data);
622
0
          x0i = (*(data + 1));
623
624
0
          x0r = x0r + (x2r);
625
0
          x0i = x0i + (x2i);
626
0
          x2r = x0r - (x2r * 2);
627
0
          x2i = x0i - (x2i * 2);
628
0
          x1r = x1r + x3r;
629
0
          x1i = x1i + x3i;
630
0
          x3r = x1r - (x3r * 2);
631
0
          x3i = x1i - (x3i * 2);
632
633
0
          x0r = x0r + (x1r);
634
0
          x0i = x0i + (x1i);
635
0
          x1r = x0r - (x1r * 2);
636
0
          x1i = x0i - (x1i * 2);
637
0
          x2r = x2r + (x3i);
638
0
          x2i = x2i - (x3r);
639
0
          x3i = x2r - (x3i * 2);
640
0
          x3r = x2i + (x3r * 2);
641
642
0
          *data = x0r;
643
0
          *(data + 1) = x0i;
644
0
          data += (del << 1);
645
646
0
          *data = x2r;
647
0
          *(data + 1) = x2i;
648
0
          data += (del << 1);
649
650
0
          *data = x1r;
651
0
          *(data + 1) = x1i;
652
0
          data += (del << 1);
653
654
0
          *data = x3i;
655
0
          *(data + 1) = x3r;
656
0
          data += (del << 1);
657
0
        }
658
0
        data -= 2 * npoints;
659
0
        data += 2;
660
0
      }
661
0
      for (; j < nodespacing * del; j += nodespacing) {
662
0
        W1 = *(twiddles + j);
663
0
        W4 = *(twiddles + j + 257);
664
0
        W2 = *(twiddles + (j << 1) - 256);
665
0
        W5 = *(twiddles + (j << 1) + 1);
666
0
        W3 = *(twiddles + j + (j << 1) - 512);
667
0
        W6 = *(twiddles + j + (j << 1) - 512 + 257);
668
669
0
        for (k = in_loop_cnt; k != 0; k--) {
670
0
          FLOAT32 tmp;
671
0
          FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
672
673
0
          data += (del << 1);
674
675
0
          x1r = *data;
676
0
          x1i = *(data + 1);
677
0
          data += (del << 1);
678
679
0
          x2r = *data;
680
0
          x2i = *(data + 1);
681
0
          data += (del << 1);
682
683
0
          x3r = *data;
684
0
          x3i = *(data + 1);
685
0
          data -= 3 * (del << 1);
686
687
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
688
0
                          ixheaacd_mult32X32float((FLOAT64)x1i, W4));
689
0
          x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4),
690
0
                                                      (FLOAT64)x1i, W1);
691
0
          x1r = tmp;
692
693
0
          tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W5) +
694
0
                          ixheaacd_mult32X32float((FLOAT64)x2i, W2));
695
0
          x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x2r, W2) +
696
0
                          ixheaacd_mult32X32float((FLOAT64)x2i, W5));
697
0
          x2r = tmp;
698
699
0
          tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x3r, W3) +
700
0
                          ixheaacd_mult32X32float((FLOAT64)x3i, W6));
701
0
          x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x3r, W6),
702
0
                                                      (FLOAT64)x3i, W3);
703
0
          x3r = tmp;
704
705
0
          x0r = (*data);
706
0
          x0i = (*(data + 1));
707
708
0
          x0r = x0r + (x2r);
709
0
          x0i = x0i + (x2i);
710
0
          x2r = x0r - (x2r * 2);
711
0
          x2i = x0i - (x2i * 2);
712
0
          x1r = x1r + x3r;
713
0
          x1i = x1i - x3i;
714
0
          x3r = x1r - (x3r * 2);
715
0
          x3i = x1i + (x3i * 2);
716
717
0
          x0r = x0r + (x1r);
718
0
          x0i = x0i + (x1i);
719
0
          x1r = x0r - (x1r * 2);
720
0
          x1i = x0i - (x1i * 2);
721
0
          x2r = x2r + (x3i);
722
0
          x2i = x2i - (x3r);
723
0
          x3i = x2r - (x3i * 2);
724
0
          x3r = x2i + (x3r * 2);
725
726
0
          *data = x0r;
727
0
          *(data + 1) = x0i;
728
0
          data += (del << 1);
729
730
0
          *data = x2r;
731
0
          *(data + 1) = x2i;
732
0
          data += (del << 1);
733
734
0
          *data = x1r;
735
0
          *(data + 1) = x1i;
736
0
          data += (del << 1);
737
738
0
          *data = x3i;
739
0
          *(data + 1) = x3r;
740
0
          data += (del << 1);
741
0
        }
742
0
        data -= 2 * npoints;
743
0
        data += 2;
744
0
      }
745
0
      nodespacing >>= 2;
746
0
      del <<= 2;
747
0
      in_loop_cnt >>= 2;
748
0
    }
749
0
    if (not_power_4) {
750
0
      const FLOAT64 *twiddles = ptr_w;
751
0
      nodespacing <<= 1;
752
753
0
      for (j = del / 2; j != 0; j--) {
754
0
        FLOAT64 W1 = *twiddles;
755
0
        FLOAT64 W4 = *(twiddles + 257);
756
0
        FLOAT32 tmp;
757
0
        twiddles += nodespacing;
758
759
0
        x0r = *ptr_y;
760
0
        x0i = *(ptr_y + 1);
761
0
        ptr_y += (del << 1);
762
763
0
        x1r = *ptr_y;
764
0
        x1i = *(ptr_y + 1);
765
766
0
        tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
767
0
                        ixheaacd_mult32X32float((FLOAT64)x1i, W4));
768
0
        x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4),
769
0
                                                    (FLOAT64)x1i, W1);
770
0
        x1r = tmp;
771
772
0
        *ptr_y = (x0r) - (x1r);
773
0
        *(ptr_y + 1) = (x0i) - (x1i);
774
0
        ptr_y -= (del << 1);
775
776
0
        *ptr_y = (x0r) + (x1r);
777
0
        *(ptr_y + 1) = (x0i) + (x1i);
778
0
        ptr_y += 2;
779
0
      }
780
0
      twiddles = ptr_w;
781
0
      for (j = del / 2; j != 0; j--) {
782
0
        FLOAT64 W1 = *twiddles;
783
0
        FLOAT64 W4 = *(twiddles + 257);
784
0
        FLOAT32 tmp;
785
0
        twiddles += nodespacing;
786
787
0
        x0r = *ptr_y;
788
0
        x0i = *(ptr_y + 1);
789
0
        ptr_y += (del << 1);
790
791
0
        x1r = *ptr_y;
792
0
        x1i = *(ptr_y + 1);
793
794
0
        tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W4) +
795
0
                        ixheaacd_mult32X32float((FLOAT64)x1i, W1));
796
0
        x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x1r, W1) +
797
0
                        ixheaacd_mult32X32float((FLOAT64)x1i, W4));
798
0
        x1r = tmp;
799
800
0
        *ptr_y = (x0r) - (x1r);
801
0
        *(ptr_y + 1) = (x0i) - (x1i);
802
0
        ptr_y -= (del << 1);
803
804
0
        *ptr_y = (x0r) + (x1r);
805
0
        *(ptr_y + 1) = (x0i) + (x1i);
806
0
        ptr_y += 2;
807
0
      }
808
0
    }
809
0
  } else {
810
0
    for (i = 0; i < npoints; i += 4) {
811
0
      FLOAT32 *inp = ptr_x;
812
813
0
      DIG_REV(i, dig_rev_shift, h2);
814
0
      if (not_power_4) {
815
0
        h2 += 1;
816
0
        h2 &= ~1;
817
0
      }
818
0
      inp += (h2);
819
820
0
      x0r = *inp;
821
0
      x0i = *(inp + 1);
822
0
      inp += (npoints >> 1);
823
824
0
      x1r = *inp;
825
0
      x1i = *(inp + 1);
826
0
      inp += (npoints >> 1);
827
828
0
      x2r = *inp;
829
0
      x2i = *(inp + 1);
830
0
      inp += (npoints >> 1);
831
832
0
      x3r = *inp;
833
0
      x3i = *(inp + 1);
834
835
0
      x0r = x0r + x2r;
836
0
      x0i = x0i + x2i;
837
0
      x2r = x0r - (x2r * 2);
838
0
      x2i = x0i - (x2i * 2);
839
0
      x1r = x1r + x3r;
840
0
      x1i = x1i + x3i;
841
0
      x3r = x1r - (x3r * 2);
842
0
      x3i = x1i - (x3i * 2);
843
844
0
      x0r = x0r + x1r;
845
0
      x0i = x0i + x1i;
846
0
      x1r = x0r - (x1r * 2);
847
0
      x1i = x0i - (x1i * 2);
848
0
      x2r = x2r - x3i;
849
0
      x2i = x2i + x3r;
850
0
      x3i = x2r + (x3i * 2);
851
0
      x3r = x2i - (x3r * 2);
852
853
0
      *ptr_y++ = x0r;
854
0
      *ptr_y++ = x0i;
855
0
      *ptr_y++ = x2r;
856
0
      *ptr_y++ = x2i;
857
0
      *ptr_y++ = x1r;
858
0
      *ptr_y++ = x1i;
859
0
      *ptr_y++ = x3i;
860
0
      *ptr_y++ = x3r;
861
0
    }
862
0
    ptr_y -= 2 * npoints;
863
0
    del = 4;
864
0
    nodespacing = 64;
865
0
    in_loop_cnt = npoints >> 4;
866
0
    for (i = n_stages - 1; i > 0; i--) {
867
0
      const FLOAT64 *twiddles = ptr_w;
868
0
      FLOAT32 *data = ptr_y;
869
0
      FLOAT64 W1, W2, W3, W4, W5, W6;
870
0
      WORD32 sec_loop_cnt;
871
872
0
      for (k = in_loop_cnt; k != 0; k--) {
873
0
        x0r = (*data);
874
0
        x0i = (*(data + 1));
875
0
        data += (del << 1);
876
877
0
        x1r = (*data);
878
0
        x1i = (*(data + 1));
879
0
        data += (del << 1);
880
881
0
        x2r = (*data);
882
0
        x2i = (*(data + 1));
883
0
        data += (del << 1);
884
885
0
        x3r = (*data);
886
0
        x3i = (*(data + 1));
887
0
        data -= 3 * (del << 1);
888
889
0
        x0r = x0r + x2r;
890
0
        x0i = x0i + x2i;
891
0
        x2r = x0r - (x2r * 2);
892
0
        x2i = x0i - (x2i * 2);
893
0
        x1r = x1r + x3r;
894
0
        x1i = x1i + x3i;
895
0
        x3r = x1r - (x3r * 2);
896
0
        x3i = x1i - (x3i * 2);
897
898
0
        x0r = x0r + x1r;
899
0
        x0i = x0i + x1i;
900
0
        x1r = x0r - (x1r * 2);
901
0
        x1i = x0i - (x1i * 2);
902
0
        x2r = x2r - x3i;
903
0
        x2i = x2i + x3r;
904
0
        x3i = x2r + (x3i * 2);
905
0
        x3r = x2i - (x3r * 2);
906
907
0
        *data = x0r;
908
0
        *(data + 1) = x0i;
909
0
        data += (del << 1);
910
911
0
        *data = x2r;
912
0
        *(data + 1) = x2i;
913
0
        data += (del << 1);
914
915
0
        *data = x1r;
916
0
        *(data + 1) = x1i;
917
0
        data += (del << 1);
918
919
0
        *data = x3i;
920
0
        *(data + 1) = x3r;
921
0
        data += (del << 1);
922
0
      }
923
0
      data = ptr_y + 2;
924
925
0
      sec_loop_cnt = (nodespacing * del);
926
0
      sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
927
0
                     (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
928
0
                     (sec_loop_cnt / 256);
929
0
      j = nodespacing;
930
931
0
      for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
932
0
        W1 = *(twiddles + j);
933
0
        W4 = *(twiddles + j + 257);
934
0
        W2 = *(twiddles + (j << 1));
935
0
        W5 = *(twiddles + (j << 1) + 257);
936
0
        W3 = *(twiddles + j + (j << 1));
937
0
        W6 = *(twiddles + j + (j << 1) + 257);
938
939
0
        for (k = in_loop_cnt; k != 0; k--) {
940
0
          FLOAT32 tmp;
941
0
          FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
942
943
0
          data += (del << 1);
944
945
0
          x1r = *data;
946
0
          x1i = *(data + 1);
947
0
          data += (del << 1);
948
949
0
          x2r = *data;
950
0
          x2i = *(data + 1);
951
0
          data += (del << 1);
952
953
0
          x3r = *data;
954
0
          x3i = *(data + 1);
955
0
          data -= 3 * (del << 1);
956
957
0
          tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
958
0
          x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
959
0
          x1r = tmp;
960
961
0
          tmp = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5));
962
0
          x2i = (FLOAT32)(-((FLOAT64)x2r * W5) + (FLOAT64)x2i * W2);
963
0
          x2r = tmp;
964
965
0
          tmp = (FLOAT32)(((FLOAT64)x3r * W3) + ((FLOAT64)x3i * W6));
966
0
          x3i = (FLOAT32)(-((FLOAT64)x3r * W6) + (FLOAT64)x3i * W3);
967
0
          x3r = tmp;
968
969
0
          x0r = (*data);
970
0
          x0i = (*(data + 1));
971
972
0
          x0r = x0r + (x2r);
973
0
          x0i = x0i + (x2i);
974
0
          x2r = x0r - (x2r * 2);
975
0
          x2i = x0i - (x2i * 2);
976
0
          x1r = x1r + x3r;
977
0
          x1i = x1i + x3i;
978
0
          x3r = x1r - (x3r * 2);
979
0
          x3i = x1i - (x3i * 2);
980
981
0
          x0r = x0r + (x1r);
982
0
          x0i = x0i + (x1i);
983
0
          x1r = x0r - (x1r * 2);
984
0
          x1i = x0i - (x1i * 2);
985
0
          x2r = x2r - (x3i);
986
0
          x2i = x2i + (x3r);
987
0
          x3i = x2r + (x3i * 2);
988
0
          x3r = x2i - (x3r * 2);
989
990
0
          *data = x0r;
991
0
          *(data + 1) = x0i;
992
0
          data += (del << 1);
993
994
0
          *data = x2r;
995
0
          *(data + 1) = x2i;
996
0
          data += (del << 1);
997
998
0
          *data = x1r;
999
0
          *(data + 1) = x1i;
1000
0
          data += (del << 1);
1001
1002
0
          *data = x3i;
1003
0
          *(data + 1) = x3r;
1004
0
          data += (del << 1);
1005
0
        }
1006
0
        data -= 2 * npoints;
1007
0
        data += 2;
1008
0
      }
1009
0
      for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1010
0
        W1 = *(twiddles + j);
1011
0
        W4 = *(twiddles + j + 257);
1012
0
        W2 = *(twiddles + (j << 1));
1013
0
        W5 = *(twiddles + (j << 1) + 257);
1014
0
        W3 = *(twiddles + j + (j << 1) - 256);
1015
0
        W6 = *(twiddles + j + (j << 1) + 1);
1016
1017
0
        for (k = in_loop_cnt; k != 0; k--) {
1018
0
          FLOAT32 tmp;
1019
0
          FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1020
1021
0
          data += (del << 1);
1022
1023
0
          x1r = *data;
1024
0
          x1i = *(data + 1);
1025
0
          data += (del << 1);
1026
1027
0
          x2r = *data;
1028
0
          x2i = *(data + 1);
1029
0
          data += (del << 1);
1030
1031
0
          x3r = *data;
1032
0
          x3i = *(data + 1);
1033
0
          data -= 3 * (del << 1);
1034
1035
0
          tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1036
0
          x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
1037
0
          x1r = tmp;
1038
1039
0
          tmp = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5));
1040
0
          x2i = (FLOAT32)(-((FLOAT64)x2r * W5) + (FLOAT64)x2i * W2);
1041
0
          x2r = tmp;
1042
1043
0
          tmp = (FLOAT32)(((FLOAT64)x3r * W6) - ((FLOAT64)x3i * W3));
1044
0
          x3i = (FLOAT32)(((FLOAT64)x3r * W3) + ((FLOAT64)x3i * W6));
1045
0
          x3r = tmp;
1046
1047
0
          x0r = (*data);
1048
0
          x0i = (*(data + 1));
1049
1050
0
          x0r = x0r + (x2r);
1051
0
          x0i = x0i + (x2i);
1052
0
          x2r = x0r - (x2r * 2);
1053
0
          x2i = x0i - (x2i * 2);
1054
0
          x1r = x1r + x3r;
1055
0
          x1i = x1i + x3i;
1056
0
          x3r = x1r - (x3r * 2);
1057
0
          x3i = x1i - (x3i * 2);
1058
1059
0
          x0r = x0r + (x1r);
1060
0
          x0i = x0i + (x1i);
1061
0
          x1r = x0r - (x1r * 2);
1062
0
          x1i = x0i - (x1i * 2);
1063
0
          x2r = x2r - (x3i);
1064
0
          x2i = x2i + (x3r);
1065
0
          x3i = x2r + (x3i * 2);
1066
0
          x3r = x2i - (x3r * 2);
1067
1068
0
          *data = x0r;
1069
0
          *(data + 1) = x0i;
1070
0
          data += (del << 1);
1071
1072
0
          *data = x2r;
1073
0
          *(data + 1) = x2i;
1074
0
          data += (del << 1);
1075
1076
0
          *data = x1r;
1077
0
          *(data + 1) = x1i;
1078
0
          data += (del << 1);
1079
1080
0
          *data = x3i;
1081
0
          *(data + 1) = x3r;
1082
0
          data += (del << 1);
1083
0
        }
1084
0
        data -= 2 * npoints;
1085
0
        data += 2;
1086
0
      }
1087
0
      for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1088
0
        W1 = *(twiddles + j);
1089
0
        W4 = *(twiddles + j + 257);
1090
0
        W2 = *(twiddles + (j << 1) - 256);
1091
0
        W5 = *(twiddles + (j << 1) + 1);
1092
0
        W3 = *(twiddles + j + (j << 1) - 256);
1093
0
        W6 = *(twiddles + j + (j << 1) + 1);
1094
1095
0
        for (k = in_loop_cnt; k != 0; k--) {
1096
0
          FLOAT32 tmp;
1097
0
          FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1098
1099
0
          data += (del << 1);
1100
1101
0
          x1r = *data;
1102
0
          x1i = *(data + 1);
1103
0
          data += (del << 1);
1104
1105
0
          x2r = *data;
1106
0
          x2i = *(data + 1);
1107
0
          data += (del << 1);
1108
1109
0
          x3r = *data;
1110
0
          x3i = *(data + 1);
1111
0
          data -= 3 * (del << 1);
1112
1113
0
          tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1114
0
          x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
1115
0
          x1r = tmp;
1116
1117
0
          tmp = (FLOAT32)(((FLOAT64)x2r * W5) - ((FLOAT64)x2i * W2));
1118
0
          x2i = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5));
1119
0
          x2r = tmp;
1120
1121
0
          tmp = (FLOAT32)(((FLOAT64)x3r * W6) - ((FLOAT64)x3i * W3));
1122
0
          x3i = (FLOAT32)(((FLOAT64)x3r * W3) + ((FLOAT64)x3i * W6));
1123
0
          x3r = tmp;
1124
1125
0
          x0r = (*data);
1126
0
          x0i = (*(data + 1));
1127
1128
0
          x0r = x0r + (x2r);
1129
0
          x0i = x0i + (x2i);
1130
0
          x2r = x0r - (x2r * 2);
1131
0
          x2i = x0i - (x2i * 2);
1132
0
          x1r = x1r + x3r;
1133
0
          x1i = x1i + x3i;
1134
0
          x3r = x1r - (x3r * 2);
1135
0
          x3i = x1i - (x3i * 2);
1136
1137
0
          x0r = x0r + (x1r);
1138
0
          x0i = x0i + (x1i);
1139
0
          x1r = x0r - (x1r * 2);
1140
0
          x1i = x0i - (x1i * 2);
1141
0
          x2r = x2r - (x3i);
1142
0
          x2i = x2i + (x3r);
1143
0
          x3i = x2r + (x3i * 2);
1144
0
          x3r = x2i - (x3r * 2);
1145
1146
0
          *data = x0r;
1147
0
          *(data + 1) = x0i;
1148
0
          data += (del << 1);
1149
1150
0
          *data = x2r;
1151
0
          *(data + 1) = x2i;
1152
0
          data += (del << 1);
1153
1154
0
          *data = x1r;
1155
0
          *(data + 1) = x1i;
1156
0
          data += (del << 1);
1157
1158
0
          *data = x3i;
1159
0
          *(data + 1) = x3r;
1160
0
          data += (del << 1);
1161
0
        }
1162
0
        data -= 2 * npoints;
1163
0
        data += 2;
1164
0
      }
1165
0
      for (; j < nodespacing * del; j += nodespacing) {
1166
0
        W1 = *(twiddles + j);
1167
0
        W4 = *(twiddles + j + 257);
1168
0
        W2 = *(twiddles + (j << 1) - 256);
1169
0
        W5 = *(twiddles + (j << 1) + 1);
1170
0
        W3 = *(twiddles + j + (j << 1) - 512);
1171
0
        W6 = *(twiddles + j + (j << 1) - 512 + 257);
1172
1173
0
        for (k = in_loop_cnt; k != 0; k--) {
1174
0
          FLOAT32 tmp;
1175
0
          FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1176
1177
0
          data += (del << 1);
1178
1179
0
          x1r = *data;
1180
0
          x1i = *(data + 1);
1181
0
          data += (del << 1);
1182
1183
0
          x2r = *data;
1184
0
          x2i = *(data + 1);
1185
0
          data += (del << 1);
1186
1187
0
          x3r = *data;
1188
0
          x3i = *(data + 1);
1189
0
          data -= 3 * (del << 1);
1190
1191
0
          tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1192
0
          x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
1193
0
          x1r = tmp;
1194
1195
0
          tmp = (FLOAT32)(((FLOAT64)x2r * W5) - ((FLOAT64)x2i * W2));
1196
0
          x2i = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5));
1197
0
          x2r = tmp;
1198
1199
0
          tmp = (FLOAT32)(-((FLOAT64)x3r * W3) - ((FLOAT64)x3i * W6));
1200
0
          x3i = (FLOAT32)(-((FLOAT64)x3r * W6) + (FLOAT64)x3i * W3);
1201
0
          x3r = tmp;
1202
1203
0
          x0r = (*data);
1204
0
          x0i = (*(data + 1));
1205
1206
0
          x0r = x0r + (x2r);
1207
0
          x0i = x0i + (x2i);
1208
0
          x2r = x0r - (x2r * 2);
1209
0
          x2i = x0i - (x2i * 2);
1210
0
          x1r = x1r + x3r;
1211
0
          x1i = x1i - x3i;
1212
0
          x3r = x1r - (x3r * 2);
1213
0
          x3i = x1i + (x3i * 2);
1214
1215
0
          x0r = x0r + (x1r);
1216
0
          x0i = x0i + (x1i);
1217
0
          x1r = x0r - (x1r * 2);
1218
0
          x1i = x0i - (x1i * 2);
1219
0
          x2r = x2r - (x3i);
1220
0
          x2i = x2i + (x3r);
1221
0
          x3i = x2r + (x3i * 2);
1222
0
          x3r = x2i - (x3r * 2);
1223
1224
0
          *data = x0r;
1225
0
          *(data + 1) = x0i;
1226
0
          data += (del << 1);
1227
1228
0
          *data = x2r;
1229
0
          *(data + 1) = x2i;
1230
0
          data += (del << 1);
1231
1232
0
          *data = x1r;
1233
0
          *(data + 1) = x1i;
1234
0
          data += (del << 1);
1235
1236
0
          *data = x3i;
1237
0
          *(data + 1) = x3r;
1238
0
          data += (del << 1);
1239
0
        }
1240
0
        data -= 2 * npoints;
1241
0
        data += 2;
1242
0
      }
1243
0
      nodespacing >>= 2;
1244
0
      del <<= 2;
1245
0
      in_loop_cnt >>= 2;
1246
0
    }
1247
1248
0
    if (not_power_4) {
1249
0
      const FLOAT64 *twiddles = ptr_w;
1250
0
      nodespacing <<= 1;
1251
1252
0
      for (j = del / 2; j != 0; j--) {
1253
0
        FLOAT64 W1 = *twiddles;
1254
0
        FLOAT64 W4 = *(twiddles + 257);
1255
0
        FLOAT32 tmp;
1256
0
        twiddles += nodespacing;
1257
1258
0
        x0r = *ptr_y;
1259
0
        x0i = *(ptr_y + 1);
1260
0
        ptr_y += (del << 1);
1261
1262
0
        x1r = *ptr_y;
1263
0
        x1i = *(ptr_y + 1);
1264
1265
0
        tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1266
0
        x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
1267
0
        x1r = tmp;
1268
1269
0
        *ptr_y = (x0r) - (x1r);
1270
0
        *(ptr_y + 1) = (x0i) - (x1i);
1271
0
        ptr_y -= (del << 1);
1272
1273
0
        *ptr_y = (x0r) + (x1r);
1274
0
        *(ptr_y + 1) = (x0i) + (x1i);
1275
0
        ptr_y += 2;
1276
0
      }
1277
0
      twiddles = ptr_w;
1278
0
      for (j = del / 2; j != 0; j--) {
1279
0
        FLOAT64 W1 = *twiddles;
1280
0
        FLOAT64 W4 = *(twiddles + 257);
1281
0
        FLOAT32 tmp;
1282
0
        twiddles += nodespacing;
1283
1284
0
        x0r = *ptr_y;
1285
0
        x0i = *(ptr_y + 1);
1286
0
        ptr_y += (del << 1);
1287
1288
0
        x1r = *ptr_y;
1289
0
        x1i = *(ptr_y + 1);
1290
1291
0
        tmp = (FLOAT32)(((FLOAT64)x1r * W4) - ((FLOAT64)x1i * W1));
1292
0
        x1i = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1293
0
        x1r = tmp;
1294
1295
0
        *ptr_y = (x0r) - (x1r);
1296
0
        *(ptr_y + 1) = (x0i) - (x1i);
1297
0
        ptr_y -= (del << 1);
1298
1299
0
        *ptr_y = (x0r) + (x1r);
1300
0
        *(ptr_y + 1) = (x0i) + (x1i);
1301
0
        ptr_y += 2;
1302
0
      }
1303
0
    }
1304
0
  }
1305
1306
0
  for (i = 0; i < n_pass; i++) {
1307
0
    re[2 * i + 0] = y[2 * i + 0];
1308
0
    re[2 * i + 1] = y[2 * i + 1];
1309
0
  }
1310
0
}
1311
1312
VOID ixheaacd_hbe_apply_cfftn_gen(FLOAT32 re[], FLOAT32 *scratch, WORD32 n_pass,
1313
0
                                  WORD32 i_sign) {
1314
0
  WORD32 i, j;
1315
0
  WORD32 m_points = n_pass;
1316
0
  FLOAT32 *x, *y, *re3;
1317
0
  FLOAT32 *ptr_x, *ptr_y;
1318
0
  ptr_x = x = scratch;
1319
0
  scratch += 2 * m_points;
1320
0
  ptr_y = y = scratch;
1321
0
  scratch += 4 * m_points;
1322
0
  re3 = scratch;
1323
0
  scratch += 2 * m_points;
1324
0
  WORD32 cnfac;
1325
0
  WORD32 mpass = n_pass;
1326
1327
0
  cnfac = 0;
1328
0
  while (mpass % 3 == 0) {
1329
0
    mpass /= 3;
1330
0
    cnfac++;
1331
0
  }
1332
1333
0
  for (i = 0; i < 3 * cnfac; i++) {
1334
0
    for (j = 0; j < mpass; j++) {
1335
0
      re3[2 * j + 0] = re[6 * j + 2 * i + 0];
1336
0
      re3[2 * j + 1] = re[6 * j + 2 * i + 1];
1337
0
    }
1338
1339
0
    ixheaacd_hbe_apply_cfftn(re3, scratch, mpass, i_sign);
1340
1341
0
    for (j = 0; j < mpass; j++) {
1342
0
      re[6 * j + 2 * i + 0] = re3[2 * j + 0];
1343
0
      re[6 * j + 2 * i + 1] = re3[2 * j + 1];
1344
0
    }
1345
0
  }
1346
1347
0
  {
1348
0
    FLOAT64 *w1r, *w1i;
1349
0
    FLOAT32 tmp;
1350
0
    w1r = (FLOAT64 *)ixheaac_twid_tbl_fft_ntwt3r;
1351
0
    w1i = (FLOAT64 *)ixheaac_twid_tbl_fft_ntwt3i;
1352
1353
0
    if (i_sign < 0) {
1354
1355
0
      for (i = 0; i < n_pass; i += 3) {
1356
0
        tmp = (FLOAT32)((FLOAT64)re[2 * i + 0] * (*w1r) - (FLOAT64)re[2 * i + 1] * (*w1i));
1357
0
        re[2 * i + 1] =
1358
0
            (FLOAT32)((FLOAT64)re[2 * i + 0] * (*w1i) + (FLOAT64)re[2 * i + 1] * (*w1r));
1359
0
        re[2 * i + 0] = tmp;
1360
1361
0
        w1r++;
1362
0
        w1i++;
1363
1364
0
        tmp = (FLOAT32)((FLOAT64)re[2 * i + 2] * (*w1r) - (FLOAT64)re[2 * i + 3] * (*w1i));
1365
0
        re[2 * i + 3] =
1366
0
            (FLOAT32)((FLOAT64)re[2 * i + 2] * (*w1i) + (FLOAT64)re[2 * i + 3] * (*w1r));
1367
0
        re[2 * i + 2] = tmp;
1368
1369
0
        w1r++;
1370
0
        w1i++;
1371
1372
0
        tmp = (FLOAT32)((FLOAT64)re[2 * i + 4] * (*w1r) - (FLOAT64)re[2 * i + 5] * (*w1i));
1373
0
        re[2 * i + 5] =
1374
0
            (FLOAT32)((FLOAT64)re[2 * i + 4] * (*w1i) + (FLOAT64)re[2 * i + 5] * (*w1r));
1375
0
        re[2 * i + 4] = tmp;
1376
1377
0
        w1r += 3 * (128 / mpass - 1) + 1;
1378
0
        w1i += 3 * (128 / mpass - 1) + 1;
1379
0
      }
1380
0
    } else {
1381
0
      for (i = 0; i < n_pass; i += 3) {
1382
0
        tmp = (FLOAT32)((FLOAT64)re[2 * i + 0] * (*w1r) + (FLOAT64)re[2 * i + 1] * (*w1i));
1383
0
        re[2 * i + 1] =
1384
0
            (FLOAT32)(-(FLOAT64)re[2 * i + 0] * (*w1i) + (FLOAT64)re[2 * i + 1] * (*w1r));
1385
0
        re[2 * i + 0] = tmp;
1386
1387
0
        w1r++;
1388
0
        w1i++;
1389
1390
0
        tmp = (FLOAT32)((FLOAT64)re[2 * i + 2] * (*w1r) + (FLOAT64)re[2 * i + 3] * (*w1i));
1391
0
        re[2 * i + 3] =
1392
0
            (FLOAT32)(-(FLOAT64)re[2 * i + 2] * (*w1i) + (FLOAT64)re[2 * i + 3] * (*w1r));
1393
0
        re[2 * i + 2] = tmp;
1394
1395
0
        w1r++;
1396
0
        w1i++;
1397
1398
0
        tmp = (FLOAT32)((FLOAT64)re[2 * i + 4] * (*w1r) + (FLOAT64)re[2 * i + 5] * (*w1i));
1399
0
        re[2 * i + 5] =
1400
0
            (FLOAT32)(-(FLOAT64)re[2 * i + 4] * (*w1i) + (FLOAT64)re[2 * i + 5] * (*w1r));
1401
0
        re[2 * i + 4] = tmp;
1402
1403
0
        w1r += 3 * (128 / mpass - 1) + 1;
1404
0
        w1i += 3 * (128 / mpass - 1) + 1;
1405
0
      }
1406
0
    }
1407
0
  }
1408
1409
0
  for (i = 0; i < n_pass; i++) {
1410
0
    ptr_x[2 * i + 0] = re[2 * i + 0];
1411
0
    ptr_x[2 * i + 1] = re[2 * i + 1];
1412
0
  }
1413
0
  for (i = 0; i < mpass; i++) {
1414
0
    ixheaacd_hbe_apply_fft_3(ptr_x, ptr_y, i_sign);
1415
1416
0
    ptr_x = ptr_x + 6;
1417
0
    ptr_y = ptr_y + 6;
1418
0
  }
1419
1420
0
  for (i = 0; i < mpass; i++) {
1421
0
    re[2 * i + 0] = y[6 * i + 0];
1422
0
    re[2 * i + 1] = y[6 * i + 1];
1423
0
  }
1424
1425
0
  for (i = 0; i < mpass; i++) {
1426
0
    re[2 * mpass + 2 * i + 0] = y[6 * i + 2];
1427
0
    re[2 * mpass + 2 * i + 1] = y[6 * i + 3];
1428
0
  }
1429
1430
0
  for (i = 0; i < mpass; i++) {
1431
0
    re[4 * mpass + 2 * i + 0] = y[6 * i + 4];
1432
0
    re[4 * mpass + 2 * i + 1] = y[6 * i + 5];
1433
0
  }
1434
0
}
1435
1436
0
VOID ixheaacd_hbe_apply_fft_288(FLOAT32 *inp, FLOAT32 *scratch, WORD32 len, WORD32 i_sign) {
1437
0
  FLOAT32 *op = scratch;
1438
0
  WORD32 mpoints = len / 96;
1439
0
  WORD32 fpoints = len / 3;
1440
0
  WORD32 ii, jj;
1441
0
  scratch += 2 * len;
1442
1443
0
  for (ii = 0; ii < mpoints; ii++) {
1444
0
    for (jj = 0; jj < fpoints; jj++) {
1445
0
      op[2 * jj + 0] = inp[2 * mpoints * jj + 2 * ii];
1446
0
      op[2 * jj + 1] = inp[2 * mpoints * jj + 2 * ii + 1];
1447
0
    }
1448
1449
0
    if (fpoints & (fpoints - 1))
1450
0
      ixheaacd_hbe_apply_cfftn_gen(op, scratch, fpoints, i_sign);
1451
0
    else
1452
0
      ixheaacd_hbe_apply_cfftn(op, scratch, fpoints, i_sign);
1453
1454
0
    for (jj = 0; jj < fpoints; jj++) {
1455
0
      inp[mpoints * 2 * jj + 2 * ii + 0] = op[2 * jj + 0];
1456
0
      inp[mpoints * 2 * jj + 2 * ii + 1] = op[2 * jj + 1];
1457
0
    }
1458
0
  }
1459
1460
0
  ixheaacd_hbe_apply_tw_mult_fft(inp, op, fpoints, mpoints, ixheaac_twid_tbl_fft_288);
1461
1462
0
  for (ii = 0; ii < fpoints; ii++) {
1463
0
    ixheaacd_hbe_apply_fft_3(op, scratch, i_sign);
1464
0
    op = op + (mpoints * 2);
1465
0
    scratch = scratch + (mpoints * 2);
1466
0
  }
1467
1468
0
  scratch -= fpoints * mpoints * 2;
1469
1470
0
  for (jj = 0; jj < fpoints; jj++) {
1471
0
    inp[2 * jj + 0] = scratch[6 * jj];
1472
0
    inp[2 * jj + 1] = scratch[6 * jj + 1];
1473
0
  }
1474
0
  for (jj = 0; jj < fpoints; jj++) {
1475
0
    inp[2 * fpoints + 2 * jj + 0] = scratch[6 * jj + 2];
1476
0
    inp[2 * fpoints + 2 * jj + 1] = scratch[6 * jj + 3];
1477
0
  }
1478
0
  for (jj = 0; jj < fpoints; jj++) {
1479
0
    inp[4 * fpoints + 2 * jj + 0] = scratch[6 * jj + 4];
1480
0
    inp[4 * fpoints + 2 * jj + 1] = scratch[6 * jj + 5];
1481
0
  }
1482
0
}
1483
1484
0
VOID ixheaacd_hbe_apply_ifft_224(FLOAT32 *inp, FLOAT32 *scratch, WORD32 len, WORD32 i_sign) {
1485
0
  WORD32 mpoints = len / 32;
1486
0
  WORD32 fpoints = len / 7;
1487
0
  WORD32 ii, jj;
1488
0
  FLOAT32 *op = scratch;
1489
0
  scratch += 2 * len;
1490
1491
0
  for (ii = 0; ii < mpoints; ii++) {
1492
0
    for (jj = 0; jj < fpoints; jj++) {
1493
0
      op[2 * jj + 0] = inp[2 * mpoints * jj + 2 * ii];
1494
0
      op[2 * jj + 1] = inp[2 * mpoints * jj + 2 * ii + 1];
1495
0
    }
1496
1497
0
    if (fpoints & (fpoints - 1))
1498
0
      ixheaacd_hbe_apply_cfftn_gen(op, scratch, fpoints, i_sign);
1499
0
    else
1500
0
      ixheaacd_hbe_apply_cfftn(op, scratch, fpoints, i_sign);
1501
1502
0
    for (jj = 0; jj < fpoints; jj++) {
1503
0
      inp[mpoints * 2 * jj + 2 * ii + 0] = op[2 * jj + 0];
1504
0
      inp[mpoints * 2 * jj + 2 * ii + 1] = op[2 * jj + 1];
1505
0
    }
1506
0
  }
1507
1508
0
  ixheaacd_hbe_apply_tw_mult_ifft(inp, op, fpoints, mpoints, ixheaac_twid_tbl_fft_224);
1509
1510
0
  for (ii = 0; ii < fpoints; ii++) {
1511
0
    ixheaacd_hbe_apply_ifft_7(op, scratch);
1512
0
    scratch += (mpoints * 2);
1513
0
    op += (mpoints * 2);
1514
0
  }
1515
1516
0
  scratch -= fpoints * mpoints * 2;
1517
1518
0
  for (jj = 0; jj < fpoints; jj++) {
1519
0
    for (ii = 0; ii < mpoints; ii++) {
1520
0
      inp[fpoints * ii * 2 + 2 * jj + 0] = scratch[mpoints * jj * 2 + 2 * ii + 0];
1521
0
      inp[fpoints * ii * 2 + 2 * jj + 1] = scratch[mpoints * jj * 2 + 2 * ii + 1];
1522
0
    }
1523
0
  }
1524
0
}
1525
1526
VOID ixheaacd_hbe_apply_ifft_336(FLOAT32 *inp, FLOAT32 *ptr_scratch, WORD32 len,
1527
0
                                 WORD32 i_sign) {
1528
0
  WORD32 i, j;
1529
0
  WORD32 m_points = len / 7;
1530
0
  WORD32 n_points = len / 48;
1531
0
  FLOAT32 *ptr_real, *ptr_imag, *p_real_1, *p_scratch;
1532
0
  ptr_real = ptr_scratch;
1533
0
  ptr_scratch += 2 * len;
1534
0
  ptr_imag = ptr_scratch;
1535
0
  ptr_scratch += len;
1536
0
  p_scratch = ptr_scratch;
1537
0
  ptr_scratch += len;
1538
0
  p_real_1 = ptr_scratch;
1539
0
  ptr_scratch += len;
1540
1541
0
  for (i = 0; i < len; i++) {
1542
0
    ptr_real[i] = inp[2 * i + 0];
1543
0
    ptr_imag[i] = inp[2 * i + 1];
1544
0
  }
1545
1546
0
  for (i = 0; i < m_points; i++) {
1547
0
    for (j = 0; j < n_points; j++) {
1548
0
      p_real_1[2 * j + 0] = inp[m_points * 2 * j + 2 * i + 0];
1549
0
      p_real_1[2 * j + 1] = inp[m_points * 2 * j + 2 * i + 1];
1550
0
    }
1551
1552
0
    ixheaacd_hbe_apply_ifft_7(p_real_1, ptr_scratch);
1553
1554
0
    for (j = 0; j < n_points; j++) {
1555
0
      inp[m_points * 2 * j + 2 * i + 0] = ptr_scratch[2 * j + 0];
1556
0
      inp[m_points * 2 * j + 2 * i + 1] = ptr_scratch[2 * j + 1];
1557
0
    }
1558
0
  }
1559
1560
0
  if (m_points == 48)
1561
0
    ixheaacd_hbe_apply_tw_mult_ifft(inp, p_scratch, n_points, m_points,
1562
0
                                    ixheaac_twid_tbl_fft_336);
1563
0
  else
1564
0
    ixheaacd_hbe_apply_tw_mult_ifft(inp, p_scratch, n_points, m_points,
1565
0
                                    ixheaac_twid_tbl_fft_168);
1566
1567
0
  for (i = 0; i < len; i++) {
1568
0
    ptr_real[2 * i + 0] = p_scratch[2 * i + 0];
1569
0
    ptr_real[2 * i + 1] = p_scratch[2 * i + 1];
1570
0
  }
1571
1572
0
  for (i = 0; i < n_points; i++) {
1573
0
    ixheaacd_hbe_apply_cfftn_gen(ptr_real, ptr_scratch, m_points, i_sign);
1574
0
    ptr_real += (2 * m_points);
1575
0
  }
1576
1577
0
  ptr_real -= n_points * 2 * m_points;
1578
1579
0
  for (j = 0; j < n_points; j++) {
1580
0
    for (i = 0; i < m_points; i++) {
1581
0
      inp[n_points * 2 * i + 2 * j + 0] = ptr_real[2 * m_points * j + 2 * i + 0];
1582
0
      inp[n_points * 2 * i + 2 * j + 1] = ptr_real[2 * m_points * j + 2 * i + 1];
1583
0
    }
1584
0
  }
1585
0
  return;
1586
0
}
1587