Coverage Report

Created: 2025-08-03 06:57

/src/libxaac/common/ixheaac_esbr_fft.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
#include <stdio.h>
22
#include <stdlib.h>
23
#include "ixheaac_type_def.h"
24
#include "ixheaac_constants.h"
25
#include "ixheaac_basic_ops32.h"
26
27
#define PLATFORM_INLINE __inline
28
29
#define DIG_REV(i, m, j)                                    \
30
167M
  do {                                                      \
31
167M
    unsigned _ = (i);                                       \
32
167M
    _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
33
167M
    _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
34
167M
    _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
35
167M
    (j) = _ >> (m);                                         \
36
167M
  } while (0)
37
38
extern const FLOAT32 ixheaac_twiddle_table_fft_float[514];
39
extern const FLOAT32 ixheaac_twidle_tbl_48[64];
40
extern const FLOAT32 ixheaac_twidle_tbl_24[32];
41
42
29.9M
void ixheaac_real_synth_fft_p2(FLOAT32 *ptr_x, FLOAT32 *ptr_y, WORD32 npoints) {
43
29.9M
  WORD32 i, j, k, n_stages, h2;
44
29.9M
  FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
45
29.9M
  WORD32 del, nodespacing, in_loop_cnt;
46
29.9M
  WORD32 not_power_4;
47
29.9M
  WORD32 dig_rev_shift;
48
29.9M
  const FLOAT32 *ptr_w;
49
50
29.9M
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
51
29.9M
  n_stages = 30 - ixheaac_norm32(npoints);
52
29.9M
  not_power_4 = n_stages & 1;
53
54
29.9M
  n_stages = n_stages >> 1;
55
56
29.9M
  ptr_w = ixheaac_twiddle_table_fft_float;
57
58
117M
  for (i = 0; i < npoints; i += 4) {
59
87.3M
    FLOAT32 *inp = ptr_x;
60
61
87.3M
    DIG_REV(i, dig_rev_shift, h2);
62
87.3M
    if (not_power_4) {
63
62.5M
      h2 += 1;
64
62.5M
      h2 &= ~1;
65
62.5M
    }
66
87.3M
    inp += (h2 >> 1);
67
68
87.3M
    x0r = *inp;
69
87.3M
    inp += (npoints >> 2);
70
71
87.3M
    x1r = *inp;
72
87.3M
    inp += (npoints >> 2);
73
74
87.3M
    x2r = *inp;
75
87.3M
    inp += (npoints >> 2);
76
77
87.3M
    x3r = *inp;
78
79
87.3M
    x0r = x0r + x2r;
80
87.3M
    x2r = x0r - (x2r * 2);
81
87.3M
    x1r = x1r + x3r;
82
87.3M
    x3r = x1r - (x3r * 2);
83
87.3M
    x0r = x0r + x1r;
84
87.3M
    x1r = x0r - (x1r * 2);
85
86
87.3M
    *ptr_y++ = x0r;
87
87.3M
    *ptr_y++ = 0;
88
87.3M
    *ptr_y++ = x2r;
89
87.3M
    *ptr_y++ = x3r;
90
87.3M
    *ptr_y++ = x1r;
91
87.3M
    *ptr_y++ = 0;
92
87.3M
    *ptr_y++ = x2r;
93
87.3M
    *ptr_y++ = -x3r;
94
87.3M
  }
95
29.9M
  ptr_y -= 2 * npoints;
96
29.9M
  del = 4;
97
29.9M
  nodespacing = 64;
98
29.9M
  in_loop_cnt = npoints >> 4;
99
38.6M
  for (i = n_stages - 1; i > 0; i--) {
100
8.72M
    const FLOAT32 *twiddles = ptr_w;
101
8.72M
    FLOAT32 *data = ptr_y;
102
8.72M
    FLOAT32 W1, W2, W3, W4, W5, W6;
103
8.72M
    WORD32 sec_loop_cnt;
104
105
19.9M
    for (k = in_loop_cnt; k != 0; k--) {
106
11.2M
      x0r = (*data);
107
11.2M
      x0i = (*(data + 1));
108
11.2M
      data += ((SIZE_T)del << 1);
109
110
11.2M
      x1r = (*data);
111
11.2M
      x1i = (*(data + 1));
112
11.2M
      data += ((SIZE_T)del << 1);
113
114
11.2M
      x2r = (*data);
115
11.2M
      x2i = (*(data + 1));
116
11.2M
      data += ((SIZE_T)del << 1);
117
118
11.2M
      x3r = (*data);
119
11.2M
      x3i = (*(data + 1));
120
11.2M
      data -= 3 * ((SIZE_T)del << 1);
121
122
11.2M
      x0r = x0r + x2r;
123
11.2M
      x0i = x0i + x2i;
124
11.2M
      x2r = x0r - (x2r * 2);
125
11.2M
      x2i = x0i - (x2i * 2);
126
11.2M
      x1r = x1r + x3r;
127
11.2M
      x1i = x1i + x3i;
128
11.2M
      x3r = x1r - (x3r * 2);
129
11.2M
      x3i = x1i - (x3i * 2);
130
131
11.2M
      x0r = x0r + x1r;
132
11.2M
      x0i = x0i + x1i;
133
11.2M
      x1r = x0r - (x1r * 2);
134
11.2M
      x1i = x0i - (x1i * 2);
135
11.2M
      x2r = x2r - x3i;
136
11.2M
      x2i = x2i + x3r;
137
11.2M
      x3i = x2r + (x3i * 2);
138
11.2M
      x3r = x2i - (x3r * 2);
139
140
11.2M
      *data = x0r;
141
11.2M
      *(data + 1) = x0i;
142
11.2M
      data += ((SIZE_T)del << 1);
143
144
11.2M
      *data = x2r;
145
11.2M
      *(data + 1) = x2i;
146
11.2M
      data += ((SIZE_T)del << 1);
147
148
11.2M
      *data = x1r;
149
11.2M
      *(data + 1) = x1i;
150
11.2M
      data += ((SIZE_T)del << 1);
151
152
11.2M
      *data = x3i;
153
11.2M
      *(data + 1) = x3r;
154
11.2M
      data += ((SIZE_T)del << 1);
155
11.2M
    }
156
8.72M
    data = ptr_y + 2;
157
158
8.72M
    sec_loop_cnt = (nodespacing * del);
159
8.72M
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
160
8.72M
                   (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
161
8.72M
                   (sec_loop_cnt / 256);
162
163
17.4M
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
164
8.72M
      W1 = *(twiddles + j);
165
8.72M
      W4 = *(twiddles + j + 257);
166
8.72M
      W2 = *(twiddles + ((SIZE_T)j << 1));
167
8.72M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
168
8.72M
      W3 = *(twiddles + j + ((SIZE_T)j << 1));
169
8.72M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
170
171
19.9M
      for (k = in_loop_cnt; k != 0; k--) {
172
11.2M
        FLOAT32 tmp;
173
11.2M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
174
175
11.2M
        data += ((SIZE_T)del << 1);
176
177
11.2M
        x1r = *data;
178
11.2M
        x1i = *(data + 1);
179
11.2M
        data += ((SIZE_T)del << 1);
180
181
11.2M
        x2r = *data;
182
11.2M
        x2i = *(data + 1);
183
11.2M
        data += ((SIZE_T)del << 1);
184
185
11.2M
        x3r = *data;
186
11.2M
        x3i = *(data + 1);
187
11.2M
        data -= 3 * ((SIZE_T)del << 1);
188
189
11.2M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
190
11.2M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
191
11.2M
        x1r = tmp;
192
193
11.2M
        tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
194
11.2M
        x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
195
11.2M
        x2r = tmp;
196
197
11.2M
        tmp = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
198
11.2M
        x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
199
11.2M
        x3r = tmp;
200
201
11.2M
        x0r = (*data);
202
11.2M
        x0i = (*(data + 1));
203
204
11.2M
        x0r = x0r + (x2r);
205
11.2M
        x0i = x0i + (x2i);
206
11.2M
        x2r = x0r - (x2r * 2);
207
11.2M
        x2i = x0i - (x2i * 2);
208
11.2M
        x1r = x1r + x3r;
209
11.2M
        x1i = x1i + x3i;
210
11.2M
        x3r = x1r - (x3r * 2);
211
11.2M
        x3i = x1i - (x3i * 2);
212
213
11.2M
        x0r = x0r + (x1r);
214
11.2M
        x0i = x0i + (x1i);
215
11.2M
        x1r = x0r - (x1r * 2);
216
11.2M
        x1i = x0i - (x1i * 2);
217
11.2M
        x2r = x2r - (x3i);
218
11.2M
        x2i = x2i + (x3r);
219
11.2M
        x3i = x2r + (x3i * 2);
220
11.2M
        x3r = x2i - (x3r * 2);
221
222
11.2M
        *data = x0r;
223
11.2M
        *(data + 1) = x0i;
224
11.2M
        data += ((SIZE_T)del << 1);
225
226
11.2M
        *data = x2r;
227
11.2M
        *(data + 1) = x2i;
228
11.2M
        data += ((SIZE_T)del << 1);
229
230
11.2M
        *data = x1r;
231
11.2M
        *(data + 1) = x1i;
232
11.2M
        data += ((SIZE_T)del << 1);
233
234
11.2M
        *data = x3i;
235
11.2M
        *(data + 1) = x3r;
236
11.2M
        data += ((SIZE_T)del << 1);
237
11.2M
      }
238
8.72M
      data -= 2 * npoints;
239
8.72M
      data += 2;
240
8.72M
    }
241
17.4M
    for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
242
8.72M
      W1 = *(twiddles + j);
243
8.72M
      W4 = *(twiddles + j + 257);
244
8.72M
      W2 = *(twiddles + ((SIZE_T)j << 1));
245
8.72M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
246
8.72M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
247
8.72M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
248
249
19.9M
      for (k = in_loop_cnt; k != 0; k--) {
250
11.2M
        FLOAT32 tmp;
251
11.2M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
252
253
11.2M
        data += ((SIZE_T)del << 1);
254
255
11.2M
        x1r = *data;
256
11.2M
        x1i = *(data + 1);
257
11.2M
        data += ((SIZE_T)del << 1);
258
259
11.2M
        x2r = *data;
260
11.2M
        x2i = *(data + 1);
261
11.2M
        data += ((SIZE_T)del << 1);
262
263
11.2M
        x3r = *data;
264
11.2M
        x3i = *(data + 1);
265
11.2M
        data -= 3 * ((SIZE_T)del << 1);
266
267
11.2M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
268
11.2M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
269
11.2M
        x1r = tmp;
270
271
11.2M
        tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
272
11.2M
        x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
273
11.2M
        x2r = tmp;
274
275
11.2M
        tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
276
11.2M
        x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
277
11.2M
        x3r = tmp;
278
279
11.2M
        x0r = (*data);
280
11.2M
        x0i = (*(data + 1));
281
282
11.2M
        x0r = x0r + (x2r);
283
11.2M
        x0i = x0i + (x2i);
284
11.2M
        x2r = x0r - (x2r * 2);
285
11.2M
        x2i = x0i - (x2i * 2);
286
11.2M
        x1r = x1r + x3r;
287
11.2M
        x1i = x1i + x3i;
288
11.2M
        x3r = x1r - (x3r * 2);
289
11.2M
        x3i = x1i - (x3i * 2);
290
291
11.2M
        x0r = x0r + (x1r);
292
11.2M
        x0i = x0i + (x1i);
293
11.2M
        x1r = x0r - (x1r * 2);
294
11.2M
        x1i = x0i - (x1i * 2);
295
11.2M
        x2r = x2r - (x3i);
296
11.2M
        x2i = x2i + (x3r);
297
11.2M
        x3i = x2r + (x3i * 2);
298
11.2M
        x3r = x2i - (x3r * 2);
299
300
11.2M
        *data = x0r;
301
11.2M
        *(data + 1) = x0i;
302
11.2M
        data += ((SIZE_T)del << 1);
303
304
11.2M
        *data = x2r;
305
11.2M
        *(data + 1) = x2i;
306
11.2M
        data += ((SIZE_T)del << 1);
307
308
11.2M
        *data = x1r;
309
11.2M
        *(data + 1) = x1i;
310
11.2M
        data += ((SIZE_T)del << 1);
311
312
11.2M
        *data = x3i;
313
11.2M
        *(data + 1) = x3r;
314
11.2M
        data += ((SIZE_T)del << 1);
315
11.2M
      }
316
8.72M
      data -= 2 * npoints;
317
8.72M
      data += 2;
318
8.72M
    }
319
8.72M
    for (; j <= sec_loop_cnt * 2; j += nodespacing) {
320
0
      W1 = *(twiddles + j);
321
0
      W4 = *(twiddles + j + 257);
322
0
      W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
323
0
      W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
324
0
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
325
0
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
326
327
0
      for (k = in_loop_cnt; k != 0; k--) {
328
0
        FLOAT32 tmp;
329
0
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
330
331
0
        data += ((SIZE_T)del << 1);
332
333
0
        x1r = *data;
334
0
        x1i = *(data + 1);
335
0
        data += ((SIZE_T)del << 1);
336
337
0
        x2r = *data;
338
0
        x2i = *(data + 1);
339
0
        data += ((SIZE_T)del << 1);
340
341
0
        x3r = *data;
342
0
        x3i = *(data + 1);
343
0
        data -= 3 * ((SIZE_T)del << 1);
344
345
0
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
346
0
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
347
0
        x1r = tmp;
348
349
0
        tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
350
0
        x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
351
0
        x2r = tmp;
352
353
0
        tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
354
0
        x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
355
0
        x3r = tmp;
356
357
0
        x0r = (*data);
358
0
        x0i = (*(data + 1));
359
360
0
        x0r = x0r + (x2r);
361
0
        x0i = x0i + (x2i);
362
0
        x2r = x0r - (x2r * 2);
363
0
        x2i = x0i - (x2i * 2);
364
0
        x1r = x1r + x3r;
365
0
        x1i = x1i + x3i;
366
0
        x3r = x1r - (x3r * 2);
367
0
        x3i = x1i - (x3i * 2);
368
369
0
        x0r = x0r + (x1r);
370
0
        x0i = x0i + (x1i);
371
0
        x1r = x0r - (x1r * 2);
372
0
        x1i = x0i - (x1i * 2);
373
0
        x2r = x2r - (x3i);
374
0
        x2i = x2i + (x3r);
375
0
        x3i = x2r + (x3i * 2);
376
0
        x3r = x2i - (x3r * 2);
377
378
0
        *data = x0r;
379
0
        *(data + 1) = x0i;
380
0
        data += ((SIZE_T)del << 1);
381
382
0
        *data = x2r;
383
0
        *(data + 1) = x2i;
384
0
        data += ((SIZE_T)del << 1);
385
386
0
        *data = x1r;
387
0
        *(data + 1) = x1i;
388
0
        data += ((SIZE_T)del << 1);
389
390
0
        *data = x3i;
391
0
        *(data + 1) = x3r;
392
0
        data += ((SIZE_T)del << 1);
393
0
      }
394
0
      data -= 2 * npoints;
395
0
      data += 2;
396
0
    }
397
17.4M
    for (; j < nodespacing * del; j += nodespacing) {
398
8.72M
      W1 = *(twiddles + j);
399
8.72M
      W4 = *(twiddles + j + 257);
400
8.72M
      W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
401
8.72M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
402
8.72M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
403
8.72M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
404
405
19.9M
      for (k = in_loop_cnt; k != 0; k--) {
406
11.2M
        FLOAT32 tmp;
407
11.2M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
408
409
11.2M
        data += ((SIZE_T)del << 1);
410
411
11.2M
        x1r = *data;
412
11.2M
        x1i = *(data + 1);
413
11.2M
        data += ((SIZE_T)del << 1);
414
415
11.2M
        x2r = *data;
416
11.2M
        x2i = *(data + 1);
417
11.2M
        data += ((SIZE_T)del << 1);
418
419
11.2M
        x3r = *data;
420
11.2M
        x3i = *(data + 1);
421
11.2M
        data -= 3 * ((SIZE_T)del << 1);
422
423
11.2M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
424
11.2M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
425
11.2M
        x1r = tmp;
426
427
11.2M
        tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
428
11.2M
        x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
429
11.2M
        x2r = tmp;
430
431
11.2M
        tmp = (FLOAT32)(-((FLOAT32)x3r * W3) - ((FLOAT32)x3i * W6));
432
11.2M
        x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
433
11.2M
        x3r = tmp;
434
435
11.2M
        x0r = (*data);
436
11.2M
        x0i = (*(data + 1));
437
438
11.2M
        x0r = x0r + (x2r);
439
11.2M
        x0i = x0i + (x2i);
440
11.2M
        x2r = x0r - (x2r * 2);
441
11.2M
        x2i = x0i - (x2i * 2);
442
11.2M
        x1r = x1r + x3r;
443
11.2M
        x1i = x1i - x3i;
444
11.2M
        x3r = x1r - (x3r * 2);
445
11.2M
        x3i = x1i + (x3i * 2);
446
447
11.2M
        x0r = x0r + (x1r);
448
11.2M
        x0i = x0i + (x1i);
449
11.2M
        x1r = x0r - (x1r * 2);
450
11.2M
        x1i = x0i - (x1i * 2);
451
11.2M
        x2r = x2r - (x3i);
452
11.2M
        x2i = x2i + (x3r);
453
11.2M
        x3i = x2r + (x3i * 2);
454
11.2M
        x3r = x2i - (x3r * 2);
455
456
11.2M
        *data = x0r;
457
11.2M
        *(data + 1) = x0i;
458
11.2M
        data += ((SIZE_T)del << 1);
459
460
11.2M
        *data = x2r;
461
11.2M
        *(data + 1) = x2i;
462
11.2M
        data += ((SIZE_T)del << 1);
463
464
11.2M
        *data = x1r;
465
11.2M
        *(data + 1) = x1i;
466
11.2M
        data += ((SIZE_T)del << 1);
467
468
11.2M
        *data = x3i;
469
11.2M
        *(data + 1) = x3r;
470
11.2M
        data += ((SIZE_T)del << 1);
471
11.2M
      }
472
8.72M
      data -= 2 * npoints;
473
8.72M
      data += 2;
474
8.72M
    }
475
8.72M
    nodespacing >>= 2;
476
8.72M
    del <<= 2;
477
8.72M
    in_loop_cnt >>= 2;
478
8.72M
  }
479
480
29.9M
  if (not_power_4) {
481
23.7M
    const FLOAT32 *twiddles = ptr_w;
482
23.7M
    nodespacing <<= 1;
483
484
86.3M
    for (j = del / 2; j != 0; j--) {
485
62.5M
      FLOAT32 W1 = *twiddles;
486
62.5M
      FLOAT32 W4 = *(twiddles + 257);
487
62.5M
      FLOAT32 tmp;
488
62.5M
      twiddles += nodespacing;
489
490
62.5M
      x0r = *ptr_y;
491
62.5M
      x0i = *(ptr_y + 1);
492
62.5M
      ptr_y += ((SIZE_T)del << 1);
493
494
62.5M
      x1r = *ptr_y;
495
62.5M
      x1i = *(ptr_y + 1);
496
497
62.5M
      tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
498
62.5M
      x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
499
62.5M
      x1r = tmp;
500
501
62.5M
      *ptr_y = (x0r) - (x1r);
502
62.5M
      *(ptr_y + 1) = (x0i) - (x1i);
503
62.5M
      ptr_y -= ((SIZE_T)del << 1);
504
505
62.5M
      *ptr_y = (x0r) + (x1r);
506
62.5M
      *(ptr_y + 1) = (x0i) + (x1i);
507
62.5M
      ptr_y += 2;
508
62.5M
    }
509
23.7M
    twiddles = ptr_w;
510
86.3M
    for (j = del / 2; j != 0; j--) {
511
62.5M
      FLOAT32 W1 = *twiddles;
512
62.5M
      FLOAT32 W4 = *(twiddles + 257);
513
62.5M
      FLOAT32 tmp;
514
62.5M
      twiddles += nodespacing;
515
516
62.5M
      x0r = *ptr_y;
517
62.5M
      x0i = *(ptr_y + 1);
518
62.5M
      ptr_y += ((SIZE_T)del << 1);
519
520
62.5M
      x1r = *ptr_y;
521
62.5M
      x1i = *(ptr_y + 1);
522
62.5M
      tmp = (FLOAT32)(((FLOAT32)x1r * W4) - ((FLOAT32)x1i * W1));
523
62.5M
      x1i = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
524
62.5M
      x1r = tmp;
525
526
62.5M
      *ptr_y = (x0r) - (x1r);
527
62.5M
      *(ptr_y + 1) = (x0i) - (x1i);
528
62.5M
      ptr_y -= ((SIZE_T)del << 1);
529
530
62.5M
      *ptr_y = (x0r) + (x1r);
531
62.5M
      *(ptr_y + 1) = (x0i) + (x1i);
532
62.5M
      ptr_y += 2;
533
62.5M
    }
534
23.7M
  }
535
29.9M
}
536
537
13.6M
void ixheaac_cmplx_anal_fft_p2(FLOAT32 *ptr_x, FLOAT32 *ptr_y, WORD32 npoints) {
538
13.6M
  WORD32 i, j, k, n_stages, h2;
539
13.6M
  FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
540
13.6M
  WORD32 del, nodespacing, in_loop_cnt;
541
13.6M
  WORD32 not_power_4;
542
13.6M
  WORD32 dig_rev_shift;
543
13.6M
  const FLOAT32 *ptr_w;
544
545
13.6M
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
546
13.6M
  n_stages = 30 - ixheaac_norm32(npoints);
547
13.6M
  not_power_4 = n_stages & 1;
548
549
13.6M
  n_stages = n_stages >> 1;
550
551
13.6M
  ptr_w = ixheaac_twiddle_table_fft_float;
552
553
94.1M
  for (i = 0; i < npoints; i += 4) {
554
80.4M
    FLOAT32 *inp = ptr_x;
555
556
80.4M
    DIG_REV(i, dig_rev_shift, h2);
557
80.4M
    if (not_power_4) {
558
24.8M
      h2 += 1;
559
24.8M
      h2 &= ~1;
560
24.8M
    }
561
80.4M
    inp += (h2);
562
563
80.4M
    x0r = *inp;
564
80.4M
    x0i = *(inp + 1);
565
80.4M
    inp += (npoints >> 1);
566
567
80.4M
    x1r = *inp;
568
80.4M
    x1i = *(inp + 1);
569
80.4M
    inp += (npoints >> 1);
570
571
80.4M
    x2r = *inp;
572
80.4M
    x2i = *(inp + 1);
573
80.4M
    inp += (npoints >> 1);
574
575
80.4M
    x3r = *inp;
576
80.4M
    x3i = *(inp + 1);
577
578
80.4M
    x0r = x0r + x2r;
579
80.4M
    x0i = x0i + x2i;
580
80.4M
    x2r = x0r - (x2r * 2);
581
80.4M
    x2i = x0i - (x2i * 2);
582
80.4M
    x1r = x1r + x3r;
583
80.4M
    x1i = x1i + x3i;
584
80.4M
    x3r = x1r - (x3r * 2);
585
80.4M
    x3i = x1i - (x3i * 2);
586
587
80.4M
    x0r = x0r + x1r;
588
80.4M
    x0i = x0i + x1i;
589
80.4M
    x1r = x0r - (x1r * 2);
590
80.4M
    x1i = x0i - (x1i * 2);
591
80.4M
    x2r = x2r - x3i;
592
80.4M
    x2i = x2i + x3r;
593
80.4M
    x3i = x2r + (x3i * 2);
594
80.4M
    x3r = x2i - (x3r * 2);
595
596
80.4M
    *ptr_y++ = x0r;
597
80.4M
    *ptr_y++ = x0i;
598
80.4M
    *ptr_y++ = x2r;
599
80.4M
    *ptr_y++ = x2i;
600
80.4M
    *ptr_y++ = x1r;
601
80.4M
    *ptr_y++ = x1i;
602
80.4M
    *ptr_y++ = x3i;
603
80.4M
    *ptr_y++ = x3r;
604
80.4M
  }
605
13.6M
  ptr_y -= 2 * npoints;
606
13.6M
  del = 4;
607
13.6M
  nodespacing = 64;
608
13.6M
  in_loop_cnt = npoints >> 4;
609
28.4M
  for (i = n_stages - 1; i > 0; i--) {
610
14.7M
    const FLOAT32 *twiddles = ptr_w;
611
14.7M
    FLOAT32 *data = ptr_y;
612
14.7M
    FLOAT32 W1, W2, W3, W4, W5, W6;
613
14.7M
    WORD32 sec_loop_cnt;
614
615
36.0M
    for (k = in_loop_cnt; k != 0; k--) {
616
21.2M
      x0r = (*data);
617
21.2M
      x0i = (*(data + 1));
618
21.2M
      data += ((SIZE_T)del << 1);
619
620
21.2M
      x1r = (*data);
621
21.2M
      x1i = (*(data + 1));
622
21.2M
      data += ((SIZE_T)del << 1);
623
624
21.2M
      x2r = (*data);
625
21.2M
      x2i = (*(data + 1));
626
21.2M
      data += ((SIZE_T)del << 1);
627
628
21.2M
      x3r = (*data);
629
21.2M
      x3i = (*(data + 1));
630
21.2M
      data -= 3 * ((SIZE_T)del << 1);
631
632
21.2M
      x0r = x0r + x2r;
633
21.2M
      x0i = x0i + x2i;
634
21.2M
      x2r = x0r - (x2r * 2);
635
21.2M
      x2i = x0i - (x2i * 2);
636
21.2M
      x1r = x1r + x3r;
637
21.2M
      x1i = x1i + x3i;
638
21.2M
      x3r = x1r - (x3r * 2);
639
21.2M
      x3i = x1i - (x3i * 2);
640
641
21.2M
      x0r = x0r + x1r;
642
21.2M
      x0i = x0i + x1i;
643
21.2M
      x1r = x0r - (x1r * 2);
644
21.2M
      x1i = x0i - (x1i * 2);
645
21.2M
      x2r = x2r - x3i;
646
21.2M
      x2i = x2i + x3r;
647
21.2M
      x3i = x2r + (x3i * 2);
648
21.2M
      x3r = x2i - (x3r * 2);
649
650
21.2M
      *data = x0r;
651
21.2M
      *(data + 1) = x0i;
652
21.2M
      data += ((SIZE_T)del << 1);
653
654
21.2M
      *data = x2r;
655
21.2M
      *(data + 1) = x2i;
656
21.2M
      data += ((SIZE_T)del << 1);
657
658
21.2M
      *data = x1r;
659
21.2M
      *(data + 1) = x1i;
660
21.2M
      data += ((SIZE_T)del << 1);
661
662
21.2M
      *data = x3i;
663
21.2M
      *(data + 1) = x3r;
664
21.2M
      data += ((SIZE_T)del << 1);
665
21.2M
    }
666
14.7M
    data = ptr_y + 2;
667
668
14.7M
    sec_loop_cnt = (nodespacing * del);
669
14.7M
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
670
14.7M
                   (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
671
14.7M
                   (sec_loop_cnt / 256);
672
673
34.0M
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
674
19.2M
      W1 = *(twiddles + j);
675
19.2M
      W4 = *(twiddles + j + 257);
676
19.2M
      W2 = *(twiddles + ((SIZE_T)j << 1));
677
19.2M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
678
19.2M
      W3 = *(twiddles + j + ((SIZE_T)j << 1));
679
19.2M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
680
681
44.9M
      for (k = in_loop_cnt; k != 0; k--) {
682
25.6M
        FLOAT32 tmp;
683
25.6M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
684
685
25.6M
        data += ((SIZE_T)del << 1);
686
687
25.6M
        x1r = *data;
688
25.6M
        x1i = *(data + 1);
689
25.6M
        data += ((SIZE_T)del << 1);
690
691
25.6M
        x2r = *data;
692
25.6M
        x2i = *(data + 1);
693
25.6M
        data += ((SIZE_T)del << 1);
694
695
25.6M
        x3r = *data;
696
25.6M
        x3i = *(data + 1);
697
25.6M
        data -= 3 * ((SIZE_T)del << 1);
698
699
25.6M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
700
25.6M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
701
25.6M
        x1r = tmp;
702
703
25.6M
        tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
704
25.6M
        x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
705
25.6M
        x2r = tmp;
706
707
25.6M
        tmp = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
708
25.6M
        x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
709
25.6M
        x3r = tmp;
710
711
25.6M
        x0r = (*data);
712
25.6M
        x0i = (*(data + 1));
713
714
25.6M
        x0r = x0r + (x2r);
715
25.6M
        x0i = x0i + (x2i);
716
25.6M
        x2r = x0r - (x2r * 2);
717
25.6M
        x2i = x0i - (x2i * 2);
718
25.6M
        x1r = x1r + x3r;
719
25.6M
        x1i = x1i + x3i;
720
25.6M
        x3r = x1r - (x3r * 2);
721
25.6M
        x3i = x1i - (x3i * 2);
722
723
25.6M
        x0r = x0r + (x1r);
724
25.6M
        x0i = x0i + (x1i);
725
25.6M
        x1r = x0r - (x1r * 2);
726
25.6M
        x1i = x0i - (x1i * 2);
727
25.6M
        x2r = x2r - (x3i);
728
25.6M
        x2i = x2i + (x3r);
729
25.6M
        x3i = x2r + (x3i * 2);
730
25.6M
        x3r = x2i - (x3r * 2);
731
732
25.6M
        *data = x0r;
733
25.6M
        *(data + 1) = x0i;
734
25.6M
        data += ((SIZE_T)del << 1);
735
736
25.6M
        *data = x2r;
737
25.6M
        *(data + 1) = x2i;
738
25.6M
        data += ((SIZE_T)del << 1);
739
740
25.6M
        *data = x1r;
741
25.6M
        *(data + 1) = x1i;
742
25.6M
        data += ((SIZE_T)del << 1);
743
744
25.6M
        *data = x3i;
745
25.6M
        *(data + 1) = x3r;
746
25.6M
        data += ((SIZE_T)del << 1);
747
25.6M
      }
748
19.2M
      data -= 2 * npoints;
749
19.2M
      data += 2;
750
19.2M
    }
751
31.8M
    for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
752
17.0M
      W1 = *(twiddles + j);
753
17.0M
      W4 = *(twiddles + j + 257);
754
17.0M
      W2 = *(twiddles + ((SIZE_T)j << 1));
755
17.0M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
756
17.0M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
757
17.0M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
758
759
40.4M
      for (k = in_loop_cnt; k != 0; k--) {
760
23.4M
        FLOAT32 tmp;
761
23.4M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
762
763
23.4M
        data += ((SIZE_T)del << 1);
764
765
23.4M
        x1r = *data;
766
23.4M
        x1i = *(data + 1);
767
23.4M
        data += ((SIZE_T)del << 1);
768
769
23.4M
        x2r = *data;
770
23.4M
        x2i = *(data + 1);
771
23.4M
        data += ((SIZE_T)del << 1);
772
773
23.4M
        x3r = *data;
774
23.4M
        x3i = *(data + 1);
775
23.4M
        data -= 3 * ((SIZE_T)del << 1);
776
777
23.4M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
778
23.4M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
779
23.4M
        x1r = tmp;
780
781
23.4M
        tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
782
23.4M
        x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
783
23.4M
        x2r = tmp;
784
785
23.4M
        tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
786
23.4M
        x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
787
23.4M
        x3r = tmp;
788
789
23.4M
        x0r = (*data);
790
23.4M
        x0i = (*(data + 1));
791
792
23.4M
        x0r = x0r + (x2r);
793
23.4M
        x0i = x0i + (x2i);
794
23.4M
        x2r = x0r - (x2r * 2);
795
23.4M
        x2i = x0i - (x2i * 2);
796
23.4M
        x1r = x1r + x3r;
797
23.4M
        x1i = x1i + x3i;
798
23.4M
        x3r = x1r - (x3r * 2);
799
23.4M
        x3i = x1i - (x3i * 2);
800
801
23.4M
        x0r = x0r + (x1r);
802
23.4M
        x0i = x0i + (x1i);
803
23.4M
        x1r = x0r - (x1r * 2);
804
23.4M
        x1i = x0i - (x1i * 2);
805
23.4M
        x2r = x2r - (x3i);
806
23.4M
        x2i = x2i + (x3r);
807
23.4M
        x3i = x2r + (x3i * 2);
808
23.4M
        x3r = x2i - (x3r * 2);
809
810
23.4M
        *data = x0r;
811
23.4M
        *(data + 1) = x0i;
812
23.4M
        data += ((SIZE_T)del << 1);
813
814
23.4M
        *data = x2r;
815
23.4M
        *(data + 1) = x2i;
816
23.4M
        data += ((SIZE_T)del << 1);
817
818
23.4M
        *data = x1r;
819
23.4M
        *(data + 1) = x1i;
820
23.4M
        data += ((SIZE_T)del << 1);
821
822
23.4M
        *data = x3i;
823
23.4M
        *(data + 1) = x3r;
824
23.4M
        data += ((SIZE_T)del << 1);
825
23.4M
      }
826
17.0M
      data -= 2 * npoints;
827
17.0M
      data += 2;
828
17.0M
    }
829
17.0M
    for (; j <= sec_loop_cnt * 2; j += nodespacing) {
830
2.22M
      W1 = *(twiddles + j);
831
2.22M
      W4 = *(twiddles + j + 257);
832
2.22M
      W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
833
2.22M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
834
2.22M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
835
2.22M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
836
837
4.44M
      for (k = in_loop_cnt; k != 0; k--) {
838
2.22M
        FLOAT32 tmp;
839
2.22M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
840
841
2.22M
        data += ((SIZE_T)del << 1);
842
843
2.22M
        x1r = *data;
844
2.22M
        x1i = *(data + 1);
845
2.22M
        data += ((SIZE_T)del << 1);
846
847
2.22M
        x2r = *data;
848
2.22M
        x2i = *(data + 1);
849
2.22M
        data += ((SIZE_T)del << 1);
850
851
2.22M
        x3r = *data;
852
2.22M
        x3i = *(data + 1);
853
2.22M
        data -= 3 * ((SIZE_T)del << 1);
854
855
2.22M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
856
2.22M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
857
2.22M
        x1r = tmp;
858
859
2.22M
        tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
860
2.22M
        x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
861
2.22M
        x2r = tmp;
862
863
2.22M
        tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
864
2.22M
        x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
865
2.22M
        x3r = tmp;
866
867
2.22M
        x0r = (*data);
868
2.22M
        x0i = (*(data + 1));
869
870
2.22M
        x0r = x0r + (x2r);
871
2.22M
        x0i = x0i + (x2i);
872
2.22M
        x2r = x0r - (x2r * 2);
873
2.22M
        x2i = x0i - (x2i * 2);
874
2.22M
        x1r = x1r + x3r;
875
2.22M
        x1i = x1i + x3i;
876
2.22M
        x3r = x1r - (x3r * 2);
877
2.22M
        x3i = x1i - (x3i * 2);
878
879
2.22M
        x0r = x0r + (x1r);
880
2.22M
        x0i = x0i + (x1i);
881
2.22M
        x1r = x0r - (x1r * 2);
882
2.22M
        x1i = x0i - (x1i * 2);
883
2.22M
        x2r = x2r - (x3i);
884
2.22M
        x2i = x2i + (x3r);
885
2.22M
        x3i = x2r + (x3i * 2);
886
2.22M
        x3r = x2i - (x3r * 2);
887
888
2.22M
        *data = x0r;
889
2.22M
        *(data + 1) = x0i;
890
2.22M
        data += ((SIZE_T)del << 1);
891
892
2.22M
        *data = x2r;
893
2.22M
        *(data + 1) = x2i;
894
2.22M
        data += ((SIZE_T)del << 1);
895
896
2.22M
        *data = x1r;
897
2.22M
        *(data + 1) = x1i;
898
2.22M
        data += ((SIZE_T)del << 1);
899
900
2.22M
        *data = x3i;
901
2.22M
        *(data + 1) = x3r;
902
2.22M
        data += ((SIZE_T)del << 1);
903
2.22M
      }
904
2.22M
      data -= 2 * npoints;
905
2.22M
      data += 2;
906
2.22M
    }
907
34.0M
    for (; j < nodespacing * del; j += nodespacing) {
908
19.2M
      W1 = *(twiddles + j);
909
19.2M
      W4 = *(twiddles + j + 257);
910
19.2M
      W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
911
19.2M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
912
19.2M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
913
19.2M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
914
915
44.9M
      for (k = in_loop_cnt; k != 0; k--) {
916
25.6M
        FLOAT32 tmp;
917
25.6M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
918
919
25.6M
        data += ((SIZE_T)del << 1);
920
921
25.6M
        x1r = *data;
922
25.6M
        x1i = *(data + 1);
923
25.6M
        data += ((SIZE_T)del << 1);
924
925
25.6M
        x2r = *data;
926
25.6M
        x2i = *(data + 1);
927
25.6M
        data += ((SIZE_T)del << 1);
928
929
25.6M
        x3r = *data;
930
25.6M
        x3i = *(data + 1);
931
25.6M
        data -= 3 * ((SIZE_T)del << 1);
932
933
25.6M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
934
25.6M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
935
25.6M
        x1r = tmp;
936
937
25.6M
        tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
938
25.6M
        x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
939
25.6M
        x2r = tmp;
940
941
25.6M
        tmp = (FLOAT32)(-((FLOAT32)x3r * W3) - ((FLOAT32)x3i * W6));
942
25.6M
        x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
943
25.6M
        x3r = tmp;
944
945
25.6M
        x0r = (*data);
946
25.6M
        x0i = (*(data + 1));
947
948
25.6M
        x0r = x0r + (x2r);
949
25.6M
        x0i = x0i + (x2i);
950
25.6M
        x2r = x0r - (x2r * 2);
951
25.6M
        x2i = x0i - (x2i * 2);
952
25.6M
        x1r = x1r + x3r;
953
25.6M
        x1i = x1i - x3i;
954
25.6M
        x3r = x1r - (x3r * 2);
955
25.6M
        x3i = x1i + (x3i * 2);
956
957
25.6M
        x0r = x0r + (x1r);
958
25.6M
        x0i = x0i + (x1i);
959
25.6M
        x1r = x0r - (x1r * 2);
960
25.6M
        x1i = x0i - (x1i * 2);
961
25.6M
        x2r = x2r - (x3i);
962
25.6M
        x2i = x2i + (x3r);
963
25.6M
        x3i = x2r + (x3i * 2);
964
25.6M
        x3r = x2i - (x3r * 2);
965
966
25.6M
        *data = x0r;
967
25.6M
        *(data + 1) = x0i;
968
25.6M
        data += ((SIZE_T)del << 1);
969
970
25.6M
        *data = x2r;
971
25.6M
        *(data + 1) = x2i;
972
25.6M
        data += ((SIZE_T)del << 1);
973
974
25.6M
        *data = x1r;
975
25.6M
        *(data + 1) = x1i;
976
25.6M
        data += ((SIZE_T)del << 1);
977
978
25.6M
        *data = x3i;
979
25.6M
        *(data + 1) = x3r;
980
25.6M
        data += ((SIZE_T)del << 1);
981
25.6M
      }
982
19.2M
      data -= 2 * npoints;
983
19.2M
      data += 2;
984
19.2M
    }
985
14.7M
    nodespacing >>= 2;
986
14.7M
    del <<= 2;
987
14.7M
    in_loop_cnt >>= 2;
988
14.7M
  }
989
990
13.6M
  if (not_power_4) {
991
3.10M
    const FLOAT32 *twiddles = ptr_w;
992
3.10M
    nodespacing <<= 1;
993
994
27.9M
    for (j = del / 2; j != 0; j--) {
995
24.8M
      FLOAT32 W1 = *twiddles;
996
24.8M
      FLOAT32 W4 = *(twiddles + 257);
997
24.8M
      FLOAT32 tmp;
998
24.8M
      twiddles += nodespacing;
999
1000
24.8M
      x0r = *ptr_y;
1001
24.8M
      x0i = *(ptr_y + 1);
1002
24.8M
      ptr_y += ((SIZE_T)del << 1);
1003
1004
24.8M
      x1r = *ptr_y;
1005
24.8M
      x1i = *(ptr_y + 1);
1006
1007
24.8M
      tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
1008
24.8M
      x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
1009
24.8M
      x1r = tmp;
1010
1011
24.8M
      *ptr_y = (x0r) - (x1r);
1012
24.8M
      *(ptr_y + 1) = (x0i) - (x1i);
1013
24.8M
      ptr_y -= ((SIZE_T)del << 1);
1014
1015
24.8M
      *ptr_y = (x0r) + (x1r);
1016
24.8M
      *(ptr_y + 1) = (x0i) + (x1i);
1017
24.8M
      ptr_y += 2;
1018
24.8M
    }
1019
3.10M
    twiddles = ptr_w;
1020
27.9M
    for (j = del / 2; j != 0; j--) {
1021
24.8M
      FLOAT32 W1 = *twiddles;
1022
24.8M
      FLOAT32 W4 = *(twiddles + 257);
1023
24.8M
      FLOAT32 tmp;
1024
24.8M
      twiddles += nodespacing;
1025
1026
24.8M
      x0r = *ptr_y;
1027
24.8M
      x0i = *(ptr_y + 1);
1028
24.8M
      ptr_y += ((SIZE_T)del << 1);
1029
1030
24.8M
      x1r = *ptr_y;
1031
24.8M
      x1i = *(ptr_y + 1);
1032
1033
24.8M
      tmp = (FLOAT32)(((FLOAT32)x1r * W4) - ((FLOAT32)x1i * W1));
1034
24.8M
      x1i = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
1035
24.8M
      x1r = tmp;
1036
1037
24.8M
      *ptr_y = (x0r) - (x1r);
1038
24.8M
      *(ptr_y + 1) = (x0i) - (x1i);
1039
24.8M
      ptr_y -= ((SIZE_T)del << 1);
1040
1041
24.8M
      *ptr_y = (x0r) + (x1r);
1042
24.8M
      *(ptr_y + 1) = (x0i) + (x1i);
1043
24.8M
      ptr_y += 2;
1044
24.8M
    }
1045
3.10M
  }
1046
13.6M
}
1047
1048
106M
static PLATFORM_INLINE void ixheaac_aac_ld_dec_fft_3_float(FLOAT32 *inp, FLOAT32 *op) {
1049
106M
  FLOAT32 add_r, sub_r;
1050
106M
  FLOAT32 add_i, sub_i;
1051
106M
  FLOAT32 temp_real, temp_imag, temp;
1052
1053
106M
  FLOAT32 p1, p2, p3, p4;
1054
1055
106M
  FLOAT32 sinmu;
1056
106M
  sinmu = -0.866025403784439f;
1057
1058
106M
  temp_real = inp[0] + inp[2];
1059
106M
  temp_imag = inp[1] + inp[3];
1060
1061
106M
  add_r = inp[2] + inp[4];
1062
106M
  add_i = inp[3] + inp[5];
1063
1064
106M
  sub_r = inp[2] - inp[4];
1065
106M
  sub_i = inp[3] - inp[5];
1066
1067
106M
  p1 = add_r / 2.0f;
1068
106M
  p4 = add_i / 2.0f;
1069
106M
  p2 = sub_i * sinmu;
1070
106M
  p3 = sub_r * sinmu;
1071
1072
106M
  temp = inp[0] - p1;
1073
1074
106M
  op[0] = temp_real + inp[4];
1075
106M
  op[1] = temp_imag + inp[5];
1076
106M
  op[2] = temp + p2;
1077
106M
  op[3] = (inp[1] - p3) - p4;
1078
106M
  op[4] = temp - p2;
1079
106M
  op[5] = (inp[1] + p3) - p4;
1080
1081
106M
  return;
1082
106M
}
1083
1084
7.04M
void ixheaac_real_synth_fft_p3(FLOAT32 *x_in, FLOAT32 *x_out, WORD32 npoints) {
1085
7.04M
  WORD32 i, j;
1086
7.04M
  FLOAT32 x_3[8];
1087
7.04M
  FLOAT32 y_3[16];
1088
7.04M
  FLOAT32 y[48];
1089
7.04M
  FLOAT32 x[48];
1090
7.04M
  FLOAT32 *ptr_y = y;
1091
7.04M
  FLOAT32 *y_p3 = y;
1092
7.04M
  FLOAT32 *x_p3 = x;
1093
1094
28.1M
  for (i = 0; i < 3; i += 1) {
1095
190M
    for (j = 0; j < (npoints / 3); j++) {
1096
169M
      x_3[j] = x_in[3 * j + i];
1097
169M
    }
1098
1099
21.1M
    ixheaac_real_synth_fft_p2(x_3, y_3, 8);
1100
1101
190M
    for (j = 0; j < 16; j += 2) {
1102
169M
      x[3 * j + 2 * i] = y_3[j];
1103
169M
      x[3 * j + 2 * i + 1] = y_3[j + 1];
1104
169M
    }
1105
21.1M
  }
1106
1107
7.04M
  {
1108
7.04M
    FLOAT32 *wr;
1109
7.04M
    FLOAT32 tmp;
1110
7.04M
    FLOAT32 *x_tw = x;
1111
7.04M
    wr = (FLOAT32 *)ixheaac_twidle_tbl_24;
1112
7.04M
    x_tw += 2;
1113
1114
63.4M
    for (i = 0; i < (npoints / 3); i++) {
1115
56.3M
      tmp = ((*x_tw) * (*wr) + (*(x_tw + 1)) * (*(wr + 1)));
1116
56.3M
      *(x_tw + 1) = (-(*x_tw) * (*(wr + 1)) + (*(x_tw + 1)) * (*wr));
1117
56.3M
      *x_tw = tmp;
1118
1119
56.3M
      wr += 2;
1120
56.3M
      x_tw += 2;
1121
1122
56.3M
      tmp = ((*x_tw) * (*wr) + (*(x_tw + 1)) * (*(wr + 1)));
1123
56.3M
      *(x_tw + 1) = (-(*x_tw) * (*(wr + 1)) + (*(x_tw + 1)) * (*wr));
1124
56.3M
      *x_tw = tmp;
1125
1126
56.3M
      wr += 2;
1127
56.3M
      x_tw += 4;
1128
56.3M
    }
1129
7.04M
  }
1130
1131
63.4M
  for (i = 0; i < (npoints / 3); i++) {
1132
56.3M
    ixheaac_aac_ld_dec_fft_3_float(x_p3, y_p3);
1133
1134
56.3M
    x_p3 = x_p3 + 6;
1135
56.3M
    y_p3 = y_p3 + 6;
1136
56.3M
  }
1137
1138
63.4M
  for (i = 0; i < 16; i += 2) {
1139
56.3M
    x_out[i] = *ptr_y++;
1140
56.3M
    x_out[i + 1] = *ptr_y++;
1141
56.3M
    x_out[16 + i] = *ptr_y++;
1142
56.3M
    x_out[16 + i + 1] = *ptr_y++;
1143
56.3M
    x_out[32 + i] = *ptr_y++;
1144
56.3M
    x_out[32 + i + 1] = *ptr_y++;
1145
56.3M
  }
1146
7.04M
}
1147
1148
3.14M
void ixheaac_cmplx_anal_fft_p3(FLOAT32 *x_in, FLOAT32 *x_out, WORD32 npoints) {
1149
3.14M
  WORD32 i, j;
1150
3.14M
  FLOAT32 x_3[32];
1151
3.14M
  FLOAT32 y_3[32];
1152
3.14M
  FLOAT32 y[96];
1153
3.14M
  FLOAT32 *ptr_x = x_in;
1154
3.14M
  FLOAT32 *ptr_y = y;
1155
3.14M
  FLOAT32 *y_p3 = y;
1156
1157
12.5M
  for (i = 0; i < 6; i += 2) {
1158
160M
    for (j = 0; j < 32; j += 2) {
1159
151M
      x_3[j] = x_in[3 * j + i];
1160
151M
      x_3[j + 1] = x_in[3 * j + i + 1];
1161
151M
    }
1162
1163
9.44M
    ixheaac_cmplx_anal_fft_p2(x_3, y_3, 16);
1164
1165
160M
    for (j = 0; j < 32; j += 2) {
1166
151M
      x_in[3 * j + i] = y_3[j];
1167
151M
      x_in[3 * j + i + 1] = y_3[j + 1];
1168
151M
    }
1169
9.44M
  }
1170
1171
3.14M
  {
1172
3.14M
    FLOAT32 *wr;
1173
3.14M
    FLOAT32 tmp;
1174
3.14M
    wr = (FLOAT32 *)ixheaac_twidle_tbl_48;
1175
3.14M
    x_in += 2;
1176
1177
53.4M
    for (i = 0; i < (npoints / 3); i++) {
1178
50.3M
      tmp = ((*x_in) * (*wr) + (*(x_in + 1)) * (*(wr + 1)));
1179
50.3M
      *(x_in + 1) = (-(*x_in) * (*(wr + 1)) + (*(x_in + 1)) * (*wr));
1180
50.3M
      *x_in = tmp;
1181
1182
50.3M
      wr += 2;
1183
50.3M
      x_in += 2;
1184
1185
50.3M
      tmp = ((*x_in) * (*wr) + (*(x_in + 1)) * (*(wr + 1)));
1186
50.3M
      *(x_in + 1) = (-(*x_in) * (*(wr + 1)) + (*(x_in + 1)) * (*wr));
1187
50.3M
      *x_in = tmp;
1188
1189
50.3M
      wr += 2;
1190
50.3M
      x_in += 4;
1191
50.3M
    }
1192
3.14M
  }
1193
1194
53.4M
  for (i = 0; i < (npoints / 3); i++) {
1195
50.3M
    ixheaac_aac_ld_dec_fft_3_float(ptr_x, ptr_y);
1196
1197
50.3M
    ptr_x = ptr_x + 6;
1198
50.3M
    ptr_y = ptr_y + 6;
1199
50.3M
  }
1200
1201
53.4M
  for (i = 0; i < 32; i += 2) {
1202
50.3M
    x_out[i] = *y_p3++;
1203
50.3M
    x_out[i + 1] = *y_p3++;
1204
50.3M
    x_out[32 + i] = *y_p3++;
1205
50.3M
    x_out[32 + i + 1] = *y_p3++;
1206
50.3M
    x_out[64 + i] = *y_p3++;
1207
50.3M
    x_out[64 + i + 1] = *y_p3++;
1208
50.3M
  }
1209
3.14M
}