Coverage Report

Created: 2026-01-09 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/common/ixheaac_esbr_fft.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
#include <stdio.h>
22
#include <stdlib.h>
23
#include "ixheaac_type_def.h"
24
#include "ixheaac_constants.h"
25
#include "ixheaac_basic_ops32.h"
26
27
#define PLATFORM_INLINE __inline
28
29
#define DIG_REV(i, m, j)                                    \
30
167M
  do {                                                      \
31
167M
    unsigned _ = (i);                                       \
32
167M
    _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
33
167M
    _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
34
167M
    _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
35
167M
    (j) = _ >> (m);                                         \
36
167M
  } while (0)
37
38
extern const FLOAT32 ixheaac_twiddle_table_fft_float[514];
39
extern const FLOAT32 ixheaac_twidle_tbl_48[64];
40
extern const FLOAT32 ixheaac_twidle_tbl_24[32];
41
42
28.9M
void ixheaac_real_synth_fft_p2(FLOAT32 *ptr_x, FLOAT32 *ptr_y, WORD32 npoints) {
43
28.9M
  WORD32 i, j, k, n_stages, h2;
44
28.9M
  FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
45
28.9M
  WORD32 del, nodespacing, in_loop_cnt;
46
28.9M
  WORD32 not_power_4;
47
28.9M
  WORD32 dig_rev_shift;
48
28.9M
  const FLOAT32 *ptr_w;
49
50
28.9M
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
51
28.9M
  n_stages = 30 - ixheaac_norm32(npoints);
52
28.9M
  not_power_4 = n_stages & 1;
53
54
28.9M
  n_stages = n_stages >> 1;
55
56
28.9M
  ptr_w = ixheaac_twiddle_table_fft_float;
57
58
116M
  for (i = 0; i < npoints; i += 4) {
59
87.5M
    FLOAT32 *inp = ptr_x;
60
61
87.5M
    DIG_REV(i, dig_rev_shift, h2);
62
87.5M
    if (not_power_4) {
63
68.0M
      h2 += 1;
64
68.0M
      h2 &= ~1;
65
68.0M
    }
66
87.5M
    inp += (h2 >> 1);
67
68
87.5M
    x0r = *inp;
69
87.5M
    inp += (npoints >> 2);
70
71
87.5M
    x1r = *inp;
72
87.5M
    inp += (npoints >> 2);
73
74
87.5M
    x2r = *inp;
75
87.5M
    inp += (npoints >> 2);
76
77
87.5M
    x3r = *inp;
78
79
87.5M
    x0r = x0r + x2r;
80
87.5M
    x2r = x0r - (x2r * 2);
81
87.5M
    x1r = x1r + x3r;
82
87.5M
    x3r = x1r - (x3r * 2);
83
87.5M
    x0r = x0r + x1r;
84
87.5M
    x1r = x0r - (x1r * 2);
85
86
87.5M
    *ptr_y++ = x0r;
87
87.5M
    *ptr_y++ = 0;
88
87.5M
    *ptr_y++ = x2r;
89
87.5M
    *ptr_y++ = x3r;
90
87.5M
    *ptr_y++ = x1r;
91
87.5M
    *ptr_y++ = 0;
92
87.5M
    *ptr_y++ = x2r;
93
87.5M
    *ptr_y++ = -x3r;
94
87.5M
  }
95
28.9M
  ptr_y -= 2 * npoints;
96
28.9M
  del = 4;
97
28.9M
  nodespacing = 64;
98
28.9M
  in_loop_cnt = npoints >> 4;
99
37.1M
  for (i = n_stages - 1; i > 0; i--) {
100
8.19M
    const FLOAT32 *twiddles = ptr_w;
101
8.19M
    FLOAT32 *data = ptr_y;
102
8.19M
    FLOAT32 W1, W2, W3, W4, W5, W6;
103
8.19M
    WORD32 sec_loop_cnt;
104
105
19.7M
    for (k = in_loop_cnt; k != 0; k--) {
106
11.5M
      x0r = (*data);
107
11.5M
      x0i = (*(data + 1));
108
11.5M
      data += ((SIZE_T)del << 1);
109
110
11.5M
      x1r = (*data);
111
11.5M
      x1i = (*(data + 1));
112
11.5M
      data += ((SIZE_T)del << 1);
113
114
11.5M
      x2r = (*data);
115
11.5M
      x2i = (*(data + 1));
116
11.5M
      data += ((SIZE_T)del << 1);
117
118
11.5M
      x3r = (*data);
119
11.5M
      x3i = (*(data + 1));
120
11.5M
      data -= 3 * ((SIZE_T)del << 1);
121
122
11.5M
      x0r = x0r + x2r;
123
11.5M
      x0i = x0i + x2i;
124
11.5M
      x2r = x0r - (x2r * 2);
125
11.5M
      x2i = x0i - (x2i * 2);
126
11.5M
      x1r = x1r + x3r;
127
11.5M
      x1i = x1i + x3i;
128
11.5M
      x3r = x1r - (x3r * 2);
129
11.5M
      x3i = x1i - (x3i * 2);
130
131
11.5M
      x0r = x0r + x1r;
132
11.5M
      x0i = x0i + x1i;
133
11.5M
      x1r = x0r - (x1r * 2);
134
11.5M
      x1i = x0i - (x1i * 2);
135
11.5M
      x2r = x2r - x3i;
136
11.5M
      x2i = x2i + x3r;
137
11.5M
      x3i = x2r + (x3i * 2);
138
11.5M
      x3r = x2i - (x3r * 2);
139
140
11.5M
      *data = x0r;
141
11.5M
      *(data + 1) = x0i;
142
11.5M
      data += ((SIZE_T)del << 1);
143
144
11.5M
      *data = x2r;
145
11.5M
      *(data + 1) = x2i;
146
11.5M
      data += ((SIZE_T)del << 1);
147
148
11.5M
      *data = x1r;
149
11.5M
      *(data + 1) = x1i;
150
11.5M
      data += ((SIZE_T)del << 1);
151
152
11.5M
      *data = x3i;
153
11.5M
      *(data + 1) = x3r;
154
11.5M
      data += ((SIZE_T)del << 1);
155
11.5M
    }
156
8.19M
    data = ptr_y + 2;
157
158
8.19M
    sec_loop_cnt = (nodespacing * del);
159
8.19M
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
160
8.19M
                   (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
161
8.19M
                   (sec_loop_cnt / 256);
162
163
16.3M
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
164
8.19M
      W1 = *(twiddles + j);
165
8.19M
      W4 = *(twiddles + j + 257);
166
8.19M
      W2 = *(twiddles + ((SIZE_T)j << 1));
167
8.19M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
168
8.19M
      W3 = *(twiddles + j + ((SIZE_T)j << 1));
169
8.19M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
170
171
19.7M
      for (k = in_loop_cnt; k != 0; k--) {
172
11.5M
        FLOAT32 tmp;
173
11.5M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
174
175
11.5M
        data += ((SIZE_T)del << 1);
176
177
11.5M
        x1r = *data;
178
11.5M
        x1i = *(data + 1);
179
11.5M
        data += ((SIZE_T)del << 1);
180
181
11.5M
        x2r = *data;
182
11.5M
        x2i = *(data + 1);
183
11.5M
        data += ((SIZE_T)del << 1);
184
185
11.5M
        x3r = *data;
186
11.5M
        x3i = *(data + 1);
187
11.5M
        data -= 3 * ((SIZE_T)del << 1);
188
189
11.5M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
190
11.5M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
191
11.5M
        x1r = tmp;
192
193
11.5M
        tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
194
11.5M
        x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
195
11.5M
        x2r = tmp;
196
197
11.5M
        tmp = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
198
11.5M
        x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
199
11.5M
        x3r = tmp;
200
201
11.5M
        x0r = (*data);
202
11.5M
        x0i = (*(data + 1));
203
204
11.5M
        x0r = x0r + (x2r);
205
11.5M
        x0i = x0i + (x2i);
206
11.5M
        x2r = x0r - (x2r * 2);
207
11.5M
        x2i = x0i - (x2i * 2);
208
11.5M
        x1r = x1r + x3r;
209
11.5M
        x1i = x1i + x3i;
210
11.5M
        x3r = x1r - (x3r * 2);
211
11.5M
        x3i = x1i - (x3i * 2);
212
213
11.5M
        x0r = x0r + (x1r);
214
11.5M
        x0i = x0i + (x1i);
215
11.5M
        x1r = x0r - (x1r * 2);
216
11.5M
        x1i = x0i - (x1i * 2);
217
11.5M
        x2r = x2r - (x3i);
218
11.5M
        x2i = x2i + (x3r);
219
11.5M
        x3i = x2r + (x3i * 2);
220
11.5M
        x3r = x2i - (x3r * 2);
221
222
11.5M
        *data = x0r;
223
11.5M
        *(data + 1) = x0i;
224
11.5M
        data += ((SIZE_T)del << 1);
225
226
11.5M
        *data = x2r;
227
11.5M
        *(data + 1) = x2i;
228
11.5M
        data += ((SIZE_T)del << 1);
229
230
11.5M
        *data = x1r;
231
11.5M
        *(data + 1) = x1i;
232
11.5M
        data += ((SIZE_T)del << 1);
233
234
11.5M
        *data = x3i;
235
11.5M
        *(data + 1) = x3r;
236
11.5M
        data += ((SIZE_T)del << 1);
237
11.5M
      }
238
8.19M
      data -= 2 * npoints;
239
8.19M
      data += 2;
240
8.19M
    }
241
16.3M
    for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
242
8.19M
      W1 = *(twiddles + j);
243
8.19M
      W4 = *(twiddles + j + 257);
244
8.19M
      W2 = *(twiddles + ((SIZE_T)j << 1));
245
8.19M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
246
8.19M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
247
8.19M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
248
249
19.7M
      for (k = in_loop_cnt; k != 0; k--) {
250
11.5M
        FLOAT32 tmp;
251
11.5M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
252
253
11.5M
        data += ((SIZE_T)del << 1);
254
255
11.5M
        x1r = *data;
256
11.5M
        x1i = *(data + 1);
257
11.5M
        data += ((SIZE_T)del << 1);
258
259
11.5M
        x2r = *data;
260
11.5M
        x2i = *(data + 1);
261
11.5M
        data += ((SIZE_T)del << 1);
262
263
11.5M
        x3r = *data;
264
11.5M
        x3i = *(data + 1);
265
11.5M
        data -= 3 * ((SIZE_T)del << 1);
266
267
11.5M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
268
11.5M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
269
11.5M
        x1r = tmp;
270
271
11.5M
        tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
272
11.5M
        x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
273
11.5M
        x2r = tmp;
274
275
11.5M
        tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
276
11.5M
        x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
277
11.5M
        x3r = tmp;
278
279
11.5M
        x0r = (*data);
280
11.5M
        x0i = (*(data + 1));
281
282
11.5M
        x0r = x0r + (x2r);
283
11.5M
        x0i = x0i + (x2i);
284
11.5M
        x2r = x0r - (x2r * 2);
285
11.5M
        x2i = x0i - (x2i * 2);
286
11.5M
        x1r = x1r + x3r;
287
11.5M
        x1i = x1i + x3i;
288
11.5M
        x3r = x1r - (x3r * 2);
289
11.5M
        x3i = x1i - (x3i * 2);
290
291
11.5M
        x0r = x0r + (x1r);
292
11.5M
        x0i = x0i + (x1i);
293
11.5M
        x1r = x0r - (x1r * 2);
294
11.5M
        x1i = x0i - (x1i * 2);
295
11.5M
        x2r = x2r - (x3i);
296
11.5M
        x2i = x2i + (x3r);
297
11.5M
        x3i = x2r + (x3i * 2);
298
11.5M
        x3r = x2i - (x3r * 2);
299
300
11.5M
        *data = x0r;
301
11.5M
        *(data + 1) = x0i;
302
11.5M
        data += ((SIZE_T)del << 1);
303
304
11.5M
        *data = x2r;
305
11.5M
        *(data + 1) = x2i;
306
11.5M
        data += ((SIZE_T)del << 1);
307
308
11.5M
        *data = x1r;
309
11.5M
        *(data + 1) = x1i;
310
11.5M
        data += ((SIZE_T)del << 1);
311
312
11.5M
        *data = x3i;
313
11.5M
        *(data + 1) = x3r;
314
11.5M
        data += ((SIZE_T)del << 1);
315
11.5M
      }
316
8.19M
      data -= 2 * npoints;
317
8.19M
      data += 2;
318
8.19M
    }
319
8.19M
    for (; j <= sec_loop_cnt * 2; j += nodespacing) {
320
0
      W1 = *(twiddles + j);
321
0
      W4 = *(twiddles + j + 257);
322
0
      W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
323
0
      W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
324
0
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
325
0
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
326
327
0
      for (k = in_loop_cnt; k != 0; k--) {
328
0
        FLOAT32 tmp;
329
0
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
330
331
0
        data += ((SIZE_T)del << 1);
332
333
0
        x1r = *data;
334
0
        x1i = *(data + 1);
335
0
        data += ((SIZE_T)del << 1);
336
337
0
        x2r = *data;
338
0
        x2i = *(data + 1);
339
0
        data += ((SIZE_T)del << 1);
340
341
0
        x3r = *data;
342
0
        x3i = *(data + 1);
343
0
        data -= 3 * ((SIZE_T)del << 1);
344
345
0
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
346
0
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
347
0
        x1r = tmp;
348
349
0
        tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
350
0
        x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
351
0
        x2r = tmp;
352
353
0
        tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
354
0
        x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
355
0
        x3r = tmp;
356
357
0
        x0r = (*data);
358
0
        x0i = (*(data + 1));
359
360
0
        x0r = x0r + (x2r);
361
0
        x0i = x0i + (x2i);
362
0
        x2r = x0r - (x2r * 2);
363
0
        x2i = x0i - (x2i * 2);
364
0
        x1r = x1r + x3r;
365
0
        x1i = x1i + x3i;
366
0
        x3r = x1r - (x3r * 2);
367
0
        x3i = x1i - (x3i * 2);
368
369
0
        x0r = x0r + (x1r);
370
0
        x0i = x0i + (x1i);
371
0
        x1r = x0r - (x1r * 2);
372
0
        x1i = x0i - (x1i * 2);
373
0
        x2r = x2r - (x3i);
374
0
        x2i = x2i + (x3r);
375
0
        x3i = x2r + (x3i * 2);
376
0
        x3r = x2i - (x3r * 2);
377
378
0
        *data = x0r;
379
0
        *(data + 1) = x0i;
380
0
        data += ((SIZE_T)del << 1);
381
382
0
        *data = x2r;
383
0
        *(data + 1) = x2i;
384
0
        data += ((SIZE_T)del << 1);
385
386
0
        *data = x1r;
387
0
        *(data + 1) = x1i;
388
0
        data += ((SIZE_T)del << 1);
389
390
0
        *data = x3i;
391
0
        *(data + 1) = x3r;
392
0
        data += ((SIZE_T)del << 1);
393
0
      }
394
0
      data -= 2 * npoints;
395
0
      data += 2;
396
0
    }
397
16.3M
    for (; j < nodespacing * del; j += nodespacing) {
398
8.19M
      W1 = *(twiddles + j);
399
8.19M
      W4 = *(twiddles + j + 257);
400
8.19M
      W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
401
8.19M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
402
8.19M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
403
8.19M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
404
405
19.7M
      for (k = in_loop_cnt; k != 0; k--) {
406
11.5M
        FLOAT32 tmp;
407
11.5M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
408
409
11.5M
        data += ((SIZE_T)del << 1);
410
411
11.5M
        x1r = *data;
412
11.5M
        x1i = *(data + 1);
413
11.5M
        data += ((SIZE_T)del << 1);
414
415
11.5M
        x2r = *data;
416
11.5M
        x2i = *(data + 1);
417
11.5M
        data += ((SIZE_T)del << 1);
418
419
11.5M
        x3r = *data;
420
11.5M
        x3i = *(data + 1);
421
11.5M
        data -= 3 * ((SIZE_T)del << 1);
422
423
11.5M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
424
11.5M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
425
11.5M
        x1r = tmp;
426
427
11.5M
        tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
428
11.5M
        x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
429
11.5M
        x2r = tmp;
430
431
11.5M
        tmp = (FLOAT32)(-((FLOAT32)x3r * W3) - ((FLOAT32)x3i * W6));
432
11.5M
        x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
433
11.5M
        x3r = tmp;
434
435
11.5M
        x0r = (*data);
436
11.5M
        x0i = (*(data + 1));
437
438
11.5M
        x0r = x0r + (x2r);
439
11.5M
        x0i = x0i + (x2i);
440
11.5M
        x2r = x0r - (x2r * 2);
441
11.5M
        x2i = x0i - (x2i * 2);
442
11.5M
        x1r = x1r + x3r;
443
11.5M
        x1i = x1i - x3i;
444
11.5M
        x3r = x1r - (x3r * 2);
445
11.5M
        x3i = x1i + (x3i * 2);
446
447
11.5M
        x0r = x0r + (x1r);
448
11.5M
        x0i = x0i + (x1i);
449
11.5M
        x1r = x0r - (x1r * 2);
450
11.5M
        x1i = x0i - (x1i * 2);
451
11.5M
        x2r = x2r - (x3i);
452
11.5M
        x2i = x2i + (x3r);
453
11.5M
        x3i = x2r + (x3i * 2);
454
11.5M
        x3r = x2i - (x3r * 2);
455
456
11.5M
        *data = x0r;
457
11.5M
        *(data + 1) = x0i;
458
11.5M
        data += ((SIZE_T)del << 1);
459
460
11.5M
        *data = x2r;
461
11.5M
        *(data + 1) = x2i;
462
11.5M
        data += ((SIZE_T)del << 1);
463
464
11.5M
        *data = x1r;
465
11.5M
        *(data + 1) = x1i;
466
11.5M
        data += ((SIZE_T)del << 1);
467
468
11.5M
        *data = x3i;
469
11.5M
        *(data + 1) = x3r;
470
11.5M
        data += ((SIZE_T)del << 1);
471
11.5M
      }
472
8.19M
      data -= 2 * npoints;
473
8.19M
      data += 2;
474
8.19M
    }
475
8.19M
    nodespacing >>= 2;
476
8.19M
    del <<= 2;
477
8.19M
    in_loop_cnt >>= 2;
478
8.19M
  }
479
480
28.9M
  if (not_power_4) {
481
24.0M
    const FLOAT32 *twiddles = ptr_w;
482
24.0M
    nodespacing <<= 1;
483
484
92.0M
    for (j = del / 2; j != 0; j--) {
485
68.0M
      FLOAT32 W1 = *twiddles;
486
68.0M
      FLOAT32 W4 = *(twiddles + 257);
487
68.0M
      FLOAT32 tmp;
488
68.0M
      twiddles += nodespacing;
489
490
68.0M
      x0r = *ptr_y;
491
68.0M
      x0i = *(ptr_y + 1);
492
68.0M
      ptr_y += ((SIZE_T)del << 1);
493
494
68.0M
      x1r = *ptr_y;
495
68.0M
      x1i = *(ptr_y + 1);
496
497
68.0M
      tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
498
68.0M
      x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
499
68.0M
      x1r = tmp;
500
501
68.0M
      *ptr_y = (x0r) - (x1r);
502
68.0M
      *(ptr_y + 1) = (x0i) - (x1i);
503
68.0M
      ptr_y -= ((SIZE_T)del << 1);
504
505
68.0M
      *ptr_y = (x0r) + (x1r);
506
68.0M
      *(ptr_y + 1) = (x0i) + (x1i);
507
68.0M
      ptr_y += 2;
508
68.0M
    }
509
24.0M
    twiddles = ptr_w;
510
92.0M
    for (j = del / 2; j != 0; j--) {
511
68.0M
      FLOAT32 W1 = *twiddles;
512
68.0M
      FLOAT32 W4 = *(twiddles + 257);
513
68.0M
      FLOAT32 tmp;
514
68.0M
      twiddles += nodespacing;
515
516
68.0M
      x0r = *ptr_y;
517
68.0M
      x0i = *(ptr_y + 1);
518
68.0M
      ptr_y += ((SIZE_T)del << 1);
519
520
68.0M
      x1r = *ptr_y;
521
68.0M
      x1i = *(ptr_y + 1);
522
68.0M
      tmp = (FLOAT32)(((FLOAT32)x1r * W4) - ((FLOAT32)x1i * W1));
523
68.0M
      x1i = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
524
68.0M
      x1r = tmp;
525
526
68.0M
      *ptr_y = (x0r) - (x1r);
527
68.0M
      *(ptr_y + 1) = (x0i) - (x1i);
528
68.0M
      ptr_y -= ((SIZE_T)del << 1);
529
530
68.0M
      *ptr_y = (x0r) + (x1r);
531
68.0M
      *(ptr_y + 1) = (x0i) + (x1i);
532
68.0M
      ptr_y += 2;
533
68.0M
    }
534
24.0M
  }
535
28.9M
}
536
537
13.2M
void ixheaac_cmplx_anal_fft_p2(FLOAT32 *ptr_x, FLOAT32 *ptr_y, WORD32 npoints) {
538
13.2M
  WORD32 i, j, k, n_stages, h2;
539
13.2M
  FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
540
13.2M
  WORD32 del, nodespacing, in_loop_cnt;
541
13.2M
  WORD32 not_power_4;
542
13.2M
  WORD32 dig_rev_shift;
543
13.2M
  const FLOAT32 *ptr_w;
544
545
13.2M
  dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
546
13.2M
  n_stages = 30 - ixheaac_norm32(npoints);
547
13.2M
  not_power_4 = n_stages & 1;
548
549
13.2M
  n_stages = n_stages >> 1;
550
551
13.2M
  ptr_w = ixheaac_twiddle_table_fft_float;
552
553
93.6M
  for (i = 0; i < npoints; i += 4) {
554
80.4M
    FLOAT32 *inp = ptr_x;
555
556
80.4M
    DIG_REV(i, dig_rev_shift, h2);
557
80.4M
    if (not_power_4) {
558
19.4M
      h2 += 1;
559
19.4M
      h2 &= ~1;
560
19.4M
    }
561
80.4M
    inp += (h2);
562
563
80.4M
    x0r = *inp;
564
80.4M
    x0i = *(inp + 1);
565
80.4M
    inp += (npoints >> 1);
566
567
80.4M
    x1r = *inp;
568
80.4M
    x1i = *(inp + 1);
569
80.4M
    inp += (npoints >> 1);
570
571
80.4M
    x2r = *inp;
572
80.4M
    x2i = *(inp + 1);
573
80.4M
    inp += (npoints >> 1);
574
575
80.4M
    x3r = *inp;
576
80.4M
    x3i = *(inp + 1);
577
578
80.4M
    x0r = x0r + x2r;
579
80.4M
    x0i = x0i + x2i;
580
80.4M
    x2r = x0r - (x2r * 2);
581
80.4M
    x2i = x0i - (x2i * 2);
582
80.4M
    x1r = x1r + x3r;
583
80.4M
    x1i = x1i + x3i;
584
80.4M
    x3r = x1r - (x3r * 2);
585
80.4M
    x3i = x1i - (x3i * 2);
586
587
80.4M
    x0r = x0r + x1r;
588
80.4M
    x0i = x0i + x1i;
589
80.4M
    x1r = x0r - (x1r * 2);
590
80.4M
    x1i = x0i - (x1i * 2);
591
80.4M
    x2r = x2r - x3i;
592
80.4M
    x2i = x2i + x3r;
593
80.4M
    x3i = x2r + (x3i * 2);
594
80.4M
    x3r = x2i - (x3r * 2);
595
596
80.4M
    *ptr_y++ = x0r;
597
80.4M
    *ptr_y++ = x0i;
598
80.4M
    *ptr_y++ = x2r;
599
80.4M
    *ptr_y++ = x2i;
600
80.4M
    *ptr_y++ = x1r;
601
80.4M
    *ptr_y++ = x1i;
602
80.4M
    *ptr_y++ = x3i;
603
80.4M
    *ptr_y++ = x3r;
604
80.4M
  }
605
13.2M
  ptr_y -= 2 * npoints;
606
13.2M
  del = 4;
607
13.2M
  nodespacing = 64;
608
13.2M
  in_loop_cnt = npoints >> 4;
609
27.9M
  for (i = n_stages - 1; i > 0; i--) {
610
14.7M
    const FLOAT32 *twiddles = ptr_w;
611
14.7M
    FLOAT32 *data = ptr_y;
612
14.7M
    FLOAT32 W1, W2, W3, W4, W5, W6;
613
14.7M
    WORD32 sec_loop_cnt;
614
615
36.3M
    for (k = in_loop_cnt; k != 0; k--) {
616
21.6M
      x0r = (*data);
617
21.6M
      x0i = (*(data + 1));
618
21.6M
      data += ((SIZE_T)del << 1);
619
620
21.6M
      x1r = (*data);
621
21.6M
      x1i = (*(data + 1));
622
21.6M
      data += ((SIZE_T)del << 1);
623
624
21.6M
      x2r = (*data);
625
21.6M
      x2i = (*(data + 1));
626
21.6M
      data += ((SIZE_T)del << 1);
627
628
21.6M
      x3r = (*data);
629
21.6M
      x3i = (*(data + 1));
630
21.6M
      data -= 3 * ((SIZE_T)del << 1);
631
632
21.6M
      x0r = x0r + x2r;
633
21.6M
      x0i = x0i + x2i;
634
21.6M
      x2r = x0r - (x2r * 2);
635
21.6M
      x2i = x0i - (x2i * 2);
636
21.6M
      x1r = x1r + x3r;
637
21.6M
      x1i = x1i + x3i;
638
21.6M
      x3r = x1r - (x3r * 2);
639
21.6M
      x3i = x1i - (x3i * 2);
640
641
21.6M
      x0r = x0r + x1r;
642
21.6M
      x0i = x0i + x1i;
643
21.6M
      x1r = x0r - (x1r * 2);
644
21.6M
      x1i = x0i - (x1i * 2);
645
21.6M
      x2r = x2r - x3i;
646
21.6M
      x2i = x2i + x3r;
647
21.6M
      x3i = x2r + (x3i * 2);
648
21.6M
      x3r = x2i - (x3r * 2);
649
650
21.6M
      *data = x0r;
651
21.6M
      *(data + 1) = x0i;
652
21.6M
      data += ((SIZE_T)del << 1);
653
654
21.6M
      *data = x2r;
655
21.6M
      *(data + 1) = x2i;
656
21.6M
      data += ((SIZE_T)del << 1);
657
658
21.6M
      *data = x1r;
659
21.6M
      *(data + 1) = x1i;
660
21.6M
      data += ((SIZE_T)del << 1);
661
662
21.6M
      *data = x3i;
663
21.6M
      *(data + 1) = x3r;
664
21.6M
      data += ((SIZE_T)del << 1);
665
21.6M
    }
666
14.7M
    data = ptr_y + 2;
667
668
14.7M
    sec_loop_cnt = (nodespacing * del);
669
14.7M
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
670
14.7M
                   (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
671
14.7M
                   (sec_loop_cnt / 256);
672
673
35.3M
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
674
20.6M
      W1 = *(twiddles + j);
675
20.6M
      W4 = *(twiddles + j + 257);
676
20.6M
      W2 = *(twiddles + ((SIZE_T)j << 1));
677
20.6M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
678
20.6M
      W3 = *(twiddles + j + ((SIZE_T)j << 1));
679
20.6M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
680
681
48.2M
      for (k = in_loop_cnt; k != 0; k--) {
682
27.5M
        FLOAT32 tmp;
683
27.5M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
684
685
27.5M
        data += ((SIZE_T)del << 1);
686
687
27.5M
        x1r = *data;
688
27.5M
        x1i = *(data + 1);
689
27.5M
        data += ((SIZE_T)del << 1);
690
691
27.5M
        x2r = *data;
692
27.5M
        x2i = *(data + 1);
693
27.5M
        data += ((SIZE_T)del << 1);
694
695
27.5M
        x3r = *data;
696
27.5M
        x3i = *(data + 1);
697
27.5M
        data -= 3 * ((SIZE_T)del << 1);
698
699
27.5M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
700
27.5M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
701
27.5M
        x1r = tmp;
702
703
27.5M
        tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
704
27.5M
        x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
705
27.5M
        x2r = tmp;
706
707
27.5M
        tmp = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
708
27.5M
        x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
709
27.5M
        x3r = tmp;
710
711
27.5M
        x0r = (*data);
712
27.5M
        x0i = (*(data + 1));
713
714
27.5M
        x0r = x0r + (x2r);
715
27.5M
        x0i = x0i + (x2i);
716
27.5M
        x2r = x0r - (x2r * 2);
717
27.5M
        x2i = x0i - (x2i * 2);
718
27.5M
        x1r = x1r + x3r;
719
27.5M
        x1i = x1i + x3i;
720
27.5M
        x3r = x1r - (x3r * 2);
721
27.5M
        x3i = x1i - (x3i * 2);
722
723
27.5M
        x0r = x0r + (x1r);
724
27.5M
        x0i = x0i + (x1i);
725
27.5M
        x1r = x0r - (x1r * 2);
726
27.5M
        x1i = x0i - (x1i * 2);
727
27.5M
        x2r = x2r - (x3i);
728
27.5M
        x2i = x2i + (x3r);
729
27.5M
        x3i = x2r + (x3i * 2);
730
27.5M
        x3r = x2i - (x3r * 2);
731
732
27.5M
        *data = x0r;
733
27.5M
        *(data + 1) = x0i;
734
27.5M
        data += ((SIZE_T)del << 1);
735
736
27.5M
        *data = x2r;
737
27.5M
        *(data + 1) = x2i;
738
27.5M
        data += ((SIZE_T)del << 1);
739
740
27.5M
        *data = x1r;
741
27.5M
        *(data + 1) = x1i;
742
27.5M
        data += ((SIZE_T)del << 1);
743
744
27.5M
        *data = x3i;
745
27.5M
        *(data + 1) = x3r;
746
27.5M
        data += ((SIZE_T)del << 1);
747
27.5M
      }
748
20.6M
      data -= 2 * npoints;
749
20.6M
      data += 2;
750
20.6M
    }
751
32.3M
    for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
752
17.6M
      W1 = *(twiddles + j);
753
17.6M
      W4 = *(twiddles + j + 257);
754
17.6M
      W2 = *(twiddles + ((SIZE_T)j << 1));
755
17.6M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
756
17.6M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
757
17.6M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
758
759
42.2M
      for (k = in_loop_cnt; k != 0; k--) {
760
24.5M
        FLOAT32 tmp;
761
24.5M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
762
763
24.5M
        data += ((SIZE_T)del << 1);
764
765
24.5M
        x1r = *data;
766
24.5M
        x1i = *(data + 1);
767
24.5M
        data += ((SIZE_T)del << 1);
768
769
24.5M
        x2r = *data;
770
24.5M
        x2i = *(data + 1);
771
24.5M
        data += ((SIZE_T)del << 1);
772
773
24.5M
        x3r = *data;
774
24.5M
        x3i = *(data + 1);
775
24.5M
        data -= 3 * ((SIZE_T)del << 1);
776
777
24.5M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
778
24.5M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
779
24.5M
        x1r = tmp;
780
781
24.5M
        tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
782
24.5M
        x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
783
24.5M
        x2r = tmp;
784
785
24.5M
        tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
786
24.5M
        x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
787
24.5M
        x3r = tmp;
788
789
24.5M
        x0r = (*data);
790
24.5M
        x0i = (*(data + 1));
791
792
24.5M
        x0r = x0r + (x2r);
793
24.5M
        x0i = x0i + (x2i);
794
24.5M
        x2r = x0r - (x2r * 2);
795
24.5M
        x2i = x0i - (x2i * 2);
796
24.5M
        x1r = x1r + x3r;
797
24.5M
        x1i = x1i + x3i;
798
24.5M
        x3r = x1r - (x3r * 2);
799
24.5M
        x3i = x1i - (x3i * 2);
800
801
24.5M
        x0r = x0r + (x1r);
802
24.5M
        x0i = x0i + (x1i);
803
24.5M
        x1r = x0r - (x1r * 2);
804
24.5M
        x1i = x0i - (x1i * 2);
805
24.5M
        x2r = x2r - (x3i);
806
24.5M
        x2i = x2i + (x3r);
807
24.5M
        x3i = x2r + (x3i * 2);
808
24.5M
        x3r = x2i - (x3r * 2);
809
810
24.5M
        *data = x0r;
811
24.5M
        *(data + 1) = x0i;
812
24.5M
        data += ((SIZE_T)del << 1);
813
814
24.5M
        *data = x2r;
815
24.5M
        *(data + 1) = x2i;
816
24.5M
        data += ((SIZE_T)del << 1);
817
818
24.5M
        *data = x1r;
819
24.5M
        *(data + 1) = x1i;
820
24.5M
        data += ((SIZE_T)del << 1);
821
822
24.5M
        *data = x3i;
823
24.5M
        *(data + 1) = x3r;
824
24.5M
        data += ((SIZE_T)del << 1);
825
24.5M
      }
826
17.6M
      data -= 2 * npoints;
827
17.6M
      data += 2;
828
17.6M
    }
829
17.6M
    for (; j <= sec_loop_cnt * 2; j += nodespacing) {
830
2.98M
      W1 = *(twiddles + j);
831
2.98M
      W4 = *(twiddles + j + 257);
832
2.98M
      W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
833
2.98M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
834
2.98M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
835
2.98M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
836
837
5.96M
      for (k = in_loop_cnt; k != 0; k--) {
838
2.98M
        FLOAT32 tmp;
839
2.98M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
840
841
2.98M
        data += ((SIZE_T)del << 1);
842
843
2.98M
        x1r = *data;
844
2.98M
        x1i = *(data + 1);
845
2.98M
        data += ((SIZE_T)del << 1);
846
847
2.98M
        x2r = *data;
848
2.98M
        x2i = *(data + 1);
849
2.98M
        data += ((SIZE_T)del << 1);
850
851
2.98M
        x3r = *data;
852
2.98M
        x3i = *(data + 1);
853
2.98M
        data -= 3 * ((SIZE_T)del << 1);
854
855
2.98M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
856
2.98M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
857
2.98M
        x1r = tmp;
858
859
2.98M
        tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
860
2.98M
        x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
861
2.98M
        x2r = tmp;
862
863
2.98M
        tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
864
2.98M
        x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
865
2.98M
        x3r = tmp;
866
867
2.98M
        x0r = (*data);
868
2.98M
        x0i = (*(data + 1));
869
870
2.98M
        x0r = x0r + (x2r);
871
2.98M
        x0i = x0i + (x2i);
872
2.98M
        x2r = x0r - (x2r * 2);
873
2.98M
        x2i = x0i - (x2i * 2);
874
2.98M
        x1r = x1r + x3r;
875
2.98M
        x1i = x1i + x3i;
876
2.98M
        x3r = x1r - (x3r * 2);
877
2.98M
        x3i = x1i - (x3i * 2);
878
879
2.98M
        x0r = x0r + (x1r);
880
2.98M
        x0i = x0i + (x1i);
881
2.98M
        x1r = x0r - (x1r * 2);
882
2.98M
        x1i = x0i - (x1i * 2);
883
2.98M
        x2r = x2r - (x3i);
884
2.98M
        x2i = x2i + (x3r);
885
2.98M
        x3i = x2r + (x3i * 2);
886
2.98M
        x3r = x2i - (x3r * 2);
887
888
2.98M
        *data = x0r;
889
2.98M
        *(data + 1) = x0i;
890
2.98M
        data += ((SIZE_T)del << 1);
891
892
2.98M
        *data = x2r;
893
2.98M
        *(data + 1) = x2i;
894
2.98M
        data += ((SIZE_T)del << 1);
895
896
2.98M
        *data = x1r;
897
2.98M
        *(data + 1) = x1i;
898
2.98M
        data += ((SIZE_T)del << 1);
899
900
2.98M
        *data = x3i;
901
2.98M
        *(data + 1) = x3r;
902
2.98M
        data += ((SIZE_T)del << 1);
903
2.98M
      }
904
2.98M
      data -= 2 * npoints;
905
2.98M
      data += 2;
906
2.98M
    }
907
35.3M
    for (; j < nodespacing * del; j += nodespacing) {
908
20.6M
      W1 = *(twiddles + j);
909
20.6M
      W4 = *(twiddles + j + 257);
910
20.6M
      W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
911
20.6M
      W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
912
20.6M
      W3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
913
20.6M
      W6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
914
915
48.2M
      for (k = in_loop_cnt; k != 0; k--) {
916
27.5M
        FLOAT32 tmp;
917
27.5M
        FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
918
919
27.5M
        data += ((SIZE_T)del << 1);
920
921
27.5M
        x1r = *data;
922
27.5M
        x1i = *(data + 1);
923
27.5M
        data += ((SIZE_T)del << 1);
924
925
27.5M
        x2r = *data;
926
27.5M
        x2i = *(data + 1);
927
27.5M
        data += ((SIZE_T)del << 1);
928
929
27.5M
        x3r = *data;
930
27.5M
        x3i = *(data + 1);
931
27.5M
        data -= 3 * ((SIZE_T)del << 1);
932
933
27.5M
        tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
934
27.5M
        x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
935
27.5M
        x1r = tmp;
936
937
27.5M
        tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
938
27.5M
        x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
939
27.5M
        x2r = tmp;
940
941
27.5M
        tmp = (FLOAT32)(-((FLOAT32)x3r * W3) - ((FLOAT32)x3i * W6));
942
27.5M
        x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
943
27.5M
        x3r = tmp;
944
945
27.5M
        x0r = (*data);
946
27.5M
        x0i = (*(data + 1));
947
948
27.5M
        x0r = x0r + (x2r);
949
27.5M
        x0i = x0i + (x2i);
950
27.5M
        x2r = x0r - (x2r * 2);
951
27.5M
        x2i = x0i - (x2i * 2);
952
27.5M
        x1r = x1r + x3r;
953
27.5M
        x1i = x1i - x3i;
954
27.5M
        x3r = x1r - (x3r * 2);
955
27.5M
        x3i = x1i + (x3i * 2);
956
957
27.5M
        x0r = x0r + (x1r);
958
27.5M
        x0i = x0i + (x1i);
959
27.5M
        x1r = x0r - (x1r * 2);
960
27.5M
        x1i = x0i - (x1i * 2);
961
27.5M
        x2r = x2r - (x3i);
962
27.5M
        x2i = x2i + (x3r);
963
27.5M
        x3i = x2r + (x3i * 2);
964
27.5M
        x3r = x2i - (x3r * 2);
965
966
27.5M
        *data = x0r;
967
27.5M
        *(data + 1) = x0i;
968
27.5M
        data += ((SIZE_T)del << 1);
969
970
27.5M
        *data = x2r;
971
27.5M
        *(data + 1) = x2i;
972
27.5M
        data += ((SIZE_T)del << 1);
973
974
27.5M
        *data = x1r;
975
27.5M
        *(data + 1) = x1i;
976
27.5M
        data += ((SIZE_T)del << 1);
977
978
27.5M
        *data = x3i;
979
27.5M
        *(data + 1) = x3r;
980
27.5M
        data += ((SIZE_T)del << 1);
981
27.5M
      }
982
20.6M
      data -= 2 * npoints;
983
20.6M
      data += 2;
984
20.6M
    }
985
14.7M
    nodespacing >>= 2;
986
14.7M
    del <<= 2;
987
14.7M
    in_loop_cnt >>= 2;
988
14.7M
  }
989
990
13.2M
  if (not_power_4) {
991
2.43M
    const FLOAT32 *twiddles = ptr_w;
992
2.43M
    nodespacing <<= 1;
993
994
21.8M
    for (j = del / 2; j != 0; j--) {
995
19.4M
      FLOAT32 W1 = *twiddles;
996
19.4M
      FLOAT32 W4 = *(twiddles + 257);
997
19.4M
      FLOAT32 tmp;
998
19.4M
      twiddles += nodespacing;
999
1000
19.4M
      x0r = *ptr_y;
1001
19.4M
      x0i = *(ptr_y + 1);
1002
19.4M
      ptr_y += ((SIZE_T)del << 1);
1003
1004
19.4M
      x1r = *ptr_y;
1005
19.4M
      x1i = *(ptr_y + 1);
1006
1007
19.4M
      tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
1008
19.4M
      x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
1009
19.4M
      x1r = tmp;
1010
1011
19.4M
      *ptr_y = (x0r) - (x1r);
1012
19.4M
      *(ptr_y + 1) = (x0i) - (x1i);
1013
19.4M
      ptr_y -= ((SIZE_T)del << 1);
1014
1015
19.4M
      *ptr_y = (x0r) + (x1r);
1016
19.4M
      *(ptr_y + 1) = (x0i) + (x1i);
1017
19.4M
      ptr_y += 2;
1018
19.4M
    }
1019
2.43M
    twiddles = ptr_w;
1020
21.8M
    for (j = del / 2; j != 0; j--) {
1021
19.4M
      FLOAT32 W1 = *twiddles;
1022
19.4M
      FLOAT32 W4 = *(twiddles + 257);
1023
19.4M
      FLOAT32 tmp;
1024
19.4M
      twiddles += nodespacing;
1025
1026
19.4M
      x0r = *ptr_y;
1027
19.4M
      x0i = *(ptr_y + 1);
1028
19.4M
      ptr_y += ((SIZE_T)del << 1);
1029
1030
19.4M
      x1r = *ptr_y;
1031
19.4M
      x1i = *(ptr_y + 1);
1032
1033
19.4M
      tmp = (FLOAT32)(((FLOAT32)x1r * W4) - ((FLOAT32)x1i * W1));
1034
19.4M
      x1i = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
1035
19.4M
      x1r = tmp;
1036
1037
19.4M
      *ptr_y = (x0r) - (x1r);
1038
19.4M
      *(ptr_y + 1) = (x0i) - (x1i);
1039
19.4M
      ptr_y -= ((SIZE_T)del << 1);
1040
1041
19.4M
      *ptr_y = (x0r) + (x1r);
1042
19.4M
      *(ptr_y + 1) = (x0i) + (x1i);
1043
19.4M
      ptr_y += 2;
1044
19.4M
    }
1045
2.43M
  }
1046
13.2M
}
1047
1048
104M
static PLATFORM_INLINE void ixheaac_aac_ld_dec_fft_3_float(FLOAT32 *inp, FLOAT32 *op) {
1049
104M
  FLOAT32 add_r, sub_r;
1050
104M
  FLOAT32 add_i, sub_i;
1051
104M
  FLOAT32 temp_real, temp_imag, temp;
1052
1053
104M
  FLOAT32 p1, p2, p3, p4;
1054
1055
104M
  FLOAT32 sinmu;
1056
104M
  sinmu = -0.866025403784439f;
1057
1058
104M
  temp_real = inp[0] + inp[2];
1059
104M
  temp_imag = inp[1] + inp[3];
1060
1061
104M
  add_r = inp[2] + inp[4];
1062
104M
  add_i = inp[3] + inp[5];
1063
1064
104M
  sub_r = inp[2] - inp[4];
1065
104M
  sub_i = inp[3] - inp[5];
1066
1067
104M
  p1 = add_r / 2.0f;
1068
104M
  p4 = add_i / 2.0f;
1069
104M
  p2 = sub_i * sinmu;
1070
104M
  p3 = sub_r * sinmu;
1071
1072
104M
  temp = inp[0] - p1;
1073
1074
104M
  op[0] = temp_real + inp[4];
1075
104M
  op[1] = temp_imag + inp[5];
1076
104M
  op[2] = temp + p2;
1077
104M
  op[3] = (inp[1] - p3) - p4;
1078
104M
  op[4] = temp - p2;
1079
104M
  op[5] = (inp[1] + p3) - p4;
1080
1081
104M
  return;
1082
104M
}
1083
1084
6.88M
void ixheaac_real_synth_fft_p3(FLOAT32 *x_in, FLOAT32 *x_out, WORD32 npoints) {
1085
6.88M
  WORD32 i, j;
1086
6.88M
  FLOAT32 x_3[8];
1087
6.88M
  FLOAT32 y_3[16];
1088
6.88M
  FLOAT32 y[48];
1089
6.88M
  FLOAT32 x[48];
1090
6.88M
  FLOAT32 *ptr_y = y;
1091
6.88M
  FLOAT32 *y_p3 = y;
1092
6.88M
  FLOAT32 *x_p3 = x;
1093
1094
27.5M
  for (i = 0; i < 3; i += 1) {
1095
185M
    for (j = 0; j < (npoints / 3); j++) {
1096
165M
      x_3[j] = x_in[3 * j + i];
1097
165M
    }
1098
1099
20.6M
    ixheaac_real_synth_fft_p2(x_3, y_3, 8);
1100
1101
185M
    for (j = 0; j < 16; j += 2) {
1102
165M
      x[3 * j + 2 * i] = y_3[j];
1103
165M
      x[3 * j + 2 * i + 1] = y_3[j + 1];
1104
165M
    }
1105
20.6M
  }
1106
1107
6.88M
  {
1108
6.88M
    FLOAT32 *wr;
1109
6.88M
    FLOAT32 tmp;
1110
6.88M
    FLOAT32 *x_tw = x;
1111
6.88M
    wr = (FLOAT32 *)ixheaac_twidle_tbl_24;
1112
6.88M
    x_tw += 2;
1113
1114
61.9M
    for (i = 0; i < (npoints / 3); i++) {
1115
55.0M
      tmp = ((*x_tw) * (*wr) + (*(x_tw + 1)) * (*(wr + 1)));
1116
55.0M
      *(x_tw + 1) = (-(*x_tw) * (*(wr + 1)) + (*(x_tw + 1)) * (*wr));
1117
55.0M
      *x_tw = tmp;
1118
1119
55.0M
      wr += 2;
1120
55.0M
      x_tw += 2;
1121
1122
55.0M
      tmp = ((*x_tw) * (*wr) + (*(x_tw + 1)) * (*(wr + 1)));
1123
55.0M
      *(x_tw + 1) = (-(*x_tw) * (*(wr + 1)) + (*(x_tw + 1)) * (*wr));
1124
55.0M
      *x_tw = tmp;
1125
1126
55.0M
      wr += 2;
1127
55.0M
      x_tw += 4;
1128
55.0M
    }
1129
6.88M
  }
1130
1131
61.9M
  for (i = 0; i < (npoints / 3); i++) {
1132
55.0M
    ixheaac_aac_ld_dec_fft_3_float(x_p3, y_p3);
1133
1134
55.0M
    x_p3 = x_p3 + 6;
1135
55.0M
    y_p3 = y_p3 + 6;
1136
55.0M
  }
1137
1138
61.9M
  for (i = 0; i < 16; i += 2) {
1139
55.0M
    x_out[i] = *ptr_y++;
1140
55.0M
    x_out[i + 1] = *ptr_y++;
1141
55.0M
    x_out[16 + i] = *ptr_y++;
1142
55.0M
    x_out[16 + i + 1] = *ptr_y++;
1143
55.0M
    x_out[32 + i] = *ptr_y++;
1144
55.0M
    x_out[32 + i + 1] = *ptr_y++;
1145
55.0M
  }
1146
6.88M
}
1147
1148
3.08M
void ixheaac_cmplx_anal_fft_p3(FLOAT32 *x_in, FLOAT32 *x_out, WORD32 npoints) {
1149
3.08M
  WORD32 i, j;
1150
3.08M
  FLOAT32 x_3[32];
1151
3.08M
  FLOAT32 y_3[32];
1152
3.08M
  FLOAT32 y[96];
1153
3.08M
  FLOAT32 *ptr_x = x_in;
1154
3.08M
  FLOAT32 *ptr_y = y;
1155
3.08M
  FLOAT32 *y_p3 = y;
1156
1157
12.3M
  for (i = 0; i < 6; i += 2) {
1158
157M
    for (j = 0; j < 32; j += 2) {
1159
148M
      x_3[j] = x_in[3 * j + i];
1160
148M
      x_3[j + 1] = x_in[3 * j + i + 1];
1161
148M
    }
1162
1163
9.25M
    ixheaac_cmplx_anal_fft_p2(x_3, y_3, 16);
1164
1165
157M
    for (j = 0; j < 32; j += 2) {
1166
148M
      x_in[3 * j + i] = y_3[j];
1167
148M
      x_in[3 * j + i + 1] = y_3[j + 1];
1168
148M
    }
1169
9.25M
  }
1170
1171
3.08M
  {
1172
3.08M
    FLOAT32 *wr;
1173
3.08M
    FLOAT32 tmp;
1174
3.08M
    wr = (FLOAT32 *)ixheaac_twidle_tbl_48;
1175
3.08M
    x_in += 2;
1176
1177
52.4M
    for (i = 0; i < (npoints / 3); i++) {
1178
49.3M
      tmp = ((*x_in) * (*wr) + (*(x_in + 1)) * (*(wr + 1)));
1179
49.3M
      *(x_in + 1) = (-(*x_in) * (*(wr + 1)) + (*(x_in + 1)) * (*wr));
1180
49.3M
      *x_in = tmp;
1181
1182
49.3M
      wr += 2;
1183
49.3M
      x_in += 2;
1184
1185
49.3M
      tmp = ((*x_in) * (*wr) + (*(x_in + 1)) * (*(wr + 1)));
1186
49.3M
      *(x_in + 1) = (-(*x_in) * (*(wr + 1)) + (*(x_in + 1)) * (*wr));
1187
49.3M
      *x_in = tmp;
1188
1189
49.3M
      wr += 2;
1190
49.3M
      x_in += 4;
1191
49.3M
    }
1192
3.08M
  }
1193
1194
52.4M
  for (i = 0; i < (npoints / 3); i++) {
1195
49.3M
    ixheaac_aac_ld_dec_fft_3_float(ptr_x, ptr_y);
1196
1197
49.3M
    ptr_x = ptr_x + 6;
1198
49.3M
    ptr_y = ptr_y + 6;
1199
49.3M
  }
1200
1201
52.4M
  for (i = 0; i < 32; i += 2) {
1202
49.3M
    x_out[i] = *y_p3++;
1203
49.3M
    x_out[i + 1] = *y_p3++;
1204
49.3M
    x_out[32 + i] = *y_p3++;
1205
49.3M
    x_out[32 + i + 1] = *y_p3++;
1206
49.3M
    x_out[64 + i] = *y_p3++;
1207
49.3M
    x_out[64 + i + 1] = *y_p3++;
1208
49.3M
  }
1209
3.08M
}