Coverage Report

Created: 2026-01-17 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/encoder/ixheaace_fft.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2023 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
21
#include <string.h>
22
23
#include "ixheaac_type_def.h"
24
#include "ixheaac_constants.h"
25
#include "ixheaace_psy_const.h"
26
#include "ixheaace_tns.h"
27
#include "ixheaace_tns_params.h"
28
#include "ixheaace_rom.h"
29
#include "ixheaace_common_rom.h"
30
#include "ixheaace_bitbuffer.h"
31
#include "ixheaace_aac_constants.h"
32
#include "ixheaace_fft.h"
33
#include "ixheaac_basic_ops32.h"
34
#include "ixheaac_basic_ops40.h"
35
#include "ixheaac_basic_ops.h"
36
#include "iusace_basic_ops_flt.h"
37
38
static VOID ia_enhaacplus_enc_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer,
39
                                                      const FLOAT32 *ptr_time_signal,
40
                                                      WORD32 ch_increment,
41
93.2k
                                                      WORD32 long_frame_len) {
42
93.2k
  WORD32 i;
43
93.2k
  FLOAT32 *ptr_mdct_buff = ptr_mdct_delay_buffer;
44
93.2k
  if (ch_increment == 2) {
45
92.3k
    const FLOAT32 *ptr_input = ptr_time_signal;
46
92.3k
    FLOAT32 temp1, temp2, temp3, temp4;
47
92.3k
    temp1 = *ptr_input++;
48
92.3k
    ptr_input++;
49
92.3k
    temp2 = *ptr_input++;
50
92.3k
    ptr_input++;
51
92.3k
    temp3 = *ptr_input++;
52
92.3k
    ptr_input++;
53
11.0M
    for (i = ((long_frame_len >> 2) - 2); i >= 0; i--) {
54
10.9M
      *ptr_mdct_buff++ = temp1;
55
10.9M
      temp4 = *ptr_input++;
56
10.9M
      ptr_input++;
57
58
10.9M
      *ptr_mdct_buff++ = temp2;
59
10.9M
      *ptr_mdct_buff++ = temp3;
60
10.9M
      *ptr_mdct_buff++ = temp4;
61
62
10.9M
      temp1 = *ptr_input++;
63
10.9M
      ptr_input++;
64
10.9M
      temp2 = *ptr_input++;
65
10.9M
      ptr_input++;
66
10.9M
      temp3 = *ptr_input++;
67
10.9M
      ptr_input++;
68
10.9M
    }
69
92.3k
    *ptr_mdct_buff++ = temp1;
70
92.3k
    temp4 = *ptr_input;
71
92.3k
    *ptr_mdct_buff++ = temp2;
72
92.3k
    *ptr_mdct_buff++ = temp3;
73
92.3k
    *ptr_mdct_buff++ = temp4;
74
92.3k
  } else {
75
214k
    for (i = 0; i < long_frame_len; i += 2) {
76
213k
      *ptr_mdct_buff++ = ptr_time_signal[i * ch_increment];
77
213k
      *ptr_mdct_buff++ = ptr_time_signal[(i + 1) * ch_increment];
78
213k
    }
79
889
  }
80
93.2k
}
81
82
static VOID ia_eaacp_enc_inverse_transform_512(FLOAT32 *ptr_data, FLOAT32 *ptr_win_buf,
83
                                               const FLOAT32 *ptr_cos_sin_tbl,
84
0
                                               WORD8 *ptr_scratch) {
85
0
  WORD32 n = FRAME_LEN_512;
86
0
  WORD32 n_by_2 = n >> 1;
87
88
0
  ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch;
89
90
0
  ia_eaacp_enc_pre_twiddle_aac(ptr_win_buf, ptr_data, n, ptr_cos_sin_tbl);
91
92
0
  ia_enhaacplus_enc_complex_fft(ptr_win_buf, n_by_2, pstr_scratch);
93
94
0
  ia_enhaacplus_enc_post_twiddle(ptr_data, ptr_win_buf, ptr_cos_sin_tbl, n);
95
0
}
96
97
544k
static VOID ixheaace_pre_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_sine_window) {
98
544k
  WORD32 i;
99
544k
  FLOAT32 wre, wim, re1, re2, im1, im2;
100
101
58.0M
  for (i = 0; i < m / 4; i++) {
102
57.5M
    re1 = ptr_x[2 * i];
103
57.5M
    im2 = ptr_x[2 * i + 1];
104
57.5M
    re2 = ptr_x[m - 2 - 2 * i];
105
57.5M
    im1 = ptr_x[m - 1 - 2 * i];
106
107
57.5M
    wim = ptr_sine_window[i * 2];
108
57.5M
    wre = ptr_sine_window[m - 1 - 2 * i];
109
110
57.5M
    ptr_x[2 * i] = im1 * wim + re1 * wre;
111
112
57.5M
    ptr_x[2 * i + 1] = im1 * wre - re1 * wim;
113
114
57.5M
    wim = ptr_sine_window[m - 2 - 2 * i];
115
57.5M
    wre = ptr_sine_window[2 * i + 1];
116
117
57.5M
    ptr_x[m - 2 - 2 * i] = im2 * wim + re2 * wre;
118
119
57.5M
    ptr_x[m - 1 - 2 * i] = im2 * wre - re2 * wim;
120
57.5M
  }
121
544k
}
122
123
static VOID ia_enhaacplus_enc_tranform_mac4(FLOAT32 *ptr_op, const FLOAT32 *ptr_win,
124
                                            FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2,
125
                                            FLOAT32 *ptr_buf3, FLOAT32 *ptr_buf4, UWORD32 len,
126
186k
                                            WORD32 increment) {
127
186k
  WORD32 i;
128
129
186k
  if (increment > 0) {
130
5.68M
    for (i = len >> 2; i > 0; i--) {
131
5.59M
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++)));
132
5.59M
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
133
5.59M
      *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--)));
134
5.59M
      ptr_op++;
135
136
5.59M
      *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++)));
137
5.59M
      *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--)));
138
5.59M
      *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--)));
139
5.59M
      ptr_op++;
140
141
5.59M
      *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++)));
142
5.59M
      *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--)));
143
5.59M
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--)));
144
5.59M
      ptr_op++;
145
146
5.59M
      *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++)));
147
5.59M
      *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--)));
148
5.59M
      *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--)));
149
5.59M
      ptr_op++;
150
5.59M
      ptr_win += 16;
151
5.59M
    }
152
93.2k
  } else {
153
2.88M
    for (i = len >> 2; i > 0; i--) {
154
2.79M
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++)));
155
2.79M
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
156
2.79M
      *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--)));
157
2.79M
      ptr_op--;
158
159
2.79M
      *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++)));
160
2.79M
      *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--)));
161
2.79M
      *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--)));
162
2.79M
      ptr_op--;
163
164
2.79M
      *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++)));
165
2.79M
      *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--)));
166
2.79M
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--)));
167
2.79M
      ptr_op--;
168
169
2.79M
      *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++)));
170
2.79M
      *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--)));
171
2.79M
      *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--)));
172
2.79M
      ptr_op--;
173
2.79M
      ptr_win += 16;
174
2.79M
    }
175
93.2k
  }
176
186k
}
177
178
static VOID ia_enhaacplus_enc_tranform_mac3(FLOAT32 *ptr_op, const FLOAT32 *ptr_win,
179
                                            FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2,
180
93.2k
                                            FLOAT32 *ptr_buf3, UWORD32 len, WORD32 increment) {
181
93.2k
  WORD32 i;
182
183
93.2k
  if (increment > 0) {
184
0
    for (i = len >> 2; i > 0; i--) {
185
0
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--)));
186
0
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
187
0
      ptr_op++;
188
189
0
      *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--)));
190
0
      *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--)));
191
0
      ptr_op++;
192
193
0
      *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--)));
194
0
      *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--)));
195
0
      ptr_op++;
196
197
0
      *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--)));
198
0
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--)));
199
0
      ptr_op++;
200
0
      ptr_win += 12;
201
0
    }
202
93.2k
  } else {
203
2.88M
    for (i = len >> 2; i > 0; i--) {
204
2.79M
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--)));
205
2.79M
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
206
2.79M
      ptr_op--;
207
208
2.79M
      *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--)));
209
2.79M
      *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--)));
210
2.79M
      ptr_op--;
211
212
2.79M
      *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--)));
213
2.79M
      *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--)));
214
2.79M
      ptr_op--;
215
216
2.79M
      *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--)));
217
2.79M
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--)));
218
2.79M
      ptr_op--;
219
2.79M
      ptr_win += 12;
220
2.79M
    }
221
93.2k
  }
222
93.2k
}
223
224
VOID ia_enhaacplus_enc_transform_real(FLOAT32 *ptr_mdct_delay_buffer,
225
                                      const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
226
                                      FLOAT32 *ptr_real_out, ixheaace_mdct_tables *pstr_mdct_tab,
227
                                      FLOAT32 *ptr_shared_buffer1, WORD8 *ptr_shared_buffer5,
228
93.2k
                                      WORD32 long_frame_len) {
229
93.2k
  WORD32 n, n1;
230
93.2k
  FLOAT32 *ptr_windowed_buf = ptr_shared_buffer1;
231
93.2k
  const FLOAT32 *ptr_ws1;
232
93.2k
  WORD32 i, len = long_frame_len;
233
93.2k
  FLOAT32 *ptr_real_in;
234
93.2k
  FLOAT32 *ptr_data1, *ptr_data2, *ptr_data3, *ptr_data4;
235
93.2k
  FLOAT32 *ptr_op1;
236
237
93.2k
  ptr_real_in = ptr_mdct_delay_buffer;
238
239
93.2k
  n = long_frame_len << 1;
240
93.2k
  n1 = long_frame_len >> 1;
241
242
93.2k
  ptr_ws1 =
243
93.2k
      (long_frame_len == FRAME_LEN_512) ? pstr_mdct_tab->win_512_ld : pstr_mdct_tab->win_480_ld;
244
245
93.2k
  ptr_op1 = ptr_real_out;
246
93.2k
  ptr_data1 = &ptr_real_in[n1];
247
93.2k
  ptr_data2 = &ptr_real_in[n + n1];
248
93.2k
  ptr_data3 = &ptr_real_in[n1 - 1];
249
93.2k
  ptr_data4 = &ptr_real_in[n + n1 - 1];
250
251
93.2k
  ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4,
252
93.2k
                                  n1, 1);
253
93.2k
  ptr_ws1 += ((SIZE_T)n1 << 2);
254
255
89.5M
  for (i = 0; i < long_frame_len << 1; i++) {
256
89.4M
    ptr_mdct_delay_buffer[i] = ptr_mdct_delay_buffer[long_frame_len + i];
257
89.4M
  }
258
93.2k
  ia_enhaacplus_enc_shift_mdct_delay_buffer(&ptr_mdct_delay_buffer[2 * long_frame_len],
259
93.2k
                                            ptr_time_signal, ch_increment, long_frame_len);
260
261
93.2k
  ptr_op1 = &ptr_real_out[long_frame_len - 1];
262
93.2k
  ptr_data1 = &ptr_real_in[n + len - n1];
263
93.2k
  ptr_data2 = &ptr_real_in[len - n1];
264
93.2k
  ptr_data3 = &ptr_real_in[len - n1 - 1];
265
93.2k
  ptr_data4 = &ptr_real_in[n + len - n1 - 1];
266
267
93.2k
  ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4,
268
93.2k
                                  (n1 >> 1), -1);
269
93.2k
  ptr_op1 -= (n1 >> 1);
270
93.2k
  ptr_ws1 += ((SIZE_T)n1 << 1);
271
93.2k
  ptr_data2 += (n1 >> 1);
272
93.2k
  ptr_data3 -= (n1 >> 1);
273
93.2k
  ptr_data4 -= (n1 >> 1);
274
93.2k
  ia_enhaacplus_enc_tranform_mac3(ptr_op1, ptr_ws1, ptr_data2, ptr_data3, ptr_data4, (n1 >> 1),
275
93.2k
                                  -1);
276
277
93.2k
  if (long_frame_len == FRAME_LEN_480) {
278
93.2k
    ia_aac_ld_enc_mdct_480(ptr_real_out, ptr_windowed_buf, 1, pstr_mdct_tab);
279
93.2k
  } else {
280
0
    ia_eaacp_enc_inverse_transform_512(ptr_real_out, ptr_windowed_buf,
281
0
                                       pstr_mdct_tab->cosine_array_1024, ptr_shared_buffer5);
282
0
  }
283
93.2k
}
284
285
static VOID ia_eaacp_enc_pre_twiddle_compute(FLOAT32 *ptr_in1, FLOAT32 *ptr_in2, FLOAT32 *ptr_x,
286
93.2k
                                             const FLOAT32 *ptr_cos_sin, WORD n_by_4) {
287
93.2k
  WORD32 i;
288
93.2k
  FLOAT32 temp_r, temp_i;
289
93.2k
  FLOAT32 temp_r1, temp_i1;
290
93.2k
  FLOAT32 *ptr_x1 = ptr_x + (SIZE_T)((n_by_4 << 2) - 1);
291
93.2k
  FLOAT32 c, c1, s, s1;
292
293
11.2M
  for (i = 0; i < n_by_4; i++) {
294
11.1M
    c = *ptr_cos_sin++;
295
11.1M
    s = *ptr_cos_sin++;
296
11.1M
    s1 = *ptr_cos_sin++;
297
11.1M
    c1 = *ptr_cos_sin++;
298
299
11.1M
    temp_r = *ptr_in1++;
300
11.1M
    temp_i1 = *ptr_in1++;
301
11.1M
    temp_i = *ptr_in2--;
302
11.1M
    temp_r1 = *ptr_in2--;
303
11.1M
    *ptr_x = ((temp_r * c) + (temp_i * s));
304
11.1M
    ptr_x++;
305
306
11.1M
    *ptr_x = ((temp_i * c) - (temp_r * s));
307
11.1M
    ptr_x++;
308
309
11.1M
    *ptr_x1 = ((temp_i1 * c1) - (temp_r1 * s1));
310
11.1M
    ptr_x1--;
311
312
11.1M
    *ptr_x1 = ((temp_r1 * c1) + (temp_i1 * s1));
313
11.1M
    ptr_x1--;
314
11.1M
  }
315
93.2k
}
316
317
VOID ia_enhaacplus_enc_post_twiddle(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
318
93.2k
                                    const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
319
93.2k
  WORD i;
320
93.2k
  FLOAT32 c, c1, s, s1;
321
93.2k
  FLOAT32 tmp_var;
322
93.2k
  FLOAT32 tempr, tempr1, tempi, tempi1;
323
93.2k
  FLOAT32 *ptr_out1 = ptr_out + m - 1;
324
93.2k
  FLOAT32 *ptr_x1 = ptr_x + m - 1;
325
326
11.2M
  for (i = 0; i < (m >> 2); i++) {
327
11.1M
    c = *ptr_cos_sin_tbl++;
328
11.1M
    s = *ptr_cos_sin_tbl++;
329
11.1M
    s1 = *ptr_cos_sin_tbl++;
330
11.1M
    c1 = *ptr_cos_sin_tbl++;
331
11.1M
    tempr = *ptr_x++;
332
11.1M
    tempi = *ptr_x++;
333
11.1M
    tempi1 = *ptr_x1--;
334
11.1M
    tempr1 = *ptr_x1--;
335
336
11.1M
    tmp_var = ((tempr * c) + (tempi * s));
337
11.1M
    *ptr_out++ = tmp_var;
338
339
11.1M
    tmp_var = ((tempr * s) - (tempi * c));
340
11.1M
    *ptr_out1-- = tmp_var;
341
342
11.1M
    tmp_var = ((tempr1 * c1) + (tempi1 * s1));
343
11.1M
    *ptr_out1-- = tmp_var;
344
345
11.1M
    tmp_var = ((tempr1 * s1) - (tempi1 * c1));
346
11.1M
    *ptr_out++ = tmp_var;
347
11.1M
  }
348
93.2k
}
349
350
VOID ia_eaacp_enc_pre_twiddle_aac(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n,
351
93.2k
                                  const FLOAT32 *ptr_cos_array) {
352
93.2k
  WORD n_by_4;
353
93.2k
  FLOAT32 *ptr_in1, *ptr_in2;
354
355
93.2k
  n_by_4 = n >> 2;
356
357
93.2k
  ptr_in1 = ptr_data;
358
93.2k
  ptr_in2 = ptr_data + n - 1;
359
360
93.2k
  ia_eaacp_enc_pre_twiddle_compute(ptr_in1, ptr_in2, ptr_x, ptr_cos_array, n_by_4);
361
93.2k
}
362
363
38.6M
static PLATFORM_INLINE WORD8 ia_enhaacplus_enc_calc_norm(WORD32 a) {
364
38.6M
  WORD8 norm_val;
365
366
38.6M
  if (a == 0) {
367
0
    norm_val = 31;
368
38.6M
  } else {
369
38.6M
    if (a == (WORD32)0xffffffffL) {
370
0
      norm_val = 31;
371
38.6M
    } else {
372
38.6M
      if (a < 0) {
373
0
        a = ~a;
374
0
      }
375
1.00G
      for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) {
376
963M
        a <<= 1;
377
963M
      }
378
38.6M
    }
379
38.6M
  }
380
381
38.6M
  return norm_val;
382
38.6M
}
383
384
static PLATFORM_INLINE VOID ia_enhaacplus_enc_complex_3point_fft(FLOAT32 *ptr_in,
385
0
                                                                 FLOAT32 *ptr_out) {
386
0
  FLOAT32 add_r, sub_r;
387
0
  FLOAT32 add_i, sub_i;
388
0
  FLOAT32 x_01_r, x_01_i, temp;
389
0
  FLOAT32 p1, p2, p3, p4;
390
0
  FLOAT64 sin_mu = 0.866025403784439f;
391
392
0
  x_01_r = ptr_in[0] + ptr_in[2];
393
0
  x_01_i = ptr_in[1] + ptr_in[3];
394
395
0
  add_r = ptr_in[2] + ptr_in[4];
396
0
  add_i = ptr_in[3] + ptr_in[5];
397
398
0
  sub_r = ptr_in[2] - ptr_in[4];
399
0
  sub_i = ptr_in[3] - ptr_in[5];
400
401
0
  p1 = add_r / (FLOAT32)2.0f;
402
0
  p4 = add_i / (FLOAT32)2.0f;
403
0
  p2 = (FLOAT32)((FLOAT64)sub_i * sin_mu);
404
0
  p3 = (FLOAT32)((FLOAT64)sub_r * sin_mu);
405
406
0
  temp = ptr_in[0] - p1;
407
408
0
  ptr_out[0] = x_01_r + ptr_in[4];
409
0
  ptr_out[1] = x_01_i + ptr_in[5];
410
0
  ptr_out[2] = temp + p2;
411
0
  ptr_out[3] = (ptr_in[1] - p3) - p4;
412
0
  ptr_out[4] = temp - p2;
413
0
  ptr_out[5] = (ptr_in[1] + p3) - p4;
414
0
}
415
416
VOID ia_enhaacplus_enc_complex_fft_p2(FLOAT32 *ptr_x, WORD32 nlength,
417
19.3M
                                      FLOAT32 *ptr_scratch_fft_p2_y) {
418
19.3M
  WORD32 i, j, k, n_stages, h2;
419
19.3M
  FLOAT32 x0_r, x0_i, x1_r, x1_i, x2_r, x2_i, x3_r, x3_i;
420
19.3M
  WORD32 del, nodespacing, in_loop_cnt;
421
19.3M
  WORD32 not_power_4;
422
19.3M
  WORD32 dig_rev_shift;
423
19.3M
  FLOAT32 *ptr_p2_y = ptr_scratch_fft_p2_y;
424
19.3M
  WORD32 mpass = nlength;
425
19.3M
  WORD32 npoints = nlength;
426
19.3M
  FLOAT32 *ptr_y = ptr_p2_y;
427
19.3M
  const FLOAT64 *ptr_w;
428
19.3M
  FLOAT32 *ptr_inp;
429
19.3M
  FLOAT32 tmk;
430
19.3M
  const FLOAT64 *ptr_twiddles;
431
19.3M
  FLOAT32 *ptr_data;
432
19.3M
  FLOAT64 w_1, w_2, w_3, w_4, w_5, w_6;
433
19.3M
  WORD32 sec_loop_cnt;
434
19.3M
  FLOAT32 tmp;
435
436
19.3M
  memset(ptr_y, 0, nlength * 2 * sizeof(*ptr_y));
437
438
19.3M
  dig_rev_shift = ia_enhaacplus_enc_calc_norm(mpass) + 1 - 16;
439
19.3M
  n_stages = 30 - ia_enhaacplus_enc_calc_norm(mpass);
440
19.3M
  not_power_4 = n_stages & 1;
441
442
19.3M
  n_stages = n_stages >> 1;
443
444
19.3M
  ptr_w = ia_enhaacplus_enc_twiddle_table_fft_32x32;
445
446
19.3M
  dig_rev_shift = MAX(dig_rev_shift, 0);
447
448
198M
  for (i = 0; i < npoints; i += 4) {
449
178M
    ptr_inp = ptr_x;
450
178M
    DIG_REV_NEW(i, dig_rev_shift, h2);
451
178M
    if (not_power_4) {
452
168M
      h2 += 1;
453
168M
      h2 &= ~1;
454
168M
    }
455
178M
    ptr_inp += (h2);
456
457
178M
    x0_r = *ptr_inp;
458
178M
    x0_i = *(ptr_inp + 1);
459
178M
    ptr_inp += (npoints >> 1);
460
461
178M
    x1_r = *ptr_inp;
462
178M
    x1_i = *(ptr_inp + 1);
463
178M
    ptr_inp += (npoints >> 1);
464
465
178M
    x2_r = *ptr_inp;
466
178M
    x2_i = *(ptr_inp + 1);
467
178M
    ptr_inp += (npoints >> 1);
468
469
178M
    x3_r = *ptr_inp;
470
178M
    x3_i = *(ptr_inp + 1);
471
472
178M
    x0_r = x0_r + x2_r;
473
178M
    x0_i = x0_i + x2_i;
474
475
178M
    tmk = x0_r - x2_r;
476
178M
    x2_r = tmk - x2_r;
477
178M
    tmk = x0_i - x2_i;
478
178M
    x2_i = tmk - x2_i;
479
480
178M
    x1_r = x1_r + x3_r;
481
178M
    x1_i = x1_i + x3_i;
482
483
178M
    tmk = x1_r - x3_r;
484
178M
    x3_r = tmk - x3_r;
485
178M
    tmk = x1_i - x3_i;
486
178M
    x3_i = tmk - x3_i;
487
488
178M
    x0_r = x0_r + x1_r;
489
178M
    x0_i = x0_i + x1_i;
490
491
178M
    tmk = x0_r - x1_r;
492
178M
    x1_r = tmk - x1_r;
493
178M
    tmk = x0_i - x1_i;
494
178M
    x1_i = tmk - x1_i;
495
496
178M
    x2_r = x2_r + x3_i;
497
178M
    x2_i = x2_i - x3_r;
498
499
178M
    tmk = x2_r - x3_i;
500
178M
    x3_i = tmk - x3_i;
501
178M
    tmk = x2_i + x3_r;
502
178M
    x3_r = tmk + x3_r;
503
504
178M
    *ptr_y++ = x0_r;
505
178M
    *ptr_y++ = x0_i;
506
178M
    *ptr_y++ = x2_r;
507
178M
    *ptr_y++ = x2_i;
508
178M
    *ptr_y++ = x1_r;
509
178M
    *ptr_y++ = x1_i;
510
178M
    *ptr_y++ = x3_i;
511
178M
    *ptr_y++ = x3_r;
512
178M
  }
513
19.3M
  ptr_y -= 2 * npoints;
514
19.3M
  del = 4;
515
19.3M
  nodespacing = 64;
516
19.3M
  in_loop_cnt = npoints >> 4;
517
39.4M
  for (i = n_stages - 1; i > 0; i--) {
518
20.1M
    ptr_twiddles = ptr_w;
519
20.1M
    ptr_data = ptr_y;
520
67.0M
    for (k = in_loop_cnt; k != 0; k--) {
521
46.9M
      x0_r = (*ptr_data);
522
46.9M
      x0_i = (*(ptr_data + 1));
523
46.9M
      ptr_data += ((SIZE_T)del << 1);
524
525
46.9M
      x1_r = (*ptr_data);
526
46.9M
      x1_i = (*(ptr_data + 1));
527
46.9M
      ptr_data += ((SIZE_T)del << 1);
528
529
46.9M
      x2_r = (*ptr_data);
530
46.9M
      x2_i = (*(ptr_data + 1));
531
46.9M
      ptr_data += ((SIZE_T)del << 1);
532
533
46.9M
      x3_r = (*ptr_data);
534
46.9M
      x3_i = (*(ptr_data + 1));
535
46.9M
      ptr_data -= 3 * (del << 1);
536
537
46.9M
      x0_r = x0_r + x2_r;
538
46.9M
      x0_i = x0_i + x2_i;
539
46.9M
      x2_r = x0_r - (x2_r * 2);
540
46.9M
      x2_i = x0_i - (x2_i * 2);
541
46.9M
      x1_r = x1_r + x3_r;
542
46.9M
      x1_i = x1_i + x3_i;
543
46.9M
      x3_r = x1_r - (x3_r * 2);
544
46.9M
      x3_i = x1_i - (x3_i * 2);
545
546
46.9M
      x0_r = x0_r + x1_r;
547
46.9M
      x0_i = x0_i + x1_i;
548
46.9M
      x1_r = x0_r - (x1_r * 2);
549
46.9M
      x1_i = x0_i - (x1_i * 2);
550
46.9M
      x2_r = x2_r + x3_i;
551
46.9M
      x2_i = x2_i - x3_r;
552
46.9M
      x3_i = x2_r - (x3_i * 2);
553
46.9M
      x3_r = x2_i + (x3_r * 2);
554
555
46.9M
      *ptr_data = x0_r;
556
46.9M
      *(ptr_data + 1) = x0_i;
557
46.9M
      ptr_data += ((SIZE_T)del << 1);
558
559
46.9M
      *ptr_data = x2_r;
560
46.9M
      *(ptr_data + 1) = x2_i;
561
46.9M
      ptr_data += ((SIZE_T)del << 1);
562
563
46.9M
      *ptr_data = x1_r;
564
46.9M
      *(ptr_data + 1) = x1_i;
565
46.9M
      ptr_data += ((SIZE_T)del << 1);
566
567
46.9M
      *ptr_data = x3_i;
568
46.9M
      *(ptr_data + 1) = x3_r;
569
46.9M
      ptr_data += ((SIZE_T)del << 1);
570
46.9M
    }
571
20.1M
    ptr_data = ptr_y + 2;
572
573
20.1M
    sec_loop_cnt = (nodespacing * del);
574
20.1M
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
575
20.1M
                   (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
576
20.1M
                   (sec_loop_cnt / 256);
577
578
47.0M
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
579
26.9M
      w_1 = *(ptr_twiddles + j);
580
26.9M
      w_4 = *(ptr_twiddles + j + 257);
581
26.9M
      w_2 = *(ptr_twiddles + ((SIZE_T)j << 1));
582
26.9M
      w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257);
583
26.9M
      w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1));
584
26.9M
      w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 257);
585
586
88.4M
      for (k = in_loop_cnt; k != 0; k--) {
587
61.5M
        ptr_data += ((SIZE_T)del << 1);
588
589
61.5M
        x1_r = *ptr_data;
590
61.5M
        x1_i = *(ptr_data + 1);
591
61.5M
        ptr_data += ((SIZE_T)del << 1);
592
593
61.5M
        x2_r = *ptr_data;
594
61.5M
        x2_i = *(ptr_data + 1);
595
61.5M
        ptr_data += ((SIZE_T)del << 1);
596
597
61.5M
        x3_r = *ptr_data;
598
61.5M
        x3_i = *(ptr_data + 1);
599
61.5M
        ptr_data -= 3 * (del << 1);
600
601
61.5M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
602
61.5M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
603
61.5M
        x1_r = tmp;
604
605
61.5M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5));
606
61.5M
        x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2);
607
61.5M
        x2_r = tmp;
608
609
61.5M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_3) - ixheaace_dmult((FLOAT64)x3_i, w_6));
610
61.5M
        x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3);
611
61.5M
        x3_r = tmp;
612
613
61.5M
        x0_r = (*ptr_data);
614
61.5M
        x0_i = (*(ptr_data + 1));
615
616
61.5M
        x0_r = x0_r + (x2_r);
617
61.5M
        x0_i = x0_i + (x2_i);
618
61.5M
        x2_r = x0_r - (x2_r * 2);
619
61.5M
        x2_i = x0_i - (x2_i * 2);
620
61.5M
        x1_r = x1_r + x3_r;
621
61.5M
        x1_i = x1_i + x3_i;
622
61.5M
        x3_r = x1_r - (x3_r * 2);
623
61.5M
        x3_i = x1_i - (x3_i * 2);
624
625
61.5M
        x0_r = x0_r + (x1_r);
626
61.5M
        x0_i = x0_i + (x1_i);
627
61.5M
        x1_r = x0_r - (x1_r * 2);
628
61.5M
        x1_i = x0_i - (x1_i * 2);
629
61.5M
        x2_r = x2_r + (x3_i);
630
61.5M
        x2_i = x2_i - (x3_r);
631
61.5M
        x3_i = x2_r - (x3_i * 2);
632
61.5M
        x3_r = x2_i + (x3_r * 2);
633
634
61.5M
        *ptr_data = x0_r;
635
61.5M
        *(ptr_data + 1) = x0_i;
636
61.5M
        ptr_data += ((SIZE_T)del << 1);
637
638
61.5M
        *ptr_data = x2_r;
639
61.5M
        *(ptr_data + 1) = x2_i;
640
61.5M
        ptr_data += ((SIZE_T)del << 1);
641
642
61.5M
        *ptr_data = x1_r;
643
61.5M
        *(ptr_data + 1) = x1_i;
644
61.5M
        ptr_data += ((SIZE_T)del << 1);
645
646
61.5M
        *ptr_data = x3_i;
647
61.5M
        *(ptr_data + 1) = x3_r;
648
61.5M
        ptr_data += ((SIZE_T)del << 1);
649
61.5M
      }
650
26.9M
      ptr_data -= 2 * npoints;
651
26.9M
      ptr_data += 2;
652
26.9M
    }
653
43.6M
    for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
654
23.5M
      w_1 = *(ptr_twiddles + j);
655
23.5M
      w_4 = *(ptr_twiddles + j + 257);
656
23.5M
      w_2 = *(ptr_twiddles + ((SIZE_T)j << 1));
657
23.5M
      w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257);
658
23.5M
      w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1) - 256);
659
23.5M
      w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 1);
660
661
77.7M
      for (k = in_loop_cnt; k != 0; k--) {
662
54.2M
        ptr_data += ((SIZE_T)del << 1);
663
664
54.2M
        x1_r = *ptr_data;
665
54.2M
        x1_i = *(ptr_data + 1);
666
54.2M
        ptr_data += ((SIZE_T)del << 1);
667
668
54.2M
        x2_r = *ptr_data;
669
54.2M
        x2_i = *(ptr_data + 1);
670
54.2M
        ptr_data += ((SIZE_T)del << 1);
671
672
54.2M
        x3_r = *ptr_data;
673
54.2M
        x3_i = *(ptr_data + 1);
674
54.2M
        ptr_data -= 3 * (del << 1);
675
676
54.2M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
677
54.2M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
678
54.2M
        x1_r = tmp;
679
680
54.2M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5));
681
54.2M
        x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2);
682
54.2M
        x2_r = tmp;
683
684
54.2M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3));
685
54.2M
        x3_i =
686
54.2M
            (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
687
54.2M
        x3_r = tmp;
688
689
54.2M
        x0_r = (*ptr_data);
690
54.2M
        x0_i = (*(ptr_data + 1));
691
692
54.2M
        x0_r = x0_r + (x2_r);
693
54.2M
        x0_i = x0_i + (x2_i);
694
54.2M
        x2_r = x0_r - (x2_r * 2);
695
54.2M
        x2_i = x0_i - (x2_i * 2);
696
54.2M
        x1_r = x1_r + x3_r;
697
54.2M
        x1_i = x1_i + x3_i;
698
54.2M
        x3_r = x1_r - (x3_r * 2);
699
54.2M
        x3_i = x1_i - (x3_i * 2);
700
701
54.2M
        x0_r = x0_r + (x1_r);
702
54.2M
        x0_i = x0_i + (x1_i);
703
54.2M
        x1_r = x0_r - (x1_r * 2);
704
54.2M
        x1_i = x0_i - (x1_i * 2);
705
54.2M
        x2_r = x2_r + (x3_i);
706
54.2M
        x2_i = x2_i - (x3_r);
707
54.2M
        x3_i = x2_r - (x3_i * 2);
708
54.2M
        x3_r = x2_i + (x3_r * 2);
709
710
54.2M
        *ptr_data = x0_r;
711
54.2M
        *(ptr_data + 1) = x0_i;
712
54.2M
        ptr_data += ((SIZE_T)del << 1);
713
714
54.2M
        *ptr_data = x2_r;
715
54.2M
        *(ptr_data + 1) = x2_i;
716
54.2M
        ptr_data += ((SIZE_T)del << 1);
717
718
54.2M
        *ptr_data = x1_r;
719
54.2M
        *(ptr_data + 1) = x1_i;
720
54.2M
        ptr_data += ((SIZE_T)del << 1);
721
722
54.2M
        *ptr_data = x3_i;
723
54.2M
        *(ptr_data + 1) = x3_r;
724
54.2M
        ptr_data += ((SIZE_T)del << 1);
725
54.2M
      }
726
23.5M
      ptr_data -= 2 * npoints;
727
23.5M
      ptr_data += 2;
728
23.5M
    }
729
23.5M
    for (; j <= sec_loop_cnt * 2; j += nodespacing) {
730
3.41M
      w_1 = *(ptr_twiddles + j);
731
3.41M
      w_4 = *(ptr_twiddles + j + 257);
732
3.41M
      w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256));
733
3.41M
      w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1));
734
3.41M
      w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 256));
735
3.41M
      w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) + 1));
736
737
10.7M
      for (k = in_loop_cnt; k != 0; k--) {
738
7.30M
        ptr_data += ((SIZE_T)del << 1);
739
740
7.30M
        x1_r = *ptr_data;
741
7.30M
        x1_i = *(ptr_data + 1);
742
7.30M
        ptr_data += ((SIZE_T)del << 1);
743
744
7.30M
        x2_r = *ptr_data;
745
7.30M
        x2_i = *(ptr_data + 1);
746
7.30M
        ptr_data += ((SIZE_T)del << 1);
747
748
7.30M
        x3_r = *ptr_data;
749
7.30M
        x3_i = *(ptr_data + 1);
750
7.30M
        ptr_data -= 3 * (del << 1);
751
752
7.30M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
753
7.30M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult(x1_r, w_4), x1_i, w_1);
754
7.30M
        x1_r = tmp;
755
756
7.30M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2));
757
7.30M
        x2_i = (FLOAT32)(-ixheaace_dmult(x2_r, w_2) + ixheaace_dmult(x2_i, w_5));
758
7.30M
        x2_r = tmp;
759
760
7.30M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3));
761
7.30M
        x3_i =
762
7.30M
            (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
763
7.30M
        x3_r = tmp;
764
765
7.30M
        x0_r = (*ptr_data);
766
7.30M
        x0_i = (*(ptr_data + 1));
767
768
7.30M
        x0_r = x0_r + (x2_r);
769
7.30M
        x0_i = x0_i + (x2_i);
770
7.30M
        x2_r = x0_r - (x2_r * 2);
771
7.30M
        x2_i = x0_i - (x2_i * 2);
772
7.30M
        x1_r = x1_r + x3_r;
773
7.30M
        x1_i = x1_i + x3_i;
774
7.30M
        x3_r = x1_r - (x3_r * 2);
775
7.30M
        x3_i = x1_i - (x3_i * 2);
776
777
7.30M
        x0_r = x0_r + (x1_r);
778
7.30M
        x0_i = x0_i + (x1_i);
779
7.30M
        x1_r = x0_r - (x1_r * 2);
780
7.30M
        x1_i = x0_i - (x1_i * 2);
781
7.30M
        x2_r = x2_r + (x3_i);
782
7.30M
        x2_i = x2_i - (x3_r);
783
7.30M
        x3_i = x2_r - (x3_i * 2);
784
7.30M
        x3_r = x2_i + (x3_r * 2);
785
786
7.30M
        *ptr_data = x0_r;
787
7.30M
        *(ptr_data + 1) = x0_i;
788
7.30M
        ptr_data += ((SIZE_T)del << 1);
789
790
7.30M
        *ptr_data = x2_r;
791
7.30M
        *(ptr_data + 1) = x2_i;
792
7.30M
        ptr_data += ((SIZE_T)del << 1);
793
794
7.30M
        *ptr_data = x1_r;
795
7.30M
        *(ptr_data + 1) = x1_i;
796
7.30M
        ptr_data += ((SIZE_T)del << 1);
797
798
7.30M
        *ptr_data = x3_i;
799
7.30M
        *(ptr_data + 1) = x3_r;
800
7.30M
        ptr_data += ((SIZE_T)del << 1);
801
7.30M
      }
802
3.41M
      ptr_data -= 2 * npoints;
803
3.41M
      ptr_data += 2;
804
3.41M
    }
805
47.0M
    for (; j < nodespacing * del; j += nodespacing) {
806
26.9M
      w_1 = *(ptr_twiddles + j);
807
26.9M
      w_4 = *(ptr_twiddles + j + 257);
808
26.9M
      w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256));
809
26.9M
      w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1));
810
26.9M
      w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512));
811
26.9M
      w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512 + 257));
812
813
88.4M
      for (k = in_loop_cnt; k != 0; k--) {
814
61.5M
        ptr_data += ((SIZE_T)del << 1);
815
816
61.5M
        x1_r = *ptr_data;
817
61.5M
        x1_i = *(ptr_data + 1);
818
61.5M
        ptr_data += ((SIZE_T)del << 1);
819
820
61.5M
        x2_r = *ptr_data;
821
61.5M
        x2_i = *(ptr_data + 1);
822
61.5M
        ptr_data += ((SIZE_T)del << 1);
823
824
61.5M
        x3_r = *ptr_data;
825
61.5M
        x3_i = *(ptr_data + 1);
826
61.5M
        ptr_data -= 3 * (del << 1);
827
828
61.5M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
829
61.5M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
830
61.5M
        x1_r = tmp;
831
832
61.5M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2));
833
61.5M
        x2_i =
834
61.5M
            (FLOAT32)(-ixheaace_dmult((FLOAT64)x2_r, w_2) + ixheaace_dmult((FLOAT64)x2_i, w_5));
835
61.5M
        x2_r = tmp;
836
837
61.5M
        tmp = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
838
61.5M
        x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3);
839
61.5M
        x3_r = tmp;
840
841
61.5M
        x0_r = (*ptr_data);
842
61.5M
        x0_i = (*(ptr_data + 1));
843
844
61.5M
        x0_r = x0_r + (x2_r);
845
61.5M
        x0_i = x0_i + (x2_i);
846
61.5M
        x2_r = x0_r - (x2_r * 2);
847
61.5M
        x2_i = x0_i - (x2_i * 2);
848
61.5M
        x1_r = x1_r + x3_r;
849
61.5M
        x1_i = x1_i - x3_i;
850
61.5M
        x3_r = x1_r - (x3_r * 2);
851
61.5M
        x3_i = x1_i + (x3_i * 2);
852
853
61.5M
        x0_r = x0_r + (x1_r);
854
61.5M
        x0_i = x0_i + (x1_i);
855
61.5M
        x1_r = x0_r - (x1_r * 2);
856
61.5M
        x1_i = x0_i - (x1_i * 2);
857
61.5M
        x2_r = x2_r + (x3_i);
858
61.5M
        x2_i = x2_i - (x3_r);
859
61.5M
        x3_i = x2_r - (x3_i * 2);
860
61.5M
        x3_r = x2_i + (x3_r * 2);
861
862
61.5M
        *ptr_data = x0_r;
863
61.5M
        *(ptr_data + 1) = x0_i;
864
61.5M
        ptr_data += ((SIZE_T)del << 1);
865
866
61.5M
        *ptr_data = x2_r;
867
61.5M
        *(ptr_data + 1) = x2_i;
868
61.5M
        ptr_data += ((SIZE_T)del << 1);
869
870
61.5M
        *ptr_data = x1_r;
871
61.5M
        *(ptr_data + 1) = x1_i;
872
61.5M
        ptr_data += ((SIZE_T)del << 1);
873
874
61.5M
        *ptr_data = x3_i;
875
61.5M
        *(ptr_data + 1) = x3_r;
876
61.5M
        ptr_data += ((SIZE_T)del << 1);
877
61.5M
      }
878
26.9M
      ptr_data -= 2 * npoints;
879
26.9M
      ptr_data += 2;
880
26.9M
    }
881
20.1M
    nodespacing >>= 2;
882
20.1M
    del <<= 2;
883
20.1M
    in_loop_cnt >>= 2;
884
20.1M
  }
885
19.3M
  if (not_power_4) {
886
18.9M
    ptr_twiddles = ptr_w;
887
18.9M
    nodespacing <<= 1;
888
889
186M
    for (j = del / 2; j != 0; j--) {
890
168M
      w_1 = *ptr_twiddles;
891
168M
      w_4 = *(ptr_twiddles + 257);
892
168M
      ptr_twiddles += nodespacing;
893
894
168M
      x0_r = *ptr_y;
895
168M
      x0_i = *(ptr_y + 1);
896
168M
      ptr_y += ((SIZE_T)del << 1);
897
898
168M
      x1_r = *ptr_y;
899
168M
      x1_i = *(ptr_y + 1);
900
901
168M
      tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
902
168M
      x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
903
168M
      x1_r = tmp;
904
905
168M
      *ptr_y = (x0_r) - (x1_r);
906
168M
      *(ptr_y + 1) = (x0_i) - (x1_i);
907
168M
      ptr_y -= ((SIZE_T)del << 1);
908
909
168M
      *ptr_y = (x0_r) + (x1_r);
910
168M
      *(ptr_y + 1) = (x0_i) + (x1_i);
911
168M
      ptr_y += 2;
912
168M
    }
913
18.9M
    ptr_twiddles = ptr_w;
914
186M
    for (j = del / 2; j != 0; j--) {
915
168M
      w_1 = *ptr_twiddles;
916
168M
      w_4 = *(ptr_twiddles + 257);
917
168M
      ptr_twiddles += nodespacing;
918
919
168M
      x0_r = *ptr_y;
920
168M
      x0_i = *(ptr_y + 1);
921
168M
      ptr_y += ((SIZE_T)del << 1);
922
923
168M
      x1_r = *ptr_y;
924
168M
      x1_i = *(ptr_y + 1);
925
926
168M
      tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_4) +
927
168M
                      ixheaace_dmult((FLOAT64)x1_i, w_1)) /*/2*/;
928
168M
      x1_i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x1_r, w_1) +
929
168M
                       ixheaace_dmult((FLOAT64)x1_i, w_4)) /*/2*/;
930
168M
      x1_r = tmp;
931
932
168M
      *ptr_y = (x0_r) - (x1_r);
933
168M
      *(ptr_y + 1) = (x0_i) - (x1_i);
934
168M
      ptr_y -= ((SIZE_T)del << 1);
935
936
168M
      *ptr_y = (x0_r) + (x1_r);
937
168M
      *(ptr_y + 1) = (x0_i) + (x1_i);
938
168M
      ptr_y += 2;
939
168M
    }
940
18.9M
  }
941
942
735M
  for (i = 0; i < nlength; i++) {
943
715M
    *(ptr_x + 2 * i) = ptr_p2_y[2 * i];
944
715M
    *(ptr_x + 2 * i + 1) = ptr_p2_y[2 * i + 1];
945
715M
  }
946
19.3M
}
947
948
static VOID ia_enhaacplus_enc_complex_fft_p3(FLOAT32 *ptr_data, WORD32 nlength,
949
0
                                             ixheaace_scratch_mem *pstr_scratch) {
950
0
  WORD32 i, j;
951
0
  FLOAT32 *ptr_data_3 = pstr_scratch->p_fft_p3_data_3;
952
0
  FLOAT32 *ptr_p3_y = pstr_scratch->p_fft_p3_y;
953
0
  WORD32 cnfac;
954
0
  WORD32 mpass = nlength;
955
0
  FLOAT32 *ptr_x = ptr_data;
956
0
  FLOAT32 *ptr_y = ptr_p3_y;
957
0
  cnfac = 0;
958
0
  const FLOAT64 *ptr_w1_r, *ptr_w1_i;
959
0
  FLOAT32 tmp;
960
0
  ptr_w1_r = ia_enhaacplus_enc_twiddle_table_3pr;
961
0
  ptr_w1_i = ia_enhaacplus_enc_twiddle_table_3pi;
962
963
0
  while (mpass % 3 == 0) {
964
0
    mpass /= 3;
965
0
    cnfac++;
966
0
  }
967
968
0
  for (i = 0; i < 3 * cnfac; i++) {
969
0
    for (j = 0; j < mpass; j++) {
970
0
      ptr_data_3[2 * j] = ptr_data[3 * (2 * j) + (2 * i)];
971
0
      ptr_data_3[2 * j + 1] = ptr_data[3 * (2 * j) + 1 + (2 * i)];
972
0
    }
973
0
    ia_enhaacplus_enc_complex_fft_p2(ptr_data_3, mpass, pstr_scratch->p_fft_p2_y);
974
975
0
    for (j = 0; j < mpass; j++) {
976
0
      ptr_data[3 * (2 * j) + (2 * i)] = ptr_data_3[2 * j];
977
0
      ptr_data[3 * (2 * j) + 1 + (2 * i)] = ptr_data_3[2 * j + 1];
978
0
    }
979
0
  }
980
981
0
  {
982
0
    for (i = 0; i < nlength; i += 3) {
983
0
      tmp = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_r) -
984
0
                      (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_i));
985
0
      ptr_data[2 * i + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_i) +
986
0
                                      (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_r));
987
0
      ptr_data[2 * i] = tmp;
988
989
0
      ptr_w1_r++;
990
0
      ptr_w1_i++;
991
992
0
      tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_r) -
993
0
                      (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_i));
994
0
      ptr_data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_i) +
995
0
                                            (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_r));
996
0
      ptr_data[2 * (i + 1)] = tmp;
997
998
0
      ptr_w1_r++;
999
0
      ptr_w1_i++;
1000
1001
0
      tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_r) -
1002
0
                      (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_i));
1003
0
      ptr_data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_i) +
1004
0
                                            (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_r));
1005
0
      ptr_data[2 * (i + 2)] = tmp;
1006
1007
0
      ptr_w1_r += 3 * (128 / mpass - 1) + 1;
1008
0
      ptr_w1_i += 3 * (128 / mpass - 1) + 1;
1009
0
    }
1010
0
  }
1011
1012
0
  for (i = 0; i < mpass; i++) {
1013
0
    ia_enhaacplus_enc_complex_3point_fft(ptr_x, ptr_y);
1014
1015
0
    ptr_x = ptr_x + 6;
1016
0
    ptr_y = ptr_y + 6;
1017
0
  }
1018
1019
0
  for (i = 0; i < mpass; i++) {
1020
0
    ptr_data[2 * i] = ptr_p3_y[6 * i];
1021
0
    ptr_data[2 * i + 1] = ptr_p3_y[6 * i + 1];
1022
0
  }
1023
1024
0
  for (i = 0; i < mpass; i++) {
1025
0
    ptr_data[2 * (i + mpass)] = ptr_p3_y[6 * i + 2];
1026
0
    ptr_data[2 * (i + mpass) + 1] = ptr_p3_y[6 * i + 3];
1027
0
  }
1028
1029
0
  for (i = 0; i < mpass; i++) {
1030
0
    ptr_data[2 * (i + 2 * mpass)] = ptr_p3_y[6 * i + 4];
1031
0
    ptr_data[2 * (i + 2 * mpass) + 1] = ptr_p3_y[6 * i + 5];
1032
0
  }
1033
0
}
1034
1035
VOID ia_enhaacplus_enc_complex_fft(FLOAT32 *ptr_data, WORD32 len,
1036
14.8M
                                   ixheaace_scratch_mem *pstr_scratch) {
1037
14.8M
  if (len & (len - 1)) {
1038
0
    ia_enhaacplus_enc_complex_fft_p3(ptr_data, len, pstr_scratch);
1039
14.8M
  } else {
1040
14.8M
    ia_enhaacplus_enc_complex_fft_p2(ptr_data, len, pstr_scratch->p_fft_p2_y);
1041
14.8M
  }
1042
14.8M
}
1043
1044
static VOID ixheaace_post_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_trig_data,
1045
544k
                               WORD32 step, WORD32 trig_data_size) {
1046
544k
  WORD32 i;
1047
544k
  FLOAT32 w_re, w_im, re1, re2, im1, im2;
1048
544k
  const FLOAT32 *ptr_sin = ptr_trig_data;
1049
544k
  const FLOAT32 *ptr_cos = ptr_trig_data + trig_data_size;
1050
1051
544k
  w_im = *ptr_sin;
1052
544k
  w_re = *ptr_cos;
1053
1054
58.0M
  for (i = 0; i < m / 4; i++) {
1055
57.5M
    re1 = ptr_x[2 * i];
1056
57.5M
    im1 = ptr_x[2 * i + 1];
1057
57.5M
    re2 = ptr_x[m - 2 - 2 * i];
1058
57.5M
    im2 = ptr_x[m - 1 - 2 * i];
1059
1060
57.5M
    ptr_x[2 * i] = (re1 * w_re + im1 * w_im);
1061
1062
57.5M
    ptr_x[m - 1 - 2 * i] = (re1 * w_im - im1 * w_re);
1063
1064
57.5M
    ptr_sin += step;
1065
57.5M
    ptr_cos -= step;
1066
1067
57.5M
    w_im = *ptr_sin;
1068
57.5M
    w_re = *ptr_cos;
1069
1070
57.5M
    ptr_x[m - 2 - 2 * i] = (re2 * w_im + im2 * w_re);
1071
1072
57.5M
    ptr_x[2 * i + 1] = (re2 * w_re - im2 * w_im);
1073
57.5M
  }
1074
544k
}
1075
1076
static VOID ixheaace_cplx_mult_twid(FLOAT32 *ptr_re, FLOAT32 *ptr_im, FLOAT32 a, FLOAT32 b,
1077
59.1M
                                    FLOAT32 twid_table, FLOAT32 twid_table_h) {
1078
59.1M
  *ptr_re = (a * twid_table) - (b * twid_table_h);
1079
59.1M
  *ptr_im = (a * twid_table_h) + (b * twid_table);
1080
59.1M
}
1081
1082
2.74M
static VOID ixheaace_cfft_15_twiddle(FLOAT32 *ptr_inp) {
1083
2.74M
  const FLOAT32 *ptr_tw_flt = &ixheaace_mix_rad_twid_tbl[0];
1084
2.74M
  const FLOAT32 *ptr_tw_flt_h = &ixheaace_mix_rad_twid_tbl_h[0];
1085
2.74M
  FLOAT32 accu1, accu2;
1086
2.74M
  WORD32 i, j;
1087
2.74M
  ptr_inp += 12;
1088
1089
8.22M
  for (j = 0; j < 2; j++) {
1090
27.4M
    for (i = 0; i < 4; i++) {
1091
21.9M
      ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1],
1092
21.9M
                              ptr_tw_flt[i], ptr_tw_flt_h[i]);
1093
21.9M
      ptr_inp[2 * i + 0] = accu1;
1094
21.9M
      ptr_inp[2 * i + 1] = accu2;
1095
21.9M
    }
1096
5.48M
    ptr_inp += 10;
1097
5.48M
    ptr_tw_flt += 4;
1098
5.48M
    ptr_tw_flt_h += 4;
1099
5.48M
  }
1100
2.74M
}
1101
1102
2.74M
static VOID ixheaace_cfft_15_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, FLOAT32 *ptr_fft3_out) {
1103
2.74M
  WORD32 i, idx;
1104
2.74M
  FLOAT32 *ptr_buf1, *ptr_buf2, *ptr_buf3;
1105
2.74M
  FLOAT32 add_r, sub_r;
1106
2.74M
  FLOAT32 add_i, sub_i;
1107
2.74M
  FLOAT32 x_01_r, x_01_i, temp;
1108
2.74M
  FLOAT32 p1, p2, p3, p4;
1109
1110
2.74M
  FLOAT32 sin_mu_flt = 0.866027832f;
1111
2.74M
  FLOAT32 c51_flt = 0.951049805f;
1112
2.74M
  FLOAT32 c52_flt = -0.76940918f;
1113
2.74M
  FLOAT32 c53_flt = -0.36328125f;
1114
2.74M
  FLOAT32 c54_flt = 0.559020996f;
1115
2.74M
  FLOAT32 c55_flt = -0.625f;
1116
1117
2.74M
  FLOAT32 r1, r2, r3, r4;
1118
2.74M
  FLOAT32 s1, s2, s3, s4, t, temp1, temp2;
1119
2.74M
  FLOAT32 *ptr_out_fft3 = ptr_fft3_out;
1120
1121
2.74M
  FLOAT32 xr_0, xr_1, xr_2;
1122
2.74M
  FLOAT32 xi_0, xi_1, xi_2;
1123
1124
2.74M
  ptr_buf2 = ptr_fft3_out;
1125
2.74M
  ptr_buf1 = ptr_buf3 = ptr_fft3_out;
1126
1127
10.9M
  for (i = 0; i < FFT3; i++) {
1128
8.22M
    *ptr_buf1++ = ptr_inp[0 + 64 * i];
1129
8.22M
    *ptr_buf1++ = ptr_inp[1 + 64 * i];
1130
1131
8.22M
    *ptr_buf1++ = ptr_inp[192 + 64 * i];
1132
8.22M
    *ptr_buf1++ = ptr_inp[193 + 64 * i];
1133
1134
8.22M
    *ptr_buf1++ = ptr_inp[384 + 64 * i];
1135
8.22M
    *ptr_buf1++ = ptr_inp[385 + 64 * i];
1136
1137
8.22M
    *ptr_buf1++ = ptr_inp[576 + 64 * i];
1138
8.22M
    *ptr_buf1++ = ptr_inp[577 + 64 * i];
1139
1140
8.22M
    *ptr_buf1++ = ptr_inp[768 + 64 * i];
1141
8.22M
    *ptr_buf1++ = ptr_inp[769 + 64 * i];
1142
1143
8.22M
    r1 = ptr_buf3[2] + ptr_buf3[8];
1144
8.22M
    r4 = ptr_buf3[2] - ptr_buf3[8];
1145
8.22M
    r3 = ptr_buf3[4] + ptr_buf3[6];
1146
8.22M
    r2 = ptr_buf3[4] - ptr_buf3[6];
1147
8.22M
    t = ((r1 - r3) * c54_flt);
1148
1149
8.22M
    r1 = r1 + r3;
1150
1151
8.22M
    temp1 = ptr_buf3[0] + r1;
1152
1153
8.22M
    r1 = temp1 + ((r1 * c55_flt) * 2);
1154
1155
8.22M
    r3 = r1 - t;
1156
8.22M
    r1 = r1 + t;
1157
1158
8.22M
    t = ((r4 + r2) * c51_flt);
1159
8.22M
    r4 = t + ((r4 * c52_flt) * 2);
1160
8.22M
    r2 = t + (r2 * c53_flt);
1161
1162
8.22M
    s1 = ptr_buf3[3] + ptr_buf3[9];
1163
8.22M
    s4 = ptr_buf3[3] - ptr_buf3[9];
1164
8.22M
    s3 = ptr_buf3[5] + ptr_buf3[7];
1165
8.22M
    s2 = ptr_buf3[5] - ptr_buf3[7];
1166
1167
8.22M
    t = ((s1 - s3) * c54_flt);
1168
1169
8.22M
    s1 = s1 + s3;
1170
1171
8.22M
    temp2 = ptr_buf3[1] + s1;
1172
1173
8.22M
    s1 = temp2 + (((s1 * c55_flt)) * 2);
1174
1175
8.22M
    s3 = s1 - t;
1176
8.22M
    s1 = s1 + t;
1177
1178
8.22M
    t = ((s4 + s2) * c51_flt);
1179
8.22M
    s4 = t + (((s4 * c52_flt)) * 2);
1180
8.22M
    s2 = t + ((s2 * c53_flt));
1181
1182
8.22M
    *ptr_buf2++ = temp1;
1183
8.22M
    *ptr_buf2++ = temp2;
1184
8.22M
    *ptr_buf2++ = r1 + s2;
1185
8.22M
    *ptr_buf2++ = s1 - r2;
1186
8.22M
    *ptr_buf2++ = r3 - s4;
1187
8.22M
    *ptr_buf2++ = s3 + r4;
1188
8.22M
    *ptr_buf2++ = r3 + s4;
1189
8.22M
    *ptr_buf2++ = s3 - r4;
1190
8.22M
    *ptr_buf2++ = r1 - s2;
1191
8.22M
    *ptr_buf2++ = s1 + r2;
1192
8.22M
    ptr_buf3 = ptr_buf1;
1193
8.22M
  }
1194
1195
2.74M
  idx = 0;
1196
2.74M
  ixheaace_cfft_15_twiddle(ptr_out_fft3);
1197
1198
16.4M
  for (i = 0; i < FFT5; i++) {
1199
13.7M
    xr_0 = ptr_out_fft3[0];
1200
13.7M
    xi_0 = ptr_out_fft3[1];
1201
1202
13.7M
    xr_1 = ptr_out_fft3[10];
1203
13.7M
    xi_1 = ptr_out_fft3[11];
1204
1205
13.7M
    xr_2 = ptr_out_fft3[20];
1206
13.7M
    xi_2 = ptr_out_fft3[21];
1207
1208
13.7M
    x_01_r = (xr_0 + xr_1);
1209
13.7M
    x_01_i = (xi_0 + xi_1);
1210
1211
13.7M
    add_r = (xr_1 + xr_2);
1212
13.7M
    add_i = (xi_1 + xi_2);
1213
1214
13.7M
    sub_r = (xr_1 - xr_2);
1215
13.7M
    sub_i = (xi_1 - xi_2);
1216
1217
13.7M
    p1 = add_r / 2;
1218
1219
13.7M
    p2 = (sub_i * sin_mu_flt);
1220
13.7M
    p3 = (sub_r * sin_mu_flt);
1221
1222
13.7M
    p4 = add_i / 2;
1223
1224
13.7M
    temp = (xr_0 - p1);
1225
13.7M
    temp1 = (xi_0 + p3);
1226
13.7M
    temp2 = (xi_0 - p3);
1227
1228
13.7M
    ptr_op[idx] = (x_01_r + xr_2);
1229
13.7M
    ptr_op[idx + 1] = (x_01_i + xi_2);
1230
1231
13.7M
    idx = idx + 320;
1232
13.7M
    ptr_op[idx] = (temp + p2);
1233
13.7M
    ptr_op[idx + 1] = (temp2 - p4);
1234
1235
13.7M
    idx = idx + 320;
1236
13.7M
    ptr_op[idx] = (temp - p2);
1237
13.7M
    ptr_op[idx + 1] = (temp1 - p4);
1238
13.7M
    ptr_out_fft3 += 2;
1239
13.7M
    idx = idx - 576;
1240
13.7M
  }
1241
2.74M
}
1242
1243
static VOID ixheaace_cfft_twiddle_mult(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, WORD32 dim1,
1244
                                       WORD32 dim2, const FLOAT32 *ptr_tw_flt,
1245
85.6k
                                       const FLOAT32 *ptr_tw_h_flt) {
1246
85.6k
  FLOAT32 accu1, accu2;
1247
85.6k
  WORD32 i, j;
1248
85.6k
  WORD32 step_val = (dim2 - 1) << 1;
1249
2.82M
  for (i = 0; i < dim2; i++) {
1250
2.74M
    ptr_op[0] = ptr_inp[0];
1251
2.74M
    ptr_op[1] = ptr_inp[1];
1252
2.74M
    ptr_op += 2;
1253
2.74M
    ptr_inp += 2;
1254
2.74M
  }
1255
1256
1.28M
  for (j = 0; j < (dim1 - 1); j++) {
1257
1.19M
    ptr_op[0] = ptr_inp[0];
1258
1.19M
    ptr_op[1] = ptr_inp[1];
1259
1.19M
    ptr_inp += 2;
1260
1.19M
    ptr_op += 2;
1261
38.3M
    for (i = 0; i < (dim2 - 1); i++) {
1262
37.1M
      ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1],
1263
37.1M
                              ptr_tw_flt[i], ptr_tw_h_flt[i]);
1264
37.1M
      ptr_op[2 * i + 0] = accu1;
1265
37.1M
      ptr_op[2 * i + 1] = accu2;
1266
37.1M
    }
1267
1.19M
    ptr_inp += step_val;
1268
1.19M
    ptr_op += step_val;
1269
1.19M
    ptr_tw_flt += (dim2 - 1);
1270
1.19M
    ptr_tw_h_flt += (dim2 - 1);
1271
1.19M
  }
1272
85.6k
}
1273
1274
1.28M
static VOID ixheaace_cfft_32_480(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1275
1.28M
  WORD32 i, l1, l2, h2;
1276
1.28M
  FLOAT32 xh0_0, xh1_0, xl0_0, xl1_0;
1277
1.28M
  FLOAT32 xh0_1, xh1_1, xl0_1, xl1_1;
1278
1.28M
  FLOAT32 x_0, x_1, x_2, x_3;
1279
1.28M
  FLOAT32 x_4, x_5, x_6, x_7;
1280
1.28M
  FLOAT32 *ptr_x;
1281
1.28M
  FLOAT32 *ptr_y;
1282
1.28M
  FLOAT32 interm_y[FFT32X2];
1283
1.28M
  FLOAT32 n00, n10, n20, n30, n01, n11, n21, n31;
1284
1285
1.28M
  FLOAT32 inp_0qi, inp_0qr;
1286
1.28M
  FLOAT32 inp_1qi, inp_1qr;
1287
1.28M
  FLOAT32 inp_2qi, inp_2qr;
1288
1.28M
  FLOAT32 inp_3qi, inp_3qr;
1289
1.28M
  FLOAT32 mul_0qi, mul_0qr;
1290
1.28M
  FLOAT32 mul_1qi, mul_1qr;
1291
1.28M
  FLOAT32 mul_2qi, mul_2qr;
1292
1.28M
  FLOAT32 mul_3qi, mul_3qr;
1293
1.28M
  FLOAT32 sum_0qi, sum_0qr;
1294
1.28M
  FLOAT32 sum_1qi, sum_1qr;
1295
1.28M
  FLOAT32 sum_2qi, sum_2qr;
1296
1.28M
  FLOAT32 sum_3qi, sum_3qr;
1297
1.28M
  WORD32 idx1 = 0, idx2 = FFT15 * FFT32;
1298
1.28M
  FLOAT32 mul_i, mul_r;
1299
1300
1.28M
  ptr_x = ptr_in;
1301
1302
  // This computes first and second stage butterflies. So, 4-point FFT is done.
1303
11.5M
  for (i = 0; i < 8; i++) {
1304
10.2M
    x_0 = ptr_x[0];
1305
10.2M
    x_1 = ptr_x[1];
1306
10.2M
    x_2 = ptr_x[16];
1307
10.2M
    x_3 = ptr_x[16 + 1];
1308
10.2M
    x_4 = ptr_x[32];
1309
10.2M
    x_5 = ptr_x[32 + 1];
1310
10.2M
    x_6 = ptr_x[48];
1311
10.2M
    x_7 = ptr_x[48 + 1];
1312
1313
10.2M
    xh0_0 = x_0 + x_4;
1314
10.2M
    xh1_0 = x_1 + x_5;
1315
10.2M
    xl0_0 = x_0 - x_4;
1316
10.2M
    xl1_0 = x_1 - x_5;
1317
10.2M
    xh0_1 = x_2 + x_6;
1318
10.2M
    xh1_1 = x_3 + x_7;
1319
10.2M
    xl0_1 = x_2 - x_6;
1320
10.2M
    xl1_1 = x_3 - x_7;
1321
1322
10.2M
    n00 = xh0_0 + xh0_1;
1323
10.2M
    n01 = xh1_0 + xh1_1;
1324
10.2M
    n10 = xl0_0 + xl1_1;
1325
10.2M
    n11 = xl1_0 - xl0_1;
1326
10.2M
    n20 = xh0_0 - xh0_1;
1327
10.2M
    n21 = xh1_0 - xh1_1;
1328
10.2M
    n30 = xl0_0 - xl1_1;
1329
10.2M
    n31 = xl1_0 + xl0_1;
1330
1331
10.2M
    ptr_x[0] = n00;
1332
10.2M
    ptr_x[1] = n01;
1333
10.2M
    ptr_x[16] = n10;
1334
10.2M
    ptr_x[16 + 1] = n11;
1335
10.2M
    ptr_x[32] = n20;
1336
10.2M
    ptr_x[32 + 1] = n21;
1337
10.2M
    ptr_x[48] = n30;
1338
10.2M
    ptr_x[48 + 1] = n31;
1339
1340
10.2M
    ptr_x += 2;
1341
10.2M
  }
1342
1343
  // This computes third and fourth stage butterflies. So, next 4-point FFT is done.
1344
1.28M
  {
1345
1.28M
    h2 = 16 >> 1;
1346
1.28M
    l1 = 16;
1347
1.28M
    l2 = 16 + (16 >> 1);
1348
1349
1.28M
    ptr_x = ptr_in;
1350
1.28M
    ptr_y = &interm_y[0];
1351
1352
    /* Butter fly summation in 2 steps */
1353
1.28M
    inp_0qr = ptr_x[0];
1354
1.28M
    inp_0qi = ptr_x[1];
1355
1.28M
    inp_1qr = ptr_x[4];
1356
1.28M
    inp_1qi = ptr_x[5];
1357
1.28M
    inp_2qr = ptr_x[8];
1358
1.28M
    inp_2qi = ptr_x[9];
1359
1.28M
    inp_3qr = ptr_x[12];
1360
1.28M
    inp_3qi = ptr_x[13];
1361
1362
1.28M
    mul_0qr = inp_0qr;
1363
1.28M
    mul_0qi = inp_0qi;
1364
1.28M
    mul_1qr = inp_1qr;
1365
1.28M
    mul_1qi = inp_1qi;
1366
1.28M
    mul_2qr = inp_2qr;
1367
1.28M
    mul_2qi = inp_2qi;
1368
1.28M
    mul_3qr = inp_3qr;
1369
1.28M
    mul_3qi = inp_3qi;
1370
1371
1.28M
    sum_0qr = mul_0qr + mul_2qr;
1372
1.28M
    sum_0qi = mul_0qi + mul_2qi;
1373
1.28M
    sum_1qr = mul_0qr - mul_2qr;
1374
1.28M
    sum_1qi = mul_0qi - mul_2qi;
1375
1.28M
    sum_2qr = mul_1qr + mul_3qr;
1376
1.28M
    sum_2qi = mul_1qi + mul_3qi;
1377
1.28M
    sum_3qr = mul_1qr - mul_3qr;
1378
1.28M
    sum_3qi = mul_1qi - mul_3qi;
1379
1380
1.28M
    ptr_y[0] = sum_0qr + sum_2qr;
1381
1.28M
    ptr_y[1] = sum_0qi + sum_2qi;
1382
1.28M
    ptr_y[h2] = sum_1qr + sum_3qi;
1383
1.28M
    ptr_y[h2 + 1] = sum_1qi - sum_3qr;
1384
1.28M
    ptr_y[l1] = sum_0qr - sum_2qr;
1385
1.28M
    ptr_y[l1 + 1] = sum_0qi - sum_2qi;
1386
1.28M
    ptr_y[l2] = sum_1qr - sum_3qi;
1387
1.28M
    ptr_y[l2 + 1] = sum_1qi + sum_3qr;
1388
1389
1.28M
    ptr_y += 2;
1390
1.28M
    ptr_x += 16;
1391
1392
    /* 2nd butter fly */
1393
1394
1.28M
    inp_0qr = ptr_x[0];
1395
1.28M
    inp_0qi = ptr_x[1];
1396
1.28M
    inp_1qr = ptr_x[4];
1397
1.28M
    inp_1qi = ptr_x[5];
1398
1.28M
    inp_2qr = ptr_x[8];
1399
1.28M
    inp_2qi = ptr_x[9];
1400
1.28M
    inp_3qr = ptr_x[12];
1401
1.28M
    inp_3qi = ptr_x[13];
1402
1403
1.28M
    mul_0qr = inp_0qr;
1404
1.28M
    mul_0qi = inp_0qi;
1405
1406
1.28M
    mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f);
1407
1.28M
    mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f);
1408
1409
1.28M
    mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f);
1410
1.28M
    mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f);
1411
1412
1.28M
    mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f);
1413
1.28M
    mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f);
1414
1415
1.28M
    sum_0qr = mul_0qr + (mul_2qr * 2);
1416
1.28M
    sum_0qi = mul_0qi + (mul_2qi * 2);
1417
1.28M
    sum_1qr = mul_0qr - (mul_2qr * 2);
1418
1.28M
    sum_1qi = mul_0qi - (mul_2qi * 2);
1419
1420
1.28M
    sum_2qr = mul_1qr + mul_3qr;
1421
1.28M
    sum_2qi = mul_1qi + mul_3qi;
1422
1.28M
    sum_3qr = mul_1qr - mul_3qr;
1423
1.28M
    sum_3qi = mul_1qi - mul_3qi;
1424
1425
1.28M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1426
1.28M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1427
1.28M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1428
1.28M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1429
1.28M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1430
1.28M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1431
1.28M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1432
1.28M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1433
1434
1.28M
    ptr_y += 2;
1435
1.28M
    ptr_x += 16;
1436
1437
    /* 3rd butter fly */
1438
1439
1.28M
    inp_0qr = ptr_x[0];
1440
1.28M
    inp_0qi = ptr_x[1];
1441
1.28M
    inp_1qr = ptr_x[4];
1442
1.28M
    inp_1qi = ptr_x[5];
1443
1.28M
    inp_2qr = ptr_x[8];
1444
1.28M
    inp_2qi = ptr_x[9];
1445
1.28M
    inp_3qr = ptr_x[12];
1446
1.28M
    inp_3qi = ptr_x[13];
1447
1448
1.28M
    mul_0qr = inp_0qr;
1449
1.28M
    mul_0qi = inp_0qi;
1450
1451
1.28M
    mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f);
1452
1.28M
    mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f);
1453
1454
1.28M
    mul_2qr = inp_2qi;
1455
1.28M
    mul_2qi = inp_2qr;
1456
1457
1.28M
    mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f);
1458
1.28M
    mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f);
1459
1460
1.28M
    sum_0qr = mul_0qr + mul_2qr;
1461
1.28M
    sum_0qi = mul_0qi - mul_2qi;
1462
1.28M
    sum_1qr = mul_0qr - mul_2qr;
1463
1.28M
    sum_1qi = mul_0qi + mul_2qi;
1464
1.28M
    sum_2qr = mul_1qr + mul_3qr;
1465
1.28M
    sum_2qi = mul_1qi + mul_3qi;
1466
1.28M
    sum_3qr = mul_1qr - mul_3qr;
1467
1.28M
    sum_3qi = mul_1qi - mul_3qi;
1468
1469
1.28M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1470
1.28M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1471
1.28M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1472
1.28M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1473
1.28M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1474
1.28M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1475
1.28M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1476
1.28M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1477
1478
1.28M
    ptr_y += 2;
1479
1.28M
    ptr_x += 16;
1480
1481
    /* 4th butter fly */
1482
1483
1.28M
    inp_0qr = ptr_x[0];
1484
1.28M
    inp_0qi = ptr_x[1];
1485
1.28M
    inp_1qr = ptr_x[4];
1486
1.28M
    inp_1qi = ptr_x[5];
1487
1.28M
    inp_2qr = ptr_x[8];
1488
1.28M
    inp_2qi = ptr_x[9];
1489
1.28M
    inp_3qr = ptr_x[12];
1490
1.28M
    inp_3qi = ptr_x[13];
1491
1492
1.28M
    mul_0qr = inp_0qr;
1493
1.28M
    mul_0qi = inp_0qi;
1494
1495
1.28M
    mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f);
1496
1.28M
    mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f);
1497
1498
1.28M
    mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f);
1499
1.28M
    mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f);
1500
1501
1.28M
    mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f);
1502
1.28M
    mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f);
1503
1504
1.28M
    sum_0qr = mul_0qr + (mul_2qr * 2);
1505
1.28M
    sum_0qi = mul_0qi + (mul_2qi * 2);
1506
1.28M
    sum_1qr = mul_0qr - (mul_2qr * 2);
1507
1.28M
    sum_1qi = mul_0qi - (mul_2qi * 2);
1508
1509
1.28M
    sum_2qr = mul_1qr + mul_3qr;
1510
1.28M
    sum_2qi = mul_1qi + mul_3qi;
1511
1.28M
    sum_3qr = mul_1qr - mul_3qr;
1512
1.28M
    sum_3qi = mul_1qi - mul_3qi;
1513
1514
1.28M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1515
1.28M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1516
1.28M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1517
1.28M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1518
1.28M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1519
1.28M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1520
1.28M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1521
1.28M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1522
1523
1.28M
    ptr_x = ptr_in;
1524
1.28M
    ptr_y = &interm_y[32];
1525
1526
    /* Butter fly summation in 2 steps */
1527
1.28M
    inp_0qr = ptr_x[2];
1528
1.28M
    inp_0qi = ptr_x[3];
1529
1.28M
    inp_1qr = ptr_x[6];
1530
1.28M
    inp_1qi = ptr_x[7];
1531
1.28M
    inp_2qr = ptr_x[10];
1532
1.28M
    inp_2qi = ptr_x[11];
1533
1.28M
    inp_3qr = ptr_x[14];
1534
1.28M
    inp_3qi = ptr_x[15];
1535
1536
1.28M
    mul_0qr = inp_0qr;
1537
1.28M
    mul_0qi = inp_0qi;
1538
1.28M
    mul_1qr = inp_1qr;
1539
1.28M
    mul_1qi = inp_1qi;
1540
1.28M
    mul_2qr = inp_2qr;
1541
1.28M
    mul_2qi = inp_2qi;
1542
1.28M
    mul_3qr = inp_3qr;
1543
1.28M
    mul_3qi = inp_3qi;
1544
1545
1.28M
    sum_0qr = mul_0qr + mul_2qr;
1546
1.28M
    sum_0qi = mul_0qi + mul_2qi;
1547
1.28M
    sum_1qr = mul_0qr - mul_2qr;
1548
1.28M
    sum_1qi = mul_0qi - mul_2qi;
1549
1.28M
    sum_2qr = mul_1qr + mul_3qr;
1550
1.28M
    sum_2qi = mul_1qi + mul_3qi;
1551
1.28M
    sum_3qr = mul_1qr - mul_3qr;
1552
1.28M
    sum_3qi = mul_1qi - mul_3qi;
1553
1554
1.28M
    ptr_y[0] = sum_0qr + sum_2qr;
1555
1.28M
    ptr_y[1] = sum_0qi + sum_2qi;
1556
1.28M
    ptr_y[h2] = sum_1qr + sum_3qi;
1557
1.28M
    ptr_y[h2 + 1] = sum_1qi - sum_3qr;
1558
1.28M
    ptr_y[l1] = sum_0qr - sum_2qr;
1559
1.28M
    ptr_y[l1 + 1] = sum_0qi - sum_2qi;
1560
1.28M
    ptr_y[l2] = sum_1qr - sum_3qi;
1561
1.28M
    ptr_y[l2 + 1] = sum_1qi + sum_3qr;
1562
1563
1.28M
    ptr_y += 2;
1564
1.28M
    ptr_x += 16;
1565
1566
    /* 2nd butter fly */
1567
1568
1.28M
    inp_0qr = ptr_x[2];
1569
1.28M
    inp_0qi = ptr_x[3];
1570
1.28M
    inp_1qr = ptr_x[6];
1571
1.28M
    inp_1qi = ptr_x[7];
1572
1.28M
    inp_2qr = ptr_x[10];
1573
1.28M
    inp_2qi = ptr_x[11];
1574
1.28M
    inp_3qr = ptr_x[14];
1575
1.28M
    inp_3qi = ptr_x[15];
1576
1577
1.28M
    mul_0qr = inp_0qr;
1578
1.28M
    mul_0qi = inp_0qi;
1579
1580
1.28M
    mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f);
1581
1.28M
    mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f);
1582
1583
1.28M
    mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f);
1584
1.28M
    mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f);
1585
1586
1.28M
    mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f);
1587
1.28M
    mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f);
1588
1589
1.28M
    sum_0qr = mul_0qr + (mul_2qr * 2);
1590
1.28M
    sum_0qi = mul_0qi + (mul_2qi * 2);
1591
1.28M
    sum_1qr = mul_0qr - (mul_2qr * 2);
1592
1.28M
    sum_1qi = mul_0qi - (mul_2qi * 2);
1593
1594
1.28M
    sum_2qr = mul_1qr + mul_3qr;
1595
1.28M
    sum_2qi = mul_1qi + mul_3qi;
1596
1.28M
    sum_3qr = mul_1qr - mul_3qr;
1597
1.28M
    sum_3qi = mul_1qi - mul_3qi;
1598
1599
1.28M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1600
1.28M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1601
1.28M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1602
1.28M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1603
1.28M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1604
1.28M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1605
1.28M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1606
1.28M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1607
1608
1.28M
    ptr_y += 2;
1609
1.28M
    ptr_x += 16;
1610
1611
    /* 3rd butter fly */
1612
1613
1.28M
    inp_0qr = ptr_x[2];
1614
1.28M
    inp_0qi = ptr_x[3];
1615
1.28M
    inp_1qr = ptr_x[6];
1616
1.28M
    inp_1qi = ptr_x[7];
1617
1.28M
    inp_2qr = ptr_x[10];
1618
1.28M
    inp_2qi = ptr_x[11];
1619
1.28M
    inp_3qr = ptr_x[14];
1620
1.28M
    inp_3qi = ptr_x[15];
1621
1622
1.28M
    mul_0qr = inp_0qr;
1623
1.28M
    mul_0qi = inp_0qi;
1624
1625
1.28M
    mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f);
1626
1.28M
    mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f);
1627
1628
1.28M
    mul_2qr = inp_2qi;
1629
1.28M
    mul_2qi = inp_2qr;
1630
1631
1.28M
    mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f);
1632
1.28M
    mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f);
1633
1634
1.28M
    sum_0qr = mul_0qr + mul_2qr;
1635
1.28M
    sum_0qi = mul_0qi - mul_2qi;
1636
1.28M
    sum_1qr = mul_0qr - mul_2qr;
1637
1.28M
    sum_1qi = mul_0qi + mul_2qi;
1638
1.28M
    sum_2qr = mul_1qr + mul_3qr;
1639
1.28M
    sum_2qi = mul_1qi + mul_3qi;
1640
1.28M
    sum_3qr = mul_1qr - mul_3qr;
1641
1.28M
    sum_3qi = mul_1qi - mul_3qi;
1642
1643
1.28M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1644
1.28M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1645
1.28M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1646
1.28M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1647
1.28M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1648
1.28M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1649
1.28M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1650
1.28M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1651
1652
1.28M
    ptr_y += 2;
1653
1.28M
    ptr_x += 16;
1654
1655
    /* 4th butter fly */
1656
1657
1.28M
    inp_0qr = ptr_x[2];
1658
1.28M
    inp_0qi = ptr_x[3];
1659
1.28M
    inp_1qr = ptr_x[6];
1660
1.28M
    inp_1qi = ptr_x[7];
1661
1.28M
    inp_2qr = ptr_x[10];
1662
1.28M
    inp_2qi = ptr_x[11];
1663
1.28M
    inp_3qr = ptr_x[14];
1664
1.28M
    inp_3qi = ptr_x[15];
1665
1666
1.28M
    mul_0qr = inp_0qr;
1667
1.28M
    mul_0qi = inp_0qi;
1668
1669
1.28M
    mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f);
1670
1.28M
    mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f);
1671
1672
1.28M
    mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f);
1673
1.28M
    mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f);
1674
1675
1.28M
    mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f);
1676
1.28M
    mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f);
1677
1678
1.28M
    sum_0qr = mul_0qr + (mul_2qr * 2);
1679
1.28M
    sum_0qi = mul_0qi + (mul_2qi * 2);
1680
1.28M
    sum_1qr = mul_0qr - (mul_2qr * 2);
1681
1.28M
    sum_1qi = mul_0qi - (mul_2qi * 2);
1682
1683
1.28M
    sum_2qr = mul_1qr + mul_3qr;
1684
1.28M
    sum_2qi = mul_1qi + mul_3qi;
1685
1.28M
    sum_3qr = mul_1qr - mul_3qr;
1686
1.28M
    sum_3qi = mul_1qi - mul_3qi;
1687
1688
1.28M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1689
1.28M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1690
1.28M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1691
1.28M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1692
1.28M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1693
1.28M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1694
1.28M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1695
1.28M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1696
1.28M
  }
1697
1698
  // Last stage of 32 point FFT
1699
1.28M
  {
1700
1.28M
    ptr_y = ptr_out;
1701
1.28M
    ptr_y[idx1] = interm_y[0] + interm_y[32];
1702
1.28M
    ptr_y[idx1 + 1] = interm_y[1] + interm_y[33];
1703
1.28M
    ptr_y[idx2] = interm_y[0] - interm_y[32];
1704
1.28M
    ptr_y[idx2 + 1] = interm_y[1] - interm_y[33];
1705
1.28M
    idx1 += FFT15X2;
1706
1.28M
    idx2 += FFT15X2;
1707
20.5M
    for (i = 1; i < FFT16; i++) {
1708
19.2M
      mul_r = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]) -
1709
19.2M
              (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]);
1710
19.2M
      mul_i = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]) +
1711
19.2M
              (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]);
1712
1713
19.2M
      mul_r = mul_r / 2;
1714
19.2M
      mul_i = mul_i / 2;
1715
19.2M
      ptr_y[idx1] = interm_y[2 * i + 0] + (mul_r * 2);
1716
19.2M
      ptr_y[idx1 + 1] = interm_y[2 * i + 1] + (mul_i * 2);
1717
19.2M
      ptr_y[idx2] = interm_y[2 * i + 0] - (mul_r * 2);
1718
19.2M
      ptr_y[idx2 + 1] = interm_y[2 * i + 1] - (mul_i * 2);
1719
19.2M
      idx1 += FFT15X2;
1720
19.2M
      idx2 += FFT15X2;
1721
19.2M
    }
1722
1.28M
  }
1723
1.28M
}
1724
1725
static VOID ixheaace_dec_rearrange_short_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out, WORD32 N,
1726
4.54M
                                             const WORD16 *ptr_re_arr_tab) {
1727
4.54M
  WORD32 n, i = 0;
1728
1729
113M
  for (n = 0; n < N; n++) {
1730
108M
    WORD32 idx = ptr_re_arr_tab[n] << 1;
1731
108M
    ptr_out[i++] = ptr_in[idx];
1732
108M
    ptr_out[i++] = ptr_in[idx + 1];
1733
108M
  }
1734
4.54M
}
1735
1736
3.63M
static VOID ixheaace_fft_5_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1737
3.63M
  FLOAT32 C51 = 0.951056516f;
1738
3.63M
  FLOAT32 C52 = -0.769420885f;
1739
3.63M
  FLOAT32 C53 = -0.363271264f;
1740
3.63M
  FLOAT32 C54 = 0.559016994f;
1741
3.63M
  FLOAT32 C55 = -0.625f;
1742
1743
3.63M
  FLOAT32 r1, r2, r3, r4;
1744
3.63M
  FLOAT32 s1, s2, s3, s4, t, temp1, temp2;
1745
1746
3.63M
  r1 = (ptr_in[2] + ptr_in[8]);
1747
3.63M
  r4 = (ptr_in[2] - ptr_in[8]);
1748
3.63M
  r3 = (ptr_in[4] + ptr_in[6]);
1749
3.63M
  r2 = (ptr_in[4] - ptr_in[6]);
1750
1751
3.63M
  t = ((r1 - r3) * C54);
1752
3.63M
  r1 = (r1 + r3);
1753
1754
3.63M
  temp1 = (ptr_in[0] + r1);
1755
3.63M
  r1 = (temp1 + (((r1 * C55)) * 2));
1756
1757
3.63M
  r3 = (r1 - t);
1758
3.63M
  r1 = (r1 + t);
1759
1760
3.63M
  t = ((r4 + r2) * C51);
1761
3.63M
  r4 = (t + ((r4 * C52) * 2));
1762
3.63M
  r2 = (t + (r2 * C53));
1763
1764
3.63M
  s1 = (ptr_in[3] + ptr_in[9]);
1765
3.63M
  s4 = (ptr_in[3] - ptr_in[9]);
1766
3.63M
  s3 = (ptr_in[5] + ptr_in[7]);
1767
3.63M
  s2 = (ptr_in[5] - ptr_in[7]);
1768
1769
3.63M
  t = ((s1 - s3) * C54);
1770
3.63M
  s1 = (s1 + s3);
1771
1772
3.63M
  temp2 = (ptr_in[1] + s1);
1773
1774
3.63M
  s1 = (temp2 + (((s1 * C55)) * 2));
1775
1776
3.63M
  s3 = (s1 - t);
1777
3.63M
  s1 = (s1 + t);
1778
1779
3.63M
  t = ((s4 + s2) * C51);
1780
3.63M
  s4 = (t + (((s4 * C52)) * 2));
1781
3.63M
  s2 = (t + ((s2 * C53)));
1782
1783
3.63M
  ptr_out[0] = temp1;
1784
3.63M
  ptr_out[1] = temp2;
1785
3.63M
  ptr_out[2] = (r1 + s2);
1786
3.63M
  ptr_out[3] = (s1 - r2);
1787
3.63M
  ptr_out[4] = (r3 - s4);
1788
3.63M
  ptr_out[5] = (s3 + r4);
1789
3.63M
  ptr_out[6] = (r3 + s4);
1790
3.63M
  ptr_out[7] = (s3 - r4);
1791
3.63M
  ptr_out[8] = (r1 - s2);
1792
3.63M
  ptr_out[9] = (s1 + r2);
1793
3.63M
}
1794
1795
6.05M
static VOID ixheaace_fft_3_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1796
6.05M
  FLOAT32 add_r, sub_r;
1797
6.05M
  FLOAT32 add_i, sub_i;
1798
6.05M
  FLOAT32 x_01_r, x_01_i, temp;
1799
1800
6.05M
  FLOAT32 p1, p2, p3, p4;
1801
6.05M
  FLOAT32 sinmu = 0.866025404f;
1802
1803
6.05M
  x_01_r = (ptr_in[0] + ptr_in[2]);
1804
6.05M
  x_01_i = (ptr_in[1] + ptr_in[3]);
1805
1806
6.05M
  add_r = (ptr_in[2] + ptr_in[4]);
1807
6.05M
  add_i = (ptr_in[3] + ptr_in[5]);
1808
1809
6.05M
  sub_r = (ptr_in[2] - ptr_in[4]);
1810
6.05M
  sub_i = (ptr_in[3] - ptr_in[5]);
1811
1812
6.05M
  p1 = add_r / 2;
1813
6.05M
  p2 = (sub_i * sinmu);
1814
6.05M
  p3 = (sub_r * sinmu);
1815
6.05M
  p4 = add_i / 2;
1816
1817
6.05M
  temp = (ptr_in[0] - p1);
1818
1819
6.05M
  ptr_out[0] = (x_01_r + ptr_in[4]);
1820
6.05M
  ptr_out[1] = (x_01_i + ptr_in[5]);
1821
6.05M
  ptr_out[2] = (temp + p2);
1822
6.05M
  ptr_out[3] = ((ptr_in[1] - p3) - p4);
1823
6.05M
  ptr_out[4] = (temp - p2);
1824
6.05M
  ptr_out[5] = ((ptr_in[1] + p3) - p4);
1825
6.05M
}
1826
1827
static VOID ixheaace_pre_twiddle_120(FLOAT32 *ptr_in, FLOAT32 *ptr_data, WORD32 n,
1828
302k
                                     const FLOAT32 *ptr_cos_sin_tbl) {
1829
302k
  WORD npoints_4, i;
1830
302k
  FLOAT32 tempr, tempi, temp;
1831
302k
  FLOAT32 c, c1, s, s1;
1832
302k
  FLOAT32 *ptr_in1, *ptr_in2;
1833
302k
  FLOAT32 *ptr_x = ptr_in + (n - 1);
1834
1835
302k
  npoints_4 = n >> 2;
1836
1837
302k
  ptr_in1 = ptr_data;
1838
302k
  ptr_in2 = ptr_data + n - 1;
1839
1840
9.38M
  for (i = 0; i < npoints_4; i++) {
1841
9.08M
    c = *ptr_cos_sin_tbl++;
1842
9.08M
    s = *ptr_cos_sin_tbl++;
1843
1844
9.08M
    tempr = *ptr_in1++;
1845
9.08M
    tempi = *ptr_in2--;
1846
1847
9.08M
    temp = -((tempr * c) + (tempi * s));
1848
9.08M
    *ptr_in++ = temp;
1849
1850
9.08M
    temp = -((tempi * c) - (tempr * s));
1851
9.08M
    *ptr_in++ = temp;
1852
1853
9.08M
    c1 = *ptr_cos_sin_tbl++;
1854
9.08M
    s1 = *ptr_cos_sin_tbl++;
1855
1856
9.08M
    tempi = *ptr_in1++;
1857
9.08M
    tempr = *ptr_in2--;
1858
1859
9.08M
    temp = -((tempi * c1) - (tempr * s1));
1860
9.08M
    *ptr_x-- = temp;
1861
1862
9.08M
    temp = -((tempr * c1) + (tempi * s1));
1863
9.08M
    *ptr_x-- = temp;
1864
9.08M
  }
1865
302k
}
1866
1867
static VOID ixheaace_post_twiddle_120(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
1868
302k
                                      const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
1869
302k
  WORD i;
1870
302k
  FLOAT32 c, c1, s, s1;
1871
302k
  FLOAT32 tempr, tempi, temp;
1872
302k
  FLOAT32 *ptr_in2 = ptr_x + (m - 1);
1873
302k
  FLOAT32 *ptr_in1 = ptr_x;
1874
302k
  FLOAT32 *ptr_x1 = ptr_out;
1875
302k
  FLOAT32 *ptr_x2 = ptr_out + (m - 1);
1876
1877
9.38M
  for (i = 0; i < m; i += 4) {
1878
9.08M
    c = *ptr_cos_sin_tbl++;
1879
9.08M
    s = *ptr_cos_sin_tbl++;
1880
9.08M
    c1 = *ptr_cos_sin_tbl++;
1881
9.08M
    s1 = *ptr_cos_sin_tbl++;
1882
1883
9.08M
    tempr = *ptr_in1++;
1884
9.08M
    tempi = *ptr_in1++;
1885
1886
9.08M
    temp = -((tempr * s) - (tempi * c));
1887
9.08M
    *ptr_x2-- = temp;
1888
1889
9.08M
    temp = -((tempr * c) + (tempi * s));
1890
9.08M
    *ptr_x1++ = temp;
1891
1892
9.08M
    tempi = *ptr_in2--;
1893
9.08M
    tempr = *ptr_in2--;
1894
1895
9.08M
    temp = -((tempr * s1) - (tempi * c1));
1896
9.08M
    *ptr_x1++ = temp;
1897
1898
9.08M
    temp = -((tempr * c1) + (tempi * s1));
1899
9.08M
    *ptr_x2-- = temp;
1900
9.08M
  }
1901
302k
}
1902
1903
1.21M
static VOID ixheaace_fft_960_15(FLOAT32 *ptr_in_flt, FLOAT32 *ptr_out_flt) {
1904
1.21M
  WORD32 i;
1905
1.21M
  FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt;
1906
1.21M
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_5);
1907
1908
1.21M
  ptr_buf1_flt = ptr_out_flt;
1909
1.21M
  ptr_buf2_flt = ptr_in_flt;
1910
4.84M
  for (i = 0; i < FFT3; i++) {
1911
3.63M
    ixheaace_fft_5_flt(ptr_buf1_flt, ptr_buf2_flt);
1912
1913
3.63M
    ptr_buf1_flt += (FFT5 * 2);
1914
3.63M
    ptr_buf2_flt += (FFT5 * 2);
1915
3.63M
  }
1916
1917
1.21M
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_3);
1918
1.21M
  ptr_buf1_flt = ptr_out_flt;
1919
1.21M
  ptr_buf2_flt = ptr_in_flt;
1920
7.26M
  for (i = 0; i < FFT5; i++) {
1921
6.05M
    ixheaace_fft_3_flt(ptr_buf1_flt, ptr_buf2_flt);
1922
1923
6.05M
    ptr_buf1_flt += (FFT3 * 2);
1924
6.05M
    ptr_buf2_flt += (FFT3 * 2);
1925
6.05M
  }
1926
1927
1.21M
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_sml);
1928
1.21M
}
1929
1930
302k
static VOID ixheaace_fft_120(WORD32 npoints, FLOAT32 *ptr_x_flt, FLOAT32 *ptr_y_flt) {
1931
302k
  WORD32 i;
1932
302k
  FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt;
1933
302k
  FLOAT32 *ptr_in_flt, *ptr_out_flt;
1934
1935
302k
  ptr_in_flt = ptr_x_flt;
1936
302k
  ptr_out_flt = ptr_y_flt;
1937
302k
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_4);
1938
1939
302k
  ptr_buf1_flt = ptr_out_flt;
1940
302k
  ptr_buf2_flt = ptr_in_flt;
1941
1942
4.84M
  for (i = 0; i < FFT15; i++) {
1943
4.54M
    {
1944
4.54M
      FLOAT32 x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7;
1945
4.54M
      FLOAT32 *y0, *y1, *y2, *y3;
1946
4.54M
      FLOAT32 *x0;
1947
4.54M
      FLOAT32 xh0_0, xh1_0, xh0_1, xh1_1, xl0_0, xl1_0, xl0_1, xl1_1;
1948
4.54M
      WORD32 h2;
1949
4.54M
      FLOAT32 n00, n01, n10, n11, n20, n21, n30, n31;
1950
1951
4.54M
      ptr_x_flt = ptr_buf1_flt;
1952
4.54M
      ptr_y_flt = ptr_buf2_flt;
1953
4.54M
      npoints = 4;
1954
4.54M
      h2 = 0;
1955
1956
4.54M
      y0 = ptr_y_flt;
1957
4.54M
      y2 = ptr_y_flt + (WORD32)npoints;
1958
4.54M
      x0 = ptr_x_flt;
1959
4.54M
      y1 = y0 + (WORD32)(npoints >> 1);
1960
4.54M
      y3 = y2 + (WORD32)(npoints >> 1);
1961
1962
4.54M
      x_0 = x0[0];
1963
4.54M
      x_1 = x0[1];
1964
4.54M
      x_2 = x0[2];
1965
4.54M
      x_3 = x0[3];
1966
4.54M
      x_4 = x0[4];
1967
4.54M
      x_5 = x0[5];
1968
4.54M
      x_6 = x0[6];
1969
4.54M
      x_7 = x0[7];
1970
1971
4.54M
      xh0_0 = x_0 + x_4;
1972
4.54M
      xh1_0 = x_1 + x_5;
1973
4.54M
      xl0_0 = x_0 - x_4;
1974
4.54M
      xl1_0 = x_1 - x_5;
1975
4.54M
      xh0_1 = x_2 + x_6;
1976
4.54M
      xh1_1 = x_3 + x_7;
1977
4.54M
      xl0_1 = x_2 - x_6;
1978
4.54M
      xl1_1 = x_3 - x_7;
1979
1980
4.54M
      n00 = xh0_0 + xh0_1;
1981
4.54M
      n01 = xh1_0 + xh1_1;
1982
4.54M
      n10 = xl0_0 + xl1_1;
1983
4.54M
      n11 = xl1_0 - xl0_1;
1984
4.54M
      n20 = xh0_0 - xh0_1;
1985
4.54M
      n21 = xh1_0 - xh1_1;
1986
4.54M
      n30 = xl0_0 - xl1_1;
1987
4.54M
      n31 = xl1_0 + xl0_1;
1988
1989
4.54M
      y0[2 * h2] = n00;
1990
4.54M
      y0[2 * h2 + 1] = n01;
1991
4.54M
      y1[2 * h2] = n10;
1992
4.54M
      y1[2 * h2 + 1] = n11;
1993
4.54M
      y2[2 * h2] = n20;
1994
4.54M
      y2[2 * h2 + 1] = n21;
1995
4.54M
      y3[2 * h2] = n30;
1996
4.54M
      y3[2 * h2 + 1] = n31;
1997
4.54M
    }
1998
1999
4.54M
    ptr_buf1_flt += (FFT4 * 2);
2000
4.54M
    ptr_buf2_flt += (FFT4 * 2);
2001
4.54M
  }
2002
2003
302k
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_15_4);
2004
2005
302k
  ptr_buf1_flt = ptr_out_flt;
2006
302k
  ptr_buf2_flt = ptr_in_flt;
2007
1.51M
  for (i = 0; i < FFT4; i++) {
2008
1.21M
    ixheaace_fft_960_15(ptr_buf1_flt, ptr_buf2_flt);
2009
1.21M
    ptr_buf1_flt += (FFT15 * 2);
2010
1.21M
    ptr_buf2_flt += (FFT15 * 2);
2011
1.21M
  }
2012
2013
302k
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_120);
2014
302k
}
2015
2016
85.6k
static VOID ixheaace_cfft_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op) {
2017
85.6k
  WORD32 i;
2018
85.6k
  FLOAT32 *ptr_buf1, *ptr_buf2;
2019
85.6k
  FLOAT32 fft5_out[FFT15X2] = {0};
2020
2021
85.6k
  ptr_buf1 = ptr_inp;
2022
85.6k
  ptr_buf2 = ptr_op;
2023
2024
2.82M
  for (i = 0; i < FFT32; i++) {
2025
2.74M
    ixheaace_cfft_15_480(ptr_buf1, ptr_buf2, &fft5_out[0]);
2026
2.74M
    ptr_buf1 += 2;
2027
2.74M
    ptr_buf2 += 2;
2028
2.74M
  }
2029
2030
85.6k
  ixheaace_cfft_twiddle_mult(ptr_op, ptr_inp, FFT15, FFT32, ixheaace_fft_mix_rad_twid_tbl_480,
2031
85.6k
                             ixheaace_fft_mix_rad_twid_h_tbl_480);
2032
2033
85.6k
  ptr_buf1 = ptr_inp;
2034
85.6k
  ptr_buf2 = ptr_op;
2035
2036
1.37M
  for (i = 0; i < FFT15; i++) {
2037
1.28M
    ixheaace_cfft_32_480(ptr_buf1, ptr_buf2);
2038
1.28M
    ptr_buf1 += (FFT32X2);
2039
1.28M
    ptr_buf2 += 2;
2040
1.28M
  }
2041
85.6k
}
2042
2043
static VOID ixheaace_pre_twiddle_960(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n,
2044
85.6k
                                     const FLOAT32 *ptr_cos_sin_tbl) {
2045
85.6k
  WORD npoints_4, i;
2046
85.6k
  FLOAT32 tempr, tempi, temp;
2047
85.6k
  FLOAT32 c, c1, s, s1;
2048
85.6k
  FLOAT32 *ptr_in_1, *ptr_in_2;
2049
85.6k
  FLOAT32 *ptr_x_1 = ptr_x + (n - 1);
2050
2051
85.6k
  npoints_4 = n >> 2;
2052
2053
85.6k
  ptr_in_1 = ptr_data;
2054
85.6k
  ptr_in_2 = ptr_data + n - 1;
2055
2056
20.6M
  for (i = 0; i < npoints_4; i++) {
2057
20.5M
    c = *ptr_cos_sin_tbl++;
2058
20.5M
    s = *ptr_cos_sin_tbl++;
2059
2060
20.5M
    tempr = *ptr_in_1++;
2061
20.5M
    tempi = *ptr_in_2--;
2062
2063
20.5M
    temp = -((tempr * c) + (tempi * s));
2064
20.5M
    *ptr_x++ = temp;
2065
2066
20.5M
    temp = -((tempi * c) - (tempr * s));
2067
20.5M
    *ptr_x++ = temp;
2068
2069
20.5M
    c1 = *ptr_cos_sin_tbl++;
2070
20.5M
    s1 = *ptr_cos_sin_tbl++;
2071
2072
20.5M
    tempi = *ptr_in_1++;
2073
20.5M
    tempr = *ptr_in_2--;
2074
2075
20.5M
    temp = -((tempi * c1) - (tempr * s1));
2076
20.5M
    *ptr_x_1-- = temp;
2077
2078
20.5M
    temp = -((tempr * c1) + (tempi * s1));
2079
20.5M
    *ptr_x_1-- = temp;
2080
20.5M
  }
2081
85.6k
}
2082
2083
static VOID ixheaace_post_twiddle_960(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
2084
85.6k
                                      const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
2085
85.6k
  WORD i;
2086
85.6k
  FLOAT32 c, c1, s, s1;
2087
85.6k
  FLOAT32 tempr, tempi, temp;
2088
85.6k
  FLOAT32 *ptr_in2 = ptr_x + (m - 1);
2089
85.6k
  FLOAT32 *ptr_in1 = ptr_x;
2090
85.6k
  FLOAT32 *ptr_x1 = ptr_out;
2091
85.6k
  FLOAT32 *ptr_x2 = ptr_out + (m - 1);
2092
2093
20.6M
  for (i = 0; i < m; i += 4) {
2094
20.5M
    c = *ptr_cos_sin_tbl++;
2095
20.5M
    s = *ptr_cos_sin_tbl++;
2096
20.5M
    c1 = *ptr_cos_sin_tbl++;
2097
20.5M
    s1 = *ptr_cos_sin_tbl++;
2098
2099
20.5M
    tempr = *ptr_in1++;
2100
20.5M
    tempi = *ptr_in1++;
2101
2102
20.5M
    temp = -((tempr * s) - (tempi * c));
2103
20.5M
    *ptr_x2-- = temp;
2104
2105
20.5M
    temp = -((tempr * c) + (tempi * s));
2106
20.5M
    *ptr_x1++ = temp;
2107
2108
20.5M
    tempi = *ptr_in2--;
2109
20.5M
    tempr = *ptr_in2--;
2110
2111
20.5M
    temp = -((tempr * s1) - (tempi * c1));
2112
20.5M
    *ptr_x1++ = temp;
2113
2114
20.5M
    temp = -((tempr * c1) + (tempi * s1));
2115
20.5M
    *ptr_x2-- = temp;
2116
20.5M
  }
2117
85.6k
}
2118
2119
85.6k
static VOID ixheaace_mdct_960(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) {
2120
85.6k
  FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch;
2121
85.6k
  FLOAT32 const_mult_fac = 3.142857143f;
2122
85.6k
  FLOAT32 *ptr_data = ptr_input_flt;
2123
85.6k
  WORD32 k;
2124
2125
85.6k
  memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * FRAME_LEN_960);
2126
85.6k
  ixheaace_pre_twiddle_960(ptr_input_flt, ptr_scratch_flt, FRAME_LEN_960, cos_sin_table_flt);
2127
2128
85.6k
  ixheaace_cfft_480(ptr_input_flt, ptr_scratch_flt);
2129
2130
85.6k
  ixheaace_post_twiddle_960(ptr_input_flt, ptr_scratch_flt, cos_sin_table_flt, FRAME_LEN_960);
2131
2132
41.2M
  for (k = FRAME_LEN_960 - 1; k >= 0; k -= 2) {
2133
41.1M
    *ptr_data = (*ptr_data * const_mult_fac);
2134
41.1M
    ptr_data++;
2135
41.1M
    *ptr_data = (*ptr_data * const_mult_fac);
2136
41.1M
    ptr_data++;
2137
41.1M
  }
2138
85.6k
}
2139
2140
302k
static VOID ixheaace_mdct_120(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) {
2141
302k
  WORD32 n, k;
2142
302k
  WORD32 n_by_2;
2143
302k
  FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch;
2144
302k
  FLOAT32 const_mltfac = 3.142857143f;
2145
302k
  FLOAT32 *ptr_data = ptr_input_flt;
2146
302k
  n = 120;
2147
302k
  n_by_2 = n >> 1;
2148
302k
  memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * n);
2149
2150
302k
  ixheaace_pre_twiddle_120(ptr_input_flt, ptr_scratch_flt, n, ixheaace_cosine_array_240);
2151
2152
302k
  ixheaace_fft_120(n_by_2, ptr_input_flt, ptr_scratch_flt);
2153
2154
302k
  ixheaace_post_twiddle_120(ptr_input_flt, ptr_scratch_flt, ixheaace_cosine_array_240, n);
2155
2156
18.4M
  for (k = n - 1; k >= 0; k -= 2) {
2157
18.1M
    *ptr_data = (*ptr_data * const_mltfac);
2158
18.1M
    ptr_data++;
2159
18.1M
    *ptr_data = (*ptr_data * const_mltfac);
2160
18.1M
    ptr_data++;
2161
18.1M
  }
2162
302k
}
2163
2164
static VOID ixheaace_mdct(FLOAT32 *ptr_dct_data, const FLOAT32 *ptr_trig_data,
2165
                          const FLOAT32 *ptr_sine_window, WORD32 n, WORD32 ld_n,
2166
544k
                          WORD8 *ptr_scratch) {
2167
544k
  ixheaace_pre_mdct(ptr_dct_data, n, ptr_sine_window);
2168
2169
544k
  ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch;
2170
544k
  ia_enhaacplus_enc_complex_fft(ptr_dct_data, n / 2, pstr_scratch);
2171
2172
544k
  ixheaace_post_mdct(ptr_dct_data, n, ptr_trig_data,
2173
544k
                     1 << (LD_FFT_TWIDDLE_TABLE_SIZE - (ld_n - 1)), FFT_TWIDDLE_TABLE_SIZE);
2174
544k
}
2175
2176
static VOID ixheaace_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer,
2177
                                             const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
2178
325k
                                             WORD32 frame_len) {
2179
325k
  WORD32 i;
2180
325k
  WORD32 blk_switch_offset = frame_len;
2181
325k
  switch (frame_len) {
2182
178k
    case FRAME_LEN_1024:
2183
178k
      blk_switch_offset = BLK_SWITCH_OFFSET_LC_128;
2184
178k
      memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len,
2185
178k
              (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer));
2186
178k
      break;
2187
2188
123k
    case FRAME_LEN_960:
2189
123k
      blk_switch_offset = BLK_SWITCH_OFFSET_LC_120;
2190
123k
      memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len,
2191
123k
              (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer));
2192
123k
      break;
2193
2194
24.1k
    case FRAME_LEN_512:
2195
24.1k
    case FRAME_LEN_480:
2196
24.1k
      blk_switch_offset = frame_len;
2197
24.1k
      break;
2198
325k
  }
2199
2200
313M
  for (i = 0; i < frame_len; i++) {
2201
313M
    ptr_mdct_delay_buffer[blk_switch_offset - frame_len + i] = ptr_time_signal[i * ch_increment];
2202
313M
  }
2203
325k
}
2204
2205
VOID ixheaace_transform_real_lc_ld(FLOAT32 *ptr_mdct_delay_buffer, const FLOAT32 *ptr_time_signal,
2206
                                   WORD32 ch_increment, FLOAT32 *ptr_real_out, WORD32 block_type,
2207
325k
                                   WORD32 frame_len, WORD8 *ptr_scratch) {
2208
325k
  WORD32 i, w;
2209
325k
  FLOAT32 ws1, ws2;
2210
325k
  FLOAT32 *ptr_dct_in;
2211
325k
  WORD32 frame_len_short = FRAME_LEN_SHORT_128;
2212
325k
  WORD32 ls_trans = LS_TRANS_128;
2213
325k
  WORD32 trans_offset = TRANSFORM_OFFSET_SHORT_128;
2214
325k
  const FLOAT32 *ptr_window;
2215
325k
  if (frame_len == FRAME_LEN_960) {
2216
123k
    ls_trans = LS_TRANS_120;
2217
123k
    trans_offset = TRANSFORM_OFFSET_SHORT_120;
2218
123k
    frame_len_short = FRAME_LEN_SHORT_120;
2219
123k
  }
2220
325k
  switch (block_type) {
2221
201k
    case LONG_WINDOW:
2222
201k
      ptr_dct_in = ptr_real_out;
2223
201k
      ptr_window = &long_window_KBD[0];
2224
201k
      switch (frame_len) {
2225
111k
        case FRAME_LEN_1024:
2226
111k
          ptr_window = &long_window_KBD[0];
2227
111k
          break;
2228
2229
65.9k
        case FRAME_LEN_960:
2230
65.9k
          ptr_window = &long_window_sine_960[0];
2231
65.9k
          break;
2232
2233
24.1k
        case FRAME_LEN_512:
2234
24.1k
          ptr_window = &long_window_sine_ld[0];
2235
24.1k
          break;
2236
2237
0
        case FRAME_LEN_480:
2238
0
          ptr_window = &long_window_sine_ld_480[0];
2239
0
          break;
2240
201k
      }
2241
95.1M
      for (i = 0; i < frame_len / 2; i++) {
2242
94.9M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i];
2243
2244
94.9M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1];
2245
2246
94.9M
        ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2247
94.9M
      }
2248
2249
201k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2250
201k
                                       frame_len);
2251
2252
95.1M
      for (i = 0; i < frame_len / 2; i++) {
2253
94.9M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1];
2254
2255
94.9M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i];
2256
2257
94.9M
        ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2258
94.9M
      }
2259
201k
      switch (frame_len) {
2260
111k
        case FRAME_LEN_1024:
2261
111k
          ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10,
2262
111k
                        ptr_scratch);
2263
111k
          break;
2264
2265
65.9k
        case FRAME_LEN_960:
2266
65.9k
          ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2267
65.9k
          break;
2268
2269
24.1k
        case FRAME_LEN_512:
2270
24.1k
        case FRAME_LEN_480:
2271
24.1k
          ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, ptr_window, frame_len, 9, ptr_scratch);
2272
24.1k
          break;
2273
201k
      }
2274
201k
      break;
2275
2276
201k
    case START_WINDOW:
2277
24.9k
      ptr_dct_in = ptr_real_out;
2278
24.9k
      ptr_window = &long_window_KBD[0];
2279
24.9k
      switch (frame_len) {
2280
14.5k
        case FRAME_LEN_1024:
2281
14.5k
          ptr_window = &long_window_KBD[0];
2282
14.5k
          break;
2283
2284
10.3k
        case FRAME_LEN_960:
2285
10.3k
          ptr_window = &long_window_sine_960[0];
2286
10.3k
          break;
2287
24.9k
      }
2288
12.4M
      for (i = 0; i < frame_len / 2; i++) {
2289
12.4M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i];
2290
2291
12.4M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1];
2292
2293
12.4M
        ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2294
12.4M
      }
2295
2296
24.9k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2297
24.9k
                                       frame_len);
2298
2299
24.9k
      if (frame_len == FRAME_LEN_1024) {
2300
14.5k
        ptr_window = &short_window_sine[0];
2301
14.5k
      } else if (frame_len == FRAME_LEN_960) {
2302
10.3k
        ptr_window = &short_window_sine_120[0];
2303
10.3k
      }
2304
10.9M
      for (i = 0; i < ls_trans; i++) {
2305
10.8M
        ws1 = ptr_mdct_delay_buffer[i];
2306
10.8M
        ws2 = 0.0f;
2307
2308
10.8M
        ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2309
10.8M
      }
2310
2311
1.58M
      for (i = 0; i < frame_len_short / 2; i++) {
2312
1.55M
        ws1 = ptr_mdct_delay_buffer[i + ls_trans] * ptr_window[frame_len_short - i - 1];
2313
2314
1.55M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1 - ls_trans)] * ptr_window[i];
2315
2316
1.55M
        ptr_dct_in[frame_len / 2 - i - 1 - ls_trans] = -(ws1 + ws2);
2317
1.55M
      }
2318
24.9k
      if (frame_len == FRAME_LEN_960) {
2319
10.3k
        ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2320
14.5k
      } else {
2321
14.5k
        ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch);
2322
14.5k
      }
2323
2324
24.9k
      break;
2325
2326
22.2k
    case STOP_WINDOW:
2327
22.2k
      ptr_window = &long_window_KBD[0];
2328
22.2k
      ptr_dct_in = ptr_real_out;
2329
22.2k
      if (frame_len == FRAME_LEN_1024) {
2330
12.9k
        ptr_window = &short_window_sine[0];
2331
12.9k
      } else if (frame_len == FRAME_LEN_960) {
2332
9.28k
        ptr_window = &short_window_sine_120[0];
2333
9.28k
      }
2334
9.71M
      for (i = 0; i < ls_trans; i++) {
2335
9.69M
        ws1 = 0.0f;
2336
9.69M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)];
2337
9.69M
        ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2338
9.69M
      }
2339
2340
1.40M
      for (i = 0; i < frame_len_short / 2; i++) {
2341
1.38M
        ws1 = ptr_mdct_delay_buffer[(i + ls_trans)] * ptr_window[i];
2342
2343
1.38M
        ws2 = ptr_mdct_delay_buffer[(frame_len - ls_trans - i - 1)] *
2344
1.38M
              ptr_window[frame_len_short - i - 1];
2345
2346
1.38M
        ptr_dct_in[frame_len / 2 + i + ls_trans] = ws1 - ws2;
2347
1.38M
      }
2348
2349
22.2k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2350
22.2k
                                       frame_len);
2351
2352
22.2k
      if (frame_len == FRAME_LEN_1024) {
2353
12.9k
        ptr_window = &long_window_KBD[0];
2354
12.9k
      } else if (frame_len == FRAME_LEN_960) {
2355
9.28k
        ptr_window = &long_window_sine_960[0];
2356
9.28k
      }
2357
11.1M
      for (i = 0; i < frame_len / 2; i++) {
2358
11.0M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1];
2359
2360
11.0M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i];
2361
2362
11.0M
        ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2363
11.0M
      }
2364
2365
22.2k
      if (frame_len == FRAME_LEN_960) {
2366
9.28k
        ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2367
12.9k
      } else {
2368
12.9k
        ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch);
2369
12.9k
      }
2370
2371
22.2k
      break;
2372
2373
76.9k
    case SHORT_WINDOW:
2374
76.9k
      ptr_window = &short_window_sine[0];
2375
76.9k
      if (frame_len == FRAME_LEN_1024) {
2376
39.0k
        ptr_window = &short_window_sine[0];
2377
39.0k
      } else if (frame_len == FRAME_LEN_960) {
2378
37.8k
        ptr_window = &short_window_sine_120[0];
2379
37.8k
      }
2380
692k
      for (w = 0; w < TRANS_FAC; w++) {
2381
615k
        ptr_dct_in = ptr_real_out + w * frame_len_short;
2382
2383
38.7M
        for (i = 0; i < frame_len_short / 2; i++) {
2384
38.1M
          ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + i] * ptr_window[i];
2385
2386
38.1M
          ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short - i -
2387
38.1M
                                      1] *
2388
38.1M
                ptr_window[frame_len_short - i - 1];
2389
2390
38.1M
          ptr_dct_in[frame_len_short / 2 + i] = ws1 - ws2;
2391
2392
38.1M
          ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short + i] *
2393
38.1M
                ptr_window[frame_len_short - i - 1];
2394
2395
38.1M
          ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short * 2 -
2396
38.1M
                                      i - 1] *
2397
38.1M
                ptr_window[i];
2398
2399
38.1M
          ptr_dct_in[frame_len_short / 2 - i - 1] = -(ws1 + ws2);
2400
38.1M
        }
2401
615k
        if (frame_len == FRAME_LEN_960) {
2402
302k
          ixheaace_mdct_120(ptr_dct_in, ptr_scratch);
2403
312k
        } else {
2404
312k
          ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, short_window_sine, frame_len_short, 7,
2405
312k
                        ptr_scratch);
2406
312k
        }
2407
615k
      }
2408
2409
76.9k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2410
76.9k
                                       frame_len);
2411
76.9k
      break;
2412
325k
  }
2413
325k
}
2414
2415
VOID ia_enhaacplus_enc_transform_real_eld(FLOAT32 *ptr_mdct_delay_buffer,
2416
                                          const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
2417
                                          FLOAT32 *ptr_real_out, WORD8 *ptr_shared_buffer5,
2418
68.9k
                                          WORD32 frame_len) {
2419
68.9k
  WORD32 i, loop_len;
2420
68.9k
  FLOAT32 w1, w2;
2421
68.9k
  FLOAT32 *ptr_curr_data, *ptr_prev1_data, *ptr_prev2_data, *ptr_prev3_data;
2422
68.9k
  const FLOAT32 *ptr_win0, *ptr_win1, *ptr_win2, *ptr_win3;
2423
2424
68.9k
  loop_len = frame_len / 4;
2425
2426
68.9k
  ptr_curr_data = &ptr_mdct_delay_buffer[3 * frame_len];
2427
68.9k
  ptr_prev1_data = &ptr_mdct_delay_buffer[2 * frame_len];
2428
68.9k
  ptr_prev2_data = &ptr_mdct_delay_buffer[frame_len];
2429
68.9k
  ptr_prev3_data = &ptr_mdct_delay_buffer[0];
2430
2431
68.9k
  ptr_win0 = &low_delay_window_eld[0];
2432
68.9k
  ptr_win1 = &low_delay_window_eld[frame_len];
2433
68.9k
  ptr_win2 = &low_delay_window_eld[2 * frame_len];
2434
68.9k
  ptr_win3 = &low_delay_window_eld[3 * frame_len];
2435
2436
68.9k
  memmove(&ptr_mdct_delay_buffer[0], &ptr_mdct_delay_buffer[frame_len],
2437
68.9k
          (3 * frame_len) * sizeof(ptr_mdct_delay_buffer[0]));
2438
2439
35.3M
  for (i = 0; i < frame_len; i++) {
2440
35.3M
    ptr_curr_data[i] = ptr_time_signal[i * ch_increment];
2441
35.3M
  }
2442
2443
8.89M
  for (i = 0; i < loop_len; i++) {
2444
8.82M
    w1 = ptr_prev3_data[(frame_len / 2) + loop_len + i] * ptr_win3[(frame_len / 2) - 1 - i];
2445
8.82M
    w1 += ptr_prev3_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win3[(frame_len / 2) + i];
2446
2447
8.82M
    w2 = (-ptr_prev1_data[(frame_len / 2) + loop_len + i] * ptr_win1[(frame_len / 2) - 1 - i]);
2448
8.82M
    w2 += (-ptr_prev1_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win1[(frame_len / 2) + i]);
2449
2450
8.82M
    ptr_real_out[i] = w1 + w2;
2451
8.82M
  }
2452
2453
8.89M
  for (i = 0; i < loop_len; i++) {
2454
8.82M
    w1 = (-ptr_prev2_data[(frame_len / 2) + loop_len + i] * ptr_win2[(frame_len / 2) - 1 - i]);
2455
8.82M
    w1 += ptr_prev2_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win2[(frame_len / 2) + i];
2456
2457
8.82M
    w2 = ptr_curr_data[(frame_len / 2) + loop_len + i] * ptr_win0[(frame_len / 2) - 1 - i];
2458
8.82M
    w2 += (-ptr_curr_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win0[(frame_len / 2) + i]);
2459
2460
8.82M
    ptr_real_out[frame_len - 1 - i] = w1 + w2;
2461
8.82M
  }
2462
2463
8.89M
  for (i = 0; i < loop_len; i++) {
2464
8.82M
    w1 = ptr_prev2_data[loop_len - 1 - i] * ptr_win3[i];
2465
8.82M
    w1 += ptr_prev3_data[loop_len + i] * ptr_win3[frame_len - 1 - i];
2466
2467
8.82M
    w2 = (-ptr_curr_data[loop_len - 1 - i] * ptr_win1[i]);
2468
8.82M
    w2 += (-ptr_prev1_data[loop_len + i] * ptr_win1[frame_len - 1 - i]);
2469
2470
8.82M
    ptr_real_out[(frame_len / 2) - 1 - i] = w1 + w2;
2471
8.82M
  }
2472
2473
8.89M
  for (i = 0; i < loop_len; i++) {
2474
8.82M
    w1 = -(ptr_prev1_data[loop_len - 1 - i] * ptr_win2[i]);
2475
8.82M
    w1 += ptr_prev2_data[loop_len + i] * ptr_win2[frame_len - 1 - i];
2476
2477
    /* First 128 coeffcients are zeros in the window table so they are not used in the code here*/
2478
8.82M
    w2 = (-ptr_curr_data[loop_len + i] * ptr_win0[frame_len - 1 - i]);
2479
2480
8.82M
    ptr_real_out[(frame_len / 2) + i] = w1 + w2;
2481
8.82M
  }
2482
2483
68.9k
  ixheaace_mdct(ptr_real_out, fft_twiddle_tab, long_window_sine_ld, frame_len, 9,
2484
68.9k
                ptr_shared_buffer5);
2485
68.9k
}