Coverage Report

Created: 2026-01-25 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/encoder/ixheaace_fft.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2023 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
21
#include <string.h>
22
23
#include "ixheaac_type_def.h"
24
#include "ixheaac_constants.h"
25
#include "ixheaace_psy_const.h"
26
#include "ixheaace_tns.h"
27
#include "ixheaace_tns_params.h"
28
#include "ixheaace_rom.h"
29
#include "ixheaace_common_rom.h"
30
#include "ixheaace_bitbuffer.h"
31
#include "ixheaace_aac_constants.h"
32
#include "ixheaace_fft.h"
33
#include "ixheaac_basic_ops32.h"
34
#include "ixheaac_basic_ops40.h"
35
#include "ixheaac_basic_ops.h"
36
#include "iusace_basic_ops_flt.h"
37
38
static VOID ia_enhaacplus_enc_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer,
39
                                                      const FLOAT32 *ptr_time_signal,
40
                                                      WORD32 ch_increment,
41
85.7k
                                                      WORD32 long_frame_len) {
42
85.7k
  WORD32 i;
43
85.7k
  FLOAT32 *ptr_mdct_buff = ptr_mdct_delay_buffer;
44
85.7k
  if (ch_increment == 2) {
45
85.2k
    const FLOAT32 *ptr_input = ptr_time_signal;
46
85.2k
    FLOAT32 temp1, temp2, temp3, temp4;
47
85.2k
    temp1 = *ptr_input++;
48
85.2k
    ptr_input++;
49
85.2k
    temp2 = *ptr_input++;
50
85.2k
    ptr_input++;
51
85.2k
    temp3 = *ptr_input++;
52
85.2k
    ptr_input++;
53
10.2M
    for (i = ((long_frame_len >> 2) - 2); i >= 0; i--) {
54
10.1M
      *ptr_mdct_buff++ = temp1;
55
10.1M
      temp4 = *ptr_input++;
56
10.1M
      ptr_input++;
57
58
10.1M
      *ptr_mdct_buff++ = temp2;
59
10.1M
      *ptr_mdct_buff++ = temp3;
60
10.1M
      *ptr_mdct_buff++ = temp4;
61
62
10.1M
      temp1 = *ptr_input++;
63
10.1M
      ptr_input++;
64
10.1M
      temp2 = *ptr_input++;
65
10.1M
      ptr_input++;
66
10.1M
      temp3 = *ptr_input++;
67
10.1M
      ptr_input++;
68
10.1M
    }
69
85.2k
    *ptr_mdct_buff++ = temp1;
70
85.2k
    temp4 = *ptr_input;
71
85.2k
    *ptr_mdct_buff++ = temp2;
72
85.2k
    *ptr_mdct_buff++ = temp3;
73
85.2k
    *ptr_mdct_buff++ = temp4;
74
85.2k
  } else {
75
144k
    for (i = 0; i < long_frame_len; i += 2) {
76
143k
      *ptr_mdct_buff++ = ptr_time_signal[i * ch_increment];
77
143k
      *ptr_mdct_buff++ = ptr_time_signal[(i + 1) * ch_increment];
78
143k
    }
79
598
  }
80
85.7k
}
81
82
static VOID ia_eaacp_enc_inverse_transform_512(FLOAT32 *ptr_data, FLOAT32 *ptr_win_buf,
83
                                               const FLOAT32 *ptr_cos_sin_tbl,
84
0
                                               WORD8 *ptr_scratch) {
85
0
  WORD32 n = FRAME_LEN_512;
86
0
  WORD32 n_by_2 = n >> 1;
87
88
0
  ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch;
89
90
0
  ia_eaacp_enc_pre_twiddle_aac(ptr_win_buf, ptr_data, n, ptr_cos_sin_tbl);
91
92
0
  ia_enhaacplus_enc_complex_fft(ptr_win_buf, n_by_2, pstr_scratch);
93
94
0
  ia_enhaacplus_enc_post_twiddle(ptr_data, ptr_win_buf, ptr_cos_sin_tbl, n);
95
0
}
96
97
518k
static VOID ixheaace_pre_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_sine_window) {
98
518k
  WORD32 i;
99
518k
  FLOAT32 wre, wim, re1, re2, im1, im2;
100
101
53.7M
  for (i = 0; i < m / 4; i++) {
102
53.1M
    re1 = ptr_x[2 * i];
103
53.1M
    im2 = ptr_x[2 * i + 1];
104
53.1M
    re2 = ptr_x[m - 2 - 2 * i];
105
53.1M
    im1 = ptr_x[m - 1 - 2 * i];
106
107
53.1M
    wim = ptr_sine_window[i * 2];
108
53.1M
    wre = ptr_sine_window[m - 1 - 2 * i];
109
110
53.1M
    ptr_x[2 * i] = im1 * wim + re1 * wre;
111
112
53.1M
    ptr_x[2 * i + 1] = im1 * wre - re1 * wim;
113
114
53.1M
    wim = ptr_sine_window[m - 2 - 2 * i];
115
53.1M
    wre = ptr_sine_window[2 * i + 1];
116
117
53.1M
    ptr_x[m - 2 - 2 * i] = im2 * wim + re2 * wre;
118
119
53.1M
    ptr_x[m - 1 - 2 * i] = im2 * wre - re2 * wim;
120
53.1M
  }
121
518k
}
122
123
static VOID ia_enhaacplus_enc_tranform_mac4(FLOAT32 *ptr_op, const FLOAT32 *ptr_win,
124
                                            FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2,
125
                                            FLOAT32 *ptr_buf3, FLOAT32 *ptr_buf4, UWORD32 len,
126
171k
                                            WORD32 increment) {
127
171k
  WORD32 i;
128
129
171k
  if (increment > 0) {
130
5.23M
    for (i = len >> 2; i > 0; i--) {
131
5.14M
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++)));
132
5.14M
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
133
5.14M
      *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--)));
134
5.14M
      ptr_op++;
135
136
5.14M
      *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++)));
137
5.14M
      *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--)));
138
5.14M
      *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--)));
139
5.14M
      ptr_op++;
140
141
5.14M
      *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++)));
142
5.14M
      *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--)));
143
5.14M
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--)));
144
5.14M
      ptr_op++;
145
146
5.14M
      *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++)));
147
5.14M
      *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--)));
148
5.14M
      *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--)));
149
5.14M
      ptr_op++;
150
5.14M
      ptr_win += 16;
151
5.14M
    }
152
85.7k
  } else {
153
2.65M
    for (i = len >> 2; i > 0; i--) {
154
2.57M
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++)));
155
2.57M
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
156
2.57M
      *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--)));
157
2.57M
      ptr_op--;
158
159
2.57M
      *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++)));
160
2.57M
      *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--)));
161
2.57M
      *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--)));
162
2.57M
      ptr_op--;
163
164
2.57M
      *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++)));
165
2.57M
      *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--)));
166
2.57M
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--)));
167
2.57M
      ptr_op--;
168
169
2.57M
      *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++)));
170
2.57M
      *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--)));
171
2.57M
      *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--)));
172
2.57M
      ptr_op--;
173
2.57M
      ptr_win += 16;
174
2.57M
    }
175
85.7k
  }
176
171k
}
177
178
static VOID ia_enhaacplus_enc_tranform_mac3(FLOAT32 *ptr_op, const FLOAT32 *ptr_win,
179
                                            FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2,
180
85.7k
                                            FLOAT32 *ptr_buf3, UWORD32 len, WORD32 increment) {
181
85.7k
  WORD32 i;
182
183
85.7k
  if (increment > 0) {
184
0
    for (i = len >> 2; i > 0; i--) {
185
0
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--)));
186
0
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
187
0
      ptr_op++;
188
189
0
      *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--)));
190
0
      *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--)));
191
0
      ptr_op++;
192
193
0
      *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--)));
194
0
      *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--)));
195
0
      ptr_op++;
196
197
0
      *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--)));
198
0
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--)));
199
0
      ptr_op++;
200
0
      ptr_win += 12;
201
0
    }
202
85.7k
  } else {
203
2.65M
    for (i = len >> 2; i > 0; i--) {
204
2.57M
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--)));
205
2.57M
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
206
2.57M
      ptr_op--;
207
208
2.57M
      *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--)));
209
2.57M
      *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--)));
210
2.57M
      ptr_op--;
211
212
2.57M
      *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--)));
213
2.57M
      *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--)));
214
2.57M
      ptr_op--;
215
216
2.57M
      *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--)));
217
2.57M
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--)));
218
2.57M
      ptr_op--;
219
2.57M
      ptr_win += 12;
220
2.57M
    }
221
85.7k
  }
222
85.7k
}
223
224
VOID ia_enhaacplus_enc_transform_real(FLOAT32 *ptr_mdct_delay_buffer,
225
                                      const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
226
                                      FLOAT32 *ptr_real_out, ixheaace_mdct_tables *pstr_mdct_tab,
227
                                      FLOAT32 *ptr_shared_buffer1, WORD8 *ptr_shared_buffer5,
228
85.7k
                                      WORD32 long_frame_len) {
229
85.7k
  WORD32 n, n1;
230
85.7k
  FLOAT32 *ptr_windowed_buf = ptr_shared_buffer1;
231
85.7k
  const FLOAT32 *ptr_ws1;
232
85.7k
  WORD32 i, len = long_frame_len;
233
85.7k
  FLOAT32 *ptr_real_in;
234
85.7k
  FLOAT32 *ptr_data1, *ptr_data2, *ptr_data3, *ptr_data4;
235
85.7k
  FLOAT32 *ptr_op1;
236
237
85.7k
  ptr_real_in = ptr_mdct_delay_buffer;
238
239
85.7k
  n = long_frame_len << 1;
240
85.7k
  n1 = long_frame_len >> 1;
241
242
85.7k
  ptr_ws1 =
243
85.7k
      (long_frame_len == FRAME_LEN_512) ? pstr_mdct_tab->win_512_ld : pstr_mdct_tab->win_480_ld;
244
245
85.7k
  ptr_op1 = ptr_real_out;
246
85.7k
  ptr_data1 = &ptr_real_in[n1];
247
85.7k
  ptr_data2 = &ptr_real_in[n + n1];
248
85.7k
  ptr_data3 = &ptr_real_in[n1 - 1];
249
85.7k
  ptr_data4 = &ptr_real_in[n + n1 - 1];
250
251
85.7k
  ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4,
252
85.7k
                                  n1, 1);
253
85.7k
  ptr_ws1 += ((SIZE_T)n1 << 2);
254
255
82.4M
  for (i = 0; i < long_frame_len << 1; i++) {
256
82.3M
    ptr_mdct_delay_buffer[i] = ptr_mdct_delay_buffer[long_frame_len + i];
257
82.3M
  }
258
85.7k
  ia_enhaacplus_enc_shift_mdct_delay_buffer(&ptr_mdct_delay_buffer[2 * long_frame_len],
259
85.7k
                                            ptr_time_signal, ch_increment, long_frame_len);
260
261
85.7k
  ptr_op1 = &ptr_real_out[long_frame_len - 1];
262
85.7k
  ptr_data1 = &ptr_real_in[n + len - n1];
263
85.7k
  ptr_data2 = &ptr_real_in[len - n1];
264
85.7k
  ptr_data3 = &ptr_real_in[len - n1 - 1];
265
85.7k
  ptr_data4 = &ptr_real_in[n + len - n1 - 1];
266
267
85.7k
  ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4,
268
85.7k
                                  (n1 >> 1), -1);
269
85.7k
  ptr_op1 -= (n1 >> 1);
270
85.7k
  ptr_ws1 += ((SIZE_T)n1 << 1);
271
85.7k
  ptr_data2 += (n1 >> 1);
272
85.7k
  ptr_data3 -= (n1 >> 1);
273
85.7k
  ptr_data4 -= (n1 >> 1);
274
85.7k
  ia_enhaacplus_enc_tranform_mac3(ptr_op1, ptr_ws1, ptr_data2, ptr_data3, ptr_data4, (n1 >> 1),
275
85.7k
                                  -1);
276
277
85.7k
  if (long_frame_len == FRAME_LEN_480) {
278
85.7k
    ia_aac_ld_enc_mdct_480(ptr_real_out, ptr_windowed_buf, 1, pstr_mdct_tab);
279
85.7k
  } else {
280
0
    ia_eaacp_enc_inverse_transform_512(ptr_real_out, ptr_windowed_buf,
281
0
                                       pstr_mdct_tab->cosine_array_1024, ptr_shared_buffer5);
282
0
  }
283
85.7k
}
284
285
static VOID ia_eaacp_enc_pre_twiddle_compute(FLOAT32 *ptr_in1, FLOAT32 *ptr_in2, FLOAT32 *ptr_x,
286
85.7k
                                             const FLOAT32 *ptr_cos_sin, WORD n_by_4) {
287
85.7k
  WORD32 i;
288
85.7k
  FLOAT32 temp_r, temp_i;
289
85.7k
  FLOAT32 temp_r1, temp_i1;
290
85.7k
  FLOAT32 *ptr_x1 = ptr_x + (SIZE_T)((n_by_4 << 2) - 1);
291
85.7k
  FLOAT32 c, c1, s, s1;
292
293
10.3M
  for (i = 0; i < n_by_4; i++) {
294
10.2M
    c = *ptr_cos_sin++;
295
10.2M
    s = *ptr_cos_sin++;
296
10.2M
    s1 = *ptr_cos_sin++;
297
10.2M
    c1 = *ptr_cos_sin++;
298
299
10.2M
    temp_r = *ptr_in1++;
300
10.2M
    temp_i1 = *ptr_in1++;
301
10.2M
    temp_i = *ptr_in2--;
302
10.2M
    temp_r1 = *ptr_in2--;
303
10.2M
    *ptr_x = ((temp_r * c) + (temp_i * s));
304
10.2M
    ptr_x++;
305
306
10.2M
    *ptr_x = ((temp_i * c) - (temp_r * s));
307
10.2M
    ptr_x++;
308
309
10.2M
    *ptr_x1 = ((temp_i1 * c1) - (temp_r1 * s1));
310
10.2M
    ptr_x1--;
311
312
10.2M
    *ptr_x1 = ((temp_r1 * c1) + (temp_i1 * s1));
313
10.2M
    ptr_x1--;
314
10.2M
  }
315
85.7k
}
316
317
VOID ia_enhaacplus_enc_post_twiddle(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
318
85.7k
                                    const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
319
85.7k
  WORD i;
320
85.7k
  FLOAT32 c, c1, s, s1;
321
85.7k
  FLOAT32 tmp_var;
322
85.7k
  FLOAT32 tempr, tempr1, tempi, tempi1;
323
85.7k
  FLOAT32 *ptr_out1 = ptr_out + m - 1;
324
85.7k
  FLOAT32 *ptr_x1 = ptr_x + m - 1;
325
326
10.3M
  for (i = 0; i < (m >> 2); i++) {
327
10.2M
    c = *ptr_cos_sin_tbl++;
328
10.2M
    s = *ptr_cos_sin_tbl++;
329
10.2M
    s1 = *ptr_cos_sin_tbl++;
330
10.2M
    c1 = *ptr_cos_sin_tbl++;
331
10.2M
    tempr = *ptr_x++;
332
10.2M
    tempi = *ptr_x++;
333
10.2M
    tempi1 = *ptr_x1--;
334
10.2M
    tempr1 = *ptr_x1--;
335
336
10.2M
    tmp_var = ((tempr * c) + (tempi * s));
337
10.2M
    *ptr_out++ = tmp_var;
338
339
10.2M
    tmp_var = ((tempr * s) - (tempi * c));
340
10.2M
    *ptr_out1-- = tmp_var;
341
342
10.2M
    tmp_var = ((tempr1 * c1) + (tempi1 * s1));
343
10.2M
    *ptr_out1-- = tmp_var;
344
345
10.2M
    tmp_var = ((tempr1 * s1) - (tempi1 * c1));
346
10.2M
    *ptr_out++ = tmp_var;
347
10.2M
  }
348
85.7k
}
349
350
VOID ia_eaacp_enc_pre_twiddle_aac(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n,
351
85.7k
                                  const FLOAT32 *ptr_cos_array) {
352
85.7k
  WORD n_by_4;
353
85.7k
  FLOAT32 *ptr_in1, *ptr_in2;
354
355
85.7k
  n_by_4 = n >> 2;
356
357
85.7k
  ptr_in1 = ptr_data;
358
85.7k
  ptr_in2 = ptr_data + n - 1;
359
360
85.7k
  ia_eaacp_enc_pre_twiddle_compute(ptr_in1, ptr_in2, ptr_x, ptr_cos_array, n_by_4);
361
85.7k
}
362
363
32.6M
static PLATFORM_INLINE WORD8 ia_enhaacplus_enc_calc_norm(WORD32 a) {
364
32.6M
  WORD8 norm_val;
365
366
32.6M
  if (a == 0) {
367
0
    norm_val = 31;
368
32.6M
  } else {
369
32.6M
    if (a == (WORD32)0xffffffffL) {
370
0
      norm_val = 31;
371
32.6M
    } else {
372
32.6M
      if (a < 0) {
373
0
        a = ~a;
374
0
      }
375
846M
      for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) {
376
813M
        a <<= 1;
377
813M
      }
378
32.6M
    }
379
32.6M
  }
380
381
32.6M
  return norm_val;
382
32.6M
}
383
384
static PLATFORM_INLINE VOID ia_enhaacplus_enc_complex_3point_fft(FLOAT32 *ptr_in,
385
0
                                                                 FLOAT32 *ptr_out) {
386
0
  FLOAT32 add_r, sub_r;
387
0
  FLOAT32 add_i, sub_i;
388
0
  FLOAT32 x_01_r, x_01_i, temp;
389
0
  FLOAT32 p1, p2, p3, p4;
390
0
  FLOAT64 sin_mu = 0.866025403784439f;
391
392
0
  x_01_r = ptr_in[0] + ptr_in[2];
393
0
  x_01_i = ptr_in[1] + ptr_in[3];
394
395
0
  add_r = ptr_in[2] + ptr_in[4];
396
0
  add_i = ptr_in[3] + ptr_in[5];
397
398
0
  sub_r = ptr_in[2] - ptr_in[4];
399
0
  sub_i = ptr_in[3] - ptr_in[5];
400
401
0
  p1 = add_r / (FLOAT32)2.0f;
402
0
  p4 = add_i / (FLOAT32)2.0f;
403
0
  p2 = (FLOAT32)((FLOAT64)sub_i * sin_mu);
404
0
  p3 = (FLOAT32)((FLOAT64)sub_r * sin_mu);
405
406
0
  temp = ptr_in[0] - p1;
407
408
0
  ptr_out[0] = x_01_r + ptr_in[4];
409
0
  ptr_out[1] = x_01_i + ptr_in[5];
410
0
  ptr_out[2] = temp + p2;
411
0
  ptr_out[3] = (ptr_in[1] - p3) - p4;
412
0
  ptr_out[4] = temp - p2;
413
0
  ptr_out[5] = (ptr_in[1] + p3) - p4;
414
0
}
415
416
VOID ia_enhaacplus_enc_complex_fft_p2(FLOAT32 *ptr_x, WORD32 nlength,
417
16.3M
                                      FLOAT32 *ptr_scratch_fft_p2_y) {
418
16.3M
  WORD32 i, j, k, n_stages, h2;
419
16.3M
  FLOAT32 x0_r, x0_i, x1_r, x1_i, x2_r, x2_i, x3_r, x3_i;
420
16.3M
  WORD32 del, nodespacing, in_loop_cnt;
421
16.3M
  WORD32 not_power_4;
422
16.3M
  WORD32 dig_rev_shift;
423
16.3M
  FLOAT32 *ptr_p2_y = ptr_scratch_fft_p2_y;
424
16.3M
  WORD32 mpass = nlength;
425
16.3M
  WORD32 npoints = nlength;
426
16.3M
  FLOAT32 *ptr_y = ptr_p2_y;
427
16.3M
  const FLOAT64 *ptr_w;
428
16.3M
  FLOAT32 *ptr_inp;
429
16.3M
  FLOAT32 tmk;
430
16.3M
  const FLOAT64 *ptr_twiddles;
431
16.3M
  FLOAT32 *ptr_data;
432
16.3M
  FLOAT64 w_1, w_2, w_3, w_4, w_5, w_6;
433
16.3M
  WORD32 sec_loop_cnt;
434
16.3M
  FLOAT32 tmp;
435
436
16.3M
  memset(ptr_y, 0, nlength * 2 * sizeof(*ptr_y));
437
438
16.3M
  dig_rev_shift = ia_enhaacplus_enc_calc_norm(mpass) + 1 - 16;
439
16.3M
  n_stages = 30 - ia_enhaacplus_enc_calc_norm(mpass);
440
16.3M
  not_power_4 = n_stages & 1;
441
442
16.3M
  n_stages = n_stages >> 1;
443
444
16.3M
  ptr_w = ia_enhaacplus_enc_twiddle_table_fft_32x32;
445
446
16.3M
  dig_rev_shift = MAX(dig_rev_shift, 0);
447
448
169M
  for (i = 0; i < npoints; i += 4) {
449
153M
    ptr_inp = ptr_x;
450
153M
    DIG_REV_NEW(i, dig_rev_shift, h2);
451
153M
    if (not_power_4) {
452
142M
      h2 += 1;
453
142M
      h2 &= ~1;
454
142M
    }
455
153M
    ptr_inp += (h2);
456
457
153M
    x0_r = *ptr_inp;
458
153M
    x0_i = *(ptr_inp + 1);
459
153M
    ptr_inp += (npoints >> 1);
460
461
153M
    x1_r = *ptr_inp;
462
153M
    x1_i = *(ptr_inp + 1);
463
153M
    ptr_inp += (npoints >> 1);
464
465
153M
    x2_r = *ptr_inp;
466
153M
    x2_i = *(ptr_inp + 1);
467
153M
    ptr_inp += (npoints >> 1);
468
469
153M
    x3_r = *ptr_inp;
470
153M
    x3_i = *(ptr_inp + 1);
471
472
153M
    x0_r = x0_r + x2_r;
473
153M
    x0_i = x0_i + x2_i;
474
475
153M
    tmk = x0_r - x2_r;
476
153M
    x2_r = tmk - x2_r;
477
153M
    tmk = x0_i - x2_i;
478
153M
    x2_i = tmk - x2_i;
479
480
153M
    x1_r = x1_r + x3_r;
481
153M
    x1_i = x1_i + x3_i;
482
483
153M
    tmk = x1_r - x3_r;
484
153M
    x3_r = tmk - x3_r;
485
153M
    tmk = x1_i - x3_i;
486
153M
    x3_i = tmk - x3_i;
487
488
153M
    x0_r = x0_r + x1_r;
489
153M
    x0_i = x0_i + x1_i;
490
491
153M
    tmk = x0_r - x1_r;
492
153M
    x1_r = tmk - x1_r;
493
153M
    tmk = x0_i - x1_i;
494
153M
    x1_i = tmk - x1_i;
495
496
153M
    x2_r = x2_r + x3_i;
497
153M
    x2_i = x2_i - x3_r;
498
499
153M
    tmk = x2_r - x3_i;
500
153M
    x3_i = tmk - x3_i;
501
153M
    tmk = x2_i + x3_r;
502
153M
    x3_r = tmk + x3_r;
503
504
153M
    *ptr_y++ = x0_r;
505
153M
    *ptr_y++ = x0_i;
506
153M
    *ptr_y++ = x2_r;
507
153M
    *ptr_y++ = x2_i;
508
153M
    *ptr_y++ = x1_r;
509
153M
    *ptr_y++ = x1_i;
510
153M
    *ptr_y++ = x3_i;
511
153M
    *ptr_y++ = x3_r;
512
153M
  }
513
16.3M
  ptr_y -= 2 * npoints;
514
16.3M
  del = 4;
515
16.3M
  nodespacing = 64;
516
16.3M
  in_loop_cnt = npoints >> 4;
517
33.3M
  for (i = n_stages - 1; i > 0; i--) {
518
17.0M
    ptr_twiddles = ptr_w;
519
17.0M
    ptr_data = ptr_y;
520
57.3M
    for (k = in_loop_cnt; k != 0; k--) {
521
40.2M
      x0_r = (*ptr_data);
522
40.2M
      x0_i = (*(ptr_data + 1));
523
40.2M
      ptr_data += ((SIZE_T)del << 1);
524
525
40.2M
      x1_r = (*ptr_data);
526
40.2M
      x1_i = (*(ptr_data + 1));
527
40.2M
      ptr_data += ((SIZE_T)del << 1);
528
529
40.2M
      x2_r = (*ptr_data);
530
40.2M
      x2_i = (*(ptr_data + 1));
531
40.2M
      ptr_data += ((SIZE_T)del << 1);
532
533
40.2M
      x3_r = (*ptr_data);
534
40.2M
      x3_i = (*(ptr_data + 1));
535
40.2M
      ptr_data -= 3 * (del << 1);
536
537
40.2M
      x0_r = x0_r + x2_r;
538
40.2M
      x0_i = x0_i + x2_i;
539
40.2M
      x2_r = x0_r - (x2_r * 2);
540
40.2M
      x2_i = x0_i - (x2_i * 2);
541
40.2M
      x1_r = x1_r + x3_r;
542
40.2M
      x1_i = x1_i + x3_i;
543
40.2M
      x3_r = x1_r - (x3_r * 2);
544
40.2M
      x3_i = x1_i - (x3_i * 2);
545
546
40.2M
      x0_r = x0_r + x1_r;
547
40.2M
      x0_i = x0_i + x1_i;
548
40.2M
      x1_r = x0_r - (x1_r * 2);
549
40.2M
      x1_i = x0_i - (x1_i * 2);
550
40.2M
      x2_r = x2_r + x3_i;
551
40.2M
      x2_i = x2_i - x3_r;
552
40.2M
      x3_i = x2_r - (x3_i * 2);
553
40.2M
      x3_r = x2_i + (x3_r * 2);
554
555
40.2M
      *ptr_data = x0_r;
556
40.2M
      *(ptr_data + 1) = x0_i;
557
40.2M
      ptr_data += ((SIZE_T)del << 1);
558
559
40.2M
      *ptr_data = x2_r;
560
40.2M
      *(ptr_data + 1) = x2_i;
561
40.2M
      ptr_data += ((SIZE_T)del << 1);
562
563
40.2M
      *ptr_data = x1_r;
564
40.2M
      *(ptr_data + 1) = x1_i;
565
40.2M
      ptr_data += ((SIZE_T)del << 1);
566
567
40.2M
      *ptr_data = x3_i;
568
40.2M
      *(ptr_data + 1) = x3_r;
569
40.2M
      ptr_data += ((SIZE_T)del << 1);
570
40.2M
    }
571
17.0M
    ptr_data = ptr_y + 2;
572
573
17.0M
    sec_loop_cnt = (nodespacing * del);
574
17.0M
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
575
17.0M
                   (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
576
17.0M
                   (sec_loop_cnt / 256);
577
578
40.4M
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
579
23.3M
      w_1 = *(ptr_twiddles + j);
580
23.3M
      w_4 = *(ptr_twiddles + j + 257);
581
23.3M
      w_2 = *(ptr_twiddles + ((SIZE_T)j << 1));
582
23.3M
      w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257);
583
23.3M
      w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1));
584
23.3M
      w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 257);
585
586
77.0M
      for (k = in_loop_cnt; k != 0; k--) {
587
53.6M
        ptr_data += ((SIZE_T)del << 1);
588
589
53.6M
        x1_r = *ptr_data;
590
53.6M
        x1_i = *(ptr_data + 1);
591
53.6M
        ptr_data += ((SIZE_T)del << 1);
592
593
53.6M
        x2_r = *ptr_data;
594
53.6M
        x2_i = *(ptr_data + 1);
595
53.6M
        ptr_data += ((SIZE_T)del << 1);
596
597
53.6M
        x3_r = *ptr_data;
598
53.6M
        x3_i = *(ptr_data + 1);
599
53.6M
        ptr_data -= 3 * (del << 1);
600
601
53.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
602
53.6M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
603
53.6M
        x1_r = tmp;
604
605
53.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5));
606
53.6M
        x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2);
607
53.6M
        x2_r = tmp;
608
609
53.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_3) - ixheaace_dmult((FLOAT64)x3_i, w_6));
610
53.6M
        x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3);
611
53.6M
        x3_r = tmp;
612
613
53.6M
        x0_r = (*ptr_data);
614
53.6M
        x0_i = (*(ptr_data + 1));
615
616
53.6M
        x0_r = x0_r + (x2_r);
617
53.6M
        x0_i = x0_i + (x2_i);
618
53.6M
        x2_r = x0_r - (x2_r * 2);
619
53.6M
        x2_i = x0_i - (x2_i * 2);
620
53.6M
        x1_r = x1_r + x3_r;
621
53.6M
        x1_i = x1_i + x3_i;
622
53.6M
        x3_r = x1_r - (x3_r * 2);
623
53.6M
        x3_i = x1_i - (x3_i * 2);
624
625
53.6M
        x0_r = x0_r + (x1_r);
626
53.6M
        x0_i = x0_i + (x1_i);
627
53.6M
        x1_r = x0_r - (x1_r * 2);
628
53.6M
        x1_i = x0_i - (x1_i * 2);
629
53.6M
        x2_r = x2_r + (x3_i);
630
53.6M
        x2_i = x2_i - (x3_r);
631
53.6M
        x3_i = x2_r - (x3_i * 2);
632
53.6M
        x3_r = x2_i + (x3_r * 2);
633
634
53.6M
        *ptr_data = x0_r;
635
53.6M
        *(ptr_data + 1) = x0_i;
636
53.6M
        ptr_data += ((SIZE_T)del << 1);
637
638
53.6M
        *ptr_data = x2_r;
639
53.6M
        *(ptr_data + 1) = x2_i;
640
53.6M
        ptr_data += ((SIZE_T)del << 1);
641
642
53.6M
        *ptr_data = x1_r;
643
53.6M
        *(ptr_data + 1) = x1_i;
644
53.6M
        ptr_data += ((SIZE_T)del << 1);
645
646
53.6M
        *ptr_data = x3_i;
647
53.6M
        *(ptr_data + 1) = x3_r;
648
53.6M
        ptr_data += ((SIZE_T)del << 1);
649
53.6M
      }
650
23.3M
      ptr_data -= 2 * npoints;
651
23.3M
      ptr_data += 2;
652
23.3M
    }
653
37.2M
    for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
654
20.2M
      w_1 = *(ptr_twiddles + j);
655
20.2M
      w_4 = *(ptr_twiddles + j + 257);
656
20.2M
      w_2 = *(ptr_twiddles + ((SIZE_T)j << 1));
657
20.2M
      w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257);
658
20.2M
      w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1) - 256);
659
20.2M
      w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 1);
660
661
67.2M
      for (k = in_loop_cnt; k != 0; k--) {
662
46.9M
        ptr_data += ((SIZE_T)del << 1);
663
664
46.9M
        x1_r = *ptr_data;
665
46.9M
        x1_i = *(ptr_data + 1);
666
46.9M
        ptr_data += ((SIZE_T)del << 1);
667
668
46.9M
        x2_r = *ptr_data;
669
46.9M
        x2_i = *(ptr_data + 1);
670
46.9M
        ptr_data += ((SIZE_T)del << 1);
671
672
46.9M
        x3_r = *ptr_data;
673
46.9M
        x3_i = *(ptr_data + 1);
674
46.9M
        ptr_data -= 3 * (del << 1);
675
676
46.9M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
677
46.9M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
678
46.9M
        x1_r = tmp;
679
680
46.9M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5));
681
46.9M
        x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2);
682
46.9M
        x2_r = tmp;
683
684
46.9M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3));
685
46.9M
        x3_i =
686
46.9M
            (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
687
46.9M
        x3_r = tmp;
688
689
46.9M
        x0_r = (*ptr_data);
690
46.9M
        x0_i = (*(ptr_data + 1));
691
692
46.9M
        x0_r = x0_r + (x2_r);
693
46.9M
        x0_i = x0_i + (x2_i);
694
46.9M
        x2_r = x0_r - (x2_r * 2);
695
46.9M
        x2_i = x0_i - (x2_i * 2);
696
46.9M
        x1_r = x1_r + x3_r;
697
46.9M
        x1_i = x1_i + x3_i;
698
46.9M
        x3_r = x1_r - (x3_r * 2);
699
46.9M
        x3_i = x1_i - (x3_i * 2);
700
701
46.9M
        x0_r = x0_r + (x1_r);
702
46.9M
        x0_i = x0_i + (x1_i);
703
46.9M
        x1_r = x0_r - (x1_r * 2);
704
46.9M
        x1_i = x0_i - (x1_i * 2);
705
46.9M
        x2_r = x2_r + (x3_i);
706
46.9M
        x2_i = x2_i - (x3_r);
707
46.9M
        x3_i = x2_r - (x3_i * 2);
708
46.9M
        x3_r = x2_i + (x3_r * 2);
709
710
46.9M
        *ptr_data = x0_r;
711
46.9M
        *(ptr_data + 1) = x0_i;
712
46.9M
        ptr_data += ((SIZE_T)del << 1);
713
714
46.9M
        *ptr_data = x2_r;
715
46.9M
        *(ptr_data + 1) = x2_i;
716
46.9M
        ptr_data += ((SIZE_T)del << 1);
717
718
46.9M
        *ptr_data = x1_r;
719
46.9M
        *(ptr_data + 1) = x1_i;
720
46.9M
        ptr_data += ((SIZE_T)del << 1);
721
722
46.9M
        *ptr_data = x3_i;
723
46.9M
        *(ptr_data + 1) = x3_r;
724
46.9M
        ptr_data += ((SIZE_T)del << 1);
725
46.9M
      }
726
20.2M
      ptr_data -= 2 * npoints;
727
20.2M
      ptr_data += 2;
728
20.2M
    }
729
20.2M
    for (; j <= sec_loop_cnt * 2; j += nodespacing) {
730
3.17M
      w_1 = *(ptr_twiddles + j);
731
3.17M
      w_4 = *(ptr_twiddles + j + 257);
732
3.17M
      w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256));
733
3.17M
      w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1));
734
3.17M
      w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 256));
735
3.17M
      w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) + 1));
736
737
9.88M
      for (k = in_loop_cnt; k != 0; k--) {
738
6.71M
        ptr_data += ((SIZE_T)del << 1);
739
740
6.71M
        x1_r = *ptr_data;
741
6.71M
        x1_i = *(ptr_data + 1);
742
6.71M
        ptr_data += ((SIZE_T)del << 1);
743
744
6.71M
        x2_r = *ptr_data;
745
6.71M
        x2_i = *(ptr_data + 1);
746
6.71M
        ptr_data += ((SIZE_T)del << 1);
747
748
6.71M
        x3_r = *ptr_data;
749
6.71M
        x3_i = *(ptr_data + 1);
750
6.71M
        ptr_data -= 3 * (del << 1);
751
752
6.71M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
753
6.71M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult(x1_r, w_4), x1_i, w_1);
754
6.71M
        x1_r = tmp;
755
756
6.71M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2));
757
6.71M
        x2_i = (FLOAT32)(-ixheaace_dmult(x2_r, w_2) + ixheaace_dmult(x2_i, w_5));
758
6.71M
        x2_r = tmp;
759
760
6.71M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3));
761
6.71M
        x3_i =
762
6.71M
            (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
763
6.71M
        x3_r = tmp;
764
765
6.71M
        x0_r = (*ptr_data);
766
6.71M
        x0_i = (*(ptr_data + 1));
767
768
6.71M
        x0_r = x0_r + (x2_r);
769
6.71M
        x0_i = x0_i + (x2_i);
770
6.71M
        x2_r = x0_r - (x2_r * 2);
771
6.71M
        x2_i = x0_i - (x2_i * 2);
772
6.71M
        x1_r = x1_r + x3_r;
773
6.71M
        x1_i = x1_i + x3_i;
774
6.71M
        x3_r = x1_r - (x3_r * 2);
775
6.71M
        x3_i = x1_i - (x3_i * 2);
776
777
6.71M
        x0_r = x0_r + (x1_r);
778
6.71M
        x0_i = x0_i + (x1_i);
779
6.71M
        x1_r = x0_r - (x1_r * 2);
780
6.71M
        x1_i = x0_i - (x1_i * 2);
781
6.71M
        x2_r = x2_r + (x3_i);
782
6.71M
        x2_i = x2_i - (x3_r);
783
6.71M
        x3_i = x2_r - (x3_i * 2);
784
6.71M
        x3_r = x2_i + (x3_r * 2);
785
786
6.71M
        *ptr_data = x0_r;
787
6.71M
        *(ptr_data + 1) = x0_i;
788
6.71M
        ptr_data += ((SIZE_T)del << 1);
789
790
6.71M
        *ptr_data = x2_r;
791
6.71M
        *(ptr_data + 1) = x2_i;
792
6.71M
        ptr_data += ((SIZE_T)del << 1);
793
794
6.71M
        *ptr_data = x1_r;
795
6.71M
        *(ptr_data + 1) = x1_i;
796
6.71M
        ptr_data += ((SIZE_T)del << 1);
797
798
6.71M
        *ptr_data = x3_i;
799
6.71M
        *(ptr_data + 1) = x3_r;
800
6.71M
        ptr_data += ((SIZE_T)del << 1);
801
6.71M
      }
802
3.17M
      ptr_data -= 2 * npoints;
803
3.17M
      ptr_data += 2;
804
3.17M
    }
805
40.4M
    for (; j < nodespacing * del; j += nodespacing) {
806
23.3M
      w_1 = *(ptr_twiddles + j);
807
23.3M
      w_4 = *(ptr_twiddles + j + 257);
808
23.3M
      w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256));
809
23.3M
      w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1));
810
23.3M
      w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512));
811
23.3M
      w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512 + 257));
812
813
77.0M
      for (k = in_loop_cnt; k != 0; k--) {
814
53.6M
        ptr_data += ((SIZE_T)del << 1);
815
816
53.6M
        x1_r = *ptr_data;
817
53.6M
        x1_i = *(ptr_data + 1);
818
53.6M
        ptr_data += ((SIZE_T)del << 1);
819
820
53.6M
        x2_r = *ptr_data;
821
53.6M
        x2_i = *(ptr_data + 1);
822
53.6M
        ptr_data += ((SIZE_T)del << 1);
823
824
53.6M
        x3_r = *ptr_data;
825
53.6M
        x3_i = *(ptr_data + 1);
826
53.6M
        ptr_data -= 3 * (del << 1);
827
828
53.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
829
53.6M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
830
53.6M
        x1_r = tmp;
831
832
53.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2));
833
53.6M
        x2_i =
834
53.6M
            (FLOAT32)(-ixheaace_dmult((FLOAT64)x2_r, w_2) + ixheaace_dmult((FLOAT64)x2_i, w_5));
835
53.6M
        x2_r = tmp;
836
837
53.6M
        tmp = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
838
53.6M
        x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3);
839
53.6M
        x3_r = tmp;
840
841
53.6M
        x0_r = (*ptr_data);
842
53.6M
        x0_i = (*(ptr_data + 1));
843
844
53.6M
        x0_r = x0_r + (x2_r);
845
53.6M
        x0_i = x0_i + (x2_i);
846
53.6M
        x2_r = x0_r - (x2_r * 2);
847
53.6M
        x2_i = x0_i - (x2_i * 2);
848
53.6M
        x1_r = x1_r + x3_r;
849
53.6M
        x1_i = x1_i - x3_i;
850
53.6M
        x3_r = x1_r - (x3_r * 2);
851
53.6M
        x3_i = x1_i + (x3_i * 2);
852
853
53.6M
        x0_r = x0_r + (x1_r);
854
53.6M
        x0_i = x0_i + (x1_i);
855
53.6M
        x1_r = x0_r - (x1_r * 2);
856
53.6M
        x1_i = x0_i - (x1_i * 2);
857
53.6M
        x2_r = x2_r + (x3_i);
858
53.6M
        x2_i = x2_i - (x3_r);
859
53.6M
        x3_i = x2_r - (x3_i * 2);
860
53.6M
        x3_r = x2_i + (x3_r * 2);
861
862
53.6M
        *ptr_data = x0_r;
863
53.6M
        *(ptr_data + 1) = x0_i;
864
53.6M
        ptr_data += ((SIZE_T)del << 1);
865
866
53.6M
        *ptr_data = x2_r;
867
53.6M
        *(ptr_data + 1) = x2_i;
868
53.6M
        ptr_data += ((SIZE_T)del << 1);
869
870
53.6M
        *ptr_data = x1_r;
871
53.6M
        *(ptr_data + 1) = x1_i;
872
53.6M
        ptr_data += ((SIZE_T)del << 1);
873
874
53.6M
        *ptr_data = x3_i;
875
53.6M
        *(ptr_data + 1) = x3_r;
876
53.6M
        ptr_data += ((SIZE_T)del << 1);
877
53.6M
      }
878
23.3M
      ptr_data -= 2 * npoints;
879
23.3M
      ptr_data += 2;
880
23.3M
    }
881
17.0M
    nodespacing >>= 2;
882
17.0M
    del <<= 2;
883
17.0M
    in_loop_cnt >>= 2;
884
17.0M
  }
885
16.3M
  if (not_power_4) {
886
15.9M
    ptr_twiddles = ptr_w;
887
15.9M
    nodespacing <<= 1;
888
889
158M
    for (j = del / 2; j != 0; j--) {
890
142M
      w_1 = *ptr_twiddles;
891
142M
      w_4 = *(ptr_twiddles + 257);
892
142M
      ptr_twiddles += nodespacing;
893
894
142M
      x0_r = *ptr_y;
895
142M
      x0_i = *(ptr_y + 1);
896
142M
      ptr_y += ((SIZE_T)del << 1);
897
898
142M
      x1_r = *ptr_y;
899
142M
      x1_i = *(ptr_y + 1);
900
901
142M
      tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
902
142M
      x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
903
142M
      x1_r = tmp;
904
905
142M
      *ptr_y = (x0_r) - (x1_r);
906
142M
      *(ptr_y + 1) = (x0_i) - (x1_i);
907
142M
      ptr_y -= ((SIZE_T)del << 1);
908
909
142M
      *ptr_y = (x0_r) + (x1_r);
910
142M
      *(ptr_y + 1) = (x0_i) + (x1_i);
911
142M
      ptr_y += 2;
912
142M
    }
913
15.9M
    ptr_twiddles = ptr_w;
914
158M
    for (j = del / 2; j != 0; j--) {
915
142M
      w_1 = *ptr_twiddles;
916
142M
      w_4 = *(ptr_twiddles + 257);
917
142M
      ptr_twiddles += nodespacing;
918
919
142M
      x0_r = *ptr_y;
920
142M
      x0_i = *(ptr_y + 1);
921
142M
      ptr_y += ((SIZE_T)del << 1);
922
923
142M
      x1_r = *ptr_y;
924
142M
      x1_i = *(ptr_y + 1);
925
926
142M
      tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_4) +
927
142M
                      ixheaace_dmult((FLOAT64)x1_i, w_1)) /*/2*/;
928
142M
      x1_i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x1_r, w_1) +
929
142M
                       ixheaace_dmult((FLOAT64)x1_i, w_4)) /*/2*/;
930
142M
      x1_r = tmp;
931
932
142M
      *ptr_y = (x0_r) - (x1_r);
933
142M
      *(ptr_y + 1) = (x0_i) - (x1_i);
934
142M
      ptr_y -= ((SIZE_T)del << 1);
935
936
142M
      *ptr_y = (x0_r) + (x1_r);
937
142M
      *(ptr_y + 1) = (x0_i) + (x1_i);
938
142M
      ptr_y += 2;
939
142M
    }
940
15.9M
  }
941
942
628M
  for (i = 0; i < nlength; i++) {
943
612M
    *(ptr_x + 2 * i) = ptr_p2_y[2 * i];
944
612M
    *(ptr_x + 2 * i + 1) = ptr_p2_y[2 * i + 1];
945
612M
  }
946
16.3M
}
947
948
static VOID ia_enhaacplus_enc_complex_fft_p3(FLOAT32 *ptr_data, WORD32 nlength,
949
0
                                             ixheaace_scratch_mem *pstr_scratch) {
950
0
  WORD32 i, j;
951
0
  FLOAT32 *ptr_data_3 = pstr_scratch->p_fft_p3_data_3;
952
0
  FLOAT32 *ptr_p3_y = pstr_scratch->p_fft_p3_y;
953
0
  WORD32 cnfac;
954
0
  WORD32 mpass = nlength;
955
0
  FLOAT32 *ptr_x = ptr_data;
956
0
  FLOAT32 *ptr_y = ptr_p3_y;
957
0
  cnfac = 0;
958
0
  const FLOAT64 *ptr_w1_r, *ptr_w1_i;
959
0
  FLOAT32 tmp;
960
0
  ptr_w1_r = ia_enhaacplus_enc_twiddle_table_3pr;
961
0
  ptr_w1_i = ia_enhaacplus_enc_twiddle_table_3pi;
962
963
0
  while (mpass % 3 == 0) {
964
0
    mpass /= 3;
965
0
    cnfac++;
966
0
  }
967
968
0
  for (i = 0; i < 3 * cnfac; i++) {
969
0
    for (j = 0; j < mpass; j++) {
970
0
      ptr_data_3[2 * j] = ptr_data[3 * (2 * j) + (2 * i)];
971
0
      ptr_data_3[2 * j + 1] = ptr_data[3 * (2 * j) + 1 + (2 * i)];
972
0
    }
973
0
    ia_enhaacplus_enc_complex_fft_p2(ptr_data_3, mpass, pstr_scratch->p_fft_p2_y);
974
975
0
    for (j = 0; j < mpass; j++) {
976
0
      ptr_data[3 * (2 * j) + (2 * i)] = ptr_data_3[2 * j];
977
0
      ptr_data[3 * (2 * j) + 1 + (2 * i)] = ptr_data_3[2 * j + 1];
978
0
    }
979
0
  }
980
981
0
  {
982
0
    for (i = 0; i < nlength; i += 3) {
983
0
      tmp = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_r) -
984
0
                      (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_i));
985
0
      ptr_data[2 * i + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_i) +
986
0
                                      (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_r));
987
0
      ptr_data[2 * i] = tmp;
988
989
0
      ptr_w1_r++;
990
0
      ptr_w1_i++;
991
992
0
      tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_r) -
993
0
                      (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_i));
994
0
      ptr_data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_i) +
995
0
                                            (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_r));
996
0
      ptr_data[2 * (i + 1)] = tmp;
997
998
0
      ptr_w1_r++;
999
0
      ptr_w1_i++;
1000
1001
0
      tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_r) -
1002
0
                      (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_i));
1003
0
      ptr_data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_i) +
1004
0
                                            (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_r));
1005
0
      ptr_data[2 * (i + 2)] = tmp;
1006
1007
0
      ptr_w1_r += 3 * (128 / mpass - 1) + 1;
1008
0
      ptr_w1_i += 3 * (128 / mpass - 1) + 1;
1009
0
    }
1010
0
  }
1011
1012
0
  for (i = 0; i < mpass; i++) {
1013
0
    ia_enhaacplus_enc_complex_3point_fft(ptr_x, ptr_y);
1014
1015
0
    ptr_x = ptr_x + 6;
1016
0
    ptr_y = ptr_y + 6;
1017
0
  }
1018
1019
0
  for (i = 0; i < mpass; i++) {
1020
0
    ptr_data[2 * i] = ptr_p3_y[6 * i];
1021
0
    ptr_data[2 * i + 1] = ptr_p3_y[6 * i + 1];
1022
0
  }
1023
1024
0
  for (i = 0; i < mpass; i++) {
1025
0
    ptr_data[2 * (i + mpass)] = ptr_p3_y[6 * i + 2];
1026
0
    ptr_data[2 * (i + mpass) + 1] = ptr_p3_y[6 * i + 3];
1027
0
  }
1028
1029
0
  for (i = 0; i < mpass; i++) {
1030
0
    ptr_data[2 * (i + 2 * mpass)] = ptr_p3_y[6 * i + 4];
1031
0
    ptr_data[2 * (i + 2 * mpass) + 1] = ptr_p3_y[6 * i + 5];
1032
0
  }
1033
0
}
1034
1035
VOID ia_enhaacplus_enc_complex_fft(FLOAT32 *ptr_data, WORD32 len,
1036
12.3M
                                   ixheaace_scratch_mem *pstr_scratch) {
1037
12.3M
  if (len & (len - 1)) {
1038
0
    ia_enhaacplus_enc_complex_fft_p3(ptr_data, len, pstr_scratch);
1039
12.3M
  } else {
1040
12.3M
    ia_enhaacplus_enc_complex_fft_p2(ptr_data, len, pstr_scratch->p_fft_p2_y);
1041
12.3M
  }
1042
12.3M
}
1043
1044
static VOID ixheaace_post_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_trig_data,
1045
518k
                               WORD32 step, WORD32 trig_data_size) {
1046
518k
  WORD32 i;
1047
518k
  FLOAT32 w_re, w_im, re1, re2, im1, im2;
1048
518k
  const FLOAT32 *ptr_sin = ptr_trig_data;
1049
518k
  const FLOAT32 *ptr_cos = ptr_trig_data + trig_data_size;
1050
1051
518k
  w_im = *ptr_sin;
1052
518k
  w_re = *ptr_cos;
1053
1054
53.7M
  for (i = 0; i < m / 4; i++) {
1055
53.1M
    re1 = ptr_x[2 * i];
1056
53.1M
    im1 = ptr_x[2 * i + 1];
1057
53.1M
    re2 = ptr_x[m - 2 - 2 * i];
1058
53.1M
    im2 = ptr_x[m - 1 - 2 * i];
1059
1060
53.1M
    ptr_x[2 * i] = (re1 * w_re + im1 * w_im);
1061
1062
53.1M
    ptr_x[m - 1 - 2 * i] = (re1 * w_im - im1 * w_re);
1063
1064
53.1M
    ptr_sin += step;
1065
53.1M
    ptr_cos -= step;
1066
1067
53.1M
    w_im = *ptr_sin;
1068
53.1M
    w_re = *ptr_cos;
1069
1070
53.1M
    ptr_x[m - 2 - 2 * i] = (re2 * w_im + im2 * w_re);
1071
1072
53.1M
    ptr_x[2 * i + 1] = (re2 * w_re - im2 * w_im);
1073
53.1M
  }
1074
518k
}
1075
1076
static VOID ixheaace_cplx_mult_twid(FLOAT32 *ptr_re, FLOAT32 *ptr_im, FLOAT32 a, FLOAT32 b,
1077
69.5M
                                    FLOAT32 twid_table, FLOAT32 twid_table_h) {
1078
69.5M
  *ptr_re = (a * twid_table) - (b * twid_table_h);
1079
69.5M
  *ptr_im = (a * twid_table_h) + (b * twid_table);
1080
69.5M
}
1081
1082
3.22M
static VOID ixheaace_cfft_15_twiddle(FLOAT32 *ptr_inp) {
1083
3.22M
  const FLOAT32 *ptr_tw_flt = &ixheaace_mix_rad_twid_tbl[0];
1084
3.22M
  const FLOAT32 *ptr_tw_flt_h = &ixheaace_mix_rad_twid_tbl_h[0];
1085
3.22M
  FLOAT32 accu1, accu2;
1086
3.22M
  WORD32 i, j;
1087
3.22M
  ptr_inp += 12;
1088
1089
9.67M
  for (j = 0; j < 2; j++) {
1090
32.2M
    for (i = 0; i < 4; i++) {
1091
25.7M
      ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1],
1092
25.7M
                              ptr_tw_flt[i], ptr_tw_flt_h[i]);
1093
25.7M
      ptr_inp[2 * i + 0] = accu1;
1094
25.7M
      ptr_inp[2 * i + 1] = accu2;
1095
25.7M
    }
1096
6.44M
    ptr_inp += 10;
1097
6.44M
    ptr_tw_flt += 4;
1098
6.44M
    ptr_tw_flt_h += 4;
1099
6.44M
  }
1100
3.22M
}
1101
1102
3.22M
static VOID ixheaace_cfft_15_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, FLOAT32 *ptr_fft3_out) {
1103
3.22M
  WORD32 i, idx;
1104
3.22M
  FLOAT32 *ptr_buf1, *ptr_buf2, *ptr_buf3;
1105
3.22M
  FLOAT32 add_r, sub_r;
1106
3.22M
  FLOAT32 add_i, sub_i;
1107
3.22M
  FLOAT32 x_01_r, x_01_i, temp;
1108
3.22M
  FLOAT32 p1, p2, p3, p4;
1109
1110
3.22M
  FLOAT32 sin_mu_flt = 0.866027832f;
1111
3.22M
  FLOAT32 c51_flt = 0.951049805f;
1112
3.22M
  FLOAT32 c52_flt = -0.76940918f;
1113
3.22M
  FLOAT32 c53_flt = -0.36328125f;
1114
3.22M
  FLOAT32 c54_flt = 0.559020996f;
1115
3.22M
  FLOAT32 c55_flt = -0.625f;
1116
1117
3.22M
  FLOAT32 r1, r2, r3, r4;
1118
3.22M
  FLOAT32 s1, s2, s3, s4, t, temp1, temp2;
1119
3.22M
  FLOAT32 *ptr_out_fft3 = ptr_fft3_out;
1120
1121
3.22M
  FLOAT32 xr_0, xr_1, xr_2;
1122
3.22M
  FLOAT32 xi_0, xi_1, xi_2;
1123
1124
3.22M
  ptr_buf2 = ptr_fft3_out;
1125
3.22M
  ptr_buf1 = ptr_buf3 = ptr_fft3_out;
1126
1127
12.8M
  for (i = 0; i < FFT3; i++) {
1128
9.67M
    *ptr_buf1++ = ptr_inp[0 + 64 * i];
1129
9.67M
    *ptr_buf1++ = ptr_inp[1 + 64 * i];
1130
1131
9.67M
    *ptr_buf1++ = ptr_inp[192 + 64 * i];
1132
9.67M
    *ptr_buf1++ = ptr_inp[193 + 64 * i];
1133
1134
9.67M
    *ptr_buf1++ = ptr_inp[384 + 64 * i];
1135
9.67M
    *ptr_buf1++ = ptr_inp[385 + 64 * i];
1136
1137
9.67M
    *ptr_buf1++ = ptr_inp[576 + 64 * i];
1138
9.67M
    *ptr_buf1++ = ptr_inp[577 + 64 * i];
1139
1140
9.67M
    *ptr_buf1++ = ptr_inp[768 + 64 * i];
1141
9.67M
    *ptr_buf1++ = ptr_inp[769 + 64 * i];
1142
1143
9.67M
    r1 = ptr_buf3[2] + ptr_buf3[8];
1144
9.67M
    r4 = ptr_buf3[2] - ptr_buf3[8];
1145
9.67M
    r3 = ptr_buf3[4] + ptr_buf3[6];
1146
9.67M
    r2 = ptr_buf3[4] - ptr_buf3[6];
1147
9.67M
    t = ((r1 - r3) * c54_flt);
1148
1149
9.67M
    r1 = r1 + r3;
1150
1151
9.67M
    temp1 = ptr_buf3[0] + r1;
1152
1153
9.67M
    r1 = temp1 + ((r1 * c55_flt) * 2);
1154
1155
9.67M
    r3 = r1 - t;
1156
9.67M
    r1 = r1 + t;
1157
1158
9.67M
    t = ((r4 + r2) * c51_flt);
1159
9.67M
    r4 = t + ((r4 * c52_flt) * 2);
1160
9.67M
    r2 = t + (r2 * c53_flt);
1161
1162
9.67M
    s1 = ptr_buf3[3] + ptr_buf3[9];
1163
9.67M
    s4 = ptr_buf3[3] - ptr_buf3[9];
1164
9.67M
    s3 = ptr_buf3[5] + ptr_buf3[7];
1165
9.67M
    s2 = ptr_buf3[5] - ptr_buf3[7];
1166
1167
9.67M
    t = ((s1 - s3) * c54_flt);
1168
1169
9.67M
    s1 = s1 + s3;
1170
1171
9.67M
    temp2 = ptr_buf3[1] + s1;
1172
1173
9.67M
    s1 = temp2 + (((s1 * c55_flt)) * 2);
1174
1175
9.67M
    s3 = s1 - t;
1176
9.67M
    s1 = s1 + t;
1177
1178
9.67M
    t = ((s4 + s2) * c51_flt);
1179
9.67M
    s4 = t + (((s4 * c52_flt)) * 2);
1180
9.67M
    s2 = t + ((s2 * c53_flt));
1181
1182
9.67M
    *ptr_buf2++ = temp1;
1183
9.67M
    *ptr_buf2++ = temp2;
1184
9.67M
    *ptr_buf2++ = r1 + s2;
1185
9.67M
    *ptr_buf2++ = s1 - r2;
1186
9.67M
    *ptr_buf2++ = r3 - s4;
1187
9.67M
    *ptr_buf2++ = s3 + r4;
1188
9.67M
    *ptr_buf2++ = r3 + s4;
1189
9.67M
    *ptr_buf2++ = s3 - r4;
1190
9.67M
    *ptr_buf2++ = r1 - s2;
1191
9.67M
    *ptr_buf2++ = s1 + r2;
1192
9.67M
    ptr_buf3 = ptr_buf1;
1193
9.67M
  }
1194
1195
3.22M
  idx = 0;
1196
3.22M
  ixheaace_cfft_15_twiddle(ptr_out_fft3);
1197
1198
19.3M
  for (i = 0; i < FFT5; i++) {
1199
16.1M
    xr_0 = ptr_out_fft3[0];
1200
16.1M
    xi_0 = ptr_out_fft3[1];
1201
1202
16.1M
    xr_1 = ptr_out_fft3[10];
1203
16.1M
    xi_1 = ptr_out_fft3[11];
1204
1205
16.1M
    xr_2 = ptr_out_fft3[20];
1206
16.1M
    xi_2 = ptr_out_fft3[21];
1207
1208
16.1M
    x_01_r = (xr_0 + xr_1);
1209
16.1M
    x_01_i = (xi_0 + xi_1);
1210
1211
16.1M
    add_r = (xr_1 + xr_2);
1212
16.1M
    add_i = (xi_1 + xi_2);
1213
1214
16.1M
    sub_r = (xr_1 - xr_2);
1215
16.1M
    sub_i = (xi_1 - xi_2);
1216
1217
16.1M
    p1 = add_r / 2;
1218
1219
16.1M
    p2 = (sub_i * sin_mu_flt);
1220
16.1M
    p3 = (sub_r * sin_mu_flt);
1221
1222
16.1M
    p4 = add_i / 2;
1223
1224
16.1M
    temp = (xr_0 - p1);
1225
16.1M
    temp1 = (xi_0 + p3);
1226
16.1M
    temp2 = (xi_0 - p3);
1227
1228
16.1M
    ptr_op[idx] = (x_01_r + xr_2);
1229
16.1M
    ptr_op[idx + 1] = (x_01_i + xi_2);
1230
1231
16.1M
    idx = idx + 320;
1232
16.1M
    ptr_op[idx] = (temp + p2);
1233
16.1M
    ptr_op[idx + 1] = (temp2 - p4);
1234
1235
16.1M
    idx = idx + 320;
1236
16.1M
    ptr_op[idx] = (temp - p2);
1237
16.1M
    ptr_op[idx + 1] = (temp1 - p4);
1238
16.1M
    ptr_out_fft3 += 2;
1239
16.1M
    idx = idx - 576;
1240
16.1M
  }
1241
3.22M
}
1242
1243
static VOID ixheaace_cfft_twiddle_mult(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, WORD32 dim1,
1244
                                       WORD32 dim2, const FLOAT32 *ptr_tw_flt,
1245
100k
                                       const FLOAT32 *ptr_tw_h_flt) {
1246
100k
  FLOAT32 accu1, accu2;
1247
100k
  WORD32 i, j;
1248
100k
  WORD32 step_val = (dim2 - 1) << 1;
1249
3.32M
  for (i = 0; i < dim2; i++) {
1250
3.22M
    ptr_op[0] = ptr_inp[0];
1251
3.22M
    ptr_op[1] = ptr_inp[1];
1252
3.22M
    ptr_op += 2;
1253
3.22M
    ptr_inp += 2;
1254
3.22M
  }
1255
1256
1.51M
  for (j = 0; j < (dim1 - 1); j++) {
1257
1.41M
    ptr_op[0] = ptr_inp[0];
1258
1.41M
    ptr_op[1] = ptr_inp[1];
1259
1.41M
    ptr_inp += 2;
1260
1.41M
    ptr_op += 2;
1261
45.1M
    for (i = 0; i < (dim2 - 1); i++) {
1262
43.7M
      ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1],
1263
43.7M
                              ptr_tw_flt[i], ptr_tw_h_flt[i]);
1264
43.7M
      ptr_op[2 * i + 0] = accu1;
1265
43.7M
      ptr_op[2 * i + 1] = accu2;
1266
43.7M
    }
1267
1.41M
    ptr_inp += step_val;
1268
1.41M
    ptr_op += step_val;
1269
1.41M
    ptr_tw_flt += (dim2 - 1);
1270
1.41M
    ptr_tw_h_flt += (dim2 - 1);
1271
1.41M
  }
1272
100k
}
1273
1274
1.51M
static VOID ixheaace_cfft_32_480(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1275
1.51M
  WORD32 i, l1, l2, h2;
1276
1.51M
  FLOAT32 xh0_0, xh1_0, xl0_0, xl1_0;
1277
1.51M
  FLOAT32 xh0_1, xh1_1, xl0_1, xl1_1;
1278
1.51M
  FLOAT32 x_0, x_1, x_2, x_3;
1279
1.51M
  FLOAT32 x_4, x_5, x_6, x_7;
1280
1.51M
  FLOAT32 *ptr_x;
1281
1.51M
  FLOAT32 *ptr_y;
1282
1.51M
  FLOAT32 interm_y[FFT32X2];
1283
1.51M
  FLOAT32 n00, n10, n20, n30, n01, n11, n21, n31;
1284
1285
1.51M
  FLOAT32 inp_0qi, inp_0qr;
1286
1.51M
  FLOAT32 inp_1qi, inp_1qr;
1287
1.51M
  FLOAT32 inp_2qi, inp_2qr;
1288
1.51M
  FLOAT32 inp_3qi, inp_3qr;
1289
1.51M
  FLOAT32 mul_0qi, mul_0qr;
1290
1.51M
  FLOAT32 mul_1qi, mul_1qr;
1291
1.51M
  FLOAT32 mul_2qi, mul_2qr;
1292
1.51M
  FLOAT32 mul_3qi, mul_3qr;
1293
1.51M
  FLOAT32 sum_0qi, sum_0qr;
1294
1.51M
  FLOAT32 sum_1qi, sum_1qr;
1295
1.51M
  FLOAT32 sum_2qi, sum_2qr;
1296
1.51M
  FLOAT32 sum_3qi, sum_3qr;
1297
1.51M
  WORD32 idx1 = 0, idx2 = FFT15 * FFT32;
1298
1.51M
  FLOAT32 mul_i, mul_r;
1299
1300
1.51M
  ptr_x = ptr_in;
1301
1302
  // This computes first and second stage butterflies. So, 4-point FFT is done.
1303
13.6M
  for (i = 0; i < 8; i++) {
1304
12.0M
    x_0 = ptr_x[0];
1305
12.0M
    x_1 = ptr_x[1];
1306
12.0M
    x_2 = ptr_x[16];
1307
12.0M
    x_3 = ptr_x[16 + 1];
1308
12.0M
    x_4 = ptr_x[32];
1309
12.0M
    x_5 = ptr_x[32 + 1];
1310
12.0M
    x_6 = ptr_x[48];
1311
12.0M
    x_7 = ptr_x[48 + 1];
1312
1313
12.0M
    xh0_0 = x_0 + x_4;
1314
12.0M
    xh1_0 = x_1 + x_5;
1315
12.0M
    xl0_0 = x_0 - x_4;
1316
12.0M
    xl1_0 = x_1 - x_5;
1317
12.0M
    xh0_1 = x_2 + x_6;
1318
12.0M
    xh1_1 = x_3 + x_7;
1319
12.0M
    xl0_1 = x_2 - x_6;
1320
12.0M
    xl1_1 = x_3 - x_7;
1321
1322
12.0M
    n00 = xh0_0 + xh0_1;
1323
12.0M
    n01 = xh1_0 + xh1_1;
1324
12.0M
    n10 = xl0_0 + xl1_1;
1325
12.0M
    n11 = xl1_0 - xl0_1;
1326
12.0M
    n20 = xh0_0 - xh0_1;
1327
12.0M
    n21 = xh1_0 - xh1_1;
1328
12.0M
    n30 = xl0_0 - xl1_1;
1329
12.0M
    n31 = xl1_0 + xl0_1;
1330
1331
12.0M
    ptr_x[0] = n00;
1332
12.0M
    ptr_x[1] = n01;
1333
12.0M
    ptr_x[16] = n10;
1334
12.0M
    ptr_x[16 + 1] = n11;
1335
12.0M
    ptr_x[32] = n20;
1336
12.0M
    ptr_x[32 + 1] = n21;
1337
12.0M
    ptr_x[48] = n30;
1338
12.0M
    ptr_x[48 + 1] = n31;
1339
1340
12.0M
    ptr_x += 2;
1341
12.0M
  }
1342
1343
  // This computes third and fourth stage butterflies. So, next 4-point FFT is done.
1344
1.51M
  {
1345
1.51M
    h2 = 16 >> 1;
1346
1.51M
    l1 = 16;
1347
1.51M
    l2 = 16 + (16 >> 1);
1348
1349
1.51M
    ptr_x = ptr_in;
1350
1.51M
    ptr_y = &interm_y[0];
1351
1352
    /* Butter fly summation in 2 steps */
1353
1.51M
    inp_0qr = ptr_x[0];
1354
1.51M
    inp_0qi = ptr_x[1];
1355
1.51M
    inp_1qr = ptr_x[4];
1356
1.51M
    inp_1qi = ptr_x[5];
1357
1.51M
    inp_2qr = ptr_x[8];
1358
1.51M
    inp_2qi = ptr_x[9];
1359
1.51M
    inp_3qr = ptr_x[12];
1360
1.51M
    inp_3qi = ptr_x[13];
1361
1362
1.51M
    mul_0qr = inp_0qr;
1363
1.51M
    mul_0qi = inp_0qi;
1364
1.51M
    mul_1qr = inp_1qr;
1365
1.51M
    mul_1qi = inp_1qi;
1366
1.51M
    mul_2qr = inp_2qr;
1367
1.51M
    mul_2qi = inp_2qi;
1368
1.51M
    mul_3qr = inp_3qr;
1369
1.51M
    mul_3qi = inp_3qi;
1370
1371
1.51M
    sum_0qr = mul_0qr + mul_2qr;
1372
1.51M
    sum_0qi = mul_0qi + mul_2qi;
1373
1.51M
    sum_1qr = mul_0qr - mul_2qr;
1374
1.51M
    sum_1qi = mul_0qi - mul_2qi;
1375
1.51M
    sum_2qr = mul_1qr + mul_3qr;
1376
1.51M
    sum_2qi = mul_1qi + mul_3qi;
1377
1.51M
    sum_3qr = mul_1qr - mul_3qr;
1378
1.51M
    sum_3qi = mul_1qi - mul_3qi;
1379
1380
1.51M
    ptr_y[0] = sum_0qr + sum_2qr;
1381
1.51M
    ptr_y[1] = sum_0qi + sum_2qi;
1382
1.51M
    ptr_y[h2] = sum_1qr + sum_3qi;
1383
1.51M
    ptr_y[h2 + 1] = sum_1qi - sum_3qr;
1384
1.51M
    ptr_y[l1] = sum_0qr - sum_2qr;
1385
1.51M
    ptr_y[l1 + 1] = sum_0qi - sum_2qi;
1386
1.51M
    ptr_y[l2] = sum_1qr - sum_3qi;
1387
1.51M
    ptr_y[l2 + 1] = sum_1qi + sum_3qr;
1388
1389
1.51M
    ptr_y += 2;
1390
1.51M
    ptr_x += 16;
1391
1392
    /* 2nd butter fly */
1393
1394
1.51M
    inp_0qr = ptr_x[0];
1395
1.51M
    inp_0qi = ptr_x[1];
1396
1.51M
    inp_1qr = ptr_x[4];
1397
1.51M
    inp_1qi = ptr_x[5];
1398
1.51M
    inp_2qr = ptr_x[8];
1399
1.51M
    inp_2qi = ptr_x[9];
1400
1.51M
    inp_3qr = ptr_x[12];
1401
1.51M
    inp_3qi = ptr_x[13];
1402
1403
1.51M
    mul_0qr = inp_0qr;
1404
1.51M
    mul_0qi = inp_0qi;
1405
1406
1.51M
    mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f);
1407
1.51M
    mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f);
1408
1409
1.51M
    mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f);
1410
1.51M
    mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f);
1411
1412
1.51M
    mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f);
1413
1.51M
    mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f);
1414
1415
1.51M
    sum_0qr = mul_0qr + (mul_2qr * 2);
1416
1.51M
    sum_0qi = mul_0qi + (mul_2qi * 2);
1417
1.51M
    sum_1qr = mul_0qr - (mul_2qr * 2);
1418
1.51M
    sum_1qi = mul_0qi - (mul_2qi * 2);
1419
1420
1.51M
    sum_2qr = mul_1qr + mul_3qr;
1421
1.51M
    sum_2qi = mul_1qi + mul_3qi;
1422
1.51M
    sum_3qr = mul_1qr - mul_3qr;
1423
1.51M
    sum_3qi = mul_1qi - mul_3qi;
1424
1425
1.51M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1426
1.51M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1427
1.51M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1428
1.51M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1429
1.51M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1430
1.51M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1431
1.51M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1432
1.51M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1433
1434
1.51M
    ptr_y += 2;
1435
1.51M
    ptr_x += 16;
1436
1437
    /* 3rd butter fly */
1438
1439
1.51M
    inp_0qr = ptr_x[0];
1440
1.51M
    inp_0qi = ptr_x[1];
1441
1.51M
    inp_1qr = ptr_x[4];
1442
1.51M
    inp_1qi = ptr_x[5];
1443
1.51M
    inp_2qr = ptr_x[8];
1444
1.51M
    inp_2qi = ptr_x[9];
1445
1.51M
    inp_3qr = ptr_x[12];
1446
1.51M
    inp_3qi = ptr_x[13];
1447
1448
1.51M
    mul_0qr = inp_0qr;
1449
1.51M
    mul_0qi = inp_0qi;
1450
1451
1.51M
    mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f);
1452
1.51M
    mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f);
1453
1454
1.51M
    mul_2qr = inp_2qi;
1455
1.51M
    mul_2qi = inp_2qr;
1456
1457
1.51M
    mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f);
1458
1.51M
    mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f);
1459
1460
1.51M
    sum_0qr = mul_0qr + mul_2qr;
1461
1.51M
    sum_0qi = mul_0qi - mul_2qi;
1462
1.51M
    sum_1qr = mul_0qr - mul_2qr;
1463
1.51M
    sum_1qi = mul_0qi + mul_2qi;
1464
1.51M
    sum_2qr = mul_1qr + mul_3qr;
1465
1.51M
    sum_2qi = mul_1qi + mul_3qi;
1466
1.51M
    sum_3qr = mul_1qr - mul_3qr;
1467
1.51M
    sum_3qi = mul_1qi - mul_3qi;
1468
1469
1.51M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1470
1.51M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1471
1.51M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1472
1.51M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1473
1.51M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1474
1.51M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1475
1.51M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1476
1.51M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1477
1478
1.51M
    ptr_y += 2;
1479
1.51M
    ptr_x += 16;
1480
1481
    /* 4th butter fly */
1482
1483
1.51M
    inp_0qr = ptr_x[0];
1484
1.51M
    inp_0qi = ptr_x[1];
1485
1.51M
    inp_1qr = ptr_x[4];
1486
1.51M
    inp_1qi = ptr_x[5];
1487
1.51M
    inp_2qr = ptr_x[8];
1488
1.51M
    inp_2qi = ptr_x[9];
1489
1.51M
    inp_3qr = ptr_x[12];
1490
1.51M
    inp_3qi = ptr_x[13];
1491
1492
1.51M
    mul_0qr = inp_0qr;
1493
1.51M
    mul_0qi = inp_0qi;
1494
1495
1.51M
    mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f);
1496
1.51M
    mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f);
1497
1498
1.51M
    mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f);
1499
1.51M
    mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f);
1500
1501
1.51M
    mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f);
1502
1.51M
    mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f);
1503
1504
1.51M
    sum_0qr = mul_0qr + (mul_2qr * 2);
1505
1.51M
    sum_0qi = mul_0qi + (mul_2qi * 2);
1506
1.51M
    sum_1qr = mul_0qr - (mul_2qr * 2);
1507
1.51M
    sum_1qi = mul_0qi - (mul_2qi * 2);
1508
1509
1.51M
    sum_2qr = mul_1qr + mul_3qr;
1510
1.51M
    sum_2qi = mul_1qi + mul_3qi;
1511
1.51M
    sum_3qr = mul_1qr - mul_3qr;
1512
1.51M
    sum_3qi = mul_1qi - mul_3qi;
1513
1514
1.51M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1515
1.51M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1516
1.51M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1517
1.51M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1518
1.51M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1519
1.51M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1520
1.51M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1521
1.51M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1522
1523
1.51M
    ptr_x = ptr_in;
1524
1.51M
    ptr_y = &interm_y[32];
1525
1526
    /* Butter fly summation in 2 steps */
1527
1.51M
    inp_0qr = ptr_x[2];
1528
1.51M
    inp_0qi = ptr_x[3];
1529
1.51M
    inp_1qr = ptr_x[6];
1530
1.51M
    inp_1qi = ptr_x[7];
1531
1.51M
    inp_2qr = ptr_x[10];
1532
1.51M
    inp_2qi = ptr_x[11];
1533
1.51M
    inp_3qr = ptr_x[14];
1534
1.51M
    inp_3qi = ptr_x[15];
1535
1536
1.51M
    mul_0qr = inp_0qr;
1537
1.51M
    mul_0qi = inp_0qi;
1538
1.51M
    mul_1qr = inp_1qr;
1539
1.51M
    mul_1qi = inp_1qi;
1540
1.51M
    mul_2qr = inp_2qr;
1541
1.51M
    mul_2qi = inp_2qi;
1542
1.51M
    mul_3qr = inp_3qr;
1543
1.51M
    mul_3qi = inp_3qi;
1544
1545
1.51M
    sum_0qr = mul_0qr + mul_2qr;
1546
1.51M
    sum_0qi = mul_0qi + mul_2qi;
1547
1.51M
    sum_1qr = mul_0qr - mul_2qr;
1548
1.51M
    sum_1qi = mul_0qi - mul_2qi;
1549
1.51M
    sum_2qr = mul_1qr + mul_3qr;
1550
1.51M
    sum_2qi = mul_1qi + mul_3qi;
1551
1.51M
    sum_3qr = mul_1qr - mul_3qr;
1552
1.51M
    sum_3qi = mul_1qi - mul_3qi;
1553
1554
1.51M
    ptr_y[0] = sum_0qr + sum_2qr;
1555
1.51M
    ptr_y[1] = sum_0qi + sum_2qi;
1556
1.51M
    ptr_y[h2] = sum_1qr + sum_3qi;
1557
1.51M
    ptr_y[h2 + 1] = sum_1qi - sum_3qr;
1558
1.51M
    ptr_y[l1] = sum_0qr - sum_2qr;
1559
1.51M
    ptr_y[l1 + 1] = sum_0qi - sum_2qi;
1560
1.51M
    ptr_y[l2] = sum_1qr - sum_3qi;
1561
1.51M
    ptr_y[l2 + 1] = sum_1qi + sum_3qr;
1562
1563
1.51M
    ptr_y += 2;
1564
1.51M
    ptr_x += 16;
1565
1566
    /* 2nd butter fly */
1567
1568
1.51M
    inp_0qr = ptr_x[2];
1569
1.51M
    inp_0qi = ptr_x[3];
1570
1.51M
    inp_1qr = ptr_x[6];
1571
1.51M
    inp_1qi = ptr_x[7];
1572
1.51M
    inp_2qr = ptr_x[10];
1573
1.51M
    inp_2qi = ptr_x[11];
1574
1.51M
    inp_3qr = ptr_x[14];
1575
1.51M
    inp_3qi = ptr_x[15];
1576
1577
1.51M
    mul_0qr = inp_0qr;
1578
1.51M
    mul_0qi = inp_0qi;
1579
1580
1.51M
    mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f);
1581
1.51M
    mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f);
1582
1583
1.51M
    mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f);
1584
1.51M
    mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f);
1585
1586
1.51M
    mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f);
1587
1.51M
    mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f);
1588
1589
1.51M
    sum_0qr = mul_0qr + (mul_2qr * 2);
1590
1.51M
    sum_0qi = mul_0qi + (mul_2qi * 2);
1591
1.51M
    sum_1qr = mul_0qr - (mul_2qr * 2);
1592
1.51M
    sum_1qi = mul_0qi - (mul_2qi * 2);
1593
1594
1.51M
    sum_2qr = mul_1qr + mul_3qr;
1595
1.51M
    sum_2qi = mul_1qi + mul_3qi;
1596
1.51M
    sum_3qr = mul_1qr - mul_3qr;
1597
1.51M
    sum_3qi = mul_1qi - mul_3qi;
1598
1599
1.51M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1600
1.51M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1601
1.51M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1602
1.51M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1603
1.51M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1604
1.51M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1605
1.51M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1606
1.51M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1607
1608
1.51M
    ptr_y += 2;
1609
1.51M
    ptr_x += 16;
1610
1611
    /* 3rd butter fly */
1612
1613
1.51M
    inp_0qr = ptr_x[2];
1614
1.51M
    inp_0qi = ptr_x[3];
1615
1.51M
    inp_1qr = ptr_x[6];
1616
1.51M
    inp_1qi = ptr_x[7];
1617
1.51M
    inp_2qr = ptr_x[10];
1618
1.51M
    inp_2qi = ptr_x[11];
1619
1.51M
    inp_3qr = ptr_x[14];
1620
1.51M
    inp_3qi = ptr_x[15];
1621
1622
1.51M
    mul_0qr = inp_0qr;
1623
1.51M
    mul_0qi = inp_0qi;
1624
1625
1.51M
    mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f);
1626
1.51M
    mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f);
1627
1628
1.51M
    mul_2qr = inp_2qi;
1629
1.51M
    mul_2qi = inp_2qr;
1630
1631
1.51M
    mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f);
1632
1.51M
    mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f);
1633
1634
1.51M
    sum_0qr = mul_0qr + mul_2qr;
1635
1.51M
    sum_0qi = mul_0qi - mul_2qi;
1636
1.51M
    sum_1qr = mul_0qr - mul_2qr;
1637
1.51M
    sum_1qi = mul_0qi + mul_2qi;
1638
1.51M
    sum_2qr = mul_1qr + mul_3qr;
1639
1.51M
    sum_2qi = mul_1qi + mul_3qi;
1640
1.51M
    sum_3qr = mul_1qr - mul_3qr;
1641
1.51M
    sum_3qi = mul_1qi - mul_3qi;
1642
1643
1.51M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1644
1.51M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1645
1.51M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1646
1.51M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1647
1.51M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1648
1.51M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1649
1.51M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1650
1.51M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1651
1652
1.51M
    ptr_y += 2;
1653
1.51M
    ptr_x += 16;
1654
1655
    /* 4th butter fly */
1656
1657
1.51M
    inp_0qr = ptr_x[2];
1658
1.51M
    inp_0qi = ptr_x[3];
1659
1.51M
    inp_1qr = ptr_x[6];
1660
1.51M
    inp_1qi = ptr_x[7];
1661
1.51M
    inp_2qr = ptr_x[10];
1662
1.51M
    inp_2qi = ptr_x[11];
1663
1.51M
    inp_3qr = ptr_x[14];
1664
1.51M
    inp_3qi = ptr_x[15];
1665
1666
1.51M
    mul_0qr = inp_0qr;
1667
1.51M
    mul_0qi = inp_0qi;
1668
1669
1.51M
    mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f);
1670
1.51M
    mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f);
1671
1672
1.51M
    mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f);
1673
1.51M
    mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f);
1674
1675
1.51M
    mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f);
1676
1.51M
    mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f);
1677
1678
1.51M
    sum_0qr = mul_0qr + (mul_2qr * 2);
1679
1.51M
    sum_0qi = mul_0qi + (mul_2qi * 2);
1680
1.51M
    sum_1qr = mul_0qr - (mul_2qr * 2);
1681
1.51M
    sum_1qi = mul_0qi - (mul_2qi * 2);
1682
1683
1.51M
    sum_2qr = mul_1qr + mul_3qr;
1684
1.51M
    sum_2qi = mul_1qi + mul_3qi;
1685
1.51M
    sum_3qr = mul_1qr - mul_3qr;
1686
1.51M
    sum_3qi = mul_1qi - mul_3qi;
1687
1688
1.51M
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1689
1.51M
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1690
1.51M
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1691
1.51M
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1692
1.51M
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1693
1.51M
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1694
1.51M
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1695
1.51M
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1696
1.51M
  }
1697
1698
  // Last stage of 32 point FFT
1699
1.51M
  {
1700
1.51M
    ptr_y = ptr_out;
1701
1.51M
    ptr_y[idx1] = interm_y[0] + interm_y[32];
1702
1.51M
    ptr_y[idx1 + 1] = interm_y[1] + interm_y[33];
1703
1.51M
    ptr_y[idx2] = interm_y[0] - interm_y[32];
1704
1.51M
    ptr_y[idx2 + 1] = interm_y[1] - interm_y[33];
1705
1.51M
    idx1 += FFT15X2;
1706
1.51M
    idx2 += FFT15X2;
1707
24.1M
    for (i = 1; i < FFT16; i++) {
1708
22.6M
      mul_r = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]) -
1709
22.6M
              (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]);
1710
22.6M
      mul_i = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]) +
1711
22.6M
              (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]);
1712
1713
22.6M
      mul_r = mul_r / 2;
1714
22.6M
      mul_i = mul_i / 2;
1715
22.6M
      ptr_y[idx1] = interm_y[2 * i + 0] + (mul_r * 2);
1716
22.6M
      ptr_y[idx1 + 1] = interm_y[2 * i + 1] + (mul_i * 2);
1717
22.6M
      ptr_y[idx2] = interm_y[2 * i + 0] - (mul_r * 2);
1718
22.6M
      ptr_y[idx2 + 1] = interm_y[2 * i + 1] - (mul_i * 2);
1719
22.6M
      idx1 += FFT15X2;
1720
22.6M
      idx2 += FFT15X2;
1721
22.6M
    }
1722
1.51M
  }
1723
1.51M
}
1724
1725
static VOID ixheaace_dec_rearrange_short_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out, WORD32 N,
1726
6.42M
                                             const WORD16 *ptr_re_arr_tab) {
1727
6.42M
  WORD32 n, i = 0;
1728
1729
160M
  for (n = 0; n < N; n++) {
1730
154M
    WORD32 idx = ptr_re_arr_tab[n] << 1;
1731
154M
    ptr_out[i++] = ptr_in[idx];
1732
154M
    ptr_out[i++] = ptr_in[idx + 1];
1733
154M
  }
1734
6.42M
}
1735
1736
5.13M
static VOID ixheaace_fft_5_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1737
5.13M
  FLOAT32 C51 = 0.951056516f;
1738
5.13M
  FLOAT32 C52 = -0.769420885f;
1739
5.13M
  FLOAT32 C53 = -0.363271264f;
1740
5.13M
  FLOAT32 C54 = 0.559016994f;
1741
5.13M
  FLOAT32 C55 = -0.625f;
1742
1743
5.13M
  FLOAT32 r1, r2, r3, r4;
1744
5.13M
  FLOAT32 s1, s2, s3, s4, t, temp1, temp2;
1745
1746
5.13M
  r1 = (ptr_in[2] + ptr_in[8]);
1747
5.13M
  r4 = (ptr_in[2] - ptr_in[8]);
1748
5.13M
  r3 = (ptr_in[4] + ptr_in[6]);
1749
5.13M
  r2 = (ptr_in[4] - ptr_in[6]);
1750
1751
5.13M
  t = ((r1 - r3) * C54);
1752
5.13M
  r1 = (r1 + r3);
1753
1754
5.13M
  temp1 = (ptr_in[0] + r1);
1755
5.13M
  r1 = (temp1 + (((r1 * C55)) * 2));
1756
1757
5.13M
  r3 = (r1 - t);
1758
5.13M
  r1 = (r1 + t);
1759
1760
5.13M
  t = ((r4 + r2) * C51);
1761
5.13M
  r4 = (t + ((r4 * C52) * 2));
1762
5.13M
  r2 = (t + (r2 * C53));
1763
1764
5.13M
  s1 = (ptr_in[3] + ptr_in[9]);
1765
5.13M
  s4 = (ptr_in[3] - ptr_in[9]);
1766
5.13M
  s3 = (ptr_in[5] + ptr_in[7]);
1767
5.13M
  s2 = (ptr_in[5] - ptr_in[7]);
1768
1769
5.13M
  t = ((s1 - s3) * C54);
1770
5.13M
  s1 = (s1 + s3);
1771
1772
5.13M
  temp2 = (ptr_in[1] + s1);
1773
1774
5.13M
  s1 = (temp2 + (((s1 * C55)) * 2));
1775
1776
5.13M
  s3 = (s1 - t);
1777
5.13M
  s1 = (s1 + t);
1778
1779
5.13M
  t = ((s4 + s2) * C51);
1780
5.13M
  s4 = (t + (((s4 * C52)) * 2));
1781
5.13M
  s2 = (t + ((s2 * C53)));
1782
1783
5.13M
  ptr_out[0] = temp1;
1784
5.13M
  ptr_out[1] = temp2;
1785
5.13M
  ptr_out[2] = (r1 + s2);
1786
5.13M
  ptr_out[3] = (s1 - r2);
1787
5.13M
  ptr_out[4] = (r3 - s4);
1788
5.13M
  ptr_out[5] = (s3 + r4);
1789
5.13M
  ptr_out[6] = (r3 + s4);
1790
5.13M
  ptr_out[7] = (s3 - r4);
1791
5.13M
  ptr_out[8] = (r1 - s2);
1792
5.13M
  ptr_out[9] = (s1 + r2);
1793
5.13M
}
1794
1795
8.56M
static VOID ixheaace_fft_3_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1796
8.56M
  FLOAT32 add_r, sub_r;
1797
8.56M
  FLOAT32 add_i, sub_i;
1798
8.56M
  FLOAT32 x_01_r, x_01_i, temp;
1799
1800
8.56M
  FLOAT32 p1, p2, p3, p4;
1801
8.56M
  FLOAT32 sinmu = 0.866025404f;
1802
1803
8.56M
  x_01_r = (ptr_in[0] + ptr_in[2]);
1804
8.56M
  x_01_i = (ptr_in[1] + ptr_in[3]);
1805
1806
8.56M
  add_r = (ptr_in[2] + ptr_in[4]);
1807
8.56M
  add_i = (ptr_in[3] + ptr_in[5]);
1808
1809
8.56M
  sub_r = (ptr_in[2] - ptr_in[4]);
1810
8.56M
  sub_i = (ptr_in[3] - ptr_in[5]);
1811
1812
8.56M
  p1 = add_r / 2;
1813
8.56M
  p2 = (sub_i * sinmu);
1814
8.56M
  p3 = (sub_r * sinmu);
1815
8.56M
  p4 = add_i / 2;
1816
1817
8.56M
  temp = (ptr_in[0] - p1);
1818
1819
8.56M
  ptr_out[0] = (x_01_r + ptr_in[4]);
1820
8.56M
  ptr_out[1] = (x_01_i + ptr_in[5]);
1821
8.56M
  ptr_out[2] = (temp + p2);
1822
8.56M
  ptr_out[3] = ((ptr_in[1] - p3) - p4);
1823
8.56M
  ptr_out[4] = (temp - p2);
1824
8.56M
  ptr_out[5] = ((ptr_in[1] + p3) - p4);
1825
8.56M
}
1826
1827
static VOID ixheaace_pre_twiddle_120(FLOAT32 *ptr_in, FLOAT32 *ptr_data, WORD32 n,
1828
428k
                                     const FLOAT32 *ptr_cos_sin_tbl) {
1829
428k
  WORD npoints_4, i;
1830
428k
  FLOAT32 tempr, tempi, temp;
1831
428k
  FLOAT32 c, c1, s, s1;
1832
428k
  FLOAT32 *ptr_in1, *ptr_in2;
1833
428k
  FLOAT32 *ptr_x = ptr_in + (n - 1);
1834
1835
428k
  npoints_4 = n >> 2;
1836
1837
428k
  ptr_in1 = ptr_data;
1838
428k
  ptr_in2 = ptr_data + n - 1;
1839
1840
13.2M
  for (i = 0; i < npoints_4; i++) {
1841
12.8M
    c = *ptr_cos_sin_tbl++;
1842
12.8M
    s = *ptr_cos_sin_tbl++;
1843
1844
12.8M
    tempr = *ptr_in1++;
1845
12.8M
    tempi = *ptr_in2--;
1846
1847
12.8M
    temp = -((tempr * c) + (tempi * s));
1848
12.8M
    *ptr_in++ = temp;
1849
1850
12.8M
    temp = -((tempi * c) - (tempr * s));
1851
12.8M
    *ptr_in++ = temp;
1852
1853
12.8M
    c1 = *ptr_cos_sin_tbl++;
1854
12.8M
    s1 = *ptr_cos_sin_tbl++;
1855
1856
12.8M
    tempi = *ptr_in1++;
1857
12.8M
    tempr = *ptr_in2--;
1858
1859
12.8M
    temp = -((tempi * c1) - (tempr * s1));
1860
12.8M
    *ptr_x-- = temp;
1861
1862
12.8M
    temp = -((tempr * c1) + (tempi * s1));
1863
12.8M
    *ptr_x-- = temp;
1864
12.8M
  }
1865
428k
}
1866
1867
static VOID ixheaace_post_twiddle_120(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
1868
428k
                                      const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
1869
428k
  WORD i;
1870
428k
  FLOAT32 c, c1, s, s1;
1871
428k
  FLOAT32 tempr, tempi, temp;
1872
428k
  FLOAT32 *ptr_in2 = ptr_x + (m - 1);
1873
428k
  FLOAT32 *ptr_in1 = ptr_x;
1874
428k
  FLOAT32 *ptr_x1 = ptr_out;
1875
428k
  FLOAT32 *ptr_x2 = ptr_out + (m - 1);
1876
1877
13.2M
  for (i = 0; i < m; i += 4) {
1878
12.8M
    c = *ptr_cos_sin_tbl++;
1879
12.8M
    s = *ptr_cos_sin_tbl++;
1880
12.8M
    c1 = *ptr_cos_sin_tbl++;
1881
12.8M
    s1 = *ptr_cos_sin_tbl++;
1882
1883
12.8M
    tempr = *ptr_in1++;
1884
12.8M
    tempi = *ptr_in1++;
1885
1886
12.8M
    temp = -((tempr * s) - (tempi * c));
1887
12.8M
    *ptr_x2-- = temp;
1888
1889
12.8M
    temp = -((tempr * c) + (tempi * s));
1890
12.8M
    *ptr_x1++ = temp;
1891
1892
12.8M
    tempi = *ptr_in2--;
1893
12.8M
    tempr = *ptr_in2--;
1894
1895
12.8M
    temp = -((tempr * s1) - (tempi * c1));
1896
12.8M
    *ptr_x1++ = temp;
1897
1898
12.8M
    temp = -((tempr * c1) + (tempi * s1));
1899
12.8M
    *ptr_x2-- = temp;
1900
12.8M
  }
1901
428k
}
1902
1903
1.71M
static VOID ixheaace_fft_960_15(FLOAT32 *ptr_in_flt, FLOAT32 *ptr_out_flt) {
1904
1.71M
  WORD32 i;
1905
1.71M
  FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt;
1906
1.71M
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_5);
1907
1908
1.71M
  ptr_buf1_flt = ptr_out_flt;
1909
1.71M
  ptr_buf2_flt = ptr_in_flt;
1910
6.84M
  for (i = 0; i < FFT3; i++) {
1911
5.13M
    ixheaace_fft_5_flt(ptr_buf1_flt, ptr_buf2_flt);
1912
1913
5.13M
    ptr_buf1_flt += (FFT5 * 2);
1914
5.13M
    ptr_buf2_flt += (FFT5 * 2);
1915
5.13M
  }
1916
1917
1.71M
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_3);
1918
1.71M
  ptr_buf1_flt = ptr_out_flt;
1919
1.71M
  ptr_buf2_flt = ptr_in_flt;
1920
10.2M
  for (i = 0; i < FFT5; i++) {
1921
8.56M
    ixheaace_fft_3_flt(ptr_buf1_flt, ptr_buf2_flt);
1922
1923
8.56M
    ptr_buf1_flt += (FFT3 * 2);
1924
8.56M
    ptr_buf2_flt += (FFT3 * 2);
1925
8.56M
  }
1926
1927
1.71M
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_sml);
1928
1.71M
}
1929
1930
428k
static VOID ixheaace_fft_120(WORD32 npoints, FLOAT32 *ptr_x_flt, FLOAT32 *ptr_y_flt) {
1931
428k
  WORD32 i;
1932
428k
  FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt;
1933
428k
  FLOAT32 *ptr_in_flt, *ptr_out_flt;
1934
1935
428k
  ptr_in_flt = ptr_x_flt;
1936
428k
  ptr_out_flt = ptr_y_flt;
1937
428k
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_4);
1938
1939
428k
  ptr_buf1_flt = ptr_out_flt;
1940
428k
  ptr_buf2_flt = ptr_in_flt;
1941
1942
6.84M
  for (i = 0; i < FFT15; i++) {
1943
6.42M
    {
1944
6.42M
      FLOAT32 x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7;
1945
6.42M
      FLOAT32 *y0, *y1, *y2, *y3;
1946
6.42M
      FLOAT32 *x0;
1947
6.42M
      FLOAT32 xh0_0, xh1_0, xh0_1, xh1_1, xl0_0, xl1_0, xl0_1, xl1_1;
1948
6.42M
      WORD32 h2;
1949
6.42M
      FLOAT32 n00, n01, n10, n11, n20, n21, n30, n31;
1950
1951
6.42M
      ptr_x_flt = ptr_buf1_flt;
1952
6.42M
      ptr_y_flt = ptr_buf2_flt;
1953
6.42M
      npoints = 4;
1954
6.42M
      h2 = 0;
1955
1956
6.42M
      y0 = ptr_y_flt;
1957
6.42M
      y2 = ptr_y_flt + (WORD32)npoints;
1958
6.42M
      x0 = ptr_x_flt;
1959
6.42M
      y1 = y0 + (WORD32)(npoints >> 1);
1960
6.42M
      y3 = y2 + (WORD32)(npoints >> 1);
1961
1962
6.42M
      x_0 = x0[0];
1963
6.42M
      x_1 = x0[1];
1964
6.42M
      x_2 = x0[2];
1965
6.42M
      x_3 = x0[3];
1966
6.42M
      x_4 = x0[4];
1967
6.42M
      x_5 = x0[5];
1968
6.42M
      x_6 = x0[6];
1969
6.42M
      x_7 = x0[7];
1970
1971
6.42M
      xh0_0 = x_0 + x_4;
1972
6.42M
      xh1_0 = x_1 + x_5;
1973
6.42M
      xl0_0 = x_0 - x_4;
1974
6.42M
      xl1_0 = x_1 - x_5;
1975
6.42M
      xh0_1 = x_2 + x_6;
1976
6.42M
      xh1_1 = x_3 + x_7;
1977
6.42M
      xl0_1 = x_2 - x_6;
1978
6.42M
      xl1_1 = x_3 - x_7;
1979
1980
6.42M
      n00 = xh0_0 + xh0_1;
1981
6.42M
      n01 = xh1_0 + xh1_1;
1982
6.42M
      n10 = xl0_0 + xl1_1;
1983
6.42M
      n11 = xl1_0 - xl0_1;
1984
6.42M
      n20 = xh0_0 - xh0_1;
1985
6.42M
      n21 = xh1_0 - xh1_1;
1986
6.42M
      n30 = xl0_0 - xl1_1;
1987
6.42M
      n31 = xl1_0 + xl0_1;
1988
1989
6.42M
      y0[2 * h2] = n00;
1990
6.42M
      y0[2 * h2 + 1] = n01;
1991
6.42M
      y1[2 * h2] = n10;
1992
6.42M
      y1[2 * h2 + 1] = n11;
1993
6.42M
      y2[2 * h2] = n20;
1994
6.42M
      y2[2 * h2 + 1] = n21;
1995
6.42M
      y3[2 * h2] = n30;
1996
6.42M
      y3[2 * h2 + 1] = n31;
1997
6.42M
    }
1998
1999
6.42M
    ptr_buf1_flt += (FFT4 * 2);
2000
6.42M
    ptr_buf2_flt += (FFT4 * 2);
2001
6.42M
  }
2002
2003
428k
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_15_4);
2004
2005
428k
  ptr_buf1_flt = ptr_out_flt;
2006
428k
  ptr_buf2_flt = ptr_in_flt;
2007
2.14M
  for (i = 0; i < FFT4; i++) {
2008
1.71M
    ixheaace_fft_960_15(ptr_buf1_flt, ptr_buf2_flt);
2009
1.71M
    ptr_buf1_flt += (FFT15 * 2);
2010
1.71M
    ptr_buf2_flt += (FFT15 * 2);
2011
1.71M
  }
2012
2013
428k
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_120);
2014
428k
}
2015
2016
100k
static VOID ixheaace_cfft_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op) {
2017
100k
  WORD32 i;
2018
100k
  FLOAT32 *ptr_buf1, *ptr_buf2;
2019
100k
  FLOAT32 fft5_out[FFT15X2] = {0};
2020
2021
100k
  ptr_buf1 = ptr_inp;
2022
100k
  ptr_buf2 = ptr_op;
2023
2024
3.32M
  for (i = 0; i < FFT32; i++) {
2025
3.22M
    ixheaace_cfft_15_480(ptr_buf1, ptr_buf2, &fft5_out[0]);
2026
3.22M
    ptr_buf1 += 2;
2027
3.22M
    ptr_buf2 += 2;
2028
3.22M
  }
2029
2030
100k
  ixheaace_cfft_twiddle_mult(ptr_op, ptr_inp, FFT15, FFT32, ixheaace_fft_mix_rad_twid_tbl_480,
2031
100k
                             ixheaace_fft_mix_rad_twid_h_tbl_480);
2032
2033
100k
  ptr_buf1 = ptr_inp;
2034
100k
  ptr_buf2 = ptr_op;
2035
2036
1.61M
  for (i = 0; i < FFT15; i++) {
2037
1.51M
    ixheaace_cfft_32_480(ptr_buf1, ptr_buf2);
2038
1.51M
    ptr_buf1 += (FFT32X2);
2039
1.51M
    ptr_buf2 += 2;
2040
1.51M
  }
2041
100k
}
2042
2043
static VOID ixheaace_pre_twiddle_960(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n,
2044
100k
                                     const FLOAT32 *ptr_cos_sin_tbl) {
2045
100k
  WORD npoints_4, i;
2046
100k
  FLOAT32 tempr, tempi, temp;
2047
100k
  FLOAT32 c, c1, s, s1;
2048
100k
  FLOAT32 *ptr_in_1, *ptr_in_2;
2049
100k
  FLOAT32 *ptr_x_1 = ptr_x + (n - 1);
2050
2051
100k
  npoints_4 = n >> 2;
2052
2053
100k
  ptr_in_1 = ptr_data;
2054
100k
  ptr_in_2 = ptr_data + n - 1;
2055
2056
24.2M
  for (i = 0; i < npoints_4; i++) {
2057
24.1M
    c = *ptr_cos_sin_tbl++;
2058
24.1M
    s = *ptr_cos_sin_tbl++;
2059
2060
24.1M
    tempr = *ptr_in_1++;
2061
24.1M
    tempi = *ptr_in_2--;
2062
2063
24.1M
    temp = -((tempr * c) + (tempi * s));
2064
24.1M
    *ptr_x++ = temp;
2065
2066
24.1M
    temp = -((tempi * c) - (tempr * s));
2067
24.1M
    *ptr_x++ = temp;
2068
2069
24.1M
    c1 = *ptr_cos_sin_tbl++;
2070
24.1M
    s1 = *ptr_cos_sin_tbl++;
2071
2072
24.1M
    tempi = *ptr_in_1++;
2073
24.1M
    tempr = *ptr_in_2--;
2074
2075
24.1M
    temp = -((tempi * c1) - (tempr * s1));
2076
24.1M
    *ptr_x_1-- = temp;
2077
2078
24.1M
    temp = -((tempr * c1) + (tempi * s1));
2079
24.1M
    *ptr_x_1-- = temp;
2080
24.1M
  }
2081
100k
}
2082
2083
static VOID ixheaace_post_twiddle_960(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
2084
100k
                                      const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
2085
100k
  WORD i;
2086
100k
  FLOAT32 c, c1, s, s1;
2087
100k
  FLOAT32 tempr, tempi, temp;
2088
100k
  FLOAT32 *ptr_in2 = ptr_x + (m - 1);
2089
100k
  FLOAT32 *ptr_in1 = ptr_x;
2090
100k
  FLOAT32 *ptr_x1 = ptr_out;
2091
100k
  FLOAT32 *ptr_x2 = ptr_out + (m - 1);
2092
2093
24.2M
  for (i = 0; i < m; i += 4) {
2094
24.1M
    c = *ptr_cos_sin_tbl++;
2095
24.1M
    s = *ptr_cos_sin_tbl++;
2096
24.1M
    c1 = *ptr_cos_sin_tbl++;
2097
24.1M
    s1 = *ptr_cos_sin_tbl++;
2098
2099
24.1M
    tempr = *ptr_in1++;
2100
24.1M
    tempi = *ptr_in1++;
2101
2102
24.1M
    temp = -((tempr * s) - (tempi * c));
2103
24.1M
    *ptr_x2-- = temp;
2104
2105
24.1M
    temp = -((tempr * c) + (tempi * s));
2106
24.1M
    *ptr_x1++ = temp;
2107
2108
24.1M
    tempi = *ptr_in2--;
2109
24.1M
    tempr = *ptr_in2--;
2110
2111
24.1M
    temp = -((tempr * s1) - (tempi * c1));
2112
24.1M
    *ptr_x1++ = temp;
2113
2114
24.1M
    temp = -((tempr * c1) + (tempi * s1));
2115
24.1M
    *ptr_x2-- = temp;
2116
24.1M
  }
2117
100k
}
2118
2119
100k
static VOID ixheaace_mdct_960(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) {
2120
100k
  FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch;
2121
100k
  FLOAT32 const_mult_fac = 3.142857143f;
2122
100k
  FLOAT32 *ptr_data = ptr_input_flt;
2123
100k
  WORD32 k;
2124
2125
100k
  memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * FRAME_LEN_960);
2126
100k
  ixheaace_pre_twiddle_960(ptr_input_flt, ptr_scratch_flt, FRAME_LEN_960, cos_sin_table_flt);
2127
2128
100k
  ixheaace_cfft_480(ptr_input_flt, ptr_scratch_flt);
2129
2130
100k
  ixheaace_post_twiddle_960(ptr_input_flt, ptr_scratch_flt, cos_sin_table_flt, FRAME_LEN_960);
2131
2132
48.4M
  for (k = FRAME_LEN_960 - 1; k >= 0; k -= 2) {
2133
48.3M
    *ptr_data = (*ptr_data * const_mult_fac);
2134
48.3M
    ptr_data++;
2135
48.3M
    *ptr_data = (*ptr_data * const_mult_fac);
2136
48.3M
    ptr_data++;
2137
48.3M
  }
2138
100k
}
2139
2140
428k
static VOID ixheaace_mdct_120(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) {
2141
428k
  WORD32 n, k;
2142
428k
  WORD32 n_by_2;
2143
428k
  FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch;
2144
428k
  FLOAT32 const_mltfac = 3.142857143f;
2145
428k
  FLOAT32 *ptr_data = ptr_input_flt;
2146
428k
  n = 120;
2147
428k
  n_by_2 = n >> 1;
2148
428k
  memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * n);
2149
2150
428k
  ixheaace_pre_twiddle_120(ptr_input_flt, ptr_scratch_flt, n, ixheaace_cosine_array_240);
2151
2152
428k
  ixheaace_fft_120(n_by_2, ptr_input_flt, ptr_scratch_flt);
2153
2154
428k
  ixheaace_post_twiddle_120(ptr_input_flt, ptr_scratch_flt, ixheaace_cosine_array_240, n);
2155
2156
26.1M
  for (k = n - 1; k >= 0; k -= 2) {
2157
25.6M
    *ptr_data = (*ptr_data * const_mltfac);
2158
25.6M
    ptr_data++;
2159
25.6M
    *ptr_data = (*ptr_data * const_mltfac);
2160
25.6M
    ptr_data++;
2161
25.6M
  }
2162
428k
}
2163
2164
static VOID ixheaace_mdct(FLOAT32 *ptr_dct_data, const FLOAT32 *ptr_trig_data,
2165
                          const FLOAT32 *ptr_sine_window, WORD32 n, WORD32 ld_n,
2166
518k
                          WORD8 *ptr_scratch) {
2167
518k
  ixheaace_pre_mdct(ptr_dct_data, n, ptr_sine_window);
2168
2169
518k
  ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch;
2170
518k
  ia_enhaacplus_enc_complex_fft(ptr_dct_data, n / 2, pstr_scratch);
2171
2172
518k
  ixheaace_post_mdct(ptr_dct_data, n, ptr_trig_data,
2173
518k
                     1 << (LD_FFT_TWIDDLE_TABLE_SIZE - (ld_n - 1)), FFT_TWIDDLE_TABLE_SIZE);
2174
518k
}
2175
2176
static VOID ixheaace_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer,
2177
                                             const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
2178
342k
                                             WORD32 frame_len) {
2179
342k
  WORD32 i;
2180
342k
  WORD32 blk_switch_offset = frame_len;
2181
342k
  switch (frame_len) {
2182
163k
    case FRAME_LEN_1024:
2183
163k
      blk_switch_offset = BLK_SWITCH_OFFSET_LC_128;
2184
163k
      memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len,
2185
163k
              (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer));
2186
163k
      break;
2187
2188
154k
    case FRAME_LEN_960:
2189
154k
      blk_switch_offset = BLK_SWITCH_OFFSET_LC_120;
2190
154k
      memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len,
2191
154k
              (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer));
2192
154k
      break;
2193
2194
24.7k
    case FRAME_LEN_512:
2195
24.7k
    case FRAME_LEN_480:
2196
24.7k
      blk_switch_offset = frame_len;
2197
24.7k
      break;
2198
342k
  }
2199
2200
328M
  for (i = 0; i < frame_len; i++) {
2201
328M
    ptr_mdct_delay_buffer[blk_switch_offset - frame_len + i] = ptr_time_signal[i * ch_increment];
2202
328M
  }
2203
342k
}
2204
2205
VOID ixheaace_transform_real_lc_ld(FLOAT32 *ptr_mdct_delay_buffer, const FLOAT32 *ptr_time_signal,
2206
                                   WORD32 ch_increment, FLOAT32 *ptr_real_out, WORD32 block_type,
2207
342k
                                   WORD32 frame_len, WORD8 *ptr_scratch) {
2208
342k
  WORD32 i, w;
2209
342k
  FLOAT32 ws1, ws2;
2210
342k
  FLOAT32 *ptr_dct_in;
2211
342k
  WORD32 frame_len_short = FRAME_LEN_SHORT_128;
2212
342k
  WORD32 ls_trans = LS_TRANS_128;
2213
342k
  WORD32 trans_offset = TRANSFORM_OFFSET_SHORT_128;
2214
342k
  const FLOAT32 *ptr_window;
2215
342k
  if (frame_len == FRAME_LEN_960) {
2216
154k
    ls_trans = LS_TRANS_120;
2217
154k
    trans_offset = TRANSFORM_OFFSET_SHORT_120;
2218
154k
    frame_len_short = FRAME_LEN_SHORT_120;
2219
154k
  }
2220
342k
  switch (block_type) {
2221
205k
    case LONG_WINDOW:
2222
205k
      ptr_dct_in = ptr_real_out;
2223
205k
      ptr_window = &long_window_KBD[0];
2224
205k
      switch (frame_len) {
2225
99.6k
        case FRAME_LEN_1024:
2226
99.6k
          ptr_window = &long_window_KBD[0];
2227
99.6k
          break;
2228
2229
80.7k
        case FRAME_LEN_960:
2230
80.7k
          ptr_window = &long_window_sine_960[0];
2231
80.7k
          break;
2232
2233
24.7k
        case FRAME_LEN_512:
2234
24.7k
          ptr_window = &long_window_sine_ld[0];
2235
24.7k
          break;
2236
2237
0
        case FRAME_LEN_480:
2238
0
          ptr_window = &long_window_sine_ld_480[0];
2239
0
          break;
2240
205k
      }
2241
96.2M
      for (i = 0; i < frame_len / 2; i++) {
2242
96.0M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i];
2243
2244
96.0M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1];
2245
2246
96.0M
        ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2247
96.0M
      }
2248
2249
205k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2250
205k
                                       frame_len);
2251
2252
96.2M
      for (i = 0; i < frame_len / 2; i++) {
2253
96.0M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1];
2254
2255
96.0M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i];
2256
2257
96.0M
        ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2258
96.0M
      }
2259
205k
      switch (frame_len) {
2260
99.6k
        case FRAME_LEN_1024:
2261
99.6k
          ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10,
2262
99.6k
                        ptr_scratch);
2263
99.6k
          break;
2264
2265
80.7k
        case FRAME_LEN_960:
2266
80.7k
          ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2267
80.7k
          break;
2268
2269
24.7k
        case FRAME_LEN_512:
2270
24.7k
        case FRAME_LEN_480:
2271
24.7k
          ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, ptr_window, frame_len, 9, ptr_scratch);
2272
24.7k
          break;
2273
205k
      }
2274
205k
      break;
2275
2276
205k
    case START_WINDOW:
2277
24.3k
      ptr_dct_in = ptr_real_out;
2278
24.3k
      ptr_window = &long_window_KBD[0];
2279
24.3k
      switch (frame_len) {
2280
13.8k
        case FRAME_LEN_1024:
2281
13.8k
          ptr_window = &long_window_KBD[0];
2282
13.8k
          break;
2283
2284
10.5k
        case FRAME_LEN_960:
2285
10.5k
          ptr_window = &long_window_sine_960[0];
2286
10.5k
          break;
2287
24.3k
      }
2288
12.1M
      for (i = 0; i < frame_len / 2; i++) {
2289
12.1M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i];
2290
2291
12.1M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1];
2292
2293
12.1M
        ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2294
12.1M
      }
2295
2296
24.3k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2297
24.3k
                                       frame_len);
2298
2299
24.3k
      if (frame_len == FRAME_LEN_1024) {
2300
13.8k
        ptr_window = &short_window_sine[0];
2301
13.8k
      } else if (frame_len == FRAME_LEN_960) {
2302
10.5k
        ptr_window = &short_window_sine_120[0];
2303
10.5k
      }
2304
10.6M
      for (i = 0; i < ls_trans; i++) {
2305
10.6M
        ws1 = ptr_mdct_delay_buffer[i];
2306
10.6M
        ws2 = 0.0f;
2307
2308
10.6M
        ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2309
10.6M
      }
2310
2311
1.54M
      for (i = 0; i < frame_len_short / 2; i++) {
2312
1.51M
        ws1 = ptr_mdct_delay_buffer[i + ls_trans] * ptr_window[frame_len_short - i - 1];
2313
2314
1.51M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1 - ls_trans)] * ptr_window[i];
2315
2316
1.51M
        ptr_dct_in[frame_len / 2 - i - 1 - ls_trans] = -(ws1 + ws2);
2317
1.51M
      }
2318
24.3k
      if (frame_len == FRAME_LEN_960) {
2319
10.5k
        ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2320
13.8k
      } else {
2321
13.8k
        ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch);
2322
13.8k
      }
2323
2324
24.3k
      break;
2325
2326
21.8k
    case STOP_WINDOW:
2327
21.8k
      ptr_window = &long_window_KBD[0];
2328
21.8k
      ptr_dct_in = ptr_real_out;
2329
21.8k
      if (frame_len == FRAME_LEN_1024) {
2330
12.3k
        ptr_window = &short_window_sine[0];
2331
12.3k
      } else if (frame_len == FRAME_LEN_960) {
2332
9.51k
        ptr_window = &short_window_sine_120[0];
2333
9.51k
      }
2334
9.53M
      for (i = 0; i < ls_trans; i++) {
2335
9.50M
        ws1 = 0.0f;
2336
9.50M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)];
2337
9.50M
        ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2338
9.50M
      }
2339
2340
1.38M
      for (i = 0; i < frame_len_short / 2; i++) {
2341
1.35M
        ws1 = ptr_mdct_delay_buffer[(i + ls_trans)] * ptr_window[i];
2342
2343
1.35M
        ws2 = ptr_mdct_delay_buffer[(frame_len - ls_trans - i - 1)] *
2344
1.35M
              ptr_window[frame_len_short - i - 1];
2345
2346
1.35M
        ptr_dct_in[frame_len / 2 + i + ls_trans] = ws1 - ws2;
2347
1.35M
      }
2348
2349
21.8k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2350
21.8k
                                       frame_len);
2351
2352
21.8k
      if (frame_len == FRAME_LEN_1024) {
2353
12.3k
        ptr_window = &long_window_KBD[0];
2354
12.3k
      } else if (frame_len == FRAME_LEN_960) {
2355
9.51k
        ptr_window = &long_window_sine_960[0];
2356
9.51k
      }
2357
10.8M
      for (i = 0; i < frame_len / 2; i++) {
2358
10.8M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1];
2359
2360
10.8M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i];
2361
2362
10.8M
        ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2363
10.8M
      }
2364
2365
21.8k
      if (frame_len == FRAME_LEN_960) {
2366
9.51k
        ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2367
12.3k
      } else {
2368
12.3k
        ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch);
2369
12.3k
      }
2370
2371
21.8k
      break;
2372
2373
91.5k
    case SHORT_WINDOW:
2374
91.5k
      ptr_window = &short_window_sine[0];
2375
91.5k
      if (frame_len == FRAME_LEN_1024) {
2376
38.0k
        ptr_window = &short_window_sine[0];
2377
53.5k
      } else if (frame_len == FRAME_LEN_960) {
2378
53.5k
        ptr_window = &short_window_sine_120[0];
2379
53.5k
      }
2380
824k
      for (w = 0; w < TRANS_FAC; w++) {
2381
732k
        ptr_dct_in = ptr_real_out + w * frame_len_short;
2382
2383
45.9M
        for (i = 0; i < frame_len_short / 2; i++) {
2384
45.1M
          ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + i] * ptr_window[i];
2385
2386
45.1M
          ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short - i -
2387
45.1M
                                      1] *
2388
45.1M
                ptr_window[frame_len_short - i - 1];
2389
2390
45.1M
          ptr_dct_in[frame_len_short / 2 + i] = ws1 - ws2;
2391
2392
45.1M
          ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short + i] *
2393
45.1M
                ptr_window[frame_len_short - i - 1];
2394
2395
45.1M
          ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short * 2 -
2396
45.1M
                                      i - 1] *
2397
45.1M
                ptr_window[i];
2398
2399
45.1M
          ptr_dct_in[frame_len_short / 2 - i - 1] = -(ws1 + ws2);
2400
45.1M
        }
2401
732k
        if (frame_len == FRAME_LEN_960) {
2402
428k
          ixheaace_mdct_120(ptr_dct_in, ptr_scratch);
2403
428k
        } else {
2404
304k
          ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, short_window_sine, frame_len_short, 7,
2405
304k
                        ptr_scratch);
2406
304k
        }
2407
732k
      }
2408
2409
91.5k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2410
91.5k
                                       frame_len);
2411
91.5k
      break;
2412
342k
  }
2413
342k
}
2414
2415
VOID ia_enhaacplus_enc_transform_real_eld(FLOAT32 *ptr_mdct_delay_buffer,
2416
                                          const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
2417
                                          FLOAT32 *ptr_real_out, WORD8 *ptr_shared_buffer5,
2418
63.0k
                                          WORD32 frame_len) {
2419
63.0k
  WORD32 i, loop_len;
2420
63.0k
  FLOAT32 w1, w2;
2421
63.0k
  FLOAT32 *ptr_curr_data, *ptr_prev1_data, *ptr_prev2_data, *ptr_prev3_data;
2422
63.0k
  const FLOAT32 *ptr_win0, *ptr_win1, *ptr_win2, *ptr_win3;
2423
2424
63.0k
  loop_len = frame_len / 4;
2425
2426
63.0k
  ptr_curr_data = &ptr_mdct_delay_buffer[3 * frame_len];
2427
63.0k
  ptr_prev1_data = &ptr_mdct_delay_buffer[2 * frame_len];
2428
63.0k
  ptr_prev2_data = &ptr_mdct_delay_buffer[frame_len];
2429
63.0k
  ptr_prev3_data = &ptr_mdct_delay_buffer[0];
2430
2431
63.0k
  ptr_win0 = &low_delay_window_eld[0];
2432
63.0k
  ptr_win1 = &low_delay_window_eld[frame_len];
2433
63.0k
  ptr_win2 = &low_delay_window_eld[2 * frame_len];
2434
63.0k
  ptr_win3 = &low_delay_window_eld[3 * frame_len];
2435
2436
63.0k
  memmove(&ptr_mdct_delay_buffer[0], &ptr_mdct_delay_buffer[frame_len],
2437
63.0k
          (3 * frame_len) * sizeof(ptr_mdct_delay_buffer[0]));
2438
2439
32.3M
  for (i = 0; i < frame_len; i++) {
2440
32.2M
    ptr_curr_data[i] = ptr_time_signal[i * ch_increment];
2441
32.2M
  }
2442
2443
8.13M
  for (i = 0; i < loop_len; i++) {
2444
8.06M
    w1 = ptr_prev3_data[(frame_len / 2) + loop_len + i] * ptr_win3[(frame_len / 2) - 1 - i];
2445
8.06M
    w1 += ptr_prev3_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win3[(frame_len / 2) + i];
2446
2447
8.06M
    w2 = (-ptr_prev1_data[(frame_len / 2) + loop_len + i] * ptr_win1[(frame_len / 2) - 1 - i]);
2448
8.06M
    w2 += (-ptr_prev1_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win1[(frame_len / 2) + i]);
2449
2450
8.06M
    ptr_real_out[i] = w1 + w2;
2451
8.06M
  }
2452
2453
8.13M
  for (i = 0; i < loop_len; i++) {
2454
8.06M
    w1 = (-ptr_prev2_data[(frame_len / 2) + loop_len + i] * ptr_win2[(frame_len / 2) - 1 - i]);
2455
8.06M
    w1 += ptr_prev2_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win2[(frame_len / 2) + i];
2456
2457
8.06M
    w2 = ptr_curr_data[(frame_len / 2) + loop_len + i] * ptr_win0[(frame_len / 2) - 1 - i];
2458
8.06M
    w2 += (-ptr_curr_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win0[(frame_len / 2) + i]);
2459
2460
8.06M
    ptr_real_out[frame_len - 1 - i] = w1 + w2;
2461
8.06M
  }
2462
2463
8.13M
  for (i = 0; i < loop_len; i++) {
2464
8.06M
    w1 = ptr_prev2_data[loop_len - 1 - i] * ptr_win3[i];
2465
8.06M
    w1 += ptr_prev3_data[loop_len + i] * ptr_win3[frame_len - 1 - i];
2466
2467
8.06M
    w2 = (-ptr_curr_data[loop_len - 1 - i] * ptr_win1[i]);
2468
8.06M
    w2 += (-ptr_prev1_data[loop_len + i] * ptr_win1[frame_len - 1 - i]);
2469
2470
8.06M
    ptr_real_out[(frame_len / 2) - 1 - i] = w1 + w2;
2471
8.06M
  }
2472
2473
8.13M
  for (i = 0; i < loop_len; i++) {
2474
8.06M
    w1 = -(ptr_prev1_data[loop_len - 1 - i] * ptr_win2[i]);
2475
8.06M
    w1 += ptr_prev2_data[loop_len + i] * ptr_win2[frame_len - 1 - i];
2476
2477
    /* First 128 coeffcients are zeros in the window table so they are not used in the code here*/
2478
8.06M
    w2 = (-ptr_curr_data[loop_len + i] * ptr_win0[frame_len - 1 - i]);
2479
2480
8.06M
    ptr_real_out[(frame_len / 2) + i] = w1 + w2;
2481
8.06M
  }
2482
2483
63.0k
  ixheaace_mdct(ptr_real_out, fft_twiddle_tab, long_window_sine_ld, frame_len, 9,
2484
63.0k
                ptr_shared_buffer5);
2485
63.0k
}