Coverage Report

Created: 2025-11-11 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/encoder/ixheaace_fft.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2023 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
21
#include <string.h>
22
23
#include "ixheaac_type_def.h"
24
#include "ixheaac_constants.h"
25
#include "ixheaace_psy_const.h"
26
#include "ixheaace_tns.h"
27
#include "ixheaace_tns_params.h"
28
#include "ixheaace_rom.h"
29
#include "ixheaace_common_rom.h"
30
#include "ixheaace_bitbuffer.h"
31
#include "ixheaace_aac_constants.h"
32
#include "ixheaace_fft.h"
33
#include "ixheaac_basic_ops32.h"
34
#include "ixheaac_basic_ops40.h"
35
#include "ixheaac_basic_ops.h"
36
#include "iusace_basic_ops_flt.h"
37
38
static VOID ia_enhaacplus_enc_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer,
39
                                                      const FLOAT32 *ptr_time_signal,
40
                                                      WORD32 ch_increment,
41
93.9k
                                                      WORD32 long_frame_len) {
42
93.9k
  WORD32 i;
43
93.9k
  FLOAT32 *ptr_mdct_buff = ptr_mdct_delay_buffer;
44
93.9k
  if (ch_increment == 2) {
45
92.8k
    const FLOAT32 *ptr_input = ptr_time_signal;
46
92.8k
    FLOAT32 temp1, temp2, temp3, temp4;
47
92.8k
    temp1 = *ptr_input++;
48
92.8k
    ptr_input++;
49
92.8k
    temp2 = *ptr_input++;
50
92.8k
    ptr_input++;
51
92.8k
    temp3 = *ptr_input++;
52
92.8k
    ptr_input++;
53
11.1M
    for (i = ((long_frame_len >> 2) - 2); i >= 0; i--) {
54
11.0M
      *ptr_mdct_buff++ = temp1;
55
11.0M
      temp4 = *ptr_input++;
56
11.0M
      ptr_input++;
57
58
11.0M
      *ptr_mdct_buff++ = temp2;
59
11.0M
      *ptr_mdct_buff++ = temp3;
60
11.0M
      *ptr_mdct_buff++ = temp4;
61
62
11.0M
      temp1 = *ptr_input++;
63
11.0M
      ptr_input++;
64
11.0M
      temp2 = *ptr_input++;
65
11.0M
      ptr_input++;
66
11.0M
      temp3 = *ptr_input++;
67
11.0M
      ptr_input++;
68
11.0M
    }
69
92.8k
    *ptr_mdct_buff++ = temp1;
70
92.8k
    temp4 = *ptr_input;
71
92.8k
    *ptr_mdct_buff++ = temp2;
72
92.8k
    *ptr_mdct_buff++ = temp3;
73
92.8k
    *ptr_mdct_buff++ = temp4;
74
92.8k
  } else {
75
262k
    for (i = 0; i < long_frame_len; i += 2) {
76
261k
      *ptr_mdct_buff++ = ptr_time_signal[i * ch_increment];
77
261k
      *ptr_mdct_buff++ = ptr_time_signal[(i + 1) * ch_increment];
78
261k
    }
79
1.08k
  }
80
93.9k
}
81
82
static VOID ia_eaacp_enc_inverse_transform_512(FLOAT32 *ptr_data, FLOAT32 *ptr_win_buf,
83
                                               const FLOAT32 *ptr_cos_sin_tbl,
84
0
                                               WORD8 *ptr_scratch) {
85
0
  WORD32 n = FRAME_LEN_512;
86
0
  WORD32 n_by_2 = n >> 1;
87
88
0
  ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch;
89
90
0
  ia_eaacp_enc_pre_twiddle_aac(ptr_win_buf, ptr_data, n, ptr_cos_sin_tbl);
91
92
0
  ia_enhaacplus_enc_complex_fft(ptr_win_buf, n_by_2, pstr_scratch);
93
94
0
  ia_enhaacplus_enc_post_twiddle(ptr_data, ptr_win_buf, ptr_cos_sin_tbl, n);
95
0
}
96
97
567k
static VOID ixheaace_pre_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_sine_window) {
98
567k
  WORD32 i;
99
567k
  FLOAT32 wre, wim, re1, re2, im1, im2;
100
101
55.9M
  for (i = 0; i < m / 4; i++) {
102
55.3M
    re1 = ptr_x[2 * i];
103
55.3M
    im2 = ptr_x[2 * i + 1];
104
55.3M
    re2 = ptr_x[m - 2 - 2 * i];
105
55.3M
    im1 = ptr_x[m - 1 - 2 * i];
106
107
55.3M
    wim = ptr_sine_window[i * 2];
108
55.3M
    wre = ptr_sine_window[m - 1 - 2 * i];
109
110
55.3M
    ptr_x[2 * i] = im1 * wim + re1 * wre;
111
112
55.3M
    ptr_x[2 * i + 1] = im1 * wre - re1 * wim;
113
114
55.3M
    wim = ptr_sine_window[m - 2 - 2 * i];
115
55.3M
    wre = ptr_sine_window[2 * i + 1];
116
117
55.3M
    ptr_x[m - 2 - 2 * i] = im2 * wim + re2 * wre;
118
119
55.3M
    ptr_x[m - 1 - 2 * i] = im2 * wre - re2 * wim;
120
55.3M
  }
121
567k
}
122
123
static VOID ia_enhaacplus_enc_tranform_mac4(FLOAT32 *ptr_op, const FLOAT32 *ptr_win,
124
                                            FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2,
125
                                            FLOAT32 *ptr_buf3, FLOAT32 *ptr_buf4, UWORD32 len,
126
187k
                                            WORD32 increment) {
127
187k
  WORD32 i;
128
129
187k
  if (increment > 0) {
130
5.73M
    for (i = len >> 2; i > 0; i--) {
131
5.63M
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++)));
132
5.63M
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
133
5.63M
      *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--)));
134
5.63M
      ptr_op++;
135
136
5.63M
      *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++)));
137
5.63M
      *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--)));
138
5.63M
      *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--)));
139
5.63M
      ptr_op++;
140
141
5.63M
      *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++)));
142
5.63M
      *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--)));
143
5.63M
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--)));
144
5.63M
      ptr_op++;
145
146
5.63M
      *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++)));
147
5.63M
      *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--)));
148
5.63M
      *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--)));
149
5.63M
      ptr_op++;
150
5.63M
      ptr_win += 16;
151
5.63M
    }
152
93.9k
  } else {
153
2.91M
    for (i = len >> 2; i > 0; i--) {
154
2.81M
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++)));
155
2.81M
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
156
2.81M
      *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--)));
157
2.81M
      ptr_op--;
158
159
2.81M
      *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++)));
160
2.81M
      *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--)));
161
2.81M
      *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--)));
162
2.81M
      ptr_op--;
163
164
2.81M
      *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++)));
165
2.81M
      *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--)));
166
2.81M
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--)));
167
2.81M
      ptr_op--;
168
169
2.81M
      *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++)));
170
2.81M
      *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--)));
171
2.81M
      *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--)));
172
2.81M
      ptr_op--;
173
2.81M
      ptr_win += 16;
174
2.81M
    }
175
93.9k
  }
176
187k
}
177
178
static VOID ia_enhaacplus_enc_tranform_mac3(FLOAT32 *ptr_op, const FLOAT32 *ptr_win,
179
                                            FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2,
180
93.9k
                                            FLOAT32 *ptr_buf3, UWORD32 len, WORD32 increment) {
181
93.9k
  WORD32 i;
182
183
93.9k
  if (increment > 0) {
184
0
    for (i = len >> 2; i > 0; i--) {
185
0
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--)));
186
0
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
187
0
      ptr_op++;
188
189
0
      *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--)));
190
0
      *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--)));
191
0
      ptr_op++;
192
193
0
      *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--)));
194
0
      *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--)));
195
0
      ptr_op++;
196
197
0
      *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--)));
198
0
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--)));
199
0
      ptr_op++;
200
0
      ptr_win += 12;
201
0
    }
202
93.9k
  } else {
203
2.91M
    for (i = len >> 2; i > 0; i--) {
204
2.81M
      *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--)));
205
2.81M
      *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
206
2.81M
      ptr_op--;
207
208
2.81M
      *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--)));
209
2.81M
      *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--)));
210
2.81M
      ptr_op--;
211
212
2.81M
      *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--)));
213
2.81M
      *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--)));
214
2.81M
      ptr_op--;
215
216
2.81M
      *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--)));
217
2.81M
      *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--)));
218
2.81M
      ptr_op--;
219
2.81M
      ptr_win += 12;
220
2.81M
    }
221
93.9k
  }
222
93.9k
}
223
224
VOID ia_enhaacplus_enc_transform_real(FLOAT32 *ptr_mdct_delay_buffer,
225
                                      const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
226
                                      FLOAT32 *ptr_real_out, ixheaace_mdct_tables *pstr_mdct_tab,
227
                                      FLOAT32 *ptr_shared_buffer1, WORD8 *ptr_shared_buffer5,
228
93.9k
                                      WORD32 long_frame_len) {
229
93.9k
  WORD32 n, n1;
230
93.9k
  FLOAT32 *ptr_windowed_buf = ptr_shared_buffer1;
231
93.9k
  const FLOAT32 *ptr_ws1;
232
93.9k
  WORD32 i, len = long_frame_len;
233
93.9k
  FLOAT32 *ptr_real_in;
234
93.9k
  FLOAT32 *ptr_data1, *ptr_data2, *ptr_data3, *ptr_data4;
235
93.9k
  FLOAT32 *ptr_op1;
236
237
93.9k
  ptr_real_in = ptr_mdct_delay_buffer;
238
239
93.9k
  n = long_frame_len << 1;
240
93.9k
  n1 = long_frame_len >> 1;
241
242
93.9k
  ptr_ws1 =
243
93.9k
      (long_frame_len == FRAME_LEN_512) ? pstr_mdct_tab->win_512_ld : pstr_mdct_tab->win_480_ld;
244
245
93.9k
  ptr_op1 = ptr_real_out;
246
93.9k
  ptr_data1 = &ptr_real_in[n1];
247
93.9k
  ptr_data2 = &ptr_real_in[n + n1];
248
93.9k
  ptr_data3 = &ptr_real_in[n1 - 1];
249
93.9k
  ptr_data4 = &ptr_real_in[n + n1 - 1];
250
251
93.9k
  ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4,
252
93.9k
                                  n1, 1);
253
93.9k
  ptr_ws1 += ((SIZE_T)n1 << 2);
254
255
90.2M
  for (i = 0; i < long_frame_len << 1; i++) {
256
90.1M
    ptr_mdct_delay_buffer[i] = ptr_mdct_delay_buffer[long_frame_len + i];
257
90.1M
  }
258
93.9k
  ia_enhaacplus_enc_shift_mdct_delay_buffer(&ptr_mdct_delay_buffer[2 * long_frame_len],
259
93.9k
                                            ptr_time_signal, ch_increment, long_frame_len);
260
261
93.9k
  ptr_op1 = &ptr_real_out[long_frame_len - 1];
262
93.9k
  ptr_data1 = &ptr_real_in[n + len - n1];
263
93.9k
  ptr_data2 = &ptr_real_in[len - n1];
264
93.9k
  ptr_data3 = &ptr_real_in[len - n1 - 1];
265
93.9k
  ptr_data4 = &ptr_real_in[n + len - n1 - 1];
266
267
93.9k
  ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4,
268
93.9k
                                  (n1 >> 1), -1);
269
93.9k
  ptr_op1 -= (n1 >> 1);
270
93.9k
  ptr_ws1 += ((SIZE_T)n1 << 1);
271
93.9k
  ptr_data2 += (n1 >> 1);
272
93.9k
  ptr_data3 -= (n1 >> 1);
273
93.9k
  ptr_data4 -= (n1 >> 1);
274
93.9k
  ia_enhaacplus_enc_tranform_mac3(ptr_op1, ptr_ws1, ptr_data2, ptr_data3, ptr_data4, (n1 >> 1),
275
93.9k
                                  -1);
276
277
93.9k
  if (long_frame_len == FRAME_LEN_480) {
278
93.9k
    ia_aac_ld_enc_mdct_480(ptr_real_out, ptr_windowed_buf, 1, pstr_mdct_tab);
279
93.9k
  } else {
280
0
    ia_eaacp_enc_inverse_transform_512(ptr_real_out, ptr_windowed_buf,
281
0
                                       pstr_mdct_tab->cosine_array_1024, ptr_shared_buffer5);
282
0
  }
283
93.9k
}
284
285
static VOID ia_eaacp_enc_pre_twiddle_compute(FLOAT32 *ptr_in1, FLOAT32 *ptr_in2, FLOAT32 *ptr_x,
286
93.9k
                                             const FLOAT32 *ptr_cos_sin, WORD n_by_4) {
287
93.9k
  WORD32 i;
288
93.9k
  FLOAT32 temp_r, temp_i;
289
93.9k
  FLOAT32 temp_r1, temp_i1;
290
93.9k
  FLOAT32 *ptr_x1 = ptr_x + (SIZE_T)((n_by_4 << 2) - 1);
291
93.9k
  FLOAT32 c, c1, s, s1;
292
293
11.3M
  for (i = 0; i < n_by_4; i++) {
294
11.2M
    c = *ptr_cos_sin++;
295
11.2M
    s = *ptr_cos_sin++;
296
11.2M
    s1 = *ptr_cos_sin++;
297
11.2M
    c1 = *ptr_cos_sin++;
298
299
11.2M
    temp_r = *ptr_in1++;
300
11.2M
    temp_i1 = *ptr_in1++;
301
11.2M
    temp_i = *ptr_in2--;
302
11.2M
    temp_r1 = *ptr_in2--;
303
11.2M
    *ptr_x = ((temp_r * c) + (temp_i * s));
304
11.2M
    ptr_x++;
305
306
11.2M
    *ptr_x = ((temp_i * c) - (temp_r * s));
307
11.2M
    ptr_x++;
308
309
11.2M
    *ptr_x1 = ((temp_i1 * c1) - (temp_r1 * s1));
310
11.2M
    ptr_x1--;
311
312
11.2M
    *ptr_x1 = ((temp_r1 * c1) + (temp_i1 * s1));
313
11.2M
    ptr_x1--;
314
11.2M
  }
315
93.9k
}
316
317
VOID ia_enhaacplus_enc_post_twiddle(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
318
93.9k
                                    const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
319
93.9k
  WORD i;
320
93.9k
  FLOAT32 c, c1, s, s1;
321
93.9k
  FLOAT32 tmp_var;
322
93.9k
  FLOAT32 tempr, tempr1, tempi, tempi1;
323
93.9k
  FLOAT32 *ptr_out1 = ptr_out + m - 1;
324
93.9k
  FLOAT32 *ptr_x1 = ptr_x + m - 1;
325
326
11.3M
  for (i = 0; i < (m >> 2); i++) {
327
11.2M
    c = *ptr_cos_sin_tbl++;
328
11.2M
    s = *ptr_cos_sin_tbl++;
329
11.2M
    s1 = *ptr_cos_sin_tbl++;
330
11.2M
    c1 = *ptr_cos_sin_tbl++;
331
11.2M
    tempr = *ptr_x++;
332
11.2M
    tempi = *ptr_x++;
333
11.2M
    tempi1 = *ptr_x1--;
334
11.2M
    tempr1 = *ptr_x1--;
335
336
11.2M
    tmp_var = ((tempr * c) + (tempi * s));
337
11.2M
    *ptr_out++ = tmp_var;
338
339
11.2M
    tmp_var = ((tempr * s) - (tempi * c));
340
11.2M
    *ptr_out1-- = tmp_var;
341
342
11.2M
    tmp_var = ((tempr1 * c1) + (tempi1 * s1));
343
11.2M
    *ptr_out1-- = tmp_var;
344
345
11.2M
    tmp_var = ((tempr1 * s1) - (tempi1 * c1));
346
11.2M
    *ptr_out++ = tmp_var;
347
11.2M
  }
348
93.9k
}
349
350
VOID ia_eaacp_enc_pre_twiddle_aac(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n,
351
93.9k
                                  const FLOAT32 *ptr_cos_array) {
352
93.9k
  WORD n_by_4;
353
93.9k
  FLOAT32 *ptr_in1, *ptr_in2;
354
355
93.9k
  n_by_4 = n >> 2;
356
357
93.9k
  ptr_in1 = ptr_data;
358
93.9k
  ptr_in2 = ptr_data + n - 1;
359
360
93.9k
  ia_eaacp_enc_pre_twiddle_compute(ptr_in1, ptr_in2, ptr_x, ptr_cos_array, n_by_4);
361
93.9k
}
362
363
42.9M
static PLATFORM_INLINE WORD8 ia_enhaacplus_enc_calc_norm(WORD32 a) {
364
42.9M
  WORD8 norm_val;
365
366
42.9M
  if (a == 0) {
367
0
    norm_val = 31;
368
42.9M
  } else {
369
42.9M
    if (a == (WORD32)0xffffffffL) {
370
0
      norm_val = 31;
371
42.9M
    } else {
372
42.9M
      if (a < 0) {
373
0
        a = ~a;
374
0
      }
375
1.11G
      for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) {
376
1.07G
        a <<= 1;
377
1.07G
      }
378
42.9M
    }
379
42.9M
  }
380
381
42.9M
  return norm_val;
382
42.9M
}
383
384
static PLATFORM_INLINE VOID ia_enhaacplus_enc_complex_3point_fft(FLOAT32 *ptr_in,
385
0
                                                                 FLOAT32 *ptr_out) {
386
0
  FLOAT32 add_r, sub_r;
387
0
  FLOAT32 add_i, sub_i;
388
0
  FLOAT32 x_01_r, x_01_i, temp;
389
0
  FLOAT32 p1, p2, p3, p4;
390
0
  FLOAT64 sin_mu = 0.866025403784439f;
391
392
0
  x_01_r = ptr_in[0] + ptr_in[2];
393
0
  x_01_i = ptr_in[1] + ptr_in[3];
394
395
0
  add_r = ptr_in[2] + ptr_in[4];
396
0
  add_i = ptr_in[3] + ptr_in[5];
397
398
0
  sub_r = ptr_in[2] - ptr_in[4];
399
0
  sub_i = ptr_in[3] - ptr_in[5];
400
401
0
  p1 = add_r / (FLOAT32)2.0f;
402
0
  p4 = add_i / (FLOAT32)2.0f;
403
0
  p2 = (FLOAT32)((FLOAT64)sub_i * sin_mu);
404
0
  p3 = (FLOAT32)((FLOAT64)sub_r * sin_mu);
405
406
0
  temp = ptr_in[0] - p1;
407
408
0
  ptr_out[0] = x_01_r + ptr_in[4];
409
0
  ptr_out[1] = x_01_i + ptr_in[5];
410
0
  ptr_out[2] = temp + p2;
411
0
  ptr_out[3] = (ptr_in[1] - p3) - p4;
412
0
  ptr_out[4] = temp - p2;
413
0
  ptr_out[5] = (ptr_in[1] + p3) - p4;
414
0
}
415
416
VOID ia_enhaacplus_enc_complex_fft_p2(FLOAT32 *ptr_x, WORD32 nlength,
417
21.4M
                                      FLOAT32 *ptr_scratch_fft_p2_y) {
418
21.4M
  WORD32 i, j, k, n_stages, h2;
419
21.4M
  FLOAT32 x0_r, x0_i, x1_r, x1_i, x2_r, x2_i, x3_r, x3_i;
420
21.4M
  WORD32 del, nodespacing, in_loop_cnt;
421
21.4M
  WORD32 not_power_4;
422
21.4M
  WORD32 dig_rev_shift;
423
21.4M
  FLOAT32 *ptr_p2_y = ptr_scratch_fft_p2_y;
424
21.4M
  WORD32 mpass = nlength;
425
21.4M
  WORD32 npoints = nlength;
426
21.4M
  FLOAT32 *ptr_y = ptr_p2_y;
427
21.4M
  const FLOAT64 *ptr_w;
428
21.4M
  FLOAT32 *ptr_inp;
429
21.4M
  FLOAT32 tmk;
430
21.4M
  const FLOAT64 *ptr_twiddles;
431
21.4M
  FLOAT32 *ptr_data;
432
21.4M
  FLOAT64 w_1, w_2, w_3, w_4, w_5, w_6;
433
21.4M
  WORD32 sec_loop_cnt;
434
21.4M
  FLOAT32 tmp;
435
436
21.4M
  memset(ptr_y, 0, nlength * 2 * sizeof(*ptr_y));
437
438
21.4M
  dig_rev_shift = ia_enhaacplus_enc_calc_norm(mpass) + 1 - 16;
439
21.4M
  n_stages = 30 - ia_enhaacplus_enc_calc_norm(mpass);
440
21.4M
  not_power_4 = n_stages & 1;
441
442
21.4M
  n_stages = n_stages >> 1;
443
444
21.4M
  ptr_w = ia_enhaacplus_enc_twiddle_table_fft_32x32;
445
446
21.4M
  dig_rev_shift = MAX(dig_rev_shift, 0);
447
448
216M
  for (i = 0; i < npoints; i += 4) {
449
194M
    ptr_inp = ptr_x;
450
194M
    DIG_REV_NEW(i, dig_rev_shift, h2);
451
194M
    if (not_power_4) {
452
182M
      h2 += 1;
453
182M
      h2 &= ~1;
454
182M
    }
455
194M
    ptr_inp += (h2);
456
457
194M
    x0_r = *ptr_inp;
458
194M
    x0_i = *(ptr_inp + 1);
459
194M
    ptr_inp += (npoints >> 1);
460
461
194M
    x1_r = *ptr_inp;
462
194M
    x1_i = *(ptr_inp + 1);
463
194M
    ptr_inp += (npoints >> 1);
464
465
194M
    x2_r = *ptr_inp;
466
194M
    x2_i = *(ptr_inp + 1);
467
194M
    ptr_inp += (npoints >> 1);
468
469
194M
    x3_r = *ptr_inp;
470
194M
    x3_i = *(ptr_inp + 1);
471
472
194M
    x0_r = x0_r + x2_r;
473
194M
    x0_i = x0_i + x2_i;
474
475
194M
    tmk = x0_r - x2_r;
476
194M
    x2_r = tmk - x2_r;
477
194M
    tmk = x0_i - x2_i;
478
194M
    x2_i = tmk - x2_i;
479
480
194M
    x1_r = x1_r + x3_r;
481
194M
    x1_i = x1_i + x3_i;
482
483
194M
    tmk = x1_r - x3_r;
484
194M
    x3_r = tmk - x3_r;
485
194M
    tmk = x1_i - x3_i;
486
194M
    x3_i = tmk - x3_i;
487
488
194M
    x0_r = x0_r + x1_r;
489
194M
    x0_i = x0_i + x1_i;
490
491
194M
    tmk = x0_r - x1_r;
492
194M
    x1_r = tmk - x1_r;
493
194M
    tmk = x0_i - x1_i;
494
194M
    x1_i = tmk - x1_i;
495
496
194M
    x2_r = x2_r + x3_i;
497
194M
    x2_i = x2_i - x3_r;
498
499
194M
    tmk = x2_r - x3_i;
500
194M
    x3_i = tmk - x3_i;
501
194M
    tmk = x2_i + x3_r;
502
194M
    x3_r = tmk + x3_r;
503
504
194M
    *ptr_y++ = x0_r;
505
194M
    *ptr_y++ = x0_i;
506
194M
    *ptr_y++ = x2_r;
507
194M
    *ptr_y++ = x2_i;
508
194M
    *ptr_y++ = x1_r;
509
194M
    *ptr_y++ = x1_i;
510
194M
    *ptr_y++ = x3_i;
511
194M
    *ptr_y++ = x3_r;
512
194M
  }
513
21.4M
  ptr_y -= 2 * npoints;
514
21.4M
  del = 4;
515
21.4M
  nodespacing = 64;
516
21.4M
  in_loop_cnt = npoints >> 4;
517
43.7M
  for (i = n_stages - 1; i > 0; i--) {
518
22.2M
    ptr_twiddles = ptr_w;
519
22.2M
    ptr_data = ptr_y;
520
73.0M
    for (k = in_loop_cnt; k != 0; k--) {
521
50.8M
      x0_r = (*ptr_data);
522
50.8M
      x0_i = (*(ptr_data + 1));
523
50.8M
      ptr_data += ((SIZE_T)del << 1);
524
525
50.8M
      x1_r = (*ptr_data);
526
50.8M
      x1_i = (*(ptr_data + 1));
527
50.8M
      ptr_data += ((SIZE_T)del << 1);
528
529
50.8M
      x2_r = (*ptr_data);
530
50.8M
      x2_i = (*(ptr_data + 1));
531
50.8M
      ptr_data += ((SIZE_T)del << 1);
532
533
50.8M
      x3_r = (*ptr_data);
534
50.8M
      x3_i = (*(ptr_data + 1));
535
50.8M
      ptr_data -= 3 * (del << 1);
536
537
50.8M
      x0_r = x0_r + x2_r;
538
50.8M
      x0_i = x0_i + x2_i;
539
50.8M
      x2_r = x0_r - (x2_r * 2);
540
50.8M
      x2_i = x0_i - (x2_i * 2);
541
50.8M
      x1_r = x1_r + x3_r;
542
50.8M
      x1_i = x1_i + x3_i;
543
50.8M
      x3_r = x1_r - (x3_r * 2);
544
50.8M
      x3_i = x1_i - (x3_i * 2);
545
546
50.8M
      x0_r = x0_r + x1_r;
547
50.8M
      x0_i = x0_i + x1_i;
548
50.8M
      x1_r = x0_r - (x1_r * 2);
549
50.8M
      x1_i = x0_i - (x1_i * 2);
550
50.8M
      x2_r = x2_r + x3_i;
551
50.8M
      x2_i = x2_i - x3_r;
552
50.8M
      x3_i = x2_r - (x3_i * 2);
553
50.8M
      x3_r = x2_i + (x3_r * 2);
554
555
50.8M
      *ptr_data = x0_r;
556
50.8M
      *(ptr_data + 1) = x0_i;
557
50.8M
      ptr_data += ((SIZE_T)del << 1);
558
559
50.8M
      *ptr_data = x2_r;
560
50.8M
      *(ptr_data + 1) = x2_i;
561
50.8M
      ptr_data += ((SIZE_T)del << 1);
562
563
50.8M
      *ptr_data = x1_r;
564
50.8M
      *(ptr_data + 1) = x1_i;
565
50.8M
      ptr_data += ((SIZE_T)del << 1);
566
567
50.8M
      *ptr_data = x3_i;
568
50.8M
      *(ptr_data + 1) = x3_r;
569
50.8M
      ptr_data += ((SIZE_T)del << 1);
570
50.8M
    }
571
22.2M
    ptr_data = ptr_y + 2;
572
573
22.2M
    sec_loop_cnt = (nodespacing * del);
574
22.2M
    sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
575
22.2M
                   (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
576
22.2M
                   (sec_loop_cnt / 256);
577
578
51.3M
    for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
579
29.0M
      w_1 = *(ptr_twiddles + j);
580
29.0M
      w_4 = *(ptr_twiddles + j + 257);
581
29.0M
      w_2 = *(ptr_twiddles + ((SIZE_T)j << 1));
582
29.0M
      w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257);
583
29.0M
      w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1));
584
29.0M
      w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 257);
585
586
93.7M
      for (k = in_loop_cnt; k != 0; k--) {
587
64.6M
        ptr_data += ((SIZE_T)del << 1);
588
589
64.6M
        x1_r = *ptr_data;
590
64.6M
        x1_i = *(ptr_data + 1);
591
64.6M
        ptr_data += ((SIZE_T)del << 1);
592
593
64.6M
        x2_r = *ptr_data;
594
64.6M
        x2_i = *(ptr_data + 1);
595
64.6M
        ptr_data += ((SIZE_T)del << 1);
596
597
64.6M
        x3_r = *ptr_data;
598
64.6M
        x3_i = *(ptr_data + 1);
599
64.6M
        ptr_data -= 3 * (del << 1);
600
601
64.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
602
64.6M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
603
64.6M
        x1_r = tmp;
604
605
64.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5));
606
64.6M
        x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2);
607
64.6M
        x2_r = tmp;
608
609
64.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_3) - ixheaace_dmult((FLOAT64)x3_i, w_6));
610
64.6M
        x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3);
611
64.6M
        x3_r = tmp;
612
613
64.6M
        x0_r = (*ptr_data);
614
64.6M
        x0_i = (*(ptr_data + 1));
615
616
64.6M
        x0_r = x0_r + (x2_r);
617
64.6M
        x0_i = x0_i + (x2_i);
618
64.6M
        x2_r = x0_r - (x2_r * 2);
619
64.6M
        x2_i = x0_i - (x2_i * 2);
620
64.6M
        x1_r = x1_r + x3_r;
621
64.6M
        x1_i = x1_i + x3_i;
622
64.6M
        x3_r = x1_r - (x3_r * 2);
623
64.6M
        x3_i = x1_i - (x3_i * 2);
624
625
64.6M
        x0_r = x0_r + (x1_r);
626
64.6M
        x0_i = x0_i + (x1_i);
627
64.6M
        x1_r = x0_r - (x1_r * 2);
628
64.6M
        x1_i = x0_i - (x1_i * 2);
629
64.6M
        x2_r = x2_r + (x3_i);
630
64.6M
        x2_i = x2_i - (x3_r);
631
64.6M
        x3_i = x2_r - (x3_i * 2);
632
64.6M
        x3_r = x2_i + (x3_r * 2);
633
634
64.6M
        *ptr_data = x0_r;
635
64.6M
        *(ptr_data + 1) = x0_i;
636
64.6M
        ptr_data += ((SIZE_T)del << 1);
637
638
64.6M
        *ptr_data = x2_r;
639
64.6M
        *(ptr_data + 1) = x2_i;
640
64.6M
        ptr_data += ((SIZE_T)del << 1);
641
642
64.6M
        *ptr_data = x1_r;
643
64.6M
        *(ptr_data + 1) = x1_i;
644
64.6M
        ptr_data += ((SIZE_T)del << 1);
645
646
64.6M
        *ptr_data = x3_i;
647
64.6M
        *(ptr_data + 1) = x3_r;
648
64.6M
        ptr_data += ((SIZE_T)del << 1);
649
64.6M
      }
650
29.0M
      ptr_data -= 2 * npoints;
651
29.0M
      ptr_data += 2;
652
29.0M
    }
653
47.9M
    for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
654
25.6M
      w_1 = *(ptr_twiddles + j);
655
25.6M
      w_4 = *(ptr_twiddles + j + 257);
656
25.6M
      w_2 = *(ptr_twiddles + ((SIZE_T)j << 1));
657
25.6M
      w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257);
658
25.6M
      w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1) - 256);
659
25.6M
      w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 1);
660
661
83.4M
      for (k = in_loop_cnt; k != 0; k--) {
662
57.7M
        ptr_data += ((SIZE_T)del << 1);
663
664
57.7M
        x1_r = *ptr_data;
665
57.7M
        x1_i = *(ptr_data + 1);
666
57.7M
        ptr_data += ((SIZE_T)del << 1);
667
668
57.7M
        x2_r = *ptr_data;
669
57.7M
        x2_i = *(ptr_data + 1);
670
57.7M
        ptr_data += ((SIZE_T)del << 1);
671
672
57.7M
        x3_r = *ptr_data;
673
57.7M
        x3_i = *(ptr_data + 1);
674
57.7M
        ptr_data -= 3 * (del << 1);
675
676
57.7M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
677
57.7M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
678
57.7M
        x1_r = tmp;
679
680
57.7M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5));
681
57.7M
        x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2);
682
57.7M
        x2_r = tmp;
683
684
57.7M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3));
685
57.7M
        x3_i =
686
57.7M
            (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
687
57.7M
        x3_r = tmp;
688
689
57.7M
        x0_r = (*ptr_data);
690
57.7M
        x0_i = (*(ptr_data + 1));
691
692
57.7M
        x0_r = x0_r + (x2_r);
693
57.7M
        x0_i = x0_i + (x2_i);
694
57.7M
        x2_r = x0_r - (x2_r * 2);
695
57.7M
        x2_i = x0_i - (x2_i * 2);
696
57.7M
        x1_r = x1_r + x3_r;
697
57.7M
        x1_i = x1_i + x3_i;
698
57.7M
        x3_r = x1_r - (x3_r * 2);
699
57.7M
        x3_i = x1_i - (x3_i * 2);
700
701
57.7M
        x0_r = x0_r + (x1_r);
702
57.7M
        x0_i = x0_i + (x1_i);
703
57.7M
        x1_r = x0_r - (x1_r * 2);
704
57.7M
        x1_i = x0_i - (x1_i * 2);
705
57.7M
        x2_r = x2_r + (x3_i);
706
57.7M
        x2_i = x2_i - (x3_r);
707
57.7M
        x3_i = x2_r - (x3_i * 2);
708
57.7M
        x3_r = x2_i + (x3_r * 2);
709
710
57.7M
        *ptr_data = x0_r;
711
57.7M
        *(ptr_data + 1) = x0_i;
712
57.7M
        ptr_data += ((SIZE_T)del << 1);
713
714
57.7M
        *ptr_data = x2_r;
715
57.7M
        *(ptr_data + 1) = x2_i;
716
57.7M
        ptr_data += ((SIZE_T)del << 1);
717
718
57.7M
        *ptr_data = x1_r;
719
57.7M
        *(ptr_data + 1) = x1_i;
720
57.7M
        ptr_data += ((SIZE_T)del << 1);
721
722
57.7M
        *ptr_data = x3_i;
723
57.7M
        *(ptr_data + 1) = x3_r;
724
57.7M
        ptr_data += ((SIZE_T)del << 1);
725
57.7M
      }
726
25.6M
      ptr_data -= 2 * npoints;
727
25.6M
      ptr_data += 2;
728
25.6M
    }
729
25.6M
    for (; j <= sec_loop_cnt * 2; j += nodespacing) {
730
3.40M
      w_1 = *(ptr_twiddles + j);
731
3.40M
      w_4 = *(ptr_twiddles + j + 257);
732
3.40M
      w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256));
733
3.40M
      w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1));
734
3.40M
      w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 256));
735
3.40M
      w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) + 1));
736
737
10.3M
      for (k = in_loop_cnt; k != 0; k--) {
738
6.93M
        ptr_data += ((SIZE_T)del << 1);
739
740
6.93M
        x1_r = *ptr_data;
741
6.93M
        x1_i = *(ptr_data + 1);
742
6.93M
        ptr_data += ((SIZE_T)del << 1);
743
744
6.93M
        x2_r = *ptr_data;
745
6.93M
        x2_i = *(ptr_data + 1);
746
6.93M
        ptr_data += ((SIZE_T)del << 1);
747
748
6.93M
        x3_r = *ptr_data;
749
6.93M
        x3_i = *(ptr_data + 1);
750
6.93M
        ptr_data -= 3 * (del << 1);
751
752
6.93M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
753
6.93M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult(x1_r, w_4), x1_i, w_1);
754
6.93M
        x1_r = tmp;
755
756
6.93M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2));
757
6.93M
        x2_i = (FLOAT32)(-ixheaace_dmult(x2_r, w_2) + ixheaace_dmult(x2_i, w_5));
758
6.93M
        x2_r = tmp;
759
760
6.93M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3));
761
6.93M
        x3_i =
762
6.93M
            (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
763
6.93M
        x3_r = tmp;
764
765
6.93M
        x0_r = (*ptr_data);
766
6.93M
        x0_i = (*(ptr_data + 1));
767
768
6.93M
        x0_r = x0_r + (x2_r);
769
6.93M
        x0_i = x0_i + (x2_i);
770
6.93M
        x2_r = x0_r - (x2_r * 2);
771
6.93M
        x2_i = x0_i - (x2_i * 2);
772
6.93M
        x1_r = x1_r + x3_r;
773
6.93M
        x1_i = x1_i + x3_i;
774
6.93M
        x3_r = x1_r - (x3_r * 2);
775
6.93M
        x3_i = x1_i - (x3_i * 2);
776
777
6.93M
        x0_r = x0_r + (x1_r);
778
6.93M
        x0_i = x0_i + (x1_i);
779
6.93M
        x1_r = x0_r - (x1_r * 2);
780
6.93M
        x1_i = x0_i - (x1_i * 2);
781
6.93M
        x2_r = x2_r + (x3_i);
782
6.93M
        x2_i = x2_i - (x3_r);
783
6.93M
        x3_i = x2_r - (x3_i * 2);
784
6.93M
        x3_r = x2_i + (x3_r * 2);
785
786
6.93M
        *ptr_data = x0_r;
787
6.93M
        *(ptr_data + 1) = x0_i;
788
6.93M
        ptr_data += ((SIZE_T)del << 1);
789
790
6.93M
        *ptr_data = x2_r;
791
6.93M
        *(ptr_data + 1) = x2_i;
792
6.93M
        ptr_data += ((SIZE_T)del << 1);
793
794
6.93M
        *ptr_data = x1_r;
795
6.93M
        *(ptr_data + 1) = x1_i;
796
6.93M
        ptr_data += ((SIZE_T)del << 1);
797
798
6.93M
        *ptr_data = x3_i;
799
6.93M
        *(ptr_data + 1) = x3_r;
800
6.93M
        ptr_data += ((SIZE_T)del << 1);
801
6.93M
      }
802
3.40M
      ptr_data -= 2 * npoints;
803
3.40M
      ptr_data += 2;
804
3.40M
    }
805
51.3M
    for (; j < nodespacing * del; j += nodespacing) {
806
29.0M
      w_1 = *(ptr_twiddles + j);
807
29.0M
      w_4 = *(ptr_twiddles + j + 257);
808
29.0M
      w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256));
809
29.0M
      w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1));
810
29.0M
      w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512));
811
29.0M
      w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512 + 257));
812
813
93.7M
      for (k = in_loop_cnt; k != 0; k--) {
814
64.6M
        ptr_data += ((SIZE_T)del << 1);
815
816
64.6M
        x1_r = *ptr_data;
817
64.6M
        x1_i = *(ptr_data + 1);
818
64.6M
        ptr_data += ((SIZE_T)del << 1);
819
820
64.6M
        x2_r = *ptr_data;
821
64.6M
        x2_i = *(ptr_data + 1);
822
64.6M
        ptr_data += ((SIZE_T)del << 1);
823
824
64.6M
        x3_r = *ptr_data;
825
64.6M
        x3_i = *(ptr_data + 1);
826
64.6M
        ptr_data -= 3 * (del << 1);
827
828
64.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
829
64.6M
        x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
830
64.6M
        x1_r = tmp;
831
832
64.6M
        tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2));
833
64.6M
        x2_i =
834
64.6M
            (FLOAT32)(-ixheaace_dmult((FLOAT64)x2_r, w_2) + ixheaace_dmult((FLOAT64)x2_i, w_5));
835
64.6M
        x2_r = tmp;
836
837
64.6M
        tmp = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
838
64.6M
        x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3);
839
64.6M
        x3_r = tmp;
840
841
64.6M
        x0_r = (*ptr_data);
842
64.6M
        x0_i = (*(ptr_data + 1));
843
844
64.6M
        x0_r = x0_r + (x2_r);
845
64.6M
        x0_i = x0_i + (x2_i);
846
64.6M
        x2_r = x0_r - (x2_r * 2);
847
64.6M
        x2_i = x0_i - (x2_i * 2);
848
64.6M
        x1_r = x1_r + x3_r;
849
64.6M
        x1_i = x1_i - x3_i;
850
64.6M
        x3_r = x1_r - (x3_r * 2);
851
64.6M
        x3_i = x1_i + (x3_i * 2);
852
853
64.6M
        x0_r = x0_r + (x1_r);
854
64.6M
        x0_i = x0_i + (x1_i);
855
64.6M
        x1_r = x0_r - (x1_r * 2);
856
64.6M
        x1_i = x0_i - (x1_i * 2);
857
64.6M
        x2_r = x2_r + (x3_i);
858
64.6M
        x2_i = x2_i - (x3_r);
859
64.6M
        x3_i = x2_r - (x3_i * 2);
860
64.6M
        x3_r = x2_i + (x3_r * 2);
861
862
64.6M
        *ptr_data = x0_r;
863
64.6M
        *(ptr_data + 1) = x0_i;
864
64.6M
        ptr_data += ((SIZE_T)del << 1);
865
866
64.6M
        *ptr_data = x2_r;
867
64.6M
        *(ptr_data + 1) = x2_i;
868
64.6M
        ptr_data += ((SIZE_T)del << 1);
869
870
64.6M
        *ptr_data = x1_r;
871
64.6M
        *(ptr_data + 1) = x1_i;
872
64.6M
        ptr_data += ((SIZE_T)del << 1);
873
874
64.6M
        *ptr_data = x3_i;
875
64.6M
        *(ptr_data + 1) = x3_r;
876
64.6M
        ptr_data += ((SIZE_T)del << 1);
877
64.6M
      }
878
29.0M
      ptr_data -= 2 * npoints;
879
29.0M
      ptr_data += 2;
880
29.0M
    }
881
22.2M
    nodespacing >>= 2;
882
22.2M
    del <<= 2;
883
22.2M
    in_loop_cnt >>= 2;
884
22.2M
  }
885
21.4M
  if (not_power_4) {
886
21.0M
    ptr_twiddles = ptr_w;
887
21.0M
    nodespacing <<= 1;
888
889
203M
    for (j = del / 2; j != 0; j--) {
890
182M
      w_1 = *ptr_twiddles;
891
182M
      w_4 = *(ptr_twiddles + 257);
892
182M
      ptr_twiddles += nodespacing;
893
894
182M
      x0_r = *ptr_y;
895
182M
      x0_i = *(ptr_y + 1);
896
182M
      ptr_y += ((SIZE_T)del << 1);
897
898
182M
      x1_r = *ptr_y;
899
182M
      x1_i = *(ptr_y + 1);
900
901
182M
      tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
902
182M
      x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
903
182M
      x1_r = tmp;
904
905
182M
      *ptr_y = (x0_r) - (x1_r);
906
182M
      *(ptr_y + 1) = (x0_i) - (x1_i);
907
182M
      ptr_y -= ((SIZE_T)del << 1);
908
909
182M
      *ptr_y = (x0_r) + (x1_r);
910
182M
      *(ptr_y + 1) = (x0_i) + (x1_i);
911
182M
      ptr_y += 2;
912
182M
    }
913
21.0M
    ptr_twiddles = ptr_w;
914
203M
    for (j = del / 2; j != 0; j--) {
915
182M
      w_1 = *ptr_twiddles;
916
182M
      w_4 = *(ptr_twiddles + 257);
917
182M
      ptr_twiddles += nodespacing;
918
919
182M
      x0_r = *ptr_y;
920
182M
      x0_i = *(ptr_y + 1);
921
182M
      ptr_y += ((SIZE_T)del << 1);
922
923
182M
      x1_r = *ptr_y;
924
182M
      x1_i = *(ptr_y + 1);
925
926
182M
      tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_4) +
927
182M
                      ixheaace_dmult((FLOAT64)x1_i, w_1)) /*/2*/;
928
182M
      x1_i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x1_r, w_1) +
929
182M
                       ixheaace_dmult((FLOAT64)x1_i, w_4)) /*/2*/;
930
182M
      x1_r = tmp;
931
932
182M
      *ptr_y = (x0_r) - (x1_r);
933
182M
      *(ptr_y + 1) = (x0_i) - (x1_i);
934
182M
      ptr_y -= ((SIZE_T)del << 1);
935
936
182M
      *ptr_y = (x0_r) + (x1_r);
937
182M
      *(ptr_y + 1) = (x0_i) + (x1_i);
938
182M
      ptr_y += 2;
939
182M
    }
940
21.0M
  }
941
942
801M
  for (i = 0; i < nlength; i++) {
943
779M
    *(ptr_x + 2 * i) = ptr_p2_y[2 * i];
944
779M
    *(ptr_x + 2 * i + 1) = ptr_p2_y[2 * i + 1];
945
779M
  }
946
21.4M
}
947
948
static VOID ia_enhaacplus_enc_complex_fft_p3(FLOAT32 *ptr_data, WORD32 nlength,
949
0
                                             ixheaace_scratch_mem *pstr_scratch) {
950
0
  WORD32 i, j;
951
0
  FLOAT32 *ptr_data_3 = pstr_scratch->p_fft_p3_data_3;
952
0
  FLOAT32 *ptr_p3_y = pstr_scratch->p_fft_p3_y;
953
0
  WORD32 cnfac;
954
0
  WORD32 mpass = nlength;
955
0
  FLOAT32 *ptr_x = ptr_data;
956
0
  FLOAT32 *ptr_y = ptr_p3_y;
957
0
  cnfac = 0;
958
0
  const FLOAT64 *ptr_w1_r, *ptr_w1_i;
959
0
  FLOAT32 tmp;
960
0
  ptr_w1_r = ia_enhaacplus_enc_twiddle_table_3pr;
961
0
  ptr_w1_i = ia_enhaacplus_enc_twiddle_table_3pi;
962
963
0
  while (mpass % 3 == 0) {
964
0
    mpass /= 3;
965
0
    cnfac++;
966
0
  }
967
968
0
  for (i = 0; i < 3 * cnfac; i++) {
969
0
    for (j = 0; j < mpass; j++) {
970
0
      ptr_data_3[2 * j] = ptr_data[3 * (2 * j) + (2 * i)];
971
0
      ptr_data_3[2 * j + 1] = ptr_data[3 * (2 * j) + 1 + (2 * i)];
972
0
    }
973
0
    ia_enhaacplus_enc_complex_fft_p2(ptr_data_3, mpass, pstr_scratch->p_fft_p2_y);
974
975
0
    for (j = 0; j < mpass; j++) {
976
0
      ptr_data[3 * (2 * j) + (2 * i)] = ptr_data_3[2 * j];
977
0
      ptr_data[3 * (2 * j) + 1 + (2 * i)] = ptr_data_3[2 * j + 1];
978
0
    }
979
0
  }
980
981
0
  {
982
0
    for (i = 0; i < nlength; i += 3) {
983
0
      tmp = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_r) -
984
0
                      (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_i));
985
0
      ptr_data[2 * i + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_i) +
986
0
                                      (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_r));
987
0
      ptr_data[2 * i] = tmp;
988
989
0
      ptr_w1_r++;
990
0
      ptr_w1_i++;
991
992
0
      tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_r) -
993
0
                      (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_i));
994
0
      ptr_data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_i) +
995
0
                                            (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_r));
996
0
      ptr_data[2 * (i + 1)] = tmp;
997
998
0
      ptr_w1_r++;
999
0
      ptr_w1_i++;
1000
1001
0
      tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_r) -
1002
0
                      (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_i));
1003
0
      ptr_data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_i) +
1004
0
                                            (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_r));
1005
0
      ptr_data[2 * (i + 2)] = tmp;
1006
1007
0
      ptr_w1_r += 3 * (128 / mpass - 1) + 1;
1008
0
      ptr_w1_i += 3 * (128 / mpass - 1) + 1;
1009
0
    }
1010
0
  }
1011
1012
0
  for (i = 0; i < mpass; i++) {
1013
0
    ia_enhaacplus_enc_complex_3point_fft(ptr_x, ptr_y);
1014
1015
0
    ptr_x = ptr_x + 6;
1016
0
    ptr_y = ptr_y + 6;
1017
0
  }
1018
1019
0
  for (i = 0; i < mpass; i++) {
1020
0
    ptr_data[2 * i] = ptr_p3_y[6 * i];
1021
0
    ptr_data[2 * i + 1] = ptr_p3_y[6 * i + 1];
1022
0
  }
1023
1024
0
  for (i = 0; i < mpass; i++) {
1025
0
    ptr_data[2 * (i + mpass)] = ptr_p3_y[6 * i + 2];
1026
0
    ptr_data[2 * (i + mpass) + 1] = ptr_p3_y[6 * i + 3];
1027
0
  }
1028
1029
0
  for (i = 0; i < mpass; i++) {
1030
0
    ptr_data[2 * (i + 2 * mpass)] = ptr_p3_y[6 * i + 4];
1031
0
    ptr_data[2 * (i + 2 * mpass) + 1] = ptr_p3_y[6 * i + 5];
1032
0
  }
1033
0
}
1034
1035
VOID ia_enhaacplus_enc_complex_fft(FLOAT32 *ptr_data, WORD32 len,
1036
16.3M
                                   ixheaace_scratch_mem *pstr_scratch) {
1037
16.3M
  if (len & (len - 1)) {
1038
0
    ia_enhaacplus_enc_complex_fft_p3(ptr_data, len, pstr_scratch);
1039
16.3M
  } else {
1040
16.3M
    ia_enhaacplus_enc_complex_fft_p2(ptr_data, len, pstr_scratch->p_fft_p2_y);
1041
16.3M
  }
1042
16.3M
}
1043
1044
static VOID ixheaace_post_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_trig_data,
1045
567k
                               WORD32 step, WORD32 trig_data_size) {
1046
567k
  WORD32 i;
1047
567k
  FLOAT32 w_re, w_im, re1, re2, im1, im2;
1048
567k
  const FLOAT32 *ptr_sin = ptr_trig_data;
1049
567k
  const FLOAT32 *ptr_cos = ptr_trig_data + trig_data_size;
1050
1051
567k
  w_im = *ptr_sin;
1052
567k
  w_re = *ptr_cos;
1053
1054
55.9M
  for (i = 0; i < m / 4; i++) {
1055
55.3M
    re1 = ptr_x[2 * i];
1056
55.3M
    im1 = ptr_x[2 * i + 1];
1057
55.3M
    re2 = ptr_x[m - 2 - 2 * i];
1058
55.3M
    im2 = ptr_x[m - 1 - 2 * i];
1059
1060
55.3M
    ptr_x[2 * i] = (re1 * w_re + im1 * w_im);
1061
1062
55.3M
    ptr_x[m - 1 - 2 * i] = (re1 * w_im - im1 * w_re);
1063
1064
55.3M
    ptr_sin += step;
1065
55.3M
    ptr_cos -= step;
1066
1067
55.3M
    w_im = *ptr_sin;
1068
55.3M
    w_re = *ptr_cos;
1069
1070
55.3M
    ptr_x[m - 2 - 2 * i] = (re2 * w_im + im2 * w_re);
1071
1072
55.3M
    ptr_x[2 * i + 1] = (re2 * w_re - im2 * w_im);
1073
55.3M
  }
1074
567k
}
1075
1076
static VOID ixheaace_cplx_mult_twid(FLOAT32 *ptr_re, FLOAT32 *ptr_im, FLOAT32 a, FLOAT32 b,
1077
41.8M
                                    FLOAT32 twid_table, FLOAT32 twid_table_h) {
1078
41.8M
  *ptr_re = (a * twid_table) - (b * twid_table_h);
1079
41.8M
  *ptr_im = (a * twid_table_h) + (b * twid_table);
1080
41.8M
}
1081
1082
1.94M
static VOID ixheaace_cfft_15_twiddle(FLOAT32 *ptr_inp) {
1083
1.94M
  const FLOAT32 *ptr_tw_flt = &ixheaace_mix_rad_twid_tbl[0];
1084
1.94M
  const FLOAT32 *ptr_tw_flt_h = &ixheaace_mix_rad_twid_tbl_h[0];
1085
1.94M
  FLOAT32 accu1, accu2;
1086
1.94M
  WORD32 i, j;
1087
1.94M
  ptr_inp += 12;
1088
1089
5.82M
  for (j = 0; j < 2; j++) {
1090
19.4M
    for (i = 0; i < 4; i++) {
1091
15.5M
      ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1],
1092
15.5M
                              ptr_tw_flt[i], ptr_tw_flt_h[i]);
1093
15.5M
      ptr_inp[2 * i + 0] = accu1;
1094
15.5M
      ptr_inp[2 * i + 1] = accu2;
1095
15.5M
    }
1096
3.88M
    ptr_inp += 10;
1097
3.88M
    ptr_tw_flt += 4;
1098
3.88M
    ptr_tw_flt_h += 4;
1099
3.88M
  }
1100
1.94M
}
1101
1102
1.94M
static VOID ixheaace_cfft_15_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, FLOAT32 *ptr_fft3_out) {
1103
1.94M
  WORD32 i, idx;
1104
1.94M
  FLOAT32 *ptr_buf1, *ptr_buf2, *ptr_buf3;
1105
1.94M
  FLOAT32 add_r, sub_r;
1106
1.94M
  FLOAT32 add_i, sub_i;
1107
1.94M
  FLOAT32 x_01_r, x_01_i, temp;
1108
1.94M
  FLOAT32 p1, p2, p3, p4;
1109
1110
1.94M
  FLOAT32 sin_mu_flt = 0.866027832f;
1111
1.94M
  FLOAT32 c51_flt = 0.951049805f;
1112
1.94M
  FLOAT32 c52_flt = -0.76940918f;
1113
1.94M
  FLOAT32 c53_flt = -0.36328125f;
1114
1.94M
  FLOAT32 c54_flt = 0.559020996f;
1115
1.94M
  FLOAT32 c55_flt = -0.625f;
1116
1117
1.94M
  FLOAT32 r1, r2, r3, r4;
1118
1.94M
  FLOAT32 s1, s2, s3, s4, t, temp1, temp2;
1119
1.94M
  FLOAT32 *ptr_out_fft3 = ptr_fft3_out;
1120
1121
1.94M
  FLOAT32 xr_0, xr_1, xr_2;
1122
1.94M
  FLOAT32 xi_0, xi_1, xi_2;
1123
1124
1.94M
  ptr_buf2 = ptr_fft3_out;
1125
1.94M
  ptr_buf1 = ptr_buf3 = ptr_fft3_out;
1126
1127
7.76M
  for (i = 0; i < FFT3; i++) {
1128
5.82M
    *ptr_buf1++ = ptr_inp[0 + 64 * i];
1129
5.82M
    *ptr_buf1++ = ptr_inp[1 + 64 * i];
1130
1131
5.82M
    *ptr_buf1++ = ptr_inp[192 + 64 * i];
1132
5.82M
    *ptr_buf1++ = ptr_inp[193 + 64 * i];
1133
1134
5.82M
    *ptr_buf1++ = ptr_inp[384 + 64 * i];
1135
5.82M
    *ptr_buf1++ = ptr_inp[385 + 64 * i];
1136
1137
5.82M
    *ptr_buf1++ = ptr_inp[576 + 64 * i];
1138
5.82M
    *ptr_buf1++ = ptr_inp[577 + 64 * i];
1139
1140
5.82M
    *ptr_buf1++ = ptr_inp[768 + 64 * i];
1141
5.82M
    *ptr_buf1++ = ptr_inp[769 + 64 * i];
1142
1143
5.82M
    r1 = ptr_buf3[2] + ptr_buf3[8];
1144
5.82M
    r4 = ptr_buf3[2] - ptr_buf3[8];
1145
5.82M
    r3 = ptr_buf3[4] + ptr_buf3[6];
1146
5.82M
    r2 = ptr_buf3[4] - ptr_buf3[6];
1147
5.82M
    t = ((r1 - r3) * c54_flt);
1148
1149
5.82M
    r1 = r1 + r3;
1150
1151
5.82M
    temp1 = ptr_buf3[0] + r1;
1152
1153
5.82M
    r1 = temp1 + ((r1 * c55_flt) * 2);
1154
1155
5.82M
    r3 = r1 - t;
1156
5.82M
    r1 = r1 + t;
1157
1158
5.82M
    t = ((r4 + r2) * c51_flt);
1159
5.82M
    r4 = t + ((r4 * c52_flt) * 2);
1160
5.82M
    r2 = t + (r2 * c53_flt);
1161
1162
5.82M
    s1 = ptr_buf3[3] + ptr_buf3[9];
1163
5.82M
    s4 = ptr_buf3[3] - ptr_buf3[9];
1164
5.82M
    s3 = ptr_buf3[5] + ptr_buf3[7];
1165
5.82M
    s2 = ptr_buf3[5] - ptr_buf3[7];
1166
1167
5.82M
    t = ((s1 - s3) * c54_flt);
1168
1169
5.82M
    s1 = s1 + s3;
1170
1171
5.82M
    temp2 = ptr_buf3[1] + s1;
1172
1173
5.82M
    s1 = temp2 + (((s1 * c55_flt)) * 2);
1174
1175
5.82M
    s3 = s1 - t;
1176
5.82M
    s1 = s1 + t;
1177
1178
5.82M
    t = ((s4 + s2) * c51_flt);
1179
5.82M
    s4 = t + (((s4 * c52_flt)) * 2);
1180
5.82M
    s2 = t + ((s2 * c53_flt));
1181
1182
5.82M
    *ptr_buf2++ = temp1;
1183
5.82M
    *ptr_buf2++ = temp2;
1184
5.82M
    *ptr_buf2++ = r1 + s2;
1185
5.82M
    *ptr_buf2++ = s1 - r2;
1186
5.82M
    *ptr_buf2++ = r3 - s4;
1187
5.82M
    *ptr_buf2++ = s3 + r4;
1188
5.82M
    *ptr_buf2++ = r3 + s4;
1189
5.82M
    *ptr_buf2++ = s3 - r4;
1190
5.82M
    *ptr_buf2++ = r1 - s2;
1191
5.82M
    *ptr_buf2++ = s1 + r2;
1192
5.82M
    ptr_buf3 = ptr_buf1;
1193
5.82M
  }
1194
1195
1.94M
  idx = 0;
1196
1.94M
  ixheaace_cfft_15_twiddle(ptr_out_fft3);
1197
1198
11.6M
  for (i = 0; i < FFT5; i++) {
1199
9.71M
    xr_0 = ptr_out_fft3[0];
1200
9.71M
    xi_0 = ptr_out_fft3[1];
1201
1202
9.71M
    xr_1 = ptr_out_fft3[10];
1203
9.71M
    xi_1 = ptr_out_fft3[11];
1204
1205
9.71M
    xr_2 = ptr_out_fft3[20];
1206
9.71M
    xi_2 = ptr_out_fft3[21];
1207
1208
9.71M
    x_01_r = (xr_0 + xr_1);
1209
9.71M
    x_01_i = (xi_0 + xi_1);
1210
1211
9.71M
    add_r = (xr_1 + xr_2);
1212
9.71M
    add_i = (xi_1 + xi_2);
1213
1214
9.71M
    sub_r = (xr_1 - xr_2);
1215
9.71M
    sub_i = (xi_1 - xi_2);
1216
1217
9.71M
    p1 = add_r / 2;
1218
1219
9.71M
    p2 = (sub_i * sin_mu_flt);
1220
9.71M
    p3 = (sub_r * sin_mu_flt);
1221
1222
9.71M
    p4 = add_i / 2;
1223
1224
9.71M
    temp = (xr_0 - p1);
1225
9.71M
    temp1 = (xi_0 + p3);
1226
9.71M
    temp2 = (xi_0 - p3);
1227
1228
9.71M
    ptr_op[idx] = (x_01_r + xr_2);
1229
9.71M
    ptr_op[idx + 1] = (x_01_i + xi_2);
1230
1231
9.71M
    idx = idx + 320;
1232
9.71M
    ptr_op[idx] = (temp + p2);
1233
9.71M
    ptr_op[idx + 1] = (temp2 - p4);
1234
1235
9.71M
    idx = idx + 320;
1236
9.71M
    ptr_op[idx] = (temp - p2);
1237
9.71M
    ptr_op[idx + 1] = (temp1 - p4);
1238
9.71M
    ptr_out_fft3 += 2;
1239
9.71M
    idx = idx - 576;
1240
9.71M
  }
1241
1.94M
}
1242
1243
static VOID ixheaace_cfft_twiddle_mult(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, WORD32 dim1,
1244
                                       WORD32 dim2, const FLOAT32 *ptr_tw_flt,
1245
60.7k
                                       const FLOAT32 *ptr_tw_h_flt) {
1246
60.7k
  FLOAT32 accu1, accu2;
1247
60.7k
  WORD32 i, j;
1248
60.7k
  WORD32 step_val = (dim2 - 1) << 1;
1249
2.00M
  for (i = 0; i < dim2; i++) {
1250
1.94M
    ptr_op[0] = ptr_inp[0];
1251
1.94M
    ptr_op[1] = ptr_inp[1];
1252
1.94M
    ptr_op += 2;
1253
1.94M
    ptr_inp += 2;
1254
1.94M
  }
1255
1256
910k
  for (j = 0; j < (dim1 - 1); j++) {
1257
849k
    ptr_op[0] = ptr_inp[0];
1258
849k
    ptr_op[1] = ptr_inp[1];
1259
849k
    ptr_inp += 2;
1260
849k
    ptr_op += 2;
1261
27.1M
    for (i = 0; i < (dim2 - 1); i++) {
1262
26.3M
      ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1],
1263
26.3M
                              ptr_tw_flt[i], ptr_tw_h_flt[i]);
1264
26.3M
      ptr_op[2 * i + 0] = accu1;
1265
26.3M
      ptr_op[2 * i + 1] = accu2;
1266
26.3M
    }
1267
849k
    ptr_inp += step_val;
1268
849k
    ptr_op += step_val;
1269
849k
    ptr_tw_flt += (dim2 - 1);
1270
849k
    ptr_tw_h_flt += (dim2 - 1);
1271
849k
  }
1272
60.7k
}
1273
1274
910k
static VOID ixheaace_cfft_32_480(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1275
910k
  WORD32 i, l1, l2, h2;
1276
910k
  FLOAT32 xh0_0, xh1_0, xl0_0, xl1_0;
1277
910k
  FLOAT32 xh0_1, xh1_1, xl0_1, xl1_1;
1278
910k
  FLOAT32 x_0, x_1, x_2, x_3;
1279
910k
  FLOAT32 x_4, x_5, x_6, x_7;
1280
910k
  FLOAT32 *ptr_x;
1281
910k
  FLOAT32 *ptr_y;
1282
910k
  FLOAT32 interm_y[FFT32X2];
1283
910k
  FLOAT32 n00, n10, n20, n30, n01, n11, n21, n31;
1284
1285
910k
  FLOAT32 inp_0qi, inp_0qr;
1286
910k
  FLOAT32 inp_1qi, inp_1qr;
1287
910k
  FLOAT32 inp_2qi, inp_2qr;
1288
910k
  FLOAT32 inp_3qi, inp_3qr;
1289
910k
  FLOAT32 mul_0qi, mul_0qr;
1290
910k
  FLOAT32 mul_1qi, mul_1qr;
1291
910k
  FLOAT32 mul_2qi, mul_2qr;
1292
910k
  FLOAT32 mul_3qi, mul_3qr;
1293
910k
  FLOAT32 sum_0qi, sum_0qr;
1294
910k
  FLOAT32 sum_1qi, sum_1qr;
1295
910k
  FLOAT32 sum_2qi, sum_2qr;
1296
910k
  FLOAT32 sum_3qi, sum_3qr;
1297
910k
  WORD32 idx1 = 0, idx2 = FFT15 * FFT32;
1298
910k
  FLOAT32 mul_i, mul_r;
1299
1300
910k
  ptr_x = ptr_in;
1301
1302
  // This computes first and second stage butterflies. So, 4-point FFT is done.
1303
8.19M
  for (i = 0; i < 8; i++) {
1304
7.28M
    x_0 = ptr_x[0];
1305
7.28M
    x_1 = ptr_x[1];
1306
7.28M
    x_2 = ptr_x[16];
1307
7.28M
    x_3 = ptr_x[16 + 1];
1308
7.28M
    x_4 = ptr_x[32];
1309
7.28M
    x_5 = ptr_x[32 + 1];
1310
7.28M
    x_6 = ptr_x[48];
1311
7.28M
    x_7 = ptr_x[48 + 1];
1312
1313
7.28M
    xh0_0 = x_0 + x_4;
1314
7.28M
    xh1_0 = x_1 + x_5;
1315
7.28M
    xl0_0 = x_0 - x_4;
1316
7.28M
    xl1_0 = x_1 - x_5;
1317
7.28M
    xh0_1 = x_2 + x_6;
1318
7.28M
    xh1_1 = x_3 + x_7;
1319
7.28M
    xl0_1 = x_2 - x_6;
1320
7.28M
    xl1_1 = x_3 - x_7;
1321
1322
7.28M
    n00 = xh0_0 + xh0_1;
1323
7.28M
    n01 = xh1_0 + xh1_1;
1324
7.28M
    n10 = xl0_0 + xl1_1;
1325
7.28M
    n11 = xl1_0 - xl0_1;
1326
7.28M
    n20 = xh0_0 - xh0_1;
1327
7.28M
    n21 = xh1_0 - xh1_1;
1328
7.28M
    n30 = xl0_0 - xl1_1;
1329
7.28M
    n31 = xl1_0 + xl0_1;
1330
1331
7.28M
    ptr_x[0] = n00;
1332
7.28M
    ptr_x[1] = n01;
1333
7.28M
    ptr_x[16] = n10;
1334
7.28M
    ptr_x[16 + 1] = n11;
1335
7.28M
    ptr_x[32] = n20;
1336
7.28M
    ptr_x[32 + 1] = n21;
1337
7.28M
    ptr_x[48] = n30;
1338
7.28M
    ptr_x[48 + 1] = n31;
1339
1340
7.28M
    ptr_x += 2;
1341
7.28M
  }
1342
1343
  // This computes third and fourth stage butterflies. So, next 4-point FFT is done.
1344
910k
  {
1345
910k
    h2 = 16 >> 1;
1346
910k
    l1 = 16;
1347
910k
    l2 = 16 + (16 >> 1);
1348
1349
910k
    ptr_x = ptr_in;
1350
910k
    ptr_y = &interm_y[0];
1351
1352
    /* Butter fly summation in 2 steps */
1353
910k
    inp_0qr = ptr_x[0];
1354
910k
    inp_0qi = ptr_x[1];
1355
910k
    inp_1qr = ptr_x[4];
1356
910k
    inp_1qi = ptr_x[5];
1357
910k
    inp_2qr = ptr_x[8];
1358
910k
    inp_2qi = ptr_x[9];
1359
910k
    inp_3qr = ptr_x[12];
1360
910k
    inp_3qi = ptr_x[13];
1361
1362
910k
    mul_0qr = inp_0qr;
1363
910k
    mul_0qi = inp_0qi;
1364
910k
    mul_1qr = inp_1qr;
1365
910k
    mul_1qi = inp_1qi;
1366
910k
    mul_2qr = inp_2qr;
1367
910k
    mul_2qi = inp_2qi;
1368
910k
    mul_3qr = inp_3qr;
1369
910k
    mul_3qi = inp_3qi;
1370
1371
910k
    sum_0qr = mul_0qr + mul_2qr;
1372
910k
    sum_0qi = mul_0qi + mul_2qi;
1373
910k
    sum_1qr = mul_0qr - mul_2qr;
1374
910k
    sum_1qi = mul_0qi - mul_2qi;
1375
910k
    sum_2qr = mul_1qr + mul_3qr;
1376
910k
    sum_2qi = mul_1qi + mul_3qi;
1377
910k
    sum_3qr = mul_1qr - mul_3qr;
1378
910k
    sum_3qi = mul_1qi - mul_3qi;
1379
1380
910k
    ptr_y[0] = sum_0qr + sum_2qr;
1381
910k
    ptr_y[1] = sum_0qi + sum_2qi;
1382
910k
    ptr_y[h2] = sum_1qr + sum_3qi;
1383
910k
    ptr_y[h2 + 1] = sum_1qi - sum_3qr;
1384
910k
    ptr_y[l1] = sum_0qr - sum_2qr;
1385
910k
    ptr_y[l1 + 1] = sum_0qi - sum_2qi;
1386
910k
    ptr_y[l2] = sum_1qr - sum_3qi;
1387
910k
    ptr_y[l2 + 1] = sum_1qi + sum_3qr;
1388
1389
910k
    ptr_y += 2;
1390
910k
    ptr_x += 16;
1391
1392
    /* 2nd butter fly */
1393
1394
910k
    inp_0qr = ptr_x[0];
1395
910k
    inp_0qi = ptr_x[1];
1396
910k
    inp_1qr = ptr_x[4];
1397
910k
    inp_1qi = ptr_x[5];
1398
910k
    inp_2qr = ptr_x[8];
1399
910k
    inp_2qi = ptr_x[9];
1400
910k
    inp_3qr = ptr_x[12];
1401
910k
    inp_3qi = ptr_x[13];
1402
1403
910k
    mul_0qr = inp_0qr;
1404
910k
    mul_0qi = inp_0qi;
1405
1406
910k
    mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f);
1407
910k
    mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f);
1408
1409
910k
    mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f);
1410
910k
    mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f);
1411
1412
910k
    mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f);
1413
910k
    mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f);
1414
1415
910k
    sum_0qr = mul_0qr + (mul_2qr * 2);
1416
910k
    sum_0qi = mul_0qi + (mul_2qi * 2);
1417
910k
    sum_1qr = mul_0qr - (mul_2qr * 2);
1418
910k
    sum_1qi = mul_0qi - (mul_2qi * 2);
1419
1420
910k
    sum_2qr = mul_1qr + mul_3qr;
1421
910k
    sum_2qi = mul_1qi + mul_3qi;
1422
910k
    sum_3qr = mul_1qr - mul_3qr;
1423
910k
    sum_3qi = mul_1qi - mul_3qi;
1424
1425
910k
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1426
910k
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1427
910k
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1428
910k
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1429
910k
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1430
910k
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1431
910k
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1432
910k
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1433
1434
910k
    ptr_y += 2;
1435
910k
    ptr_x += 16;
1436
1437
    /* 3rd butter fly */
1438
1439
910k
    inp_0qr = ptr_x[0];
1440
910k
    inp_0qi = ptr_x[1];
1441
910k
    inp_1qr = ptr_x[4];
1442
910k
    inp_1qi = ptr_x[5];
1443
910k
    inp_2qr = ptr_x[8];
1444
910k
    inp_2qi = ptr_x[9];
1445
910k
    inp_3qr = ptr_x[12];
1446
910k
    inp_3qi = ptr_x[13];
1447
1448
910k
    mul_0qr = inp_0qr;
1449
910k
    mul_0qi = inp_0qi;
1450
1451
910k
    mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f);
1452
910k
    mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f);
1453
1454
910k
    mul_2qr = inp_2qi;
1455
910k
    mul_2qi = inp_2qr;
1456
1457
910k
    mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f);
1458
910k
    mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f);
1459
1460
910k
    sum_0qr = mul_0qr + mul_2qr;
1461
910k
    sum_0qi = mul_0qi - mul_2qi;
1462
910k
    sum_1qr = mul_0qr - mul_2qr;
1463
910k
    sum_1qi = mul_0qi + mul_2qi;
1464
910k
    sum_2qr = mul_1qr + mul_3qr;
1465
910k
    sum_2qi = mul_1qi + mul_3qi;
1466
910k
    sum_3qr = mul_1qr - mul_3qr;
1467
910k
    sum_3qi = mul_1qi - mul_3qi;
1468
1469
910k
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1470
910k
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1471
910k
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1472
910k
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1473
910k
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1474
910k
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1475
910k
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1476
910k
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1477
1478
910k
    ptr_y += 2;
1479
910k
    ptr_x += 16;
1480
1481
    /* 4th butter fly */
1482
1483
910k
    inp_0qr = ptr_x[0];
1484
910k
    inp_0qi = ptr_x[1];
1485
910k
    inp_1qr = ptr_x[4];
1486
910k
    inp_1qi = ptr_x[5];
1487
910k
    inp_2qr = ptr_x[8];
1488
910k
    inp_2qi = ptr_x[9];
1489
910k
    inp_3qr = ptr_x[12];
1490
910k
    inp_3qi = ptr_x[13];
1491
1492
910k
    mul_0qr = inp_0qr;
1493
910k
    mul_0qi = inp_0qi;
1494
1495
910k
    mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f);
1496
910k
    mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f);
1497
1498
910k
    mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f);
1499
910k
    mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f);
1500
1501
910k
    mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f);
1502
910k
    mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f);
1503
1504
910k
    sum_0qr = mul_0qr + (mul_2qr * 2);
1505
910k
    sum_0qi = mul_0qi + (mul_2qi * 2);
1506
910k
    sum_1qr = mul_0qr - (mul_2qr * 2);
1507
910k
    sum_1qi = mul_0qi - (mul_2qi * 2);
1508
1509
910k
    sum_2qr = mul_1qr + mul_3qr;
1510
910k
    sum_2qi = mul_1qi + mul_3qi;
1511
910k
    sum_3qr = mul_1qr - mul_3qr;
1512
910k
    sum_3qi = mul_1qi - mul_3qi;
1513
1514
910k
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1515
910k
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1516
910k
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1517
910k
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1518
910k
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1519
910k
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1520
910k
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1521
910k
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1522
1523
910k
    ptr_x = ptr_in;
1524
910k
    ptr_y = &interm_y[32];
1525
1526
    /* Butter fly summation in 2 steps */
1527
910k
    inp_0qr = ptr_x[2];
1528
910k
    inp_0qi = ptr_x[3];
1529
910k
    inp_1qr = ptr_x[6];
1530
910k
    inp_1qi = ptr_x[7];
1531
910k
    inp_2qr = ptr_x[10];
1532
910k
    inp_2qi = ptr_x[11];
1533
910k
    inp_3qr = ptr_x[14];
1534
910k
    inp_3qi = ptr_x[15];
1535
1536
910k
    mul_0qr = inp_0qr;
1537
910k
    mul_0qi = inp_0qi;
1538
910k
    mul_1qr = inp_1qr;
1539
910k
    mul_1qi = inp_1qi;
1540
910k
    mul_2qr = inp_2qr;
1541
910k
    mul_2qi = inp_2qi;
1542
910k
    mul_3qr = inp_3qr;
1543
910k
    mul_3qi = inp_3qi;
1544
1545
910k
    sum_0qr = mul_0qr + mul_2qr;
1546
910k
    sum_0qi = mul_0qi + mul_2qi;
1547
910k
    sum_1qr = mul_0qr - mul_2qr;
1548
910k
    sum_1qi = mul_0qi - mul_2qi;
1549
910k
    sum_2qr = mul_1qr + mul_3qr;
1550
910k
    sum_2qi = mul_1qi + mul_3qi;
1551
910k
    sum_3qr = mul_1qr - mul_3qr;
1552
910k
    sum_3qi = mul_1qi - mul_3qi;
1553
1554
910k
    ptr_y[0] = sum_0qr + sum_2qr;
1555
910k
    ptr_y[1] = sum_0qi + sum_2qi;
1556
910k
    ptr_y[h2] = sum_1qr + sum_3qi;
1557
910k
    ptr_y[h2 + 1] = sum_1qi - sum_3qr;
1558
910k
    ptr_y[l1] = sum_0qr - sum_2qr;
1559
910k
    ptr_y[l1 + 1] = sum_0qi - sum_2qi;
1560
910k
    ptr_y[l2] = sum_1qr - sum_3qi;
1561
910k
    ptr_y[l2 + 1] = sum_1qi + sum_3qr;
1562
1563
910k
    ptr_y += 2;
1564
910k
    ptr_x += 16;
1565
1566
    /* 2nd butter fly */
1567
1568
910k
    inp_0qr = ptr_x[2];
1569
910k
    inp_0qi = ptr_x[3];
1570
910k
    inp_1qr = ptr_x[6];
1571
910k
    inp_1qi = ptr_x[7];
1572
910k
    inp_2qr = ptr_x[10];
1573
910k
    inp_2qi = ptr_x[11];
1574
910k
    inp_3qr = ptr_x[14];
1575
910k
    inp_3qi = ptr_x[15];
1576
1577
910k
    mul_0qr = inp_0qr;
1578
910k
    mul_0qi = inp_0qi;
1579
1580
910k
    mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f);
1581
910k
    mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f);
1582
1583
910k
    mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f);
1584
910k
    mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f);
1585
1586
910k
    mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f);
1587
910k
    mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f);
1588
1589
910k
    sum_0qr = mul_0qr + (mul_2qr * 2);
1590
910k
    sum_0qi = mul_0qi + (mul_2qi * 2);
1591
910k
    sum_1qr = mul_0qr - (mul_2qr * 2);
1592
910k
    sum_1qi = mul_0qi - (mul_2qi * 2);
1593
1594
910k
    sum_2qr = mul_1qr + mul_3qr;
1595
910k
    sum_2qi = mul_1qi + mul_3qi;
1596
910k
    sum_3qr = mul_1qr - mul_3qr;
1597
910k
    sum_3qi = mul_1qi - mul_3qi;
1598
1599
910k
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1600
910k
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1601
910k
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1602
910k
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1603
910k
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1604
910k
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1605
910k
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1606
910k
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1607
1608
910k
    ptr_y += 2;
1609
910k
    ptr_x += 16;
1610
1611
    /* 3rd butter fly */
1612
1613
910k
    inp_0qr = ptr_x[2];
1614
910k
    inp_0qi = ptr_x[3];
1615
910k
    inp_1qr = ptr_x[6];
1616
910k
    inp_1qi = ptr_x[7];
1617
910k
    inp_2qr = ptr_x[10];
1618
910k
    inp_2qi = ptr_x[11];
1619
910k
    inp_3qr = ptr_x[14];
1620
910k
    inp_3qi = ptr_x[15];
1621
1622
910k
    mul_0qr = inp_0qr;
1623
910k
    mul_0qi = inp_0qi;
1624
1625
910k
    mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f);
1626
910k
    mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f);
1627
1628
910k
    mul_2qr = inp_2qi;
1629
910k
    mul_2qi = inp_2qr;
1630
1631
910k
    mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f);
1632
910k
    mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f);
1633
1634
910k
    sum_0qr = mul_0qr + mul_2qr;
1635
910k
    sum_0qi = mul_0qi - mul_2qi;
1636
910k
    sum_1qr = mul_0qr - mul_2qr;
1637
910k
    sum_1qi = mul_0qi + mul_2qi;
1638
910k
    sum_2qr = mul_1qr + mul_3qr;
1639
910k
    sum_2qi = mul_1qi + mul_3qi;
1640
910k
    sum_3qr = mul_1qr - mul_3qr;
1641
910k
    sum_3qi = mul_1qi - mul_3qi;
1642
1643
910k
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1644
910k
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1645
910k
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1646
910k
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1647
910k
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1648
910k
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1649
910k
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1650
910k
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1651
1652
910k
    ptr_y += 2;
1653
910k
    ptr_x += 16;
1654
1655
    /* 4th butter fly */
1656
1657
910k
    inp_0qr = ptr_x[2];
1658
910k
    inp_0qi = ptr_x[3];
1659
910k
    inp_1qr = ptr_x[6];
1660
910k
    inp_1qi = ptr_x[7];
1661
910k
    inp_2qr = ptr_x[10];
1662
910k
    inp_2qi = ptr_x[11];
1663
910k
    inp_3qr = ptr_x[14];
1664
910k
    inp_3qi = ptr_x[15];
1665
1666
910k
    mul_0qr = inp_0qr;
1667
910k
    mul_0qi = inp_0qi;
1668
1669
910k
    mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f);
1670
910k
    mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f);
1671
1672
910k
    mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f);
1673
910k
    mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f);
1674
1675
910k
    mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f);
1676
910k
    mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f);
1677
1678
910k
    sum_0qr = mul_0qr + (mul_2qr * 2);
1679
910k
    sum_0qi = mul_0qi + (mul_2qi * 2);
1680
910k
    sum_1qr = mul_0qr - (mul_2qr * 2);
1681
910k
    sum_1qi = mul_0qi - (mul_2qi * 2);
1682
1683
910k
    sum_2qr = mul_1qr + mul_3qr;
1684
910k
    sum_2qi = mul_1qi + mul_3qi;
1685
910k
    sum_3qr = mul_1qr - mul_3qr;
1686
910k
    sum_3qi = mul_1qi - mul_3qi;
1687
1688
910k
    ptr_y[0] = sum_0qr + (sum_2qr * 2);
1689
910k
    ptr_y[1] = sum_0qi + (sum_2qi * 2);
1690
910k
    ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1691
910k
    ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1692
910k
    ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1693
910k
    ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1694
910k
    ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1695
910k
    ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1696
910k
  }
1697
1698
  // Last stage of 32 point FFT
1699
910k
  {
1700
910k
    ptr_y = ptr_out;
1701
910k
    ptr_y[idx1] = interm_y[0] + interm_y[32];
1702
910k
    ptr_y[idx1 + 1] = interm_y[1] + interm_y[33];
1703
910k
    ptr_y[idx2] = interm_y[0] - interm_y[32];
1704
910k
    ptr_y[idx2 + 1] = interm_y[1] - interm_y[33];
1705
910k
    idx1 += FFT15X2;
1706
910k
    idx2 += FFT15X2;
1707
14.5M
    for (i = 1; i < FFT16; i++) {
1708
13.6M
      mul_r = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]) -
1709
13.6M
              (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]);
1710
13.6M
      mul_i = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]) +
1711
13.6M
              (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]);
1712
1713
13.6M
      mul_r = mul_r / 2;
1714
13.6M
      mul_i = mul_i / 2;
1715
13.6M
      ptr_y[idx1] = interm_y[2 * i + 0] + (mul_r * 2);
1716
13.6M
      ptr_y[idx1 + 1] = interm_y[2 * i + 1] + (mul_i * 2);
1717
13.6M
      ptr_y[idx2] = interm_y[2 * i + 0] - (mul_r * 2);
1718
13.6M
      ptr_y[idx2 + 1] = interm_y[2 * i + 1] - (mul_i * 2);
1719
13.6M
      idx1 += FFT15X2;
1720
13.6M
      idx2 += FFT15X2;
1721
13.6M
    }
1722
910k
  }
1723
910k
}
1724
1725
static VOID ixheaace_dec_rearrange_short_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out, WORD32 N,
1726
4.46M
                                             const WORD16 *ptr_re_arr_tab) {
1727
4.46M
  WORD32 n, i = 0;
1728
1729
111M
  for (n = 0; n < N; n++) {
1730
107M
    WORD32 idx = ptr_re_arr_tab[n] << 1;
1731
107M
    ptr_out[i++] = ptr_in[idx];
1732
107M
    ptr_out[i++] = ptr_in[idx + 1];
1733
107M
  }
1734
4.46M
}
1735
1736
3.56M
static VOID ixheaace_fft_5_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1737
3.56M
  FLOAT32 C51 = 0.951056516f;
1738
3.56M
  FLOAT32 C52 = -0.769420885f;
1739
3.56M
  FLOAT32 C53 = -0.363271264f;
1740
3.56M
  FLOAT32 C54 = 0.559016994f;
1741
3.56M
  FLOAT32 C55 = -0.625f;
1742
1743
3.56M
  FLOAT32 r1, r2, r3, r4;
1744
3.56M
  FLOAT32 s1, s2, s3, s4, t, temp1, temp2;
1745
1746
3.56M
  r1 = (ptr_in[2] + ptr_in[8]);
1747
3.56M
  r4 = (ptr_in[2] - ptr_in[8]);
1748
3.56M
  r3 = (ptr_in[4] + ptr_in[6]);
1749
3.56M
  r2 = (ptr_in[4] - ptr_in[6]);
1750
1751
3.56M
  t = ((r1 - r3) * C54);
1752
3.56M
  r1 = (r1 + r3);
1753
1754
3.56M
  temp1 = (ptr_in[0] + r1);
1755
3.56M
  r1 = (temp1 + (((r1 * C55)) * 2));
1756
1757
3.56M
  r3 = (r1 - t);
1758
3.56M
  r1 = (r1 + t);
1759
1760
3.56M
  t = ((r4 + r2) * C51);
1761
3.56M
  r4 = (t + ((r4 * C52) * 2));
1762
3.56M
  r2 = (t + (r2 * C53));
1763
1764
3.56M
  s1 = (ptr_in[3] + ptr_in[9]);
1765
3.56M
  s4 = (ptr_in[3] - ptr_in[9]);
1766
3.56M
  s3 = (ptr_in[5] + ptr_in[7]);
1767
3.56M
  s2 = (ptr_in[5] - ptr_in[7]);
1768
1769
3.56M
  t = ((s1 - s3) * C54);
1770
3.56M
  s1 = (s1 + s3);
1771
1772
3.56M
  temp2 = (ptr_in[1] + s1);
1773
1774
3.56M
  s1 = (temp2 + (((s1 * C55)) * 2));
1775
1776
3.56M
  s3 = (s1 - t);
1777
3.56M
  s1 = (s1 + t);
1778
1779
3.56M
  t = ((s4 + s2) * C51);
1780
3.56M
  s4 = (t + (((s4 * C52)) * 2));
1781
3.56M
  s2 = (t + ((s2 * C53)));
1782
1783
3.56M
  ptr_out[0] = temp1;
1784
3.56M
  ptr_out[1] = temp2;
1785
3.56M
  ptr_out[2] = (r1 + s2);
1786
3.56M
  ptr_out[3] = (s1 - r2);
1787
3.56M
  ptr_out[4] = (r3 - s4);
1788
3.56M
  ptr_out[5] = (s3 + r4);
1789
3.56M
  ptr_out[6] = (r3 + s4);
1790
3.56M
  ptr_out[7] = (s3 - r4);
1791
3.56M
  ptr_out[8] = (r1 - s2);
1792
3.56M
  ptr_out[9] = (s1 + r2);
1793
3.56M
}
1794
1795
5.94M
static VOID ixheaace_fft_3_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1796
5.94M
  FLOAT32 add_r, sub_r;
1797
5.94M
  FLOAT32 add_i, sub_i;
1798
5.94M
  FLOAT32 x_01_r, x_01_i, temp;
1799
1800
5.94M
  FLOAT32 p1, p2, p3, p4;
1801
5.94M
  FLOAT32 sinmu = 0.866025404f;
1802
1803
5.94M
  x_01_r = (ptr_in[0] + ptr_in[2]);
1804
5.94M
  x_01_i = (ptr_in[1] + ptr_in[3]);
1805
1806
5.94M
  add_r = (ptr_in[2] + ptr_in[4]);
1807
5.94M
  add_i = (ptr_in[3] + ptr_in[5]);
1808
1809
5.94M
  sub_r = (ptr_in[2] - ptr_in[4]);
1810
5.94M
  sub_i = (ptr_in[3] - ptr_in[5]);
1811
1812
5.94M
  p1 = add_r / 2;
1813
5.94M
  p2 = (sub_i * sinmu);
1814
5.94M
  p3 = (sub_r * sinmu);
1815
5.94M
  p4 = add_i / 2;
1816
1817
5.94M
  temp = (ptr_in[0] - p1);
1818
1819
5.94M
  ptr_out[0] = (x_01_r + ptr_in[4]);
1820
5.94M
  ptr_out[1] = (x_01_i + ptr_in[5]);
1821
5.94M
  ptr_out[2] = (temp + p2);
1822
5.94M
  ptr_out[3] = ((ptr_in[1] - p3) - p4);
1823
5.94M
  ptr_out[4] = (temp - p2);
1824
5.94M
  ptr_out[5] = ((ptr_in[1] + p3) - p4);
1825
5.94M
}
1826
1827
static VOID ixheaace_pre_twiddle_120(FLOAT32 *ptr_in, FLOAT32 *ptr_data, WORD32 n,
1828
297k
                                     const FLOAT32 *ptr_cos_sin_tbl) {
1829
297k
  WORD npoints_4, i;
1830
297k
  FLOAT32 tempr, tempi, temp;
1831
297k
  FLOAT32 c, c1, s, s1;
1832
297k
  FLOAT32 *ptr_in1, *ptr_in2;
1833
297k
  FLOAT32 *ptr_x = ptr_in + (n - 1);
1834
1835
297k
  npoints_4 = n >> 2;
1836
1837
297k
  ptr_in1 = ptr_data;
1838
297k
  ptr_in2 = ptr_data + n - 1;
1839
1840
9.21M
  for (i = 0; i < npoints_4; i++) {
1841
8.92M
    c = *ptr_cos_sin_tbl++;
1842
8.92M
    s = *ptr_cos_sin_tbl++;
1843
1844
8.92M
    tempr = *ptr_in1++;
1845
8.92M
    tempi = *ptr_in2--;
1846
1847
8.92M
    temp = -((tempr * c) + (tempi * s));
1848
8.92M
    *ptr_in++ = temp;
1849
1850
8.92M
    temp = -((tempi * c) - (tempr * s));
1851
8.92M
    *ptr_in++ = temp;
1852
1853
8.92M
    c1 = *ptr_cos_sin_tbl++;
1854
8.92M
    s1 = *ptr_cos_sin_tbl++;
1855
1856
8.92M
    tempi = *ptr_in1++;
1857
8.92M
    tempr = *ptr_in2--;
1858
1859
8.92M
    temp = -((tempi * c1) - (tempr * s1));
1860
8.92M
    *ptr_x-- = temp;
1861
1862
8.92M
    temp = -((tempr * c1) + (tempi * s1));
1863
8.92M
    *ptr_x-- = temp;
1864
8.92M
  }
1865
297k
}
1866
1867
static VOID ixheaace_post_twiddle_120(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
1868
297k
                                      const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
1869
297k
  WORD i;
1870
297k
  FLOAT32 c, c1, s, s1;
1871
297k
  FLOAT32 tempr, tempi, temp;
1872
297k
  FLOAT32 *ptr_in2 = ptr_x + (m - 1);
1873
297k
  FLOAT32 *ptr_in1 = ptr_x;
1874
297k
  FLOAT32 *ptr_x1 = ptr_out;
1875
297k
  FLOAT32 *ptr_x2 = ptr_out + (m - 1);
1876
1877
9.21M
  for (i = 0; i < m; i += 4) {
1878
8.92M
    c = *ptr_cos_sin_tbl++;
1879
8.92M
    s = *ptr_cos_sin_tbl++;
1880
8.92M
    c1 = *ptr_cos_sin_tbl++;
1881
8.92M
    s1 = *ptr_cos_sin_tbl++;
1882
1883
8.92M
    tempr = *ptr_in1++;
1884
8.92M
    tempi = *ptr_in1++;
1885
1886
8.92M
    temp = -((tempr * s) - (tempi * c));
1887
8.92M
    *ptr_x2-- = temp;
1888
1889
8.92M
    temp = -((tempr * c) + (tempi * s));
1890
8.92M
    *ptr_x1++ = temp;
1891
1892
8.92M
    tempi = *ptr_in2--;
1893
8.92M
    tempr = *ptr_in2--;
1894
1895
8.92M
    temp = -((tempr * s1) - (tempi * c1));
1896
8.92M
    *ptr_x1++ = temp;
1897
1898
8.92M
    temp = -((tempr * c1) + (tempi * s1));
1899
8.92M
    *ptr_x2-- = temp;
1900
8.92M
  }
1901
297k
}
1902
1903
1.18M
static VOID ixheaace_fft_960_15(FLOAT32 *ptr_in_flt, FLOAT32 *ptr_out_flt) {
1904
1.18M
  WORD32 i;
1905
1.18M
  FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt;
1906
1.18M
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_5);
1907
1908
1.18M
  ptr_buf1_flt = ptr_out_flt;
1909
1.18M
  ptr_buf2_flt = ptr_in_flt;
1910
4.75M
  for (i = 0; i < FFT3; i++) {
1911
3.56M
    ixheaace_fft_5_flt(ptr_buf1_flt, ptr_buf2_flt);
1912
1913
3.56M
    ptr_buf1_flt += (FFT5 * 2);
1914
3.56M
    ptr_buf2_flt += (FFT5 * 2);
1915
3.56M
  }
1916
1917
1.18M
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_3);
1918
1.18M
  ptr_buf1_flt = ptr_out_flt;
1919
1.18M
  ptr_buf2_flt = ptr_in_flt;
1920
7.13M
  for (i = 0; i < FFT5; i++) {
1921
5.94M
    ixheaace_fft_3_flt(ptr_buf1_flt, ptr_buf2_flt);
1922
1923
5.94M
    ptr_buf1_flt += (FFT3 * 2);
1924
5.94M
    ptr_buf2_flt += (FFT3 * 2);
1925
5.94M
  }
1926
1927
1.18M
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_sml);
1928
1.18M
}
1929
1930
297k
static VOID ixheaace_fft_120(WORD32 npoints, FLOAT32 *ptr_x_flt, FLOAT32 *ptr_y_flt) {
1931
297k
  WORD32 i;
1932
297k
  FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt;
1933
297k
  FLOAT32 *ptr_in_flt, *ptr_out_flt;
1934
1935
297k
  ptr_in_flt = ptr_x_flt;
1936
297k
  ptr_out_flt = ptr_y_flt;
1937
297k
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_4);
1938
1939
297k
  ptr_buf1_flt = ptr_out_flt;
1940
297k
  ptr_buf2_flt = ptr_in_flt;
1941
1942
4.75M
  for (i = 0; i < FFT15; i++) {
1943
4.46M
    {
1944
4.46M
      FLOAT32 x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7;
1945
4.46M
      FLOAT32 *y0, *y1, *y2, *y3;
1946
4.46M
      FLOAT32 *x0;
1947
4.46M
      FLOAT32 xh0_0, xh1_0, xh0_1, xh1_1, xl0_0, xl1_0, xl0_1, xl1_1;
1948
4.46M
      WORD32 h2;
1949
4.46M
      FLOAT32 n00, n01, n10, n11, n20, n21, n30, n31;
1950
1951
4.46M
      ptr_x_flt = ptr_buf1_flt;
1952
4.46M
      ptr_y_flt = ptr_buf2_flt;
1953
4.46M
      npoints = 4;
1954
4.46M
      h2 = 0;
1955
1956
4.46M
      y0 = ptr_y_flt;
1957
4.46M
      y2 = ptr_y_flt + (WORD32)npoints;
1958
4.46M
      x0 = ptr_x_flt;
1959
4.46M
      y1 = y0 + (WORD32)(npoints >> 1);
1960
4.46M
      y3 = y2 + (WORD32)(npoints >> 1);
1961
1962
4.46M
      x_0 = x0[0];
1963
4.46M
      x_1 = x0[1];
1964
4.46M
      x_2 = x0[2];
1965
4.46M
      x_3 = x0[3];
1966
4.46M
      x_4 = x0[4];
1967
4.46M
      x_5 = x0[5];
1968
4.46M
      x_6 = x0[6];
1969
4.46M
      x_7 = x0[7];
1970
1971
4.46M
      xh0_0 = x_0 + x_4;
1972
4.46M
      xh1_0 = x_1 + x_5;
1973
4.46M
      xl0_0 = x_0 - x_4;
1974
4.46M
      xl1_0 = x_1 - x_5;
1975
4.46M
      xh0_1 = x_2 + x_6;
1976
4.46M
      xh1_1 = x_3 + x_7;
1977
4.46M
      xl0_1 = x_2 - x_6;
1978
4.46M
      xl1_1 = x_3 - x_7;
1979
1980
4.46M
      n00 = xh0_0 + xh0_1;
1981
4.46M
      n01 = xh1_0 + xh1_1;
1982
4.46M
      n10 = xl0_0 + xl1_1;
1983
4.46M
      n11 = xl1_0 - xl0_1;
1984
4.46M
      n20 = xh0_0 - xh0_1;
1985
4.46M
      n21 = xh1_0 - xh1_1;
1986
4.46M
      n30 = xl0_0 - xl1_1;
1987
4.46M
      n31 = xl1_0 + xl0_1;
1988
1989
4.46M
      y0[2 * h2] = n00;
1990
4.46M
      y0[2 * h2 + 1] = n01;
1991
4.46M
      y1[2 * h2] = n10;
1992
4.46M
      y1[2 * h2 + 1] = n11;
1993
4.46M
      y2[2 * h2] = n20;
1994
4.46M
      y2[2 * h2 + 1] = n21;
1995
4.46M
      y3[2 * h2] = n30;
1996
4.46M
      y3[2 * h2 + 1] = n31;
1997
4.46M
    }
1998
1999
4.46M
    ptr_buf1_flt += (FFT4 * 2);
2000
4.46M
    ptr_buf2_flt += (FFT4 * 2);
2001
4.46M
  }
2002
2003
297k
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_15_4);
2004
2005
297k
  ptr_buf1_flt = ptr_out_flt;
2006
297k
  ptr_buf2_flt = ptr_in_flt;
2007
1.48M
  for (i = 0; i < FFT4; i++) {
2008
1.18M
    ixheaace_fft_960_15(ptr_buf1_flt, ptr_buf2_flt);
2009
1.18M
    ptr_buf1_flt += (FFT15 * 2);
2010
1.18M
    ptr_buf2_flt += (FFT15 * 2);
2011
1.18M
  }
2012
2013
297k
  ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_120);
2014
297k
}
2015
2016
60.7k
static VOID ixheaace_cfft_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op) {
2017
60.7k
  WORD32 i;
2018
60.7k
  FLOAT32 *ptr_buf1, *ptr_buf2;
2019
60.7k
  FLOAT32 fft5_out[FFT15X2] = {0};
2020
2021
60.7k
  ptr_buf1 = ptr_inp;
2022
60.7k
  ptr_buf2 = ptr_op;
2023
2024
2.00M
  for (i = 0; i < FFT32; i++) {
2025
1.94M
    ixheaace_cfft_15_480(ptr_buf1, ptr_buf2, &fft5_out[0]);
2026
1.94M
    ptr_buf1 += 2;
2027
1.94M
    ptr_buf2 += 2;
2028
1.94M
  }
2029
2030
60.7k
  ixheaace_cfft_twiddle_mult(ptr_op, ptr_inp, FFT15, FFT32, ixheaace_fft_mix_rad_twid_tbl_480,
2031
60.7k
                             ixheaace_fft_mix_rad_twid_h_tbl_480);
2032
2033
60.7k
  ptr_buf1 = ptr_inp;
2034
60.7k
  ptr_buf2 = ptr_op;
2035
2036
971k
  for (i = 0; i < FFT15; i++) {
2037
910k
    ixheaace_cfft_32_480(ptr_buf1, ptr_buf2);
2038
910k
    ptr_buf1 += (FFT32X2);
2039
910k
    ptr_buf2 += 2;
2040
910k
  }
2041
60.7k
}
2042
2043
static VOID ixheaace_pre_twiddle_960(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n,
2044
60.7k
                                     const FLOAT32 *ptr_cos_sin_tbl) {
2045
60.7k
  WORD npoints_4, i;
2046
60.7k
  FLOAT32 tempr, tempi, temp;
2047
60.7k
  FLOAT32 c, c1, s, s1;
2048
60.7k
  FLOAT32 *ptr_in_1, *ptr_in_2;
2049
60.7k
  FLOAT32 *ptr_x_1 = ptr_x + (n - 1);
2050
2051
60.7k
  npoints_4 = n >> 2;
2052
2053
60.7k
  ptr_in_1 = ptr_data;
2054
60.7k
  ptr_in_2 = ptr_data + n - 1;
2055
2056
14.6M
  for (i = 0; i < npoints_4; i++) {
2057
14.5M
    c = *ptr_cos_sin_tbl++;
2058
14.5M
    s = *ptr_cos_sin_tbl++;
2059
2060
14.5M
    tempr = *ptr_in_1++;
2061
14.5M
    tempi = *ptr_in_2--;
2062
2063
14.5M
    temp = -((tempr * c) + (tempi * s));
2064
14.5M
    *ptr_x++ = temp;
2065
2066
14.5M
    temp = -((tempi * c) - (tempr * s));
2067
14.5M
    *ptr_x++ = temp;
2068
2069
14.5M
    c1 = *ptr_cos_sin_tbl++;
2070
14.5M
    s1 = *ptr_cos_sin_tbl++;
2071
2072
14.5M
    tempi = *ptr_in_1++;
2073
14.5M
    tempr = *ptr_in_2--;
2074
2075
14.5M
    temp = -((tempi * c1) - (tempr * s1));
2076
14.5M
    *ptr_x_1-- = temp;
2077
2078
14.5M
    temp = -((tempr * c1) + (tempi * s1));
2079
14.5M
    *ptr_x_1-- = temp;
2080
14.5M
  }
2081
60.7k
}
2082
2083
static VOID ixheaace_post_twiddle_960(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
2084
60.7k
                                      const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
2085
60.7k
  WORD i;
2086
60.7k
  FLOAT32 c, c1, s, s1;
2087
60.7k
  FLOAT32 tempr, tempi, temp;
2088
60.7k
  FLOAT32 *ptr_in2 = ptr_x + (m - 1);
2089
60.7k
  FLOAT32 *ptr_in1 = ptr_x;
2090
60.7k
  FLOAT32 *ptr_x1 = ptr_out;
2091
60.7k
  FLOAT32 *ptr_x2 = ptr_out + (m - 1);
2092
2093
14.6M
  for (i = 0; i < m; i += 4) {
2094
14.5M
    c = *ptr_cos_sin_tbl++;
2095
14.5M
    s = *ptr_cos_sin_tbl++;
2096
14.5M
    c1 = *ptr_cos_sin_tbl++;
2097
14.5M
    s1 = *ptr_cos_sin_tbl++;
2098
2099
14.5M
    tempr = *ptr_in1++;
2100
14.5M
    tempi = *ptr_in1++;
2101
2102
14.5M
    temp = -((tempr * s) - (tempi * c));
2103
14.5M
    *ptr_x2-- = temp;
2104
2105
14.5M
    temp = -((tempr * c) + (tempi * s));
2106
14.5M
    *ptr_x1++ = temp;
2107
2108
14.5M
    tempi = *ptr_in2--;
2109
14.5M
    tempr = *ptr_in2--;
2110
2111
14.5M
    temp = -((tempr * s1) - (tempi * c1));
2112
14.5M
    *ptr_x1++ = temp;
2113
2114
14.5M
    temp = -((tempr * c1) + (tempi * s1));
2115
14.5M
    *ptr_x2-- = temp;
2116
14.5M
  }
2117
60.7k
}
2118
2119
60.7k
static VOID ixheaace_mdct_960(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) {
2120
60.7k
  FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch;
2121
60.7k
  FLOAT32 const_mult_fac = 3.142857143f;
2122
60.7k
  FLOAT32 *ptr_data = ptr_input_flt;
2123
60.7k
  WORD32 k;
2124
2125
60.7k
  memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * FRAME_LEN_960);
2126
60.7k
  ixheaace_pre_twiddle_960(ptr_input_flt, ptr_scratch_flt, FRAME_LEN_960, cos_sin_table_flt);
2127
2128
60.7k
  ixheaace_cfft_480(ptr_input_flt, ptr_scratch_flt);
2129
2130
60.7k
  ixheaace_post_twiddle_960(ptr_input_flt, ptr_scratch_flt, cos_sin_table_flt, FRAME_LEN_960);
2131
2132
29.1M
  for (k = FRAME_LEN_960 - 1; k >= 0; k -= 2) {
2133
29.1M
    *ptr_data = (*ptr_data * const_mult_fac);
2134
29.1M
    ptr_data++;
2135
29.1M
    *ptr_data = (*ptr_data * const_mult_fac);
2136
29.1M
    ptr_data++;
2137
29.1M
  }
2138
60.7k
}
2139
2140
297k
static VOID ixheaace_mdct_120(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) {
2141
297k
  WORD32 n, k;
2142
297k
  WORD32 n_by_2;
2143
297k
  FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch;
2144
297k
  FLOAT32 const_mltfac = 3.142857143f;
2145
297k
  FLOAT32 *ptr_data = ptr_input_flt;
2146
297k
  n = 120;
2147
297k
  n_by_2 = n >> 1;
2148
297k
  memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * n);
2149
2150
297k
  ixheaace_pre_twiddle_120(ptr_input_flt, ptr_scratch_flt, n, ixheaace_cosine_array_240);
2151
2152
297k
  ixheaace_fft_120(n_by_2, ptr_input_flt, ptr_scratch_flt);
2153
2154
297k
  ixheaace_post_twiddle_120(ptr_input_flt, ptr_scratch_flt, ixheaace_cosine_array_240, n);
2155
2156
18.1M
  for (k = n - 1; k >= 0; k -= 2) {
2157
17.8M
    *ptr_data = (*ptr_data * const_mltfac);
2158
17.8M
    ptr_data++;
2159
17.8M
    *ptr_data = (*ptr_data * const_mltfac);
2160
17.8M
    ptr_data++;
2161
17.8M
  }
2162
297k
}
2163
2164
static VOID ixheaace_mdct(FLOAT32 *ptr_dct_data, const FLOAT32 *ptr_trig_data,
2165
                          const FLOAT32 *ptr_sine_window, WORD32 n, WORD32 ld_n,
2166
567k
                          WORD8 *ptr_scratch) {
2167
567k
  ixheaace_pre_mdct(ptr_dct_data, n, ptr_sine_window);
2168
2169
567k
  ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch;
2170
567k
  ia_enhaacplus_enc_complex_fft(ptr_dct_data, n / 2, pstr_scratch);
2171
2172
567k
  ixheaace_post_mdct(ptr_dct_data, n, ptr_trig_data,
2173
567k
                     1 << (LD_FFT_TWIDDLE_TABLE_SIZE - (ld_n - 1)), FFT_TWIDDLE_TABLE_SIZE);
2174
567k
}
2175
2176
static VOID ixheaace_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer,
2177
                                             const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
2178
283k
                                             WORD32 frame_len) {
2179
283k
  WORD32 i;
2180
283k
  WORD32 blk_switch_offset = frame_len;
2181
283k
  switch (frame_len) {
2182
162k
    case FRAME_LEN_1024:
2183
162k
      blk_switch_offset = BLK_SWITCH_OFFSET_LC_128;
2184
162k
      memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len,
2185
162k
              (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer));
2186
162k
      break;
2187
2188
97.8k
    case FRAME_LEN_960:
2189
97.8k
      blk_switch_offset = BLK_SWITCH_OFFSET_LC_120;
2190
97.8k
      memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len,
2191
97.8k
              (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer));
2192
97.8k
      break;
2193
2194
23.1k
    case FRAME_LEN_512:
2195
23.1k
    case FRAME_LEN_480:
2196
23.1k
      blk_switch_offset = frame_len;
2197
23.1k
      break;
2198
283k
  }
2199
2200
272M
  for (i = 0; i < frame_len; i++) {
2201
272M
    ptr_mdct_delay_buffer[blk_switch_offset - frame_len + i] = ptr_time_signal[i * ch_increment];
2202
272M
  }
2203
283k
}
2204
2205
VOID ixheaace_transform_real_lc_ld(FLOAT32 *ptr_mdct_delay_buffer, const FLOAT32 *ptr_time_signal,
2206
                                   WORD32 ch_increment, FLOAT32 *ptr_real_out, WORD32 block_type,
2207
283k
                                   WORD32 frame_len, WORD8 *ptr_scratch) {
2208
283k
  WORD32 i, w;
2209
283k
  FLOAT32 ws1, ws2;
2210
283k
  FLOAT32 *ptr_dct_in;
2211
283k
  WORD32 frame_len_short = FRAME_LEN_SHORT_128;
2212
283k
  WORD32 ls_trans = LS_TRANS_128;
2213
283k
  WORD32 trans_offset = TRANSFORM_OFFSET_SHORT_128;
2214
283k
  const FLOAT32 *ptr_window;
2215
283k
  if (frame_len == FRAME_LEN_960) {
2216
97.8k
    ls_trans = LS_TRANS_120;
2217
97.8k
    trans_offset = TRANSFORM_OFFSET_SHORT_120;
2218
97.8k
    frame_len_short = FRAME_LEN_SHORT_120;
2219
97.8k
  }
2220
283k
  switch (block_type) {
2221
154k
    case LONG_WINDOW:
2222
154k
      ptr_dct_in = ptr_real_out;
2223
154k
      ptr_window = &long_window_KBD[0];
2224
154k
      switch (frame_len) {
2225
88.9k
        case FRAME_LEN_1024:
2226
88.9k
          ptr_window = &long_window_KBD[0];
2227
88.9k
          break;
2228
2229
42.2k
        case FRAME_LEN_960:
2230
42.2k
          ptr_window = &long_window_sine_960[0];
2231
42.2k
          break;
2232
2233
23.1k
        case FRAME_LEN_512:
2234
23.1k
          ptr_window = &long_window_sine_ld[0];
2235
23.1k
          break;
2236
2237
0
        case FRAME_LEN_480:
2238
0
          ptr_window = &long_window_sine_ld_480[0];
2239
0
          break;
2240
154k
      }
2241
71.9M
      for (i = 0; i < frame_len / 2; i++) {
2242
71.7M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i];
2243
2244
71.7M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1];
2245
2246
71.7M
        ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2247
71.7M
      }
2248
2249
154k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2250
154k
                                       frame_len);
2251
2252
71.9M
      for (i = 0; i < frame_len / 2; i++) {
2253
71.7M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1];
2254
2255
71.7M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i];
2256
2257
71.7M
        ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2258
71.7M
      }
2259
154k
      switch (frame_len) {
2260
88.9k
        case FRAME_LEN_1024:
2261
88.9k
          ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10,
2262
88.9k
                        ptr_scratch);
2263
88.9k
          break;
2264
2265
42.2k
        case FRAME_LEN_960:
2266
42.2k
          ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2267
42.2k
          break;
2268
2269
23.1k
        case FRAME_LEN_512:
2270
23.1k
        case FRAME_LEN_480:
2271
23.1k
          ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, ptr_window, frame_len, 9, ptr_scratch);
2272
23.1k
          break;
2273
154k
      }
2274
154k
      break;
2275
2276
154k
    case START_WINDOW:
2277
26.4k
      ptr_dct_in = ptr_real_out;
2278
26.4k
      ptr_window = &long_window_KBD[0];
2279
26.4k
      switch (frame_len) {
2280
16.7k
        case FRAME_LEN_1024:
2281
16.7k
          ptr_window = &long_window_KBD[0];
2282
16.7k
          break;
2283
2284
9.71k
        case FRAME_LEN_960:
2285
9.71k
          ptr_window = &long_window_sine_960[0];
2286
9.71k
          break;
2287
26.4k
      }
2288
13.2M
      for (i = 0; i < frame_len / 2; i++) {
2289
13.2M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i];
2290
2291
13.2M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1];
2292
2293
13.2M
        ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2294
13.2M
      }
2295
2296
26.4k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2297
26.4k
                                       frame_len);
2298
2299
26.4k
      if (frame_len == FRAME_LEN_1024) {
2300
16.7k
        ptr_window = &short_window_sine[0];
2301
16.7k
      } else if (frame_len == FRAME_LEN_960) {
2302
9.71k
        ptr_window = &short_window_sine_120[0];
2303
9.71k
      }
2304
11.5M
      for (i = 0; i < ls_trans; i++) {
2305
11.5M
        ws1 = ptr_mdct_delay_buffer[i];
2306
11.5M
        ws2 = 0.0f;
2307
2308
11.5M
        ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2309
11.5M
      }
2310
2311
1.67M
      for (i = 0; i < frame_len_short / 2; i++) {
2312
1.65M
        ws1 = ptr_mdct_delay_buffer[i + ls_trans] * ptr_window[frame_len_short - i - 1];
2313
2314
1.65M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1 - ls_trans)] * ptr_window[i];
2315
2316
1.65M
        ptr_dct_in[frame_len / 2 - i - 1 - ls_trans] = -(ws1 + ws2);
2317
1.65M
      }
2318
26.4k
      if (frame_len == FRAME_LEN_960) {
2319
9.71k
        ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2320
16.7k
      } else {
2321
16.7k
        ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch);
2322
16.7k
      }
2323
2324
26.4k
      break;
2325
2326
23.4k
    case STOP_WINDOW:
2327
23.4k
      ptr_window = &long_window_KBD[0];
2328
23.4k
      ptr_dct_in = ptr_real_out;
2329
23.4k
      if (frame_len == FRAME_LEN_1024) {
2330
14.8k
        ptr_window = &short_window_sine[0];
2331
14.8k
      } else if (frame_len == FRAME_LEN_960) {
2332
8.69k
        ptr_window = &short_window_sine_120[0];
2333
8.69k
      }
2334
10.3M
      for (i = 0; i < ls_trans; i++) {
2335
10.2M
        ws1 = 0.0f;
2336
10.2M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)];
2337
10.2M
        ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2338
10.2M
      }
2339
2340
1.49M
      for (i = 0; i < frame_len_short / 2; i++) {
2341
1.46M
        ws1 = ptr_mdct_delay_buffer[(i + ls_trans)] * ptr_window[i];
2342
2343
1.46M
        ws2 = ptr_mdct_delay_buffer[(frame_len - ls_trans - i - 1)] *
2344
1.46M
              ptr_window[frame_len_short - i - 1];
2345
2346
1.46M
        ptr_dct_in[frame_len / 2 + i + ls_trans] = ws1 - ws2;
2347
1.46M
      }
2348
2349
23.4k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2350
23.4k
                                       frame_len);
2351
2352
23.4k
      if (frame_len == FRAME_LEN_1024) {
2353
14.8k
        ptr_window = &long_window_KBD[0];
2354
14.8k
      } else if (frame_len == FRAME_LEN_960) {
2355
8.69k
        ptr_window = &long_window_sine_960[0];
2356
8.69k
      }
2357
11.7M
      for (i = 0; i < frame_len / 2; i++) {
2358
11.7M
        ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1];
2359
2360
11.7M
        ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i];
2361
2362
11.7M
        ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2363
11.7M
      }
2364
2365
23.4k
      if (frame_len == FRAME_LEN_960) {
2366
8.69k
        ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2367
14.8k
      } else {
2368
14.8k
        ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch);
2369
14.8k
      }
2370
2371
23.4k
      break;
2372
2373
79.7k
    case SHORT_WINDOW:
2374
79.7k
      ptr_window = &short_window_sine[0];
2375
79.7k
      if (frame_len == FRAME_LEN_1024) {
2376
42.5k
        ptr_window = &short_window_sine[0];
2377
42.5k
      } else if (frame_len == FRAME_LEN_960) {
2378
37.1k
        ptr_window = &short_window_sine_120[0];
2379
37.1k
      }
2380
717k
      for (w = 0; w < TRANS_FAC; w++) {
2381
637k
        ptr_dct_in = ptr_real_out + w * frame_len_short;
2382
2383
40.2M
        for (i = 0; i < frame_len_short / 2; i++) {
2384
39.6M
          ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + i] * ptr_window[i];
2385
2386
39.6M
          ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short - i -
2387
39.6M
                                      1] *
2388
39.6M
                ptr_window[frame_len_short - i - 1];
2389
2390
39.6M
          ptr_dct_in[frame_len_short / 2 + i] = ws1 - ws2;
2391
2392
39.6M
          ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short + i] *
2393
39.6M
                ptr_window[frame_len_short - i - 1];
2394
2395
39.6M
          ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short * 2 -
2396
39.6M
                                      i - 1] *
2397
39.6M
                ptr_window[i];
2398
2399
39.6M
          ptr_dct_in[frame_len_short / 2 - i - 1] = -(ws1 + ws2);
2400
39.6M
        }
2401
637k
        if (frame_len == FRAME_LEN_960) {
2402
297k
          ixheaace_mdct_120(ptr_dct_in, ptr_scratch);
2403
340k
        } else {
2404
340k
          ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, short_window_sine, frame_len_short, 7,
2405
340k
                        ptr_scratch);
2406
340k
        }
2407
637k
      }
2408
2409
79.7k
      ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2410
79.7k
                                       frame_len);
2411
79.7k
      break;
2412
283k
  }
2413
283k
}
2414
2415
VOID ia_enhaacplus_enc_transform_real_eld(FLOAT32 *ptr_mdct_delay_buffer,
2416
                                          const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
2417
                                          FLOAT32 *ptr_real_out, WORD8 *ptr_shared_buffer5,
2418
83.6k
                                          WORD32 frame_len) {
2419
83.6k
  WORD32 i, loop_len;
2420
83.6k
  FLOAT32 w1, w2;
2421
83.6k
  FLOAT32 *ptr_curr_data, *ptr_prev1_data, *ptr_prev2_data, *ptr_prev3_data;
2422
83.6k
  const FLOAT32 *ptr_win0, *ptr_win1, *ptr_win2, *ptr_win3;
2423
2424
83.6k
  loop_len = frame_len / 4;
2425
2426
83.6k
  ptr_curr_data = &ptr_mdct_delay_buffer[3 * frame_len];
2427
83.6k
  ptr_prev1_data = &ptr_mdct_delay_buffer[2 * frame_len];
2428
83.6k
  ptr_prev2_data = &ptr_mdct_delay_buffer[frame_len];
2429
83.6k
  ptr_prev3_data = &ptr_mdct_delay_buffer[0];
2430
2431
83.6k
  ptr_win0 = &low_delay_window_eld[0];
2432
83.6k
  ptr_win1 = &low_delay_window_eld[frame_len];
2433
83.6k
  ptr_win2 = &low_delay_window_eld[2 * frame_len];
2434
83.6k
  ptr_win3 = &low_delay_window_eld[3 * frame_len];
2435
2436
83.6k
  memmove(&ptr_mdct_delay_buffer[0], &ptr_mdct_delay_buffer[frame_len],
2437
83.6k
          (3 * frame_len) * sizeof(ptr_mdct_delay_buffer[0]));
2438
2439
42.9M
  for (i = 0; i < frame_len; i++) {
2440
42.8M
    ptr_curr_data[i] = ptr_time_signal[i * ch_increment];
2441
42.8M
  }
2442
2443
10.7M
  for (i = 0; i < loop_len; i++) {
2444
10.7M
    w1 = ptr_prev3_data[(frame_len / 2) + loop_len + i] * ptr_win3[(frame_len / 2) - 1 - i];
2445
10.7M
    w1 += ptr_prev3_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win3[(frame_len / 2) + i];
2446
2447
10.7M
    w2 = (-ptr_prev1_data[(frame_len / 2) + loop_len + i] * ptr_win1[(frame_len / 2) - 1 - i]);
2448
10.7M
    w2 += (-ptr_prev1_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win1[(frame_len / 2) + i]);
2449
2450
10.7M
    ptr_real_out[i] = w1 + w2;
2451
10.7M
  }
2452
2453
10.7M
  for (i = 0; i < loop_len; i++) {
2454
10.7M
    w1 = (-ptr_prev2_data[(frame_len / 2) + loop_len + i] * ptr_win2[(frame_len / 2) - 1 - i]);
2455
10.7M
    w1 += ptr_prev2_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win2[(frame_len / 2) + i];
2456
2457
10.7M
    w2 = ptr_curr_data[(frame_len / 2) + loop_len + i] * ptr_win0[(frame_len / 2) - 1 - i];
2458
10.7M
    w2 += (-ptr_curr_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win0[(frame_len / 2) + i]);
2459
2460
10.7M
    ptr_real_out[frame_len - 1 - i] = w1 + w2;
2461
10.7M
  }
2462
2463
10.7M
  for (i = 0; i < loop_len; i++) {
2464
10.7M
    w1 = ptr_prev2_data[loop_len - 1 - i] * ptr_win3[i];
2465
10.7M
    w1 += ptr_prev3_data[loop_len + i] * ptr_win3[frame_len - 1 - i];
2466
2467
10.7M
    w2 = (-ptr_curr_data[loop_len - 1 - i] * ptr_win1[i]);
2468
10.7M
    w2 += (-ptr_prev1_data[loop_len + i] * ptr_win1[frame_len - 1 - i]);
2469
2470
10.7M
    ptr_real_out[(frame_len / 2) - 1 - i] = w1 + w2;
2471
10.7M
  }
2472
2473
10.7M
  for (i = 0; i < loop_len; i++) {
2474
10.7M
    w1 = -(ptr_prev1_data[loop_len - 1 - i] * ptr_win2[i]);
2475
10.7M
    w1 += ptr_prev2_data[loop_len + i] * ptr_win2[frame_len - 1 - i];
2476
2477
    /* First 128 coeffcients are zeros in the window table so they are not used in the code here*/
2478
10.7M
    w2 = (-ptr_curr_data[loop_len + i] * ptr_win0[frame_len - 1 - i]);
2479
2480
10.7M
    ptr_real_out[(frame_len / 2) + i] = w1 + w2;
2481
10.7M
  }
2482
2483
83.6k
  ixheaace_mdct(ptr_real_out, fft_twiddle_tab, long_window_sine_ld, frame_len, 9,
2484
83.6k
                ptr_shared_buffer5);
2485
83.6k
}