/src/libxaac/encoder/ixheaace_fft.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * * |
3 | | * Copyright (C) 2023 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | #include <string.h> |
22 | | |
23 | | #include "ixheaac_type_def.h" |
24 | | #include "ixheaac_constants.h" |
25 | | #include "ixheaace_psy_const.h" |
26 | | #include "ixheaace_tns.h" |
27 | | #include "ixheaace_tns_params.h" |
28 | | #include "ixheaace_rom.h" |
29 | | #include "ixheaace_common_rom.h" |
30 | | #include "ixheaace_bitbuffer.h" |
31 | | #include "ixheaace_aac_constants.h" |
32 | | #include "ixheaace_fft.h" |
33 | | #include "ixheaac_basic_ops32.h" |
34 | | #include "ixheaac_basic_ops40.h" |
35 | | #include "ixheaac_basic_ops.h" |
36 | | #include "iusace_basic_ops_flt.h" |
37 | | |
38 | | static VOID ia_enhaacplus_enc_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer, |
39 | | const FLOAT32 *ptr_time_signal, |
40 | | WORD32 ch_increment, |
41 | 93.9k | WORD32 long_frame_len) { |
42 | 93.9k | WORD32 i; |
43 | 93.9k | FLOAT32 *ptr_mdct_buff = ptr_mdct_delay_buffer; |
44 | 93.9k | if (ch_increment == 2) { |
45 | 92.8k | const FLOAT32 *ptr_input = ptr_time_signal; |
46 | 92.8k | FLOAT32 temp1, temp2, temp3, temp4; |
47 | 92.8k | temp1 = *ptr_input++; |
48 | 92.8k | ptr_input++; |
49 | 92.8k | temp2 = *ptr_input++; |
50 | 92.8k | ptr_input++; |
51 | 92.8k | temp3 = *ptr_input++; |
52 | 92.8k | ptr_input++; |
53 | 11.1M | for (i = ((long_frame_len >> 2) - 2); i >= 0; i--) { |
54 | 11.0M | *ptr_mdct_buff++ = temp1; |
55 | 11.0M | temp4 = *ptr_input++; |
56 | 11.0M | ptr_input++; |
57 | | |
58 | 11.0M | *ptr_mdct_buff++ = temp2; |
59 | 11.0M | *ptr_mdct_buff++ = temp3; |
60 | 11.0M | *ptr_mdct_buff++ = temp4; |
61 | | |
62 | 11.0M | temp1 = *ptr_input++; |
63 | 11.0M | ptr_input++; |
64 | 11.0M | temp2 = *ptr_input++; |
65 | 11.0M | ptr_input++; |
66 | 11.0M | temp3 = *ptr_input++; |
67 | 11.0M | ptr_input++; |
68 | 11.0M | } |
69 | 92.8k | *ptr_mdct_buff++ = temp1; |
70 | 92.8k | temp4 = *ptr_input; |
71 | 92.8k | *ptr_mdct_buff++ = temp2; |
72 | 92.8k | *ptr_mdct_buff++ = temp3; |
73 | 92.8k | *ptr_mdct_buff++ = temp4; |
74 | 92.8k | } else { |
75 | 262k | for (i = 0; i < long_frame_len; i += 2) { |
76 | 261k | *ptr_mdct_buff++ = ptr_time_signal[i * ch_increment]; |
77 | 261k | *ptr_mdct_buff++ = ptr_time_signal[(i + 1) * ch_increment]; |
78 | 261k | } |
79 | 1.08k | } |
80 | 93.9k | } |
81 | | |
82 | | static VOID ia_eaacp_enc_inverse_transform_512(FLOAT32 *ptr_data, FLOAT32 *ptr_win_buf, |
83 | | const FLOAT32 *ptr_cos_sin_tbl, |
84 | 0 | WORD8 *ptr_scratch) { |
85 | 0 | WORD32 n = FRAME_LEN_512; |
86 | 0 | WORD32 n_by_2 = n >> 1; |
87 | |
|
88 | 0 | ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch; |
89 | |
|
90 | 0 | ia_eaacp_enc_pre_twiddle_aac(ptr_win_buf, ptr_data, n, ptr_cos_sin_tbl); |
91 | |
|
92 | 0 | ia_enhaacplus_enc_complex_fft(ptr_win_buf, n_by_2, pstr_scratch); |
93 | |
|
94 | 0 | ia_enhaacplus_enc_post_twiddle(ptr_data, ptr_win_buf, ptr_cos_sin_tbl, n); |
95 | 0 | } |
96 | | |
97 | 567k | static VOID ixheaace_pre_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_sine_window) { |
98 | 567k | WORD32 i; |
99 | 567k | FLOAT32 wre, wim, re1, re2, im1, im2; |
100 | | |
101 | 55.9M | for (i = 0; i < m / 4; i++) { |
102 | 55.3M | re1 = ptr_x[2 * i]; |
103 | 55.3M | im2 = ptr_x[2 * i + 1]; |
104 | 55.3M | re2 = ptr_x[m - 2 - 2 * i]; |
105 | 55.3M | im1 = ptr_x[m - 1 - 2 * i]; |
106 | | |
107 | 55.3M | wim = ptr_sine_window[i * 2]; |
108 | 55.3M | wre = ptr_sine_window[m - 1 - 2 * i]; |
109 | | |
110 | 55.3M | ptr_x[2 * i] = im1 * wim + re1 * wre; |
111 | | |
112 | 55.3M | ptr_x[2 * i + 1] = im1 * wre - re1 * wim; |
113 | | |
114 | 55.3M | wim = ptr_sine_window[m - 2 - 2 * i]; |
115 | 55.3M | wre = ptr_sine_window[2 * i + 1]; |
116 | | |
117 | 55.3M | ptr_x[m - 2 - 2 * i] = im2 * wim + re2 * wre; |
118 | | |
119 | 55.3M | ptr_x[m - 1 - 2 * i] = im2 * wre - re2 * wim; |
120 | 55.3M | } |
121 | 567k | } |
122 | | |
123 | | static VOID ia_enhaacplus_enc_tranform_mac4(FLOAT32 *ptr_op, const FLOAT32 *ptr_win, |
124 | | FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2, |
125 | | FLOAT32 *ptr_buf3, FLOAT32 *ptr_buf4, UWORD32 len, |
126 | 187k | WORD32 increment) { |
127 | 187k | WORD32 i; |
128 | | |
129 | 187k | if (increment > 0) { |
130 | 5.73M | for (i = len >> 2; i > 0; i--) { |
131 | 5.63M | *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++))); |
132 | 5.63M | *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--))); |
133 | 5.63M | *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--))); |
134 | 5.63M | ptr_op++; |
135 | | |
136 | 5.63M | *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++))); |
137 | 5.63M | *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--))); |
138 | 5.63M | *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--))); |
139 | 5.63M | ptr_op++; |
140 | | |
141 | 5.63M | *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++))); |
142 | 5.63M | *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--))); |
143 | 5.63M | *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--))); |
144 | 5.63M | ptr_op++; |
145 | | |
146 | 5.63M | *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++))); |
147 | 5.63M | *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--))); |
148 | 5.63M | *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--))); |
149 | 5.63M | ptr_op++; |
150 | 5.63M | ptr_win += 16; |
151 | 5.63M | } |
152 | 93.9k | } else { |
153 | 2.91M | for (i = len >> 2; i > 0; i--) { |
154 | 2.81M | *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++))); |
155 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--))); |
156 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--))); |
157 | 2.81M | ptr_op--; |
158 | | |
159 | 2.81M | *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++))); |
160 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--))); |
161 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--))); |
162 | 2.81M | ptr_op--; |
163 | | |
164 | 2.81M | *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++))); |
165 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--))); |
166 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--))); |
167 | 2.81M | ptr_op--; |
168 | | |
169 | 2.81M | *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++))); |
170 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--))); |
171 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--))); |
172 | 2.81M | ptr_op--; |
173 | 2.81M | ptr_win += 16; |
174 | 2.81M | } |
175 | 93.9k | } |
176 | 187k | } |
177 | | |
178 | | static VOID ia_enhaacplus_enc_tranform_mac3(FLOAT32 *ptr_op, const FLOAT32 *ptr_win, |
179 | | FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2, |
180 | 93.9k | FLOAT32 *ptr_buf3, UWORD32 len, WORD32 increment) { |
181 | 93.9k | WORD32 i; |
182 | | |
183 | 93.9k | if (increment > 0) { |
184 | 0 | for (i = len >> 2; i > 0; i--) { |
185 | 0 | *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--))); |
186 | 0 | *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--))); |
187 | 0 | ptr_op++; |
188 | |
|
189 | 0 | *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--))); |
190 | 0 | *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--))); |
191 | 0 | ptr_op++; |
192 | |
|
193 | 0 | *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--))); |
194 | 0 | *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--))); |
195 | 0 | ptr_op++; |
196 | |
|
197 | 0 | *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--))); |
198 | 0 | *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--))); |
199 | 0 | ptr_op++; |
200 | 0 | ptr_win += 12; |
201 | 0 | } |
202 | 93.9k | } else { |
203 | 2.91M | for (i = len >> 2; i > 0; i--) { |
204 | 2.81M | *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--))); |
205 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--))); |
206 | 2.81M | ptr_op--; |
207 | | |
208 | 2.81M | *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--))); |
209 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--))); |
210 | 2.81M | ptr_op--; |
211 | | |
212 | 2.81M | *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--))); |
213 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--))); |
214 | 2.81M | ptr_op--; |
215 | | |
216 | 2.81M | *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--))); |
217 | 2.81M | *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--))); |
218 | 2.81M | ptr_op--; |
219 | 2.81M | ptr_win += 12; |
220 | 2.81M | } |
221 | 93.9k | } |
222 | 93.9k | } |
223 | | |
224 | | VOID ia_enhaacplus_enc_transform_real(FLOAT32 *ptr_mdct_delay_buffer, |
225 | | const FLOAT32 *ptr_time_signal, WORD32 ch_increment, |
226 | | FLOAT32 *ptr_real_out, ixheaace_mdct_tables *pstr_mdct_tab, |
227 | | FLOAT32 *ptr_shared_buffer1, WORD8 *ptr_shared_buffer5, |
228 | 93.9k | WORD32 long_frame_len) { |
229 | 93.9k | WORD32 n, n1; |
230 | 93.9k | FLOAT32 *ptr_windowed_buf = ptr_shared_buffer1; |
231 | 93.9k | const FLOAT32 *ptr_ws1; |
232 | 93.9k | WORD32 i, len = long_frame_len; |
233 | 93.9k | FLOAT32 *ptr_real_in; |
234 | 93.9k | FLOAT32 *ptr_data1, *ptr_data2, *ptr_data3, *ptr_data4; |
235 | 93.9k | FLOAT32 *ptr_op1; |
236 | | |
237 | 93.9k | ptr_real_in = ptr_mdct_delay_buffer; |
238 | | |
239 | 93.9k | n = long_frame_len << 1; |
240 | 93.9k | n1 = long_frame_len >> 1; |
241 | | |
242 | 93.9k | ptr_ws1 = |
243 | 93.9k | (long_frame_len == FRAME_LEN_512) ? pstr_mdct_tab->win_512_ld : pstr_mdct_tab->win_480_ld; |
244 | | |
245 | 93.9k | ptr_op1 = ptr_real_out; |
246 | 93.9k | ptr_data1 = &ptr_real_in[n1]; |
247 | 93.9k | ptr_data2 = &ptr_real_in[n + n1]; |
248 | 93.9k | ptr_data3 = &ptr_real_in[n1 - 1]; |
249 | 93.9k | ptr_data4 = &ptr_real_in[n + n1 - 1]; |
250 | | |
251 | 93.9k | ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4, |
252 | 93.9k | n1, 1); |
253 | 93.9k | ptr_ws1 += ((SIZE_T)n1 << 2); |
254 | | |
255 | 90.2M | for (i = 0; i < long_frame_len << 1; i++) { |
256 | 90.1M | ptr_mdct_delay_buffer[i] = ptr_mdct_delay_buffer[long_frame_len + i]; |
257 | 90.1M | } |
258 | 93.9k | ia_enhaacplus_enc_shift_mdct_delay_buffer(&ptr_mdct_delay_buffer[2 * long_frame_len], |
259 | 93.9k | ptr_time_signal, ch_increment, long_frame_len); |
260 | | |
261 | 93.9k | ptr_op1 = &ptr_real_out[long_frame_len - 1]; |
262 | 93.9k | ptr_data1 = &ptr_real_in[n + len - n1]; |
263 | 93.9k | ptr_data2 = &ptr_real_in[len - n1]; |
264 | 93.9k | ptr_data3 = &ptr_real_in[len - n1 - 1]; |
265 | 93.9k | ptr_data4 = &ptr_real_in[n + len - n1 - 1]; |
266 | | |
267 | 93.9k | ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4, |
268 | 93.9k | (n1 >> 1), -1); |
269 | 93.9k | ptr_op1 -= (n1 >> 1); |
270 | 93.9k | ptr_ws1 += ((SIZE_T)n1 << 1); |
271 | 93.9k | ptr_data2 += (n1 >> 1); |
272 | 93.9k | ptr_data3 -= (n1 >> 1); |
273 | 93.9k | ptr_data4 -= (n1 >> 1); |
274 | 93.9k | ia_enhaacplus_enc_tranform_mac3(ptr_op1, ptr_ws1, ptr_data2, ptr_data3, ptr_data4, (n1 >> 1), |
275 | 93.9k | -1); |
276 | | |
277 | 93.9k | if (long_frame_len == FRAME_LEN_480) { |
278 | 93.9k | ia_aac_ld_enc_mdct_480(ptr_real_out, ptr_windowed_buf, 1, pstr_mdct_tab); |
279 | 93.9k | } else { |
280 | 0 | ia_eaacp_enc_inverse_transform_512(ptr_real_out, ptr_windowed_buf, |
281 | 0 | pstr_mdct_tab->cosine_array_1024, ptr_shared_buffer5); |
282 | 0 | } |
283 | 93.9k | } |
284 | | |
285 | | static VOID ia_eaacp_enc_pre_twiddle_compute(FLOAT32 *ptr_in1, FLOAT32 *ptr_in2, FLOAT32 *ptr_x, |
286 | 93.9k | const FLOAT32 *ptr_cos_sin, WORD n_by_4) { |
287 | 93.9k | WORD32 i; |
288 | 93.9k | FLOAT32 temp_r, temp_i; |
289 | 93.9k | FLOAT32 temp_r1, temp_i1; |
290 | 93.9k | FLOAT32 *ptr_x1 = ptr_x + (SIZE_T)((n_by_4 << 2) - 1); |
291 | 93.9k | FLOAT32 c, c1, s, s1; |
292 | | |
293 | 11.3M | for (i = 0; i < n_by_4; i++) { |
294 | 11.2M | c = *ptr_cos_sin++; |
295 | 11.2M | s = *ptr_cos_sin++; |
296 | 11.2M | s1 = *ptr_cos_sin++; |
297 | 11.2M | c1 = *ptr_cos_sin++; |
298 | | |
299 | 11.2M | temp_r = *ptr_in1++; |
300 | 11.2M | temp_i1 = *ptr_in1++; |
301 | 11.2M | temp_i = *ptr_in2--; |
302 | 11.2M | temp_r1 = *ptr_in2--; |
303 | 11.2M | *ptr_x = ((temp_r * c) + (temp_i * s)); |
304 | 11.2M | ptr_x++; |
305 | | |
306 | 11.2M | *ptr_x = ((temp_i * c) - (temp_r * s)); |
307 | 11.2M | ptr_x++; |
308 | | |
309 | 11.2M | *ptr_x1 = ((temp_i1 * c1) - (temp_r1 * s1)); |
310 | 11.2M | ptr_x1--; |
311 | | |
312 | 11.2M | *ptr_x1 = ((temp_r1 * c1) + (temp_i1 * s1)); |
313 | 11.2M | ptr_x1--; |
314 | 11.2M | } |
315 | 93.9k | } |
316 | | |
317 | | VOID ia_enhaacplus_enc_post_twiddle(FLOAT32 *ptr_out, FLOAT32 *ptr_x, |
318 | 93.9k | const FLOAT32 *ptr_cos_sin_tbl, WORD m) { |
319 | 93.9k | WORD i; |
320 | 93.9k | FLOAT32 c, c1, s, s1; |
321 | 93.9k | FLOAT32 tmp_var; |
322 | 93.9k | FLOAT32 tempr, tempr1, tempi, tempi1; |
323 | 93.9k | FLOAT32 *ptr_out1 = ptr_out + m - 1; |
324 | 93.9k | FLOAT32 *ptr_x1 = ptr_x + m - 1; |
325 | | |
326 | 11.3M | for (i = 0; i < (m >> 2); i++) { |
327 | 11.2M | c = *ptr_cos_sin_tbl++; |
328 | 11.2M | s = *ptr_cos_sin_tbl++; |
329 | 11.2M | s1 = *ptr_cos_sin_tbl++; |
330 | 11.2M | c1 = *ptr_cos_sin_tbl++; |
331 | 11.2M | tempr = *ptr_x++; |
332 | 11.2M | tempi = *ptr_x++; |
333 | 11.2M | tempi1 = *ptr_x1--; |
334 | 11.2M | tempr1 = *ptr_x1--; |
335 | | |
336 | 11.2M | tmp_var = ((tempr * c) + (tempi * s)); |
337 | 11.2M | *ptr_out++ = tmp_var; |
338 | | |
339 | 11.2M | tmp_var = ((tempr * s) - (tempi * c)); |
340 | 11.2M | *ptr_out1-- = tmp_var; |
341 | | |
342 | 11.2M | tmp_var = ((tempr1 * c1) + (tempi1 * s1)); |
343 | 11.2M | *ptr_out1-- = tmp_var; |
344 | | |
345 | 11.2M | tmp_var = ((tempr1 * s1) - (tempi1 * c1)); |
346 | 11.2M | *ptr_out++ = tmp_var; |
347 | 11.2M | } |
348 | 93.9k | } |
349 | | |
350 | | VOID ia_eaacp_enc_pre_twiddle_aac(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n, |
351 | 93.9k | const FLOAT32 *ptr_cos_array) { |
352 | 93.9k | WORD n_by_4; |
353 | 93.9k | FLOAT32 *ptr_in1, *ptr_in2; |
354 | | |
355 | 93.9k | n_by_4 = n >> 2; |
356 | | |
357 | 93.9k | ptr_in1 = ptr_data; |
358 | 93.9k | ptr_in2 = ptr_data + n - 1; |
359 | | |
360 | 93.9k | ia_eaacp_enc_pre_twiddle_compute(ptr_in1, ptr_in2, ptr_x, ptr_cos_array, n_by_4); |
361 | 93.9k | } |
362 | | |
363 | 42.9M | static PLATFORM_INLINE WORD8 ia_enhaacplus_enc_calc_norm(WORD32 a) { |
364 | 42.9M | WORD8 norm_val; |
365 | | |
366 | 42.9M | if (a == 0) { |
367 | 0 | norm_val = 31; |
368 | 42.9M | } else { |
369 | 42.9M | if (a == (WORD32)0xffffffffL) { |
370 | 0 | norm_val = 31; |
371 | 42.9M | } else { |
372 | 42.9M | if (a < 0) { |
373 | 0 | a = ~a; |
374 | 0 | } |
375 | 1.11G | for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) { |
376 | 1.07G | a <<= 1; |
377 | 1.07G | } |
378 | 42.9M | } |
379 | 42.9M | } |
380 | | |
381 | 42.9M | return norm_val; |
382 | 42.9M | } |
383 | | |
384 | | static PLATFORM_INLINE VOID ia_enhaacplus_enc_complex_3point_fft(FLOAT32 *ptr_in, |
385 | 0 | FLOAT32 *ptr_out) { |
386 | 0 | FLOAT32 add_r, sub_r; |
387 | 0 | FLOAT32 add_i, sub_i; |
388 | 0 | FLOAT32 x_01_r, x_01_i, temp; |
389 | 0 | FLOAT32 p1, p2, p3, p4; |
390 | 0 | FLOAT64 sin_mu = 0.866025403784439f; |
391 | |
|
392 | 0 | x_01_r = ptr_in[0] + ptr_in[2]; |
393 | 0 | x_01_i = ptr_in[1] + ptr_in[3]; |
394 | |
|
395 | 0 | add_r = ptr_in[2] + ptr_in[4]; |
396 | 0 | add_i = ptr_in[3] + ptr_in[5]; |
397 | |
|
398 | 0 | sub_r = ptr_in[2] - ptr_in[4]; |
399 | 0 | sub_i = ptr_in[3] - ptr_in[5]; |
400 | |
|
401 | 0 | p1 = add_r / (FLOAT32)2.0f; |
402 | 0 | p4 = add_i / (FLOAT32)2.0f; |
403 | 0 | p2 = (FLOAT32)((FLOAT64)sub_i * sin_mu); |
404 | 0 | p3 = (FLOAT32)((FLOAT64)sub_r * sin_mu); |
405 | |
|
406 | 0 | temp = ptr_in[0] - p1; |
407 | |
|
408 | 0 | ptr_out[0] = x_01_r + ptr_in[4]; |
409 | 0 | ptr_out[1] = x_01_i + ptr_in[5]; |
410 | 0 | ptr_out[2] = temp + p2; |
411 | 0 | ptr_out[3] = (ptr_in[1] - p3) - p4; |
412 | 0 | ptr_out[4] = temp - p2; |
413 | 0 | ptr_out[5] = (ptr_in[1] + p3) - p4; |
414 | 0 | } |
415 | | |
416 | | VOID ia_enhaacplus_enc_complex_fft_p2(FLOAT32 *ptr_x, WORD32 nlength, |
417 | 21.4M | FLOAT32 *ptr_scratch_fft_p2_y) { |
418 | 21.4M | WORD32 i, j, k, n_stages, h2; |
419 | 21.4M | FLOAT32 x0_r, x0_i, x1_r, x1_i, x2_r, x2_i, x3_r, x3_i; |
420 | 21.4M | WORD32 del, nodespacing, in_loop_cnt; |
421 | 21.4M | WORD32 not_power_4; |
422 | 21.4M | WORD32 dig_rev_shift; |
423 | 21.4M | FLOAT32 *ptr_p2_y = ptr_scratch_fft_p2_y; |
424 | 21.4M | WORD32 mpass = nlength; |
425 | 21.4M | WORD32 npoints = nlength; |
426 | 21.4M | FLOAT32 *ptr_y = ptr_p2_y; |
427 | 21.4M | const FLOAT64 *ptr_w; |
428 | 21.4M | FLOAT32 *ptr_inp; |
429 | 21.4M | FLOAT32 tmk; |
430 | 21.4M | const FLOAT64 *ptr_twiddles; |
431 | 21.4M | FLOAT32 *ptr_data; |
432 | 21.4M | FLOAT64 w_1, w_2, w_3, w_4, w_5, w_6; |
433 | 21.4M | WORD32 sec_loop_cnt; |
434 | 21.4M | FLOAT32 tmp; |
435 | | |
436 | 21.4M | memset(ptr_y, 0, nlength * 2 * sizeof(*ptr_y)); |
437 | | |
438 | 21.4M | dig_rev_shift = ia_enhaacplus_enc_calc_norm(mpass) + 1 - 16; |
439 | 21.4M | n_stages = 30 - ia_enhaacplus_enc_calc_norm(mpass); |
440 | 21.4M | not_power_4 = n_stages & 1; |
441 | | |
442 | 21.4M | n_stages = n_stages >> 1; |
443 | | |
444 | 21.4M | ptr_w = ia_enhaacplus_enc_twiddle_table_fft_32x32; |
445 | | |
446 | 21.4M | dig_rev_shift = MAX(dig_rev_shift, 0); |
447 | | |
448 | 216M | for (i = 0; i < npoints; i += 4) { |
449 | 194M | ptr_inp = ptr_x; |
450 | 194M | DIG_REV_NEW(i, dig_rev_shift, h2); |
451 | 194M | if (not_power_4) { |
452 | 182M | h2 += 1; |
453 | 182M | h2 &= ~1; |
454 | 182M | } |
455 | 194M | ptr_inp += (h2); |
456 | | |
457 | 194M | x0_r = *ptr_inp; |
458 | 194M | x0_i = *(ptr_inp + 1); |
459 | 194M | ptr_inp += (npoints >> 1); |
460 | | |
461 | 194M | x1_r = *ptr_inp; |
462 | 194M | x1_i = *(ptr_inp + 1); |
463 | 194M | ptr_inp += (npoints >> 1); |
464 | | |
465 | 194M | x2_r = *ptr_inp; |
466 | 194M | x2_i = *(ptr_inp + 1); |
467 | 194M | ptr_inp += (npoints >> 1); |
468 | | |
469 | 194M | x3_r = *ptr_inp; |
470 | 194M | x3_i = *(ptr_inp + 1); |
471 | | |
472 | 194M | x0_r = x0_r + x2_r; |
473 | 194M | x0_i = x0_i + x2_i; |
474 | | |
475 | 194M | tmk = x0_r - x2_r; |
476 | 194M | x2_r = tmk - x2_r; |
477 | 194M | tmk = x0_i - x2_i; |
478 | 194M | x2_i = tmk - x2_i; |
479 | | |
480 | 194M | x1_r = x1_r + x3_r; |
481 | 194M | x1_i = x1_i + x3_i; |
482 | | |
483 | 194M | tmk = x1_r - x3_r; |
484 | 194M | x3_r = tmk - x3_r; |
485 | 194M | tmk = x1_i - x3_i; |
486 | 194M | x3_i = tmk - x3_i; |
487 | | |
488 | 194M | x0_r = x0_r + x1_r; |
489 | 194M | x0_i = x0_i + x1_i; |
490 | | |
491 | 194M | tmk = x0_r - x1_r; |
492 | 194M | x1_r = tmk - x1_r; |
493 | 194M | tmk = x0_i - x1_i; |
494 | 194M | x1_i = tmk - x1_i; |
495 | | |
496 | 194M | x2_r = x2_r + x3_i; |
497 | 194M | x2_i = x2_i - x3_r; |
498 | | |
499 | 194M | tmk = x2_r - x3_i; |
500 | 194M | x3_i = tmk - x3_i; |
501 | 194M | tmk = x2_i + x3_r; |
502 | 194M | x3_r = tmk + x3_r; |
503 | | |
504 | 194M | *ptr_y++ = x0_r; |
505 | 194M | *ptr_y++ = x0_i; |
506 | 194M | *ptr_y++ = x2_r; |
507 | 194M | *ptr_y++ = x2_i; |
508 | 194M | *ptr_y++ = x1_r; |
509 | 194M | *ptr_y++ = x1_i; |
510 | 194M | *ptr_y++ = x3_i; |
511 | 194M | *ptr_y++ = x3_r; |
512 | 194M | } |
513 | 21.4M | ptr_y -= 2 * npoints; |
514 | 21.4M | del = 4; |
515 | 21.4M | nodespacing = 64; |
516 | 21.4M | in_loop_cnt = npoints >> 4; |
517 | 43.7M | for (i = n_stages - 1; i > 0; i--) { |
518 | 22.2M | ptr_twiddles = ptr_w; |
519 | 22.2M | ptr_data = ptr_y; |
520 | 73.0M | for (k = in_loop_cnt; k != 0; k--) { |
521 | 50.8M | x0_r = (*ptr_data); |
522 | 50.8M | x0_i = (*(ptr_data + 1)); |
523 | 50.8M | ptr_data += ((SIZE_T)del << 1); |
524 | | |
525 | 50.8M | x1_r = (*ptr_data); |
526 | 50.8M | x1_i = (*(ptr_data + 1)); |
527 | 50.8M | ptr_data += ((SIZE_T)del << 1); |
528 | | |
529 | 50.8M | x2_r = (*ptr_data); |
530 | 50.8M | x2_i = (*(ptr_data + 1)); |
531 | 50.8M | ptr_data += ((SIZE_T)del << 1); |
532 | | |
533 | 50.8M | x3_r = (*ptr_data); |
534 | 50.8M | x3_i = (*(ptr_data + 1)); |
535 | 50.8M | ptr_data -= 3 * (del << 1); |
536 | | |
537 | 50.8M | x0_r = x0_r + x2_r; |
538 | 50.8M | x0_i = x0_i + x2_i; |
539 | 50.8M | x2_r = x0_r - (x2_r * 2); |
540 | 50.8M | x2_i = x0_i - (x2_i * 2); |
541 | 50.8M | x1_r = x1_r + x3_r; |
542 | 50.8M | x1_i = x1_i + x3_i; |
543 | 50.8M | x3_r = x1_r - (x3_r * 2); |
544 | 50.8M | x3_i = x1_i - (x3_i * 2); |
545 | | |
546 | 50.8M | x0_r = x0_r + x1_r; |
547 | 50.8M | x0_i = x0_i + x1_i; |
548 | 50.8M | x1_r = x0_r - (x1_r * 2); |
549 | 50.8M | x1_i = x0_i - (x1_i * 2); |
550 | 50.8M | x2_r = x2_r + x3_i; |
551 | 50.8M | x2_i = x2_i - x3_r; |
552 | 50.8M | x3_i = x2_r - (x3_i * 2); |
553 | 50.8M | x3_r = x2_i + (x3_r * 2); |
554 | | |
555 | 50.8M | *ptr_data = x0_r; |
556 | 50.8M | *(ptr_data + 1) = x0_i; |
557 | 50.8M | ptr_data += ((SIZE_T)del << 1); |
558 | | |
559 | 50.8M | *ptr_data = x2_r; |
560 | 50.8M | *(ptr_data + 1) = x2_i; |
561 | 50.8M | ptr_data += ((SIZE_T)del << 1); |
562 | | |
563 | 50.8M | *ptr_data = x1_r; |
564 | 50.8M | *(ptr_data + 1) = x1_i; |
565 | 50.8M | ptr_data += ((SIZE_T)del << 1); |
566 | | |
567 | 50.8M | *ptr_data = x3_i; |
568 | 50.8M | *(ptr_data + 1) = x3_r; |
569 | 50.8M | ptr_data += ((SIZE_T)del << 1); |
570 | 50.8M | } |
571 | 22.2M | ptr_data = ptr_y + 2; |
572 | | |
573 | 22.2M | sec_loop_cnt = (nodespacing * del); |
574 | 22.2M | sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) + |
575 | 22.2M | (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - |
576 | 22.2M | (sec_loop_cnt / 256); |
577 | | |
578 | 51.3M | for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { |
579 | 29.0M | w_1 = *(ptr_twiddles + j); |
580 | 29.0M | w_4 = *(ptr_twiddles + j + 257); |
581 | 29.0M | w_2 = *(ptr_twiddles + ((SIZE_T)j << 1)); |
582 | 29.0M | w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257); |
583 | 29.0M | w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1)); |
584 | 29.0M | w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 257); |
585 | | |
586 | 93.7M | for (k = in_loop_cnt; k != 0; k--) { |
587 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
588 | | |
589 | 64.6M | x1_r = *ptr_data; |
590 | 64.6M | x1_i = *(ptr_data + 1); |
591 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
592 | | |
593 | 64.6M | x2_r = *ptr_data; |
594 | 64.6M | x2_i = *(ptr_data + 1); |
595 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
596 | | |
597 | 64.6M | x3_r = *ptr_data; |
598 | 64.6M | x3_i = *(ptr_data + 1); |
599 | 64.6M | ptr_data -= 3 * (del << 1); |
600 | | |
601 | 64.6M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4)); |
602 | 64.6M | x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1); |
603 | 64.6M | x1_r = tmp; |
604 | | |
605 | 64.6M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5)); |
606 | 64.6M | x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2); |
607 | 64.6M | x2_r = tmp; |
608 | | |
609 | 64.6M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_3) - ixheaace_dmult((FLOAT64)x3_i, w_6)); |
610 | 64.6M | x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3); |
611 | 64.6M | x3_r = tmp; |
612 | | |
613 | 64.6M | x0_r = (*ptr_data); |
614 | 64.6M | x0_i = (*(ptr_data + 1)); |
615 | | |
616 | 64.6M | x0_r = x0_r + (x2_r); |
617 | 64.6M | x0_i = x0_i + (x2_i); |
618 | 64.6M | x2_r = x0_r - (x2_r * 2); |
619 | 64.6M | x2_i = x0_i - (x2_i * 2); |
620 | 64.6M | x1_r = x1_r + x3_r; |
621 | 64.6M | x1_i = x1_i + x3_i; |
622 | 64.6M | x3_r = x1_r - (x3_r * 2); |
623 | 64.6M | x3_i = x1_i - (x3_i * 2); |
624 | | |
625 | 64.6M | x0_r = x0_r + (x1_r); |
626 | 64.6M | x0_i = x0_i + (x1_i); |
627 | 64.6M | x1_r = x0_r - (x1_r * 2); |
628 | 64.6M | x1_i = x0_i - (x1_i * 2); |
629 | 64.6M | x2_r = x2_r + (x3_i); |
630 | 64.6M | x2_i = x2_i - (x3_r); |
631 | 64.6M | x3_i = x2_r - (x3_i * 2); |
632 | 64.6M | x3_r = x2_i + (x3_r * 2); |
633 | | |
634 | 64.6M | *ptr_data = x0_r; |
635 | 64.6M | *(ptr_data + 1) = x0_i; |
636 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
637 | | |
638 | 64.6M | *ptr_data = x2_r; |
639 | 64.6M | *(ptr_data + 1) = x2_i; |
640 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
641 | | |
642 | 64.6M | *ptr_data = x1_r; |
643 | 64.6M | *(ptr_data + 1) = x1_i; |
644 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
645 | | |
646 | 64.6M | *ptr_data = x3_i; |
647 | 64.6M | *(ptr_data + 1) = x3_r; |
648 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
649 | 64.6M | } |
650 | 29.0M | ptr_data -= 2 * npoints; |
651 | 29.0M | ptr_data += 2; |
652 | 29.0M | } |
653 | 47.9M | for (; j <= (nodespacing * del) >> 1; j += nodespacing) { |
654 | 25.6M | w_1 = *(ptr_twiddles + j); |
655 | 25.6M | w_4 = *(ptr_twiddles + j + 257); |
656 | 25.6M | w_2 = *(ptr_twiddles + ((SIZE_T)j << 1)); |
657 | 25.6M | w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257); |
658 | 25.6M | w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1) - 256); |
659 | 25.6M | w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 1); |
660 | | |
661 | 83.4M | for (k = in_loop_cnt; k != 0; k--) { |
662 | 57.7M | ptr_data += ((SIZE_T)del << 1); |
663 | | |
664 | 57.7M | x1_r = *ptr_data; |
665 | 57.7M | x1_i = *(ptr_data + 1); |
666 | 57.7M | ptr_data += ((SIZE_T)del << 1); |
667 | | |
668 | 57.7M | x2_r = *ptr_data; |
669 | 57.7M | x2_i = *(ptr_data + 1); |
670 | 57.7M | ptr_data += ((SIZE_T)del << 1); |
671 | | |
672 | 57.7M | x3_r = *ptr_data; |
673 | 57.7M | x3_i = *(ptr_data + 1); |
674 | 57.7M | ptr_data -= 3 * (del << 1); |
675 | | |
676 | 57.7M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4)); |
677 | 57.7M | x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1); |
678 | 57.7M | x1_r = tmp; |
679 | | |
680 | 57.7M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5)); |
681 | 57.7M | x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2); |
682 | 57.7M | x2_r = tmp; |
683 | | |
684 | 57.7M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3)); |
685 | 57.7M | x3_i = |
686 | 57.7M | (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6)); |
687 | 57.7M | x3_r = tmp; |
688 | | |
689 | 57.7M | x0_r = (*ptr_data); |
690 | 57.7M | x0_i = (*(ptr_data + 1)); |
691 | | |
692 | 57.7M | x0_r = x0_r + (x2_r); |
693 | 57.7M | x0_i = x0_i + (x2_i); |
694 | 57.7M | x2_r = x0_r - (x2_r * 2); |
695 | 57.7M | x2_i = x0_i - (x2_i * 2); |
696 | 57.7M | x1_r = x1_r + x3_r; |
697 | 57.7M | x1_i = x1_i + x3_i; |
698 | 57.7M | x3_r = x1_r - (x3_r * 2); |
699 | 57.7M | x3_i = x1_i - (x3_i * 2); |
700 | | |
701 | 57.7M | x0_r = x0_r + (x1_r); |
702 | 57.7M | x0_i = x0_i + (x1_i); |
703 | 57.7M | x1_r = x0_r - (x1_r * 2); |
704 | 57.7M | x1_i = x0_i - (x1_i * 2); |
705 | 57.7M | x2_r = x2_r + (x3_i); |
706 | 57.7M | x2_i = x2_i - (x3_r); |
707 | 57.7M | x3_i = x2_r - (x3_i * 2); |
708 | 57.7M | x3_r = x2_i + (x3_r * 2); |
709 | | |
710 | 57.7M | *ptr_data = x0_r; |
711 | 57.7M | *(ptr_data + 1) = x0_i; |
712 | 57.7M | ptr_data += ((SIZE_T)del << 1); |
713 | | |
714 | 57.7M | *ptr_data = x2_r; |
715 | 57.7M | *(ptr_data + 1) = x2_i; |
716 | 57.7M | ptr_data += ((SIZE_T)del << 1); |
717 | | |
718 | 57.7M | *ptr_data = x1_r; |
719 | 57.7M | *(ptr_data + 1) = x1_i; |
720 | 57.7M | ptr_data += ((SIZE_T)del << 1); |
721 | | |
722 | 57.7M | *ptr_data = x3_i; |
723 | 57.7M | *(ptr_data + 1) = x3_r; |
724 | 57.7M | ptr_data += ((SIZE_T)del << 1); |
725 | 57.7M | } |
726 | 25.6M | ptr_data -= 2 * npoints; |
727 | 25.6M | ptr_data += 2; |
728 | 25.6M | } |
729 | 25.6M | for (; j <= sec_loop_cnt * 2; j += nodespacing) { |
730 | 3.40M | w_1 = *(ptr_twiddles + j); |
731 | 3.40M | w_4 = *(ptr_twiddles + j + 257); |
732 | 3.40M | w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256)); |
733 | 3.40M | w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1)); |
734 | 3.40M | w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 256)); |
735 | 3.40M | w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) + 1)); |
736 | | |
737 | 10.3M | for (k = in_loop_cnt; k != 0; k--) { |
738 | 6.93M | ptr_data += ((SIZE_T)del << 1); |
739 | | |
740 | 6.93M | x1_r = *ptr_data; |
741 | 6.93M | x1_i = *(ptr_data + 1); |
742 | 6.93M | ptr_data += ((SIZE_T)del << 1); |
743 | | |
744 | 6.93M | x2_r = *ptr_data; |
745 | 6.93M | x2_i = *(ptr_data + 1); |
746 | 6.93M | ptr_data += ((SIZE_T)del << 1); |
747 | | |
748 | 6.93M | x3_r = *ptr_data; |
749 | 6.93M | x3_i = *(ptr_data + 1); |
750 | 6.93M | ptr_data -= 3 * (del << 1); |
751 | | |
752 | 6.93M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4)); |
753 | 6.93M | x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult(x1_r, w_4), x1_i, w_1); |
754 | 6.93M | x1_r = tmp; |
755 | | |
756 | 6.93M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2)); |
757 | 6.93M | x2_i = (FLOAT32)(-ixheaace_dmult(x2_r, w_2) + ixheaace_dmult(x2_i, w_5)); |
758 | 6.93M | x2_r = tmp; |
759 | | |
760 | 6.93M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3)); |
761 | 6.93M | x3_i = |
762 | 6.93M | (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6)); |
763 | 6.93M | x3_r = tmp; |
764 | | |
765 | 6.93M | x0_r = (*ptr_data); |
766 | 6.93M | x0_i = (*(ptr_data + 1)); |
767 | | |
768 | 6.93M | x0_r = x0_r + (x2_r); |
769 | 6.93M | x0_i = x0_i + (x2_i); |
770 | 6.93M | x2_r = x0_r - (x2_r * 2); |
771 | 6.93M | x2_i = x0_i - (x2_i * 2); |
772 | 6.93M | x1_r = x1_r + x3_r; |
773 | 6.93M | x1_i = x1_i + x3_i; |
774 | 6.93M | x3_r = x1_r - (x3_r * 2); |
775 | 6.93M | x3_i = x1_i - (x3_i * 2); |
776 | | |
777 | 6.93M | x0_r = x0_r + (x1_r); |
778 | 6.93M | x0_i = x0_i + (x1_i); |
779 | 6.93M | x1_r = x0_r - (x1_r * 2); |
780 | 6.93M | x1_i = x0_i - (x1_i * 2); |
781 | 6.93M | x2_r = x2_r + (x3_i); |
782 | 6.93M | x2_i = x2_i - (x3_r); |
783 | 6.93M | x3_i = x2_r - (x3_i * 2); |
784 | 6.93M | x3_r = x2_i + (x3_r * 2); |
785 | | |
786 | 6.93M | *ptr_data = x0_r; |
787 | 6.93M | *(ptr_data + 1) = x0_i; |
788 | 6.93M | ptr_data += ((SIZE_T)del << 1); |
789 | | |
790 | 6.93M | *ptr_data = x2_r; |
791 | 6.93M | *(ptr_data + 1) = x2_i; |
792 | 6.93M | ptr_data += ((SIZE_T)del << 1); |
793 | | |
794 | 6.93M | *ptr_data = x1_r; |
795 | 6.93M | *(ptr_data + 1) = x1_i; |
796 | 6.93M | ptr_data += ((SIZE_T)del << 1); |
797 | | |
798 | 6.93M | *ptr_data = x3_i; |
799 | 6.93M | *(ptr_data + 1) = x3_r; |
800 | 6.93M | ptr_data += ((SIZE_T)del << 1); |
801 | 6.93M | } |
802 | 3.40M | ptr_data -= 2 * npoints; |
803 | 3.40M | ptr_data += 2; |
804 | 3.40M | } |
805 | 51.3M | for (; j < nodespacing * del; j += nodespacing) { |
806 | 29.0M | w_1 = *(ptr_twiddles + j); |
807 | 29.0M | w_4 = *(ptr_twiddles + j + 257); |
808 | 29.0M | w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256)); |
809 | 29.0M | w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1)); |
810 | 29.0M | w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512)); |
811 | 29.0M | w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512 + 257)); |
812 | | |
813 | 93.7M | for (k = in_loop_cnt; k != 0; k--) { |
814 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
815 | | |
816 | 64.6M | x1_r = *ptr_data; |
817 | 64.6M | x1_i = *(ptr_data + 1); |
818 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
819 | | |
820 | 64.6M | x2_r = *ptr_data; |
821 | 64.6M | x2_i = *(ptr_data + 1); |
822 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
823 | | |
824 | 64.6M | x3_r = *ptr_data; |
825 | 64.6M | x3_i = *(ptr_data + 1); |
826 | 64.6M | ptr_data -= 3 * (del << 1); |
827 | | |
828 | 64.6M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4)); |
829 | 64.6M | x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1); |
830 | 64.6M | x1_r = tmp; |
831 | | |
832 | 64.6M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2)); |
833 | 64.6M | x2_i = |
834 | 64.6M | (FLOAT32)(-ixheaace_dmult((FLOAT64)x2_r, w_2) + ixheaace_dmult((FLOAT64)x2_i, w_5)); |
835 | 64.6M | x2_r = tmp; |
836 | | |
837 | 64.6M | tmp = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6)); |
838 | 64.6M | x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3); |
839 | 64.6M | x3_r = tmp; |
840 | | |
841 | 64.6M | x0_r = (*ptr_data); |
842 | 64.6M | x0_i = (*(ptr_data + 1)); |
843 | | |
844 | 64.6M | x0_r = x0_r + (x2_r); |
845 | 64.6M | x0_i = x0_i + (x2_i); |
846 | 64.6M | x2_r = x0_r - (x2_r * 2); |
847 | 64.6M | x2_i = x0_i - (x2_i * 2); |
848 | 64.6M | x1_r = x1_r + x3_r; |
849 | 64.6M | x1_i = x1_i - x3_i; |
850 | 64.6M | x3_r = x1_r - (x3_r * 2); |
851 | 64.6M | x3_i = x1_i + (x3_i * 2); |
852 | | |
853 | 64.6M | x0_r = x0_r + (x1_r); |
854 | 64.6M | x0_i = x0_i + (x1_i); |
855 | 64.6M | x1_r = x0_r - (x1_r * 2); |
856 | 64.6M | x1_i = x0_i - (x1_i * 2); |
857 | 64.6M | x2_r = x2_r + (x3_i); |
858 | 64.6M | x2_i = x2_i - (x3_r); |
859 | 64.6M | x3_i = x2_r - (x3_i * 2); |
860 | 64.6M | x3_r = x2_i + (x3_r * 2); |
861 | | |
862 | 64.6M | *ptr_data = x0_r; |
863 | 64.6M | *(ptr_data + 1) = x0_i; |
864 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
865 | | |
866 | 64.6M | *ptr_data = x2_r; |
867 | 64.6M | *(ptr_data + 1) = x2_i; |
868 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
869 | | |
870 | 64.6M | *ptr_data = x1_r; |
871 | 64.6M | *(ptr_data + 1) = x1_i; |
872 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
873 | | |
874 | 64.6M | *ptr_data = x3_i; |
875 | 64.6M | *(ptr_data + 1) = x3_r; |
876 | 64.6M | ptr_data += ((SIZE_T)del << 1); |
877 | 64.6M | } |
878 | 29.0M | ptr_data -= 2 * npoints; |
879 | 29.0M | ptr_data += 2; |
880 | 29.0M | } |
881 | 22.2M | nodespacing >>= 2; |
882 | 22.2M | del <<= 2; |
883 | 22.2M | in_loop_cnt >>= 2; |
884 | 22.2M | } |
885 | 21.4M | if (not_power_4) { |
886 | 21.0M | ptr_twiddles = ptr_w; |
887 | 21.0M | nodespacing <<= 1; |
888 | | |
889 | 203M | for (j = del / 2; j != 0; j--) { |
890 | 182M | w_1 = *ptr_twiddles; |
891 | 182M | w_4 = *(ptr_twiddles + 257); |
892 | 182M | ptr_twiddles += nodespacing; |
893 | | |
894 | 182M | x0_r = *ptr_y; |
895 | 182M | x0_i = *(ptr_y + 1); |
896 | 182M | ptr_y += ((SIZE_T)del << 1); |
897 | | |
898 | 182M | x1_r = *ptr_y; |
899 | 182M | x1_i = *(ptr_y + 1); |
900 | | |
901 | 182M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4)); |
902 | 182M | x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1); |
903 | 182M | x1_r = tmp; |
904 | | |
905 | 182M | *ptr_y = (x0_r) - (x1_r); |
906 | 182M | *(ptr_y + 1) = (x0_i) - (x1_i); |
907 | 182M | ptr_y -= ((SIZE_T)del << 1); |
908 | | |
909 | 182M | *ptr_y = (x0_r) + (x1_r); |
910 | 182M | *(ptr_y + 1) = (x0_i) + (x1_i); |
911 | 182M | ptr_y += 2; |
912 | 182M | } |
913 | 21.0M | ptr_twiddles = ptr_w; |
914 | 203M | for (j = del / 2; j != 0; j--) { |
915 | 182M | w_1 = *ptr_twiddles; |
916 | 182M | w_4 = *(ptr_twiddles + 257); |
917 | 182M | ptr_twiddles += nodespacing; |
918 | | |
919 | 182M | x0_r = *ptr_y; |
920 | 182M | x0_i = *(ptr_y + 1); |
921 | 182M | ptr_y += ((SIZE_T)del << 1); |
922 | | |
923 | 182M | x1_r = *ptr_y; |
924 | 182M | x1_i = *(ptr_y + 1); |
925 | | |
926 | 182M | tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_4) + |
927 | 182M | ixheaace_dmult((FLOAT64)x1_i, w_1)) /*/2*/; |
928 | 182M | x1_i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x1_r, w_1) + |
929 | 182M | ixheaace_dmult((FLOAT64)x1_i, w_4)) /*/2*/; |
930 | 182M | x1_r = tmp; |
931 | | |
932 | 182M | *ptr_y = (x0_r) - (x1_r); |
933 | 182M | *(ptr_y + 1) = (x0_i) - (x1_i); |
934 | 182M | ptr_y -= ((SIZE_T)del << 1); |
935 | | |
936 | 182M | *ptr_y = (x0_r) + (x1_r); |
937 | 182M | *(ptr_y + 1) = (x0_i) + (x1_i); |
938 | 182M | ptr_y += 2; |
939 | 182M | } |
940 | 21.0M | } |
941 | | |
942 | 801M | for (i = 0; i < nlength; i++) { |
943 | 779M | *(ptr_x + 2 * i) = ptr_p2_y[2 * i]; |
944 | 779M | *(ptr_x + 2 * i + 1) = ptr_p2_y[2 * i + 1]; |
945 | 779M | } |
946 | 21.4M | } |
947 | | |
948 | | static VOID ia_enhaacplus_enc_complex_fft_p3(FLOAT32 *ptr_data, WORD32 nlength, |
949 | 0 | ixheaace_scratch_mem *pstr_scratch) { |
950 | 0 | WORD32 i, j; |
951 | 0 | FLOAT32 *ptr_data_3 = pstr_scratch->p_fft_p3_data_3; |
952 | 0 | FLOAT32 *ptr_p3_y = pstr_scratch->p_fft_p3_y; |
953 | 0 | WORD32 cnfac; |
954 | 0 | WORD32 mpass = nlength; |
955 | 0 | FLOAT32 *ptr_x = ptr_data; |
956 | 0 | FLOAT32 *ptr_y = ptr_p3_y; |
957 | 0 | cnfac = 0; |
958 | 0 | const FLOAT64 *ptr_w1_r, *ptr_w1_i; |
959 | 0 | FLOAT32 tmp; |
960 | 0 | ptr_w1_r = ia_enhaacplus_enc_twiddle_table_3pr; |
961 | 0 | ptr_w1_i = ia_enhaacplus_enc_twiddle_table_3pi; |
962 | |
|
963 | 0 | while (mpass % 3 == 0) { |
964 | 0 | mpass /= 3; |
965 | 0 | cnfac++; |
966 | 0 | } |
967 | |
|
968 | 0 | for (i = 0; i < 3 * cnfac; i++) { |
969 | 0 | for (j = 0; j < mpass; j++) { |
970 | 0 | ptr_data_3[2 * j] = ptr_data[3 * (2 * j) + (2 * i)]; |
971 | 0 | ptr_data_3[2 * j + 1] = ptr_data[3 * (2 * j) + 1 + (2 * i)]; |
972 | 0 | } |
973 | 0 | ia_enhaacplus_enc_complex_fft_p2(ptr_data_3, mpass, pstr_scratch->p_fft_p2_y); |
974 | |
|
975 | 0 | for (j = 0; j < mpass; j++) { |
976 | 0 | ptr_data[3 * (2 * j) + (2 * i)] = ptr_data_3[2 * j]; |
977 | 0 | ptr_data[3 * (2 * j) + 1 + (2 * i)] = ptr_data_3[2 * j + 1]; |
978 | 0 | } |
979 | 0 | } |
980 | |
|
981 | 0 | { |
982 | 0 | for (i = 0; i < nlength; i += 3) { |
983 | 0 | tmp = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_r) - |
984 | 0 | (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_i)); |
985 | 0 | ptr_data[2 * i + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_i) + |
986 | 0 | (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_r)); |
987 | 0 | ptr_data[2 * i] = tmp; |
988 | |
|
989 | 0 | ptr_w1_r++; |
990 | 0 | ptr_w1_i++; |
991 | |
|
992 | 0 | tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_r) - |
993 | 0 | (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_i)); |
994 | 0 | ptr_data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_i) + |
995 | 0 | (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_r)); |
996 | 0 | ptr_data[2 * (i + 1)] = tmp; |
997 | |
|
998 | 0 | ptr_w1_r++; |
999 | 0 | ptr_w1_i++; |
1000 | |
|
1001 | 0 | tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_r) - |
1002 | 0 | (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_i)); |
1003 | 0 | ptr_data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_i) + |
1004 | 0 | (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_r)); |
1005 | 0 | ptr_data[2 * (i + 2)] = tmp; |
1006 | |
|
1007 | 0 | ptr_w1_r += 3 * (128 / mpass - 1) + 1; |
1008 | 0 | ptr_w1_i += 3 * (128 / mpass - 1) + 1; |
1009 | 0 | } |
1010 | 0 | } |
1011 | |
|
1012 | 0 | for (i = 0; i < mpass; i++) { |
1013 | 0 | ia_enhaacplus_enc_complex_3point_fft(ptr_x, ptr_y); |
1014 | |
|
1015 | 0 | ptr_x = ptr_x + 6; |
1016 | 0 | ptr_y = ptr_y + 6; |
1017 | 0 | } |
1018 | |
|
1019 | 0 | for (i = 0; i < mpass; i++) { |
1020 | 0 | ptr_data[2 * i] = ptr_p3_y[6 * i]; |
1021 | 0 | ptr_data[2 * i + 1] = ptr_p3_y[6 * i + 1]; |
1022 | 0 | } |
1023 | |
|
1024 | 0 | for (i = 0; i < mpass; i++) { |
1025 | 0 | ptr_data[2 * (i + mpass)] = ptr_p3_y[6 * i + 2]; |
1026 | 0 | ptr_data[2 * (i + mpass) + 1] = ptr_p3_y[6 * i + 3]; |
1027 | 0 | } |
1028 | |
|
1029 | 0 | for (i = 0; i < mpass; i++) { |
1030 | 0 | ptr_data[2 * (i + 2 * mpass)] = ptr_p3_y[6 * i + 4]; |
1031 | 0 | ptr_data[2 * (i + 2 * mpass) + 1] = ptr_p3_y[6 * i + 5]; |
1032 | 0 | } |
1033 | 0 | } |
1034 | | |
1035 | | VOID ia_enhaacplus_enc_complex_fft(FLOAT32 *ptr_data, WORD32 len, |
1036 | 16.3M | ixheaace_scratch_mem *pstr_scratch) { |
1037 | 16.3M | if (len & (len - 1)) { |
1038 | 0 | ia_enhaacplus_enc_complex_fft_p3(ptr_data, len, pstr_scratch); |
1039 | 16.3M | } else { |
1040 | 16.3M | ia_enhaacplus_enc_complex_fft_p2(ptr_data, len, pstr_scratch->p_fft_p2_y); |
1041 | 16.3M | } |
1042 | 16.3M | } |
1043 | | |
1044 | | static VOID ixheaace_post_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_trig_data, |
1045 | 567k | WORD32 step, WORD32 trig_data_size) { |
1046 | 567k | WORD32 i; |
1047 | 567k | FLOAT32 w_re, w_im, re1, re2, im1, im2; |
1048 | 567k | const FLOAT32 *ptr_sin = ptr_trig_data; |
1049 | 567k | const FLOAT32 *ptr_cos = ptr_trig_data + trig_data_size; |
1050 | | |
1051 | 567k | w_im = *ptr_sin; |
1052 | 567k | w_re = *ptr_cos; |
1053 | | |
1054 | 55.9M | for (i = 0; i < m / 4; i++) { |
1055 | 55.3M | re1 = ptr_x[2 * i]; |
1056 | 55.3M | im1 = ptr_x[2 * i + 1]; |
1057 | 55.3M | re2 = ptr_x[m - 2 - 2 * i]; |
1058 | 55.3M | im2 = ptr_x[m - 1 - 2 * i]; |
1059 | | |
1060 | 55.3M | ptr_x[2 * i] = (re1 * w_re + im1 * w_im); |
1061 | | |
1062 | 55.3M | ptr_x[m - 1 - 2 * i] = (re1 * w_im - im1 * w_re); |
1063 | | |
1064 | 55.3M | ptr_sin += step; |
1065 | 55.3M | ptr_cos -= step; |
1066 | | |
1067 | 55.3M | w_im = *ptr_sin; |
1068 | 55.3M | w_re = *ptr_cos; |
1069 | | |
1070 | 55.3M | ptr_x[m - 2 - 2 * i] = (re2 * w_im + im2 * w_re); |
1071 | | |
1072 | 55.3M | ptr_x[2 * i + 1] = (re2 * w_re - im2 * w_im); |
1073 | 55.3M | } |
1074 | 567k | } |
1075 | | |
1076 | | static VOID ixheaace_cplx_mult_twid(FLOAT32 *ptr_re, FLOAT32 *ptr_im, FLOAT32 a, FLOAT32 b, |
1077 | 41.8M | FLOAT32 twid_table, FLOAT32 twid_table_h) { |
1078 | 41.8M | *ptr_re = (a * twid_table) - (b * twid_table_h); |
1079 | 41.8M | *ptr_im = (a * twid_table_h) + (b * twid_table); |
1080 | 41.8M | } |
1081 | | |
1082 | 1.94M | static VOID ixheaace_cfft_15_twiddle(FLOAT32 *ptr_inp) { |
1083 | 1.94M | const FLOAT32 *ptr_tw_flt = &ixheaace_mix_rad_twid_tbl[0]; |
1084 | 1.94M | const FLOAT32 *ptr_tw_flt_h = &ixheaace_mix_rad_twid_tbl_h[0]; |
1085 | 1.94M | FLOAT32 accu1, accu2; |
1086 | 1.94M | WORD32 i, j; |
1087 | 1.94M | ptr_inp += 12; |
1088 | | |
1089 | 5.82M | for (j = 0; j < 2; j++) { |
1090 | 19.4M | for (i = 0; i < 4; i++) { |
1091 | 15.5M | ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1], |
1092 | 15.5M | ptr_tw_flt[i], ptr_tw_flt_h[i]); |
1093 | 15.5M | ptr_inp[2 * i + 0] = accu1; |
1094 | 15.5M | ptr_inp[2 * i + 1] = accu2; |
1095 | 15.5M | } |
1096 | 3.88M | ptr_inp += 10; |
1097 | 3.88M | ptr_tw_flt += 4; |
1098 | 3.88M | ptr_tw_flt_h += 4; |
1099 | 3.88M | } |
1100 | 1.94M | } |
1101 | | |
1102 | 1.94M | static VOID ixheaace_cfft_15_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, FLOAT32 *ptr_fft3_out) { |
1103 | 1.94M | WORD32 i, idx; |
1104 | 1.94M | FLOAT32 *ptr_buf1, *ptr_buf2, *ptr_buf3; |
1105 | 1.94M | FLOAT32 add_r, sub_r; |
1106 | 1.94M | FLOAT32 add_i, sub_i; |
1107 | 1.94M | FLOAT32 x_01_r, x_01_i, temp; |
1108 | 1.94M | FLOAT32 p1, p2, p3, p4; |
1109 | | |
1110 | 1.94M | FLOAT32 sin_mu_flt = 0.866027832f; |
1111 | 1.94M | FLOAT32 c51_flt = 0.951049805f; |
1112 | 1.94M | FLOAT32 c52_flt = -0.76940918f; |
1113 | 1.94M | FLOAT32 c53_flt = -0.36328125f; |
1114 | 1.94M | FLOAT32 c54_flt = 0.559020996f; |
1115 | 1.94M | FLOAT32 c55_flt = -0.625f; |
1116 | | |
1117 | 1.94M | FLOAT32 r1, r2, r3, r4; |
1118 | 1.94M | FLOAT32 s1, s2, s3, s4, t, temp1, temp2; |
1119 | 1.94M | FLOAT32 *ptr_out_fft3 = ptr_fft3_out; |
1120 | | |
1121 | 1.94M | FLOAT32 xr_0, xr_1, xr_2; |
1122 | 1.94M | FLOAT32 xi_0, xi_1, xi_2; |
1123 | | |
1124 | 1.94M | ptr_buf2 = ptr_fft3_out; |
1125 | 1.94M | ptr_buf1 = ptr_buf3 = ptr_fft3_out; |
1126 | | |
1127 | 7.76M | for (i = 0; i < FFT3; i++) { |
1128 | 5.82M | *ptr_buf1++ = ptr_inp[0 + 64 * i]; |
1129 | 5.82M | *ptr_buf1++ = ptr_inp[1 + 64 * i]; |
1130 | | |
1131 | 5.82M | *ptr_buf1++ = ptr_inp[192 + 64 * i]; |
1132 | 5.82M | *ptr_buf1++ = ptr_inp[193 + 64 * i]; |
1133 | | |
1134 | 5.82M | *ptr_buf1++ = ptr_inp[384 + 64 * i]; |
1135 | 5.82M | *ptr_buf1++ = ptr_inp[385 + 64 * i]; |
1136 | | |
1137 | 5.82M | *ptr_buf1++ = ptr_inp[576 + 64 * i]; |
1138 | 5.82M | *ptr_buf1++ = ptr_inp[577 + 64 * i]; |
1139 | | |
1140 | 5.82M | *ptr_buf1++ = ptr_inp[768 + 64 * i]; |
1141 | 5.82M | *ptr_buf1++ = ptr_inp[769 + 64 * i]; |
1142 | | |
1143 | 5.82M | r1 = ptr_buf3[2] + ptr_buf3[8]; |
1144 | 5.82M | r4 = ptr_buf3[2] - ptr_buf3[8]; |
1145 | 5.82M | r3 = ptr_buf3[4] + ptr_buf3[6]; |
1146 | 5.82M | r2 = ptr_buf3[4] - ptr_buf3[6]; |
1147 | 5.82M | t = ((r1 - r3) * c54_flt); |
1148 | | |
1149 | 5.82M | r1 = r1 + r3; |
1150 | | |
1151 | 5.82M | temp1 = ptr_buf3[0] + r1; |
1152 | | |
1153 | 5.82M | r1 = temp1 + ((r1 * c55_flt) * 2); |
1154 | | |
1155 | 5.82M | r3 = r1 - t; |
1156 | 5.82M | r1 = r1 + t; |
1157 | | |
1158 | 5.82M | t = ((r4 + r2) * c51_flt); |
1159 | 5.82M | r4 = t + ((r4 * c52_flt) * 2); |
1160 | 5.82M | r2 = t + (r2 * c53_flt); |
1161 | | |
1162 | 5.82M | s1 = ptr_buf3[3] + ptr_buf3[9]; |
1163 | 5.82M | s4 = ptr_buf3[3] - ptr_buf3[9]; |
1164 | 5.82M | s3 = ptr_buf3[5] + ptr_buf3[7]; |
1165 | 5.82M | s2 = ptr_buf3[5] - ptr_buf3[7]; |
1166 | | |
1167 | 5.82M | t = ((s1 - s3) * c54_flt); |
1168 | | |
1169 | 5.82M | s1 = s1 + s3; |
1170 | | |
1171 | 5.82M | temp2 = ptr_buf3[1] + s1; |
1172 | | |
1173 | 5.82M | s1 = temp2 + (((s1 * c55_flt)) * 2); |
1174 | | |
1175 | 5.82M | s3 = s1 - t; |
1176 | 5.82M | s1 = s1 + t; |
1177 | | |
1178 | 5.82M | t = ((s4 + s2) * c51_flt); |
1179 | 5.82M | s4 = t + (((s4 * c52_flt)) * 2); |
1180 | 5.82M | s2 = t + ((s2 * c53_flt)); |
1181 | | |
1182 | 5.82M | *ptr_buf2++ = temp1; |
1183 | 5.82M | *ptr_buf2++ = temp2; |
1184 | 5.82M | *ptr_buf2++ = r1 + s2; |
1185 | 5.82M | *ptr_buf2++ = s1 - r2; |
1186 | 5.82M | *ptr_buf2++ = r3 - s4; |
1187 | 5.82M | *ptr_buf2++ = s3 + r4; |
1188 | 5.82M | *ptr_buf2++ = r3 + s4; |
1189 | 5.82M | *ptr_buf2++ = s3 - r4; |
1190 | 5.82M | *ptr_buf2++ = r1 - s2; |
1191 | 5.82M | *ptr_buf2++ = s1 + r2; |
1192 | 5.82M | ptr_buf3 = ptr_buf1; |
1193 | 5.82M | } |
1194 | | |
1195 | 1.94M | idx = 0; |
1196 | 1.94M | ixheaace_cfft_15_twiddle(ptr_out_fft3); |
1197 | | |
1198 | 11.6M | for (i = 0; i < FFT5; i++) { |
1199 | 9.71M | xr_0 = ptr_out_fft3[0]; |
1200 | 9.71M | xi_0 = ptr_out_fft3[1]; |
1201 | | |
1202 | 9.71M | xr_1 = ptr_out_fft3[10]; |
1203 | 9.71M | xi_1 = ptr_out_fft3[11]; |
1204 | | |
1205 | 9.71M | xr_2 = ptr_out_fft3[20]; |
1206 | 9.71M | xi_2 = ptr_out_fft3[21]; |
1207 | | |
1208 | 9.71M | x_01_r = (xr_0 + xr_1); |
1209 | 9.71M | x_01_i = (xi_0 + xi_1); |
1210 | | |
1211 | 9.71M | add_r = (xr_1 + xr_2); |
1212 | 9.71M | add_i = (xi_1 + xi_2); |
1213 | | |
1214 | 9.71M | sub_r = (xr_1 - xr_2); |
1215 | 9.71M | sub_i = (xi_1 - xi_2); |
1216 | | |
1217 | 9.71M | p1 = add_r / 2; |
1218 | | |
1219 | 9.71M | p2 = (sub_i * sin_mu_flt); |
1220 | 9.71M | p3 = (sub_r * sin_mu_flt); |
1221 | | |
1222 | 9.71M | p4 = add_i / 2; |
1223 | | |
1224 | 9.71M | temp = (xr_0 - p1); |
1225 | 9.71M | temp1 = (xi_0 + p3); |
1226 | 9.71M | temp2 = (xi_0 - p3); |
1227 | | |
1228 | 9.71M | ptr_op[idx] = (x_01_r + xr_2); |
1229 | 9.71M | ptr_op[idx + 1] = (x_01_i + xi_2); |
1230 | | |
1231 | 9.71M | idx = idx + 320; |
1232 | 9.71M | ptr_op[idx] = (temp + p2); |
1233 | 9.71M | ptr_op[idx + 1] = (temp2 - p4); |
1234 | | |
1235 | 9.71M | idx = idx + 320; |
1236 | 9.71M | ptr_op[idx] = (temp - p2); |
1237 | 9.71M | ptr_op[idx + 1] = (temp1 - p4); |
1238 | 9.71M | ptr_out_fft3 += 2; |
1239 | 9.71M | idx = idx - 576; |
1240 | 9.71M | } |
1241 | 1.94M | } |
1242 | | |
1243 | | static VOID ixheaace_cfft_twiddle_mult(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, WORD32 dim1, |
1244 | | WORD32 dim2, const FLOAT32 *ptr_tw_flt, |
1245 | 60.7k | const FLOAT32 *ptr_tw_h_flt) { |
1246 | 60.7k | FLOAT32 accu1, accu2; |
1247 | 60.7k | WORD32 i, j; |
1248 | 60.7k | WORD32 step_val = (dim2 - 1) << 1; |
1249 | 2.00M | for (i = 0; i < dim2; i++) { |
1250 | 1.94M | ptr_op[0] = ptr_inp[0]; |
1251 | 1.94M | ptr_op[1] = ptr_inp[1]; |
1252 | 1.94M | ptr_op += 2; |
1253 | 1.94M | ptr_inp += 2; |
1254 | 1.94M | } |
1255 | | |
1256 | 910k | for (j = 0; j < (dim1 - 1); j++) { |
1257 | 849k | ptr_op[0] = ptr_inp[0]; |
1258 | 849k | ptr_op[1] = ptr_inp[1]; |
1259 | 849k | ptr_inp += 2; |
1260 | 849k | ptr_op += 2; |
1261 | 27.1M | for (i = 0; i < (dim2 - 1); i++) { |
1262 | 26.3M | ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1], |
1263 | 26.3M | ptr_tw_flt[i], ptr_tw_h_flt[i]); |
1264 | 26.3M | ptr_op[2 * i + 0] = accu1; |
1265 | 26.3M | ptr_op[2 * i + 1] = accu2; |
1266 | 26.3M | } |
1267 | 849k | ptr_inp += step_val; |
1268 | 849k | ptr_op += step_val; |
1269 | 849k | ptr_tw_flt += (dim2 - 1); |
1270 | 849k | ptr_tw_h_flt += (dim2 - 1); |
1271 | 849k | } |
1272 | 60.7k | } |
1273 | | |
1274 | 910k | static VOID ixheaace_cfft_32_480(FLOAT32 *ptr_in, FLOAT32 *ptr_out) { |
1275 | 910k | WORD32 i, l1, l2, h2; |
1276 | 910k | FLOAT32 xh0_0, xh1_0, xl0_0, xl1_0; |
1277 | 910k | FLOAT32 xh0_1, xh1_1, xl0_1, xl1_1; |
1278 | 910k | FLOAT32 x_0, x_1, x_2, x_3; |
1279 | 910k | FLOAT32 x_4, x_5, x_6, x_7; |
1280 | 910k | FLOAT32 *ptr_x; |
1281 | 910k | FLOAT32 *ptr_y; |
1282 | 910k | FLOAT32 interm_y[FFT32X2]; |
1283 | 910k | FLOAT32 n00, n10, n20, n30, n01, n11, n21, n31; |
1284 | | |
1285 | 910k | FLOAT32 inp_0qi, inp_0qr; |
1286 | 910k | FLOAT32 inp_1qi, inp_1qr; |
1287 | 910k | FLOAT32 inp_2qi, inp_2qr; |
1288 | 910k | FLOAT32 inp_3qi, inp_3qr; |
1289 | 910k | FLOAT32 mul_0qi, mul_0qr; |
1290 | 910k | FLOAT32 mul_1qi, mul_1qr; |
1291 | 910k | FLOAT32 mul_2qi, mul_2qr; |
1292 | 910k | FLOAT32 mul_3qi, mul_3qr; |
1293 | 910k | FLOAT32 sum_0qi, sum_0qr; |
1294 | 910k | FLOAT32 sum_1qi, sum_1qr; |
1295 | 910k | FLOAT32 sum_2qi, sum_2qr; |
1296 | 910k | FLOAT32 sum_3qi, sum_3qr; |
1297 | 910k | WORD32 idx1 = 0, idx2 = FFT15 * FFT32; |
1298 | 910k | FLOAT32 mul_i, mul_r; |
1299 | | |
1300 | 910k | ptr_x = ptr_in; |
1301 | | |
1302 | | // This computes first and second stage butterflies. So, 4-point FFT is done. |
1303 | 8.19M | for (i = 0; i < 8; i++) { |
1304 | 7.28M | x_0 = ptr_x[0]; |
1305 | 7.28M | x_1 = ptr_x[1]; |
1306 | 7.28M | x_2 = ptr_x[16]; |
1307 | 7.28M | x_3 = ptr_x[16 + 1]; |
1308 | 7.28M | x_4 = ptr_x[32]; |
1309 | 7.28M | x_5 = ptr_x[32 + 1]; |
1310 | 7.28M | x_6 = ptr_x[48]; |
1311 | 7.28M | x_7 = ptr_x[48 + 1]; |
1312 | | |
1313 | 7.28M | xh0_0 = x_0 + x_4; |
1314 | 7.28M | xh1_0 = x_1 + x_5; |
1315 | 7.28M | xl0_0 = x_0 - x_4; |
1316 | 7.28M | xl1_0 = x_1 - x_5; |
1317 | 7.28M | xh0_1 = x_2 + x_6; |
1318 | 7.28M | xh1_1 = x_3 + x_7; |
1319 | 7.28M | xl0_1 = x_2 - x_6; |
1320 | 7.28M | xl1_1 = x_3 - x_7; |
1321 | | |
1322 | 7.28M | n00 = xh0_0 + xh0_1; |
1323 | 7.28M | n01 = xh1_0 + xh1_1; |
1324 | 7.28M | n10 = xl0_0 + xl1_1; |
1325 | 7.28M | n11 = xl1_0 - xl0_1; |
1326 | 7.28M | n20 = xh0_0 - xh0_1; |
1327 | 7.28M | n21 = xh1_0 - xh1_1; |
1328 | 7.28M | n30 = xl0_0 - xl1_1; |
1329 | 7.28M | n31 = xl1_0 + xl0_1; |
1330 | | |
1331 | 7.28M | ptr_x[0] = n00; |
1332 | 7.28M | ptr_x[1] = n01; |
1333 | 7.28M | ptr_x[16] = n10; |
1334 | 7.28M | ptr_x[16 + 1] = n11; |
1335 | 7.28M | ptr_x[32] = n20; |
1336 | 7.28M | ptr_x[32 + 1] = n21; |
1337 | 7.28M | ptr_x[48] = n30; |
1338 | 7.28M | ptr_x[48 + 1] = n31; |
1339 | | |
1340 | 7.28M | ptr_x += 2; |
1341 | 7.28M | } |
1342 | | |
1343 | | // This computes third and fourth stage butterflies. So, next 4-point FFT is done. |
1344 | 910k | { |
1345 | 910k | h2 = 16 >> 1; |
1346 | 910k | l1 = 16; |
1347 | 910k | l2 = 16 + (16 >> 1); |
1348 | | |
1349 | 910k | ptr_x = ptr_in; |
1350 | 910k | ptr_y = &interm_y[0]; |
1351 | | |
1352 | | /* Butter fly summation in 2 steps */ |
1353 | 910k | inp_0qr = ptr_x[0]; |
1354 | 910k | inp_0qi = ptr_x[1]; |
1355 | 910k | inp_1qr = ptr_x[4]; |
1356 | 910k | inp_1qi = ptr_x[5]; |
1357 | 910k | inp_2qr = ptr_x[8]; |
1358 | 910k | inp_2qi = ptr_x[9]; |
1359 | 910k | inp_3qr = ptr_x[12]; |
1360 | 910k | inp_3qi = ptr_x[13]; |
1361 | | |
1362 | 910k | mul_0qr = inp_0qr; |
1363 | 910k | mul_0qi = inp_0qi; |
1364 | 910k | mul_1qr = inp_1qr; |
1365 | 910k | mul_1qi = inp_1qi; |
1366 | 910k | mul_2qr = inp_2qr; |
1367 | 910k | mul_2qi = inp_2qi; |
1368 | 910k | mul_3qr = inp_3qr; |
1369 | 910k | mul_3qi = inp_3qi; |
1370 | | |
1371 | 910k | sum_0qr = mul_0qr + mul_2qr; |
1372 | 910k | sum_0qi = mul_0qi + mul_2qi; |
1373 | 910k | sum_1qr = mul_0qr - mul_2qr; |
1374 | 910k | sum_1qi = mul_0qi - mul_2qi; |
1375 | 910k | sum_2qr = mul_1qr + mul_3qr; |
1376 | 910k | sum_2qi = mul_1qi + mul_3qi; |
1377 | 910k | sum_3qr = mul_1qr - mul_3qr; |
1378 | 910k | sum_3qi = mul_1qi - mul_3qi; |
1379 | | |
1380 | 910k | ptr_y[0] = sum_0qr + sum_2qr; |
1381 | 910k | ptr_y[1] = sum_0qi + sum_2qi; |
1382 | 910k | ptr_y[h2] = sum_1qr + sum_3qi; |
1383 | 910k | ptr_y[h2 + 1] = sum_1qi - sum_3qr; |
1384 | 910k | ptr_y[l1] = sum_0qr - sum_2qr; |
1385 | 910k | ptr_y[l1 + 1] = sum_0qi - sum_2qi; |
1386 | 910k | ptr_y[l2] = sum_1qr - sum_3qi; |
1387 | 910k | ptr_y[l2 + 1] = sum_1qi + sum_3qr; |
1388 | | |
1389 | 910k | ptr_y += 2; |
1390 | 910k | ptr_x += 16; |
1391 | | |
1392 | | /* 2nd butter fly */ |
1393 | | |
1394 | 910k | inp_0qr = ptr_x[0]; |
1395 | 910k | inp_0qi = ptr_x[1]; |
1396 | 910k | inp_1qr = ptr_x[4]; |
1397 | 910k | inp_1qi = ptr_x[5]; |
1398 | 910k | inp_2qr = ptr_x[8]; |
1399 | 910k | inp_2qi = ptr_x[9]; |
1400 | 910k | inp_3qr = ptr_x[12]; |
1401 | 910k | inp_3qi = ptr_x[13]; |
1402 | | |
1403 | 910k | mul_0qr = inp_0qr; |
1404 | 910k | mul_0qi = inp_0qi; |
1405 | | |
1406 | 910k | mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f); |
1407 | 910k | mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f); |
1408 | | |
1409 | 910k | mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f); |
1410 | 910k | mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f); |
1411 | | |
1412 | 910k | mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f); |
1413 | 910k | mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f); |
1414 | | |
1415 | 910k | sum_0qr = mul_0qr + (mul_2qr * 2); |
1416 | 910k | sum_0qi = mul_0qi + (mul_2qi * 2); |
1417 | 910k | sum_1qr = mul_0qr - (mul_2qr * 2); |
1418 | 910k | sum_1qi = mul_0qi - (mul_2qi * 2); |
1419 | | |
1420 | 910k | sum_2qr = mul_1qr + mul_3qr; |
1421 | 910k | sum_2qi = mul_1qi + mul_3qi; |
1422 | 910k | sum_3qr = mul_1qr - mul_3qr; |
1423 | 910k | sum_3qi = mul_1qi - mul_3qi; |
1424 | | |
1425 | 910k | ptr_y[0] = sum_0qr + (sum_2qr * 2); |
1426 | 910k | ptr_y[1] = sum_0qi + (sum_2qi * 2); |
1427 | 910k | ptr_y[h2] = sum_1qr + (sum_3qi * 2); |
1428 | 910k | ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2); |
1429 | 910k | ptr_y[l1] = sum_0qr - (sum_2qr * 2); |
1430 | 910k | ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2); |
1431 | 910k | ptr_y[l2] = sum_1qr - (sum_3qi * 2); |
1432 | 910k | ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2); |
1433 | | |
1434 | 910k | ptr_y += 2; |
1435 | 910k | ptr_x += 16; |
1436 | | |
1437 | | /* 3rd butter fly */ |
1438 | | |
1439 | 910k | inp_0qr = ptr_x[0]; |
1440 | 910k | inp_0qi = ptr_x[1]; |
1441 | 910k | inp_1qr = ptr_x[4]; |
1442 | 910k | inp_1qi = ptr_x[5]; |
1443 | 910k | inp_2qr = ptr_x[8]; |
1444 | 910k | inp_2qi = ptr_x[9]; |
1445 | 910k | inp_3qr = ptr_x[12]; |
1446 | 910k | inp_3qi = ptr_x[13]; |
1447 | | |
1448 | 910k | mul_0qr = inp_0qr; |
1449 | 910k | mul_0qi = inp_0qi; |
1450 | | |
1451 | 910k | mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f); |
1452 | 910k | mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f); |
1453 | | |
1454 | 910k | mul_2qr = inp_2qi; |
1455 | 910k | mul_2qi = inp_2qr; |
1456 | | |
1457 | 910k | mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f); |
1458 | 910k | mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f); |
1459 | | |
1460 | 910k | sum_0qr = mul_0qr + mul_2qr; |
1461 | 910k | sum_0qi = mul_0qi - mul_2qi; |
1462 | 910k | sum_1qr = mul_0qr - mul_2qr; |
1463 | 910k | sum_1qi = mul_0qi + mul_2qi; |
1464 | 910k | sum_2qr = mul_1qr + mul_3qr; |
1465 | 910k | sum_2qi = mul_1qi + mul_3qi; |
1466 | 910k | sum_3qr = mul_1qr - mul_3qr; |
1467 | 910k | sum_3qi = mul_1qi - mul_3qi; |
1468 | | |
1469 | 910k | ptr_y[0] = sum_0qr + (sum_2qr * 2); |
1470 | 910k | ptr_y[1] = sum_0qi + (sum_2qi * 2); |
1471 | 910k | ptr_y[h2] = sum_1qr + (sum_3qi * 2); |
1472 | 910k | ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2); |
1473 | 910k | ptr_y[l1] = sum_0qr - (sum_2qr * 2); |
1474 | 910k | ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2); |
1475 | 910k | ptr_y[l2] = sum_1qr - (sum_3qi * 2); |
1476 | 910k | ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2); |
1477 | | |
1478 | 910k | ptr_y += 2; |
1479 | 910k | ptr_x += 16; |
1480 | | |
1481 | | /* 4th butter fly */ |
1482 | | |
1483 | 910k | inp_0qr = ptr_x[0]; |
1484 | 910k | inp_0qi = ptr_x[1]; |
1485 | 910k | inp_1qr = ptr_x[4]; |
1486 | 910k | inp_1qi = ptr_x[5]; |
1487 | 910k | inp_2qr = ptr_x[8]; |
1488 | 910k | inp_2qi = ptr_x[9]; |
1489 | 910k | inp_3qr = ptr_x[12]; |
1490 | 910k | inp_3qi = ptr_x[13]; |
1491 | | |
1492 | 910k | mul_0qr = inp_0qr; |
1493 | 910k | mul_0qi = inp_0qi; |
1494 | | |
1495 | 910k | mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f); |
1496 | 910k | mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f); |
1497 | | |
1498 | 910k | mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f); |
1499 | 910k | mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f); |
1500 | | |
1501 | 910k | mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f); |
1502 | 910k | mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f); |
1503 | | |
1504 | 910k | sum_0qr = mul_0qr + (mul_2qr * 2); |
1505 | 910k | sum_0qi = mul_0qi + (mul_2qi * 2); |
1506 | 910k | sum_1qr = mul_0qr - (mul_2qr * 2); |
1507 | 910k | sum_1qi = mul_0qi - (mul_2qi * 2); |
1508 | | |
1509 | 910k | sum_2qr = mul_1qr + mul_3qr; |
1510 | 910k | sum_2qi = mul_1qi + mul_3qi; |
1511 | 910k | sum_3qr = mul_1qr - mul_3qr; |
1512 | 910k | sum_3qi = mul_1qi - mul_3qi; |
1513 | | |
1514 | 910k | ptr_y[0] = sum_0qr + (sum_2qr * 2); |
1515 | 910k | ptr_y[1] = sum_0qi + (sum_2qi * 2); |
1516 | 910k | ptr_y[h2] = sum_1qr + (sum_3qi * 2); |
1517 | 910k | ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2); |
1518 | 910k | ptr_y[l1] = sum_0qr - (sum_2qr * 2); |
1519 | 910k | ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2); |
1520 | 910k | ptr_y[l2] = sum_1qr - (sum_3qi * 2); |
1521 | 910k | ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2); |
1522 | | |
1523 | 910k | ptr_x = ptr_in; |
1524 | 910k | ptr_y = &interm_y[32]; |
1525 | | |
1526 | | /* Butter fly summation in 2 steps */ |
1527 | 910k | inp_0qr = ptr_x[2]; |
1528 | 910k | inp_0qi = ptr_x[3]; |
1529 | 910k | inp_1qr = ptr_x[6]; |
1530 | 910k | inp_1qi = ptr_x[7]; |
1531 | 910k | inp_2qr = ptr_x[10]; |
1532 | 910k | inp_2qi = ptr_x[11]; |
1533 | 910k | inp_3qr = ptr_x[14]; |
1534 | 910k | inp_3qi = ptr_x[15]; |
1535 | | |
1536 | 910k | mul_0qr = inp_0qr; |
1537 | 910k | mul_0qi = inp_0qi; |
1538 | 910k | mul_1qr = inp_1qr; |
1539 | 910k | mul_1qi = inp_1qi; |
1540 | 910k | mul_2qr = inp_2qr; |
1541 | 910k | mul_2qi = inp_2qi; |
1542 | 910k | mul_3qr = inp_3qr; |
1543 | 910k | mul_3qi = inp_3qi; |
1544 | | |
1545 | 910k | sum_0qr = mul_0qr + mul_2qr; |
1546 | 910k | sum_0qi = mul_0qi + mul_2qi; |
1547 | 910k | sum_1qr = mul_0qr - mul_2qr; |
1548 | 910k | sum_1qi = mul_0qi - mul_2qi; |
1549 | 910k | sum_2qr = mul_1qr + mul_3qr; |
1550 | 910k | sum_2qi = mul_1qi + mul_3qi; |
1551 | 910k | sum_3qr = mul_1qr - mul_3qr; |
1552 | 910k | sum_3qi = mul_1qi - mul_3qi; |
1553 | | |
1554 | 910k | ptr_y[0] = sum_0qr + sum_2qr; |
1555 | 910k | ptr_y[1] = sum_0qi + sum_2qi; |
1556 | 910k | ptr_y[h2] = sum_1qr + sum_3qi; |
1557 | 910k | ptr_y[h2 + 1] = sum_1qi - sum_3qr; |
1558 | 910k | ptr_y[l1] = sum_0qr - sum_2qr; |
1559 | 910k | ptr_y[l1 + 1] = sum_0qi - sum_2qi; |
1560 | 910k | ptr_y[l2] = sum_1qr - sum_3qi; |
1561 | 910k | ptr_y[l2 + 1] = sum_1qi + sum_3qr; |
1562 | | |
1563 | 910k | ptr_y += 2; |
1564 | 910k | ptr_x += 16; |
1565 | | |
1566 | | /* 2nd butter fly */ |
1567 | | |
1568 | 910k | inp_0qr = ptr_x[2]; |
1569 | 910k | inp_0qi = ptr_x[3]; |
1570 | 910k | inp_1qr = ptr_x[6]; |
1571 | 910k | inp_1qi = ptr_x[7]; |
1572 | 910k | inp_2qr = ptr_x[10]; |
1573 | 910k | inp_2qi = ptr_x[11]; |
1574 | 910k | inp_3qr = ptr_x[14]; |
1575 | 910k | inp_3qi = ptr_x[15]; |
1576 | | |
1577 | 910k | mul_0qr = inp_0qr; |
1578 | 910k | mul_0qi = inp_0qi; |
1579 | | |
1580 | 910k | mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f); |
1581 | 910k | mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f); |
1582 | | |
1583 | 910k | mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f); |
1584 | 910k | mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f); |
1585 | | |
1586 | 910k | mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f); |
1587 | 910k | mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f); |
1588 | | |
1589 | 910k | sum_0qr = mul_0qr + (mul_2qr * 2); |
1590 | 910k | sum_0qi = mul_0qi + (mul_2qi * 2); |
1591 | 910k | sum_1qr = mul_0qr - (mul_2qr * 2); |
1592 | 910k | sum_1qi = mul_0qi - (mul_2qi * 2); |
1593 | | |
1594 | 910k | sum_2qr = mul_1qr + mul_3qr; |
1595 | 910k | sum_2qi = mul_1qi + mul_3qi; |
1596 | 910k | sum_3qr = mul_1qr - mul_3qr; |
1597 | 910k | sum_3qi = mul_1qi - mul_3qi; |
1598 | | |
1599 | 910k | ptr_y[0] = sum_0qr + (sum_2qr * 2); |
1600 | 910k | ptr_y[1] = sum_0qi + (sum_2qi * 2); |
1601 | 910k | ptr_y[h2] = sum_1qr + (sum_3qi * 2); |
1602 | 910k | ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2); |
1603 | 910k | ptr_y[l1] = sum_0qr - (sum_2qr * 2); |
1604 | 910k | ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2); |
1605 | 910k | ptr_y[l2] = sum_1qr - (sum_3qi * 2); |
1606 | 910k | ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2); |
1607 | | |
1608 | 910k | ptr_y += 2; |
1609 | 910k | ptr_x += 16; |
1610 | | |
1611 | | /* 3rd butter fly */ |
1612 | | |
1613 | 910k | inp_0qr = ptr_x[2]; |
1614 | 910k | inp_0qi = ptr_x[3]; |
1615 | 910k | inp_1qr = ptr_x[6]; |
1616 | 910k | inp_1qi = ptr_x[7]; |
1617 | 910k | inp_2qr = ptr_x[10]; |
1618 | 910k | inp_2qi = ptr_x[11]; |
1619 | 910k | inp_3qr = ptr_x[14]; |
1620 | 910k | inp_3qi = ptr_x[15]; |
1621 | | |
1622 | 910k | mul_0qr = inp_0qr; |
1623 | 910k | mul_0qi = inp_0qi; |
1624 | | |
1625 | 910k | mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f); |
1626 | 910k | mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f); |
1627 | | |
1628 | 910k | mul_2qr = inp_2qi; |
1629 | 910k | mul_2qi = inp_2qr; |
1630 | | |
1631 | 910k | mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f); |
1632 | 910k | mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f); |
1633 | | |
1634 | 910k | sum_0qr = mul_0qr + mul_2qr; |
1635 | 910k | sum_0qi = mul_0qi - mul_2qi; |
1636 | 910k | sum_1qr = mul_0qr - mul_2qr; |
1637 | 910k | sum_1qi = mul_0qi + mul_2qi; |
1638 | 910k | sum_2qr = mul_1qr + mul_3qr; |
1639 | 910k | sum_2qi = mul_1qi + mul_3qi; |
1640 | 910k | sum_3qr = mul_1qr - mul_3qr; |
1641 | 910k | sum_3qi = mul_1qi - mul_3qi; |
1642 | | |
1643 | 910k | ptr_y[0] = sum_0qr + (sum_2qr * 2); |
1644 | 910k | ptr_y[1] = sum_0qi + (sum_2qi * 2); |
1645 | 910k | ptr_y[h2] = sum_1qr + (sum_3qi * 2); |
1646 | 910k | ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2); |
1647 | 910k | ptr_y[l1] = sum_0qr - (sum_2qr * 2); |
1648 | 910k | ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2); |
1649 | 910k | ptr_y[l2] = sum_1qr - (sum_3qi * 2); |
1650 | 910k | ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2); |
1651 | | |
1652 | 910k | ptr_y += 2; |
1653 | 910k | ptr_x += 16; |
1654 | | |
1655 | | /* 4th butter fly */ |
1656 | | |
1657 | 910k | inp_0qr = ptr_x[2]; |
1658 | 910k | inp_0qi = ptr_x[3]; |
1659 | 910k | inp_1qr = ptr_x[6]; |
1660 | 910k | inp_1qi = ptr_x[7]; |
1661 | 910k | inp_2qr = ptr_x[10]; |
1662 | 910k | inp_2qi = ptr_x[11]; |
1663 | 910k | inp_3qr = ptr_x[14]; |
1664 | 910k | inp_3qi = ptr_x[15]; |
1665 | | |
1666 | 910k | mul_0qr = inp_0qr; |
1667 | 910k | mul_0qi = inp_0qi; |
1668 | | |
1669 | 910k | mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f); |
1670 | 910k | mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f); |
1671 | | |
1672 | 910k | mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f); |
1673 | 910k | mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f); |
1674 | | |
1675 | 910k | mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f); |
1676 | 910k | mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f); |
1677 | | |
1678 | 910k | sum_0qr = mul_0qr + (mul_2qr * 2); |
1679 | 910k | sum_0qi = mul_0qi + (mul_2qi * 2); |
1680 | 910k | sum_1qr = mul_0qr - (mul_2qr * 2); |
1681 | 910k | sum_1qi = mul_0qi - (mul_2qi * 2); |
1682 | | |
1683 | 910k | sum_2qr = mul_1qr + mul_3qr; |
1684 | 910k | sum_2qi = mul_1qi + mul_3qi; |
1685 | 910k | sum_3qr = mul_1qr - mul_3qr; |
1686 | 910k | sum_3qi = mul_1qi - mul_3qi; |
1687 | | |
1688 | 910k | ptr_y[0] = sum_0qr + (sum_2qr * 2); |
1689 | 910k | ptr_y[1] = sum_0qi + (sum_2qi * 2); |
1690 | 910k | ptr_y[h2] = sum_1qr + (sum_3qi * 2); |
1691 | 910k | ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2); |
1692 | 910k | ptr_y[l1] = sum_0qr - (sum_2qr * 2); |
1693 | 910k | ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2); |
1694 | 910k | ptr_y[l2] = sum_1qr - (sum_3qi * 2); |
1695 | 910k | ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2); |
1696 | 910k | } |
1697 | | |
1698 | | // Last stage of 32 point FFT |
1699 | 910k | { |
1700 | 910k | ptr_y = ptr_out; |
1701 | 910k | ptr_y[idx1] = interm_y[0] + interm_y[32]; |
1702 | 910k | ptr_y[idx1 + 1] = interm_y[1] + interm_y[33]; |
1703 | 910k | ptr_y[idx2] = interm_y[0] - interm_y[32]; |
1704 | 910k | ptr_y[idx2 + 1] = interm_y[1] - interm_y[33]; |
1705 | 910k | idx1 += FFT15X2; |
1706 | 910k | idx2 += FFT15X2; |
1707 | 14.5M | for (i = 1; i < FFT16; i++) { |
1708 | 13.6M | mul_r = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]) - |
1709 | 13.6M | (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]); |
1710 | 13.6M | mul_i = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]) + |
1711 | 13.6M | (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]); |
1712 | | |
1713 | 13.6M | mul_r = mul_r / 2; |
1714 | 13.6M | mul_i = mul_i / 2; |
1715 | 13.6M | ptr_y[idx1] = interm_y[2 * i + 0] + (mul_r * 2); |
1716 | 13.6M | ptr_y[idx1 + 1] = interm_y[2 * i + 1] + (mul_i * 2); |
1717 | 13.6M | ptr_y[idx2] = interm_y[2 * i + 0] - (mul_r * 2); |
1718 | 13.6M | ptr_y[idx2 + 1] = interm_y[2 * i + 1] - (mul_i * 2); |
1719 | 13.6M | idx1 += FFT15X2; |
1720 | 13.6M | idx2 += FFT15X2; |
1721 | 13.6M | } |
1722 | 910k | } |
1723 | 910k | } |
1724 | | |
1725 | | static VOID ixheaace_dec_rearrange_short_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out, WORD32 N, |
1726 | 4.46M | const WORD16 *ptr_re_arr_tab) { |
1727 | 4.46M | WORD32 n, i = 0; |
1728 | | |
1729 | 111M | for (n = 0; n < N; n++) { |
1730 | 107M | WORD32 idx = ptr_re_arr_tab[n] << 1; |
1731 | 107M | ptr_out[i++] = ptr_in[idx]; |
1732 | 107M | ptr_out[i++] = ptr_in[idx + 1]; |
1733 | 107M | } |
1734 | 4.46M | } |
1735 | | |
1736 | 3.56M | static VOID ixheaace_fft_5_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) { |
1737 | 3.56M | FLOAT32 C51 = 0.951056516f; |
1738 | 3.56M | FLOAT32 C52 = -0.769420885f; |
1739 | 3.56M | FLOAT32 C53 = -0.363271264f; |
1740 | 3.56M | FLOAT32 C54 = 0.559016994f; |
1741 | 3.56M | FLOAT32 C55 = -0.625f; |
1742 | | |
1743 | 3.56M | FLOAT32 r1, r2, r3, r4; |
1744 | 3.56M | FLOAT32 s1, s2, s3, s4, t, temp1, temp2; |
1745 | | |
1746 | 3.56M | r1 = (ptr_in[2] + ptr_in[8]); |
1747 | 3.56M | r4 = (ptr_in[2] - ptr_in[8]); |
1748 | 3.56M | r3 = (ptr_in[4] + ptr_in[6]); |
1749 | 3.56M | r2 = (ptr_in[4] - ptr_in[6]); |
1750 | | |
1751 | 3.56M | t = ((r1 - r3) * C54); |
1752 | 3.56M | r1 = (r1 + r3); |
1753 | | |
1754 | 3.56M | temp1 = (ptr_in[0] + r1); |
1755 | 3.56M | r1 = (temp1 + (((r1 * C55)) * 2)); |
1756 | | |
1757 | 3.56M | r3 = (r1 - t); |
1758 | 3.56M | r1 = (r1 + t); |
1759 | | |
1760 | 3.56M | t = ((r4 + r2) * C51); |
1761 | 3.56M | r4 = (t + ((r4 * C52) * 2)); |
1762 | 3.56M | r2 = (t + (r2 * C53)); |
1763 | | |
1764 | 3.56M | s1 = (ptr_in[3] + ptr_in[9]); |
1765 | 3.56M | s4 = (ptr_in[3] - ptr_in[9]); |
1766 | 3.56M | s3 = (ptr_in[5] + ptr_in[7]); |
1767 | 3.56M | s2 = (ptr_in[5] - ptr_in[7]); |
1768 | | |
1769 | 3.56M | t = ((s1 - s3) * C54); |
1770 | 3.56M | s1 = (s1 + s3); |
1771 | | |
1772 | 3.56M | temp2 = (ptr_in[1] + s1); |
1773 | | |
1774 | 3.56M | s1 = (temp2 + (((s1 * C55)) * 2)); |
1775 | | |
1776 | 3.56M | s3 = (s1 - t); |
1777 | 3.56M | s1 = (s1 + t); |
1778 | | |
1779 | 3.56M | t = ((s4 + s2) * C51); |
1780 | 3.56M | s4 = (t + (((s4 * C52)) * 2)); |
1781 | 3.56M | s2 = (t + ((s2 * C53))); |
1782 | | |
1783 | 3.56M | ptr_out[0] = temp1; |
1784 | 3.56M | ptr_out[1] = temp2; |
1785 | 3.56M | ptr_out[2] = (r1 + s2); |
1786 | 3.56M | ptr_out[3] = (s1 - r2); |
1787 | 3.56M | ptr_out[4] = (r3 - s4); |
1788 | 3.56M | ptr_out[5] = (s3 + r4); |
1789 | 3.56M | ptr_out[6] = (r3 + s4); |
1790 | 3.56M | ptr_out[7] = (s3 - r4); |
1791 | 3.56M | ptr_out[8] = (r1 - s2); |
1792 | 3.56M | ptr_out[9] = (s1 + r2); |
1793 | 3.56M | } |
1794 | | |
1795 | 5.94M | static VOID ixheaace_fft_3_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) { |
1796 | 5.94M | FLOAT32 add_r, sub_r; |
1797 | 5.94M | FLOAT32 add_i, sub_i; |
1798 | 5.94M | FLOAT32 x_01_r, x_01_i, temp; |
1799 | | |
1800 | 5.94M | FLOAT32 p1, p2, p3, p4; |
1801 | 5.94M | FLOAT32 sinmu = 0.866025404f; |
1802 | | |
1803 | 5.94M | x_01_r = (ptr_in[0] + ptr_in[2]); |
1804 | 5.94M | x_01_i = (ptr_in[1] + ptr_in[3]); |
1805 | | |
1806 | 5.94M | add_r = (ptr_in[2] + ptr_in[4]); |
1807 | 5.94M | add_i = (ptr_in[3] + ptr_in[5]); |
1808 | | |
1809 | 5.94M | sub_r = (ptr_in[2] - ptr_in[4]); |
1810 | 5.94M | sub_i = (ptr_in[3] - ptr_in[5]); |
1811 | | |
1812 | 5.94M | p1 = add_r / 2; |
1813 | 5.94M | p2 = (sub_i * sinmu); |
1814 | 5.94M | p3 = (sub_r * sinmu); |
1815 | 5.94M | p4 = add_i / 2; |
1816 | | |
1817 | 5.94M | temp = (ptr_in[0] - p1); |
1818 | | |
1819 | 5.94M | ptr_out[0] = (x_01_r + ptr_in[4]); |
1820 | 5.94M | ptr_out[1] = (x_01_i + ptr_in[5]); |
1821 | 5.94M | ptr_out[2] = (temp + p2); |
1822 | 5.94M | ptr_out[3] = ((ptr_in[1] - p3) - p4); |
1823 | 5.94M | ptr_out[4] = (temp - p2); |
1824 | 5.94M | ptr_out[5] = ((ptr_in[1] + p3) - p4); |
1825 | 5.94M | } |
1826 | | |
1827 | | static VOID ixheaace_pre_twiddle_120(FLOAT32 *ptr_in, FLOAT32 *ptr_data, WORD32 n, |
1828 | 297k | const FLOAT32 *ptr_cos_sin_tbl) { |
1829 | 297k | WORD npoints_4, i; |
1830 | 297k | FLOAT32 tempr, tempi, temp; |
1831 | 297k | FLOAT32 c, c1, s, s1; |
1832 | 297k | FLOAT32 *ptr_in1, *ptr_in2; |
1833 | 297k | FLOAT32 *ptr_x = ptr_in + (n - 1); |
1834 | | |
1835 | 297k | npoints_4 = n >> 2; |
1836 | | |
1837 | 297k | ptr_in1 = ptr_data; |
1838 | 297k | ptr_in2 = ptr_data + n - 1; |
1839 | | |
1840 | 9.21M | for (i = 0; i < npoints_4; i++) { |
1841 | 8.92M | c = *ptr_cos_sin_tbl++; |
1842 | 8.92M | s = *ptr_cos_sin_tbl++; |
1843 | | |
1844 | 8.92M | tempr = *ptr_in1++; |
1845 | 8.92M | tempi = *ptr_in2--; |
1846 | | |
1847 | 8.92M | temp = -((tempr * c) + (tempi * s)); |
1848 | 8.92M | *ptr_in++ = temp; |
1849 | | |
1850 | 8.92M | temp = -((tempi * c) - (tempr * s)); |
1851 | 8.92M | *ptr_in++ = temp; |
1852 | | |
1853 | 8.92M | c1 = *ptr_cos_sin_tbl++; |
1854 | 8.92M | s1 = *ptr_cos_sin_tbl++; |
1855 | | |
1856 | 8.92M | tempi = *ptr_in1++; |
1857 | 8.92M | tempr = *ptr_in2--; |
1858 | | |
1859 | 8.92M | temp = -((tempi * c1) - (tempr * s1)); |
1860 | 8.92M | *ptr_x-- = temp; |
1861 | | |
1862 | 8.92M | temp = -((tempr * c1) + (tempi * s1)); |
1863 | 8.92M | *ptr_x-- = temp; |
1864 | 8.92M | } |
1865 | 297k | } |
1866 | | |
1867 | | static VOID ixheaace_post_twiddle_120(FLOAT32 *ptr_out, FLOAT32 *ptr_x, |
1868 | 297k | const FLOAT32 *ptr_cos_sin_tbl, WORD m) { |
1869 | 297k | WORD i; |
1870 | 297k | FLOAT32 c, c1, s, s1; |
1871 | 297k | FLOAT32 tempr, tempi, temp; |
1872 | 297k | FLOAT32 *ptr_in2 = ptr_x + (m - 1); |
1873 | 297k | FLOAT32 *ptr_in1 = ptr_x; |
1874 | 297k | FLOAT32 *ptr_x1 = ptr_out; |
1875 | 297k | FLOAT32 *ptr_x2 = ptr_out + (m - 1); |
1876 | | |
1877 | 9.21M | for (i = 0; i < m; i += 4) { |
1878 | 8.92M | c = *ptr_cos_sin_tbl++; |
1879 | 8.92M | s = *ptr_cos_sin_tbl++; |
1880 | 8.92M | c1 = *ptr_cos_sin_tbl++; |
1881 | 8.92M | s1 = *ptr_cos_sin_tbl++; |
1882 | | |
1883 | 8.92M | tempr = *ptr_in1++; |
1884 | 8.92M | tempi = *ptr_in1++; |
1885 | | |
1886 | 8.92M | temp = -((tempr * s) - (tempi * c)); |
1887 | 8.92M | *ptr_x2-- = temp; |
1888 | | |
1889 | 8.92M | temp = -((tempr * c) + (tempi * s)); |
1890 | 8.92M | *ptr_x1++ = temp; |
1891 | | |
1892 | 8.92M | tempi = *ptr_in2--; |
1893 | 8.92M | tempr = *ptr_in2--; |
1894 | | |
1895 | 8.92M | temp = -((tempr * s1) - (tempi * c1)); |
1896 | 8.92M | *ptr_x1++ = temp; |
1897 | | |
1898 | 8.92M | temp = -((tempr * c1) + (tempi * s1)); |
1899 | 8.92M | *ptr_x2-- = temp; |
1900 | 8.92M | } |
1901 | 297k | } |
1902 | | |
1903 | 1.18M | static VOID ixheaace_fft_960_15(FLOAT32 *ptr_in_flt, FLOAT32 *ptr_out_flt) { |
1904 | 1.18M | WORD32 i; |
1905 | 1.18M | FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt; |
1906 | 1.18M | ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_5); |
1907 | | |
1908 | 1.18M | ptr_buf1_flt = ptr_out_flt; |
1909 | 1.18M | ptr_buf2_flt = ptr_in_flt; |
1910 | 4.75M | for (i = 0; i < FFT3; i++) { |
1911 | 3.56M | ixheaace_fft_5_flt(ptr_buf1_flt, ptr_buf2_flt); |
1912 | | |
1913 | 3.56M | ptr_buf1_flt += (FFT5 * 2); |
1914 | 3.56M | ptr_buf2_flt += (FFT5 * 2); |
1915 | 3.56M | } |
1916 | | |
1917 | 1.18M | ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_3); |
1918 | 1.18M | ptr_buf1_flt = ptr_out_flt; |
1919 | 1.18M | ptr_buf2_flt = ptr_in_flt; |
1920 | 7.13M | for (i = 0; i < FFT5; i++) { |
1921 | 5.94M | ixheaace_fft_3_flt(ptr_buf1_flt, ptr_buf2_flt); |
1922 | | |
1923 | 5.94M | ptr_buf1_flt += (FFT3 * 2); |
1924 | 5.94M | ptr_buf2_flt += (FFT3 * 2); |
1925 | 5.94M | } |
1926 | | |
1927 | 1.18M | ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_sml); |
1928 | 1.18M | } |
1929 | | |
1930 | 297k | static VOID ixheaace_fft_120(WORD32 npoints, FLOAT32 *ptr_x_flt, FLOAT32 *ptr_y_flt) { |
1931 | 297k | WORD32 i; |
1932 | 297k | FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt; |
1933 | 297k | FLOAT32 *ptr_in_flt, *ptr_out_flt; |
1934 | | |
1935 | 297k | ptr_in_flt = ptr_x_flt; |
1936 | 297k | ptr_out_flt = ptr_y_flt; |
1937 | 297k | ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_4); |
1938 | | |
1939 | 297k | ptr_buf1_flt = ptr_out_flt; |
1940 | 297k | ptr_buf2_flt = ptr_in_flt; |
1941 | | |
1942 | 4.75M | for (i = 0; i < FFT15; i++) { |
1943 | 4.46M | { |
1944 | 4.46M | FLOAT32 x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7; |
1945 | 4.46M | FLOAT32 *y0, *y1, *y2, *y3; |
1946 | 4.46M | FLOAT32 *x0; |
1947 | 4.46M | FLOAT32 xh0_0, xh1_0, xh0_1, xh1_1, xl0_0, xl1_0, xl0_1, xl1_1; |
1948 | 4.46M | WORD32 h2; |
1949 | 4.46M | FLOAT32 n00, n01, n10, n11, n20, n21, n30, n31; |
1950 | | |
1951 | 4.46M | ptr_x_flt = ptr_buf1_flt; |
1952 | 4.46M | ptr_y_flt = ptr_buf2_flt; |
1953 | 4.46M | npoints = 4; |
1954 | 4.46M | h2 = 0; |
1955 | | |
1956 | 4.46M | y0 = ptr_y_flt; |
1957 | 4.46M | y2 = ptr_y_flt + (WORD32)npoints; |
1958 | 4.46M | x0 = ptr_x_flt; |
1959 | 4.46M | y1 = y0 + (WORD32)(npoints >> 1); |
1960 | 4.46M | y3 = y2 + (WORD32)(npoints >> 1); |
1961 | | |
1962 | 4.46M | x_0 = x0[0]; |
1963 | 4.46M | x_1 = x0[1]; |
1964 | 4.46M | x_2 = x0[2]; |
1965 | 4.46M | x_3 = x0[3]; |
1966 | 4.46M | x_4 = x0[4]; |
1967 | 4.46M | x_5 = x0[5]; |
1968 | 4.46M | x_6 = x0[6]; |
1969 | 4.46M | x_7 = x0[7]; |
1970 | | |
1971 | 4.46M | xh0_0 = x_0 + x_4; |
1972 | 4.46M | xh1_0 = x_1 + x_5; |
1973 | 4.46M | xl0_0 = x_0 - x_4; |
1974 | 4.46M | xl1_0 = x_1 - x_5; |
1975 | 4.46M | xh0_1 = x_2 + x_6; |
1976 | 4.46M | xh1_1 = x_3 + x_7; |
1977 | 4.46M | xl0_1 = x_2 - x_6; |
1978 | 4.46M | xl1_1 = x_3 - x_7; |
1979 | | |
1980 | 4.46M | n00 = xh0_0 + xh0_1; |
1981 | 4.46M | n01 = xh1_0 + xh1_1; |
1982 | 4.46M | n10 = xl0_0 + xl1_1; |
1983 | 4.46M | n11 = xl1_0 - xl0_1; |
1984 | 4.46M | n20 = xh0_0 - xh0_1; |
1985 | 4.46M | n21 = xh1_0 - xh1_1; |
1986 | 4.46M | n30 = xl0_0 - xl1_1; |
1987 | 4.46M | n31 = xl1_0 + xl0_1; |
1988 | | |
1989 | 4.46M | y0[2 * h2] = n00; |
1990 | 4.46M | y0[2 * h2 + 1] = n01; |
1991 | 4.46M | y1[2 * h2] = n10; |
1992 | 4.46M | y1[2 * h2 + 1] = n11; |
1993 | 4.46M | y2[2 * h2] = n20; |
1994 | 4.46M | y2[2 * h2 + 1] = n21; |
1995 | 4.46M | y3[2 * h2] = n30; |
1996 | 4.46M | y3[2 * h2 + 1] = n31; |
1997 | 4.46M | } |
1998 | | |
1999 | 4.46M | ptr_buf1_flt += (FFT4 * 2); |
2000 | 4.46M | ptr_buf2_flt += (FFT4 * 2); |
2001 | 4.46M | } |
2002 | | |
2003 | 297k | ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_15_4); |
2004 | | |
2005 | 297k | ptr_buf1_flt = ptr_out_flt; |
2006 | 297k | ptr_buf2_flt = ptr_in_flt; |
2007 | 1.48M | for (i = 0; i < FFT4; i++) { |
2008 | 1.18M | ixheaace_fft_960_15(ptr_buf1_flt, ptr_buf2_flt); |
2009 | 1.18M | ptr_buf1_flt += (FFT15 * 2); |
2010 | 1.18M | ptr_buf2_flt += (FFT15 * 2); |
2011 | 1.18M | } |
2012 | | |
2013 | 297k | ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_120); |
2014 | 297k | } |
2015 | | |
2016 | 60.7k | static VOID ixheaace_cfft_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op) { |
2017 | 60.7k | WORD32 i; |
2018 | 60.7k | FLOAT32 *ptr_buf1, *ptr_buf2; |
2019 | 60.7k | FLOAT32 fft5_out[FFT15X2] = {0}; |
2020 | | |
2021 | 60.7k | ptr_buf1 = ptr_inp; |
2022 | 60.7k | ptr_buf2 = ptr_op; |
2023 | | |
2024 | 2.00M | for (i = 0; i < FFT32; i++) { |
2025 | 1.94M | ixheaace_cfft_15_480(ptr_buf1, ptr_buf2, &fft5_out[0]); |
2026 | 1.94M | ptr_buf1 += 2; |
2027 | 1.94M | ptr_buf2 += 2; |
2028 | 1.94M | } |
2029 | | |
2030 | 60.7k | ixheaace_cfft_twiddle_mult(ptr_op, ptr_inp, FFT15, FFT32, ixheaace_fft_mix_rad_twid_tbl_480, |
2031 | 60.7k | ixheaace_fft_mix_rad_twid_h_tbl_480); |
2032 | | |
2033 | 60.7k | ptr_buf1 = ptr_inp; |
2034 | 60.7k | ptr_buf2 = ptr_op; |
2035 | | |
2036 | 971k | for (i = 0; i < FFT15; i++) { |
2037 | 910k | ixheaace_cfft_32_480(ptr_buf1, ptr_buf2); |
2038 | 910k | ptr_buf1 += (FFT32X2); |
2039 | 910k | ptr_buf2 += 2; |
2040 | 910k | } |
2041 | 60.7k | } |
2042 | | |
2043 | | static VOID ixheaace_pre_twiddle_960(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n, |
2044 | 60.7k | const FLOAT32 *ptr_cos_sin_tbl) { |
2045 | 60.7k | WORD npoints_4, i; |
2046 | 60.7k | FLOAT32 tempr, tempi, temp; |
2047 | 60.7k | FLOAT32 c, c1, s, s1; |
2048 | 60.7k | FLOAT32 *ptr_in_1, *ptr_in_2; |
2049 | 60.7k | FLOAT32 *ptr_x_1 = ptr_x + (n - 1); |
2050 | | |
2051 | 60.7k | npoints_4 = n >> 2; |
2052 | | |
2053 | 60.7k | ptr_in_1 = ptr_data; |
2054 | 60.7k | ptr_in_2 = ptr_data + n - 1; |
2055 | | |
2056 | 14.6M | for (i = 0; i < npoints_4; i++) { |
2057 | 14.5M | c = *ptr_cos_sin_tbl++; |
2058 | 14.5M | s = *ptr_cos_sin_tbl++; |
2059 | | |
2060 | 14.5M | tempr = *ptr_in_1++; |
2061 | 14.5M | tempi = *ptr_in_2--; |
2062 | | |
2063 | 14.5M | temp = -((tempr * c) + (tempi * s)); |
2064 | 14.5M | *ptr_x++ = temp; |
2065 | | |
2066 | 14.5M | temp = -((tempi * c) - (tempr * s)); |
2067 | 14.5M | *ptr_x++ = temp; |
2068 | | |
2069 | 14.5M | c1 = *ptr_cos_sin_tbl++; |
2070 | 14.5M | s1 = *ptr_cos_sin_tbl++; |
2071 | | |
2072 | 14.5M | tempi = *ptr_in_1++; |
2073 | 14.5M | tempr = *ptr_in_2--; |
2074 | | |
2075 | 14.5M | temp = -((tempi * c1) - (tempr * s1)); |
2076 | 14.5M | *ptr_x_1-- = temp; |
2077 | | |
2078 | 14.5M | temp = -((tempr * c1) + (tempi * s1)); |
2079 | 14.5M | *ptr_x_1-- = temp; |
2080 | 14.5M | } |
2081 | 60.7k | } |
2082 | | |
2083 | | static VOID ixheaace_post_twiddle_960(FLOAT32 *ptr_out, FLOAT32 *ptr_x, |
2084 | 60.7k | const FLOAT32 *ptr_cos_sin_tbl, WORD m) { |
2085 | 60.7k | WORD i; |
2086 | 60.7k | FLOAT32 c, c1, s, s1; |
2087 | 60.7k | FLOAT32 tempr, tempi, temp; |
2088 | 60.7k | FLOAT32 *ptr_in2 = ptr_x + (m - 1); |
2089 | 60.7k | FLOAT32 *ptr_in1 = ptr_x; |
2090 | 60.7k | FLOAT32 *ptr_x1 = ptr_out; |
2091 | 60.7k | FLOAT32 *ptr_x2 = ptr_out + (m - 1); |
2092 | | |
2093 | 14.6M | for (i = 0; i < m; i += 4) { |
2094 | 14.5M | c = *ptr_cos_sin_tbl++; |
2095 | 14.5M | s = *ptr_cos_sin_tbl++; |
2096 | 14.5M | c1 = *ptr_cos_sin_tbl++; |
2097 | 14.5M | s1 = *ptr_cos_sin_tbl++; |
2098 | | |
2099 | 14.5M | tempr = *ptr_in1++; |
2100 | 14.5M | tempi = *ptr_in1++; |
2101 | | |
2102 | 14.5M | temp = -((tempr * s) - (tempi * c)); |
2103 | 14.5M | *ptr_x2-- = temp; |
2104 | | |
2105 | 14.5M | temp = -((tempr * c) + (tempi * s)); |
2106 | 14.5M | *ptr_x1++ = temp; |
2107 | | |
2108 | 14.5M | tempi = *ptr_in2--; |
2109 | 14.5M | tempr = *ptr_in2--; |
2110 | | |
2111 | 14.5M | temp = -((tempr * s1) - (tempi * c1)); |
2112 | 14.5M | *ptr_x1++ = temp; |
2113 | | |
2114 | 14.5M | temp = -((tempr * c1) + (tempi * s1)); |
2115 | 14.5M | *ptr_x2-- = temp; |
2116 | 14.5M | } |
2117 | 60.7k | } |
2118 | | |
2119 | 60.7k | static VOID ixheaace_mdct_960(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) { |
2120 | 60.7k | FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch; |
2121 | 60.7k | FLOAT32 const_mult_fac = 3.142857143f; |
2122 | 60.7k | FLOAT32 *ptr_data = ptr_input_flt; |
2123 | 60.7k | WORD32 k; |
2124 | | |
2125 | 60.7k | memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * FRAME_LEN_960); |
2126 | 60.7k | ixheaace_pre_twiddle_960(ptr_input_flt, ptr_scratch_flt, FRAME_LEN_960, cos_sin_table_flt); |
2127 | | |
2128 | 60.7k | ixheaace_cfft_480(ptr_input_flt, ptr_scratch_flt); |
2129 | | |
2130 | 60.7k | ixheaace_post_twiddle_960(ptr_input_flt, ptr_scratch_flt, cos_sin_table_flt, FRAME_LEN_960); |
2131 | | |
2132 | 29.1M | for (k = FRAME_LEN_960 - 1; k >= 0; k -= 2) { |
2133 | 29.1M | *ptr_data = (*ptr_data * const_mult_fac); |
2134 | 29.1M | ptr_data++; |
2135 | 29.1M | *ptr_data = (*ptr_data * const_mult_fac); |
2136 | 29.1M | ptr_data++; |
2137 | 29.1M | } |
2138 | 60.7k | } |
2139 | | |
2140 | 297k | static VOID ixheaace_mdct_120(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) { |
2141 | 297k | WORD32 n, k; |
2142 | 297k | WORD32 n_by_2; |
2143 | 297k | FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch; |
2144 | 297k | FLOAT32 const_mltfac = 3.142857143f; |
2145 | 297k | FLOAT32 *ptr_data = ptr_input_flt; |
2146 | 297k | n = 120; |
2147 | 297k | n_by_2 = n >> 1; |
2148 | 297k | memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * n); |
2149 | | |
2150 | 297k | ixheaace_pre_twiddle_120(ptr_input_flt, ptr_scratch_flt, n, ixheaace_cosine_array_240); |
2151 | | |
2152 | 297k | ixheaace_fft_120(n_by_2, ptr_input_flt, ptr_scratch_flt); |
2153 | | |
2154 | 297k | ixheaace_post_twiddle_120(ptr_input_flt, ptr_scratch_flt, ixheaace_cosine_array_240, n); |
2155 | | |
2156 | 18.1M | for (k = n - 1; k >= 0; k -= 2) { |
2157 | 17.8M | *ptr_data = (*ptr_data * const_mltfac); |
2158 | 17.8M | ptr_data++; |
2159 | 17.8M | *ptr_data = (*ptr_data * const_mltfac); |
2160 | 17.8M | ptr_data++; |
2161 | 17.8M | } |
2162 | 297k | } |
2163 | | |
2164 | | static VOID ixheaace_mdct(FLOAT32 *ptr_dct_data, const FLOAT32 *ptr_trig_data, |
2165 | | const FLOAT32 *ptr_sine_window, WORD32 n, WORD32 ld_n, |
2166 | 567k | WORD8 *ptr_scratch) { |
2167 | 567k | ixheaace_pre_mdct(ptr_dct_data, n, ptr_sine_window); |
2168 | | |
2169 | 567k | ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch; |
2170 | 567k | ia_enhaacplus_enc_complex_fft(ptr_dct_data, n / 2, pstr_scratch); |
2171 | | |
2172 | 567k | ixheaace_post_mdct(ptr_dct_data, n, ptr_trig_data, |
2173 | 567k | 1 << (LD_FFT_TWIDDLE_TABLE_SIZE - (ld_n - 1)), FFT_TWIDDLE_TABLE_SIZE); |
2174 | 567k | } |
2175 | | |
2176 | | static VOID ixheaace_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer, |
2177 | | const FLOAT32 *ptr_time_signal, WORD32 ch_increment, |
2178 | 283k | WORD32 frame_len) { |
2179 | 283k | WORD32 i; |
2180 | 283k | WORD32 blk_switch_offset = frame_len; |
2181 | 283k | switch (frame_len) { |
2182 | 162k | case FRAME_LEN_1024: |
2183 | 162k | blk_switch_offset = BLK_SWITCH_OFFSET_LC_128; |
2184 | 162k | memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len, |
2185 | 162k | (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer)); |
2186 | 162k | break; |
2187 | | |
2188 | 97.8k | case FRAME_LEN_960: |
2189 | 97.8k | blk_switch_offset = BLK_SWITCH_OFFSET_LC_120; |
2190 | 97.8k | memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len, |
2191 | 97.8k | (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer)); |
2192 | 97.8k | break; |
2193 | | |
2194 | 23.1k | case FRAME_LEN_512: |
2195 | 23.1k | case FRAME_LEN_480: |
2196 | 23.1k | blk_switch_offset = frame_len; |
2197 | 23.1k | break; |
2198 | 283k | } |
2199 | | |
2200 | 272M | for (i = 0; i < frame_len; i++) { |
2201 | 272M | ptr_mdct_delay_buffer[blk_switch_offset - frame_len + i] = ptr_time_signal[i * ch_increment]; |
2202 | 272M | } |
2203 | 283k | } |
2204 | | |
2205 | | VOID ixheaace_transform_real_lc_ld(FLOAT32 *ptr_mdct_delay_buffer, const FLOAT32 *ptr_time_signal, |
2206 | | WORD32 ch_increment, FLOAT32 *ptr_real_out, WORD32 block_type, |
2207 | 283k | WORD32 frame_len, WORD8 *ptr_scratch) { |
2208 | 283k | WORD32 i, w; |
2209 | 283k | FLOAT32 ws1, ws2; |
2210 | 283k | FLOAT32 *ptr_dct_in; |
2211 | 283k | WORD32 frame_len_short = FRAME_LEN_SHORT_128; |
2212 | 283k | WORD32 ls_trans = LS_TRANS_128; |
2213 | 283k | WORD32 trans_offset = TRANSFORM_OFFSET_SHORT_128; |
2214 | 283k | const FLOAT32 *ptr_window; |
2215 | 283k | if (frame_len == FRAME_LEN_960) { |
2216 | 97.8k | ls_trans = LS_TRANS_120; |
2217 | 97.8k | trans_offset = TRANSFORM_OFFSET_SHORT_120; |
2218 | 97.8k | frame_len_short = FRAME_LEN_SHORT_120; |
2219 | 97.8k | } |
2220 | 283k | switch (block_type) { |
2221 | 154k | case LONG_WINDOW: |
2222 | 154k | ptr_dct_in = ptr_real_out; |
2223 | 154k | ptr_window = &long_window_KBD[0]; |
2224 | 154k | switch (frame_len) { |
2225 | 88.9k | case FRAME_LEN_1024: |
2226 | 88.9k | ptr_window = &long_window_KBD[0]; |
2227 | 88.9k | break; |
2228 | | |
2229 | 42.2k | case FRAME_LEN_960: |
2230 | 42.2k | ptr_window = &long_window_sine_960[0]; |
2231 | 42.2k | break; |
2232 | | |
2233 | 23.1k | case FRAME_LEN_512: |
2234 | 23.1k | ptr_window = &long_window_sine_ld[0]; |
2235 | 23.1k | break; |
2236 | | |
2237 | 0 | case FRAME_LEN_480: |
2238 | 0 | ptr_window = &long_window_sine_ld_480[0]; |
2239 | 0 | break; |
2240 | 154k | } |
2241 | 71.9M | for (i = 0; i < frame_len / 2; i++) { |
2242 | 71.7M | ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i]; |
2243 | | |
2244 | 71.7M | ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1]; |
2245 | | |
2246 | 71.7M | ptr_dct_in[frame_len / 2 + i] = ws1 - ws2; |
2247 | 71.7M | } |
2248 | | |
2249 | 154k | ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment, |
2250 | 154k | frame_len); |
2251 | | |
2252 | 71.9M | for (i = 0; i < frame_len / 2; i++) { |
2253 | 71.7M | ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1]; |
2254 | | |
2255 | 71.7M | ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i]; |
2256 | | |
2257 | 71.7M | ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2); |
2258 | 71.7M | } |
2259 | 154k | switch (frame_len) { |
2260 | 88.9k | case FRAME_LEN_1024: |
2261 | 88.9k | ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, |
2262 | 88.9k | ptr_scratch); |
2263 | 88.9k | break; |
2264 | | |
2265 | 42.2k | case FRAME_LEN_960: |
2266 | 42.2k | ixheaace_mdct_960(ptr_dct_in, ptr_scratch); |
2267 | 42.2k | break; |
2268 | | |
2269 | 23.1k | case FRAME_LEN_512: |
2270 | 23.1k | case FRAME_LEN_480: |
2271 | 23.1k | ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, ptr_window, frame_len, 9, ptr_scratch); |
2272 | 23.1k | break; |
2273 | 154k | } |
2274 | 154k | break; |
2275 | | |
2276 | 154k | case START_WINDOW: |
2277 | 26.4k | ptr_dct_in = ptr_real_out; |
2278 | 26.4k | ptr_window = &long_window_KBD[0]; |
2279 | 26.4k | switch (frame_len) { |
2280 | 16.7k | case FRAME_LEN_1024: |
2281 | 16.7k | ptr_window = &long_window_KBD[0]; |
2282 | 16.7k | break; |
2283 | | |
2284 | 9.71k | case FRAME_LEN_960: |
2285 | 9.71k | ptr_window = &long_window_sine_960[0]; |
2286 | 9.71k | break; |
2287 | 26.4k | } |
2288 | 13.2M | for (i = 0; i < frame_len / 2; i++) { |
2289 | 13.2M | ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i]; |
2290 | | |
2291 | 13.2M | ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1]; |
2292 | | |
2293 | 13.2M | ptr_dct_in[frame_len / 2 + i] = ws1 - ws2; |
2294 | 13.2M | } |
2295 | | |
2296 | 26.4k | ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment, |
2297 | 26.4k | frame_len); |
2298 | | |
2299 | 26.4k | if (frame_len == FRAME_LEN_1024) { |
2300 | 16.7k | ptr_window = &short_window_sine[0]; |
2301 | 16.7k | } else if (frame_len == FRAME_LEN_960) { |
2302 | 9.71k | ptr_window = &short_window_sine_120[0]; |
2303 | 9.71k | } |
2304 | 11.5M | for (i = 0; i < ls_trans; i++) { |
2305 | 11.5M | ws1 = ptr_mdct_delay_buffer[i]; |
2306 | 11.5M | ws2 = 0.0f; |
2307 | | |
2308 | 11.5M | ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2); |
2309 | 11.5M | } |
2310 | | |
2311 | 1.67M | for (i = 0; i < frame_len_short / 2; i++) { |
2312 | 1.65M | ws1 = ptr_mdct_delay_buffer[i + ls_trans] * ptr_window[frame_len_short - i - 1]; |
2313 | | |
2314 | 1.65M | ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1 - ls_trans)] * ptr_window[i]; |
2315 | | |
2316 | 1.65M | ptr_dct_in[frame_len / 2 - i - 1 - ls_trans] = -(ws1 + ws2); |
2317 | 1.65M | } |
2318 | 26.4k | if (frame_len == FRAME_LEN_960) { |
2319 | 9.71k | ixheaace_mdct_960(ptr_dct_in, ptr_scratch); |
2320 | 16.7k | } else { |
2321 | 16.7k | ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch); |
2322 | 16.7k | } |
2323 | | |
2324 | 26.4k | break; |
2325 | | |
2326 | 23.4k | case STOP_WINDOW: |
2327 | 23.4k | ptr_window = &long_window_KBD[0]; |
2328 | 23.4k | ptr_dct_in = ptr_real_out; |
2329 | 23.4k | if (frame_len == FRAME_LEN_1024) { |
2330 | 14.8k | ptr_window = &short_window_sine[0]; |
2331 | 14.8k | } else if (frame_len == FRAME_LEN_960) { |
2332 | 8.69k | ptr_window = &short_window_sine_120[0]; |
2333 | 8.69k | } |
2334 | 10.3M | for (i = 0; i < ls_trans; i++) { |
2335 | 10.2M | ws1 = 0.0f; |
2336 | 10.2M | ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)]; |
2337 | 10.2M | ptr_dct_in[frame_len / 2 + i] = ws1 - ws2; |
2338 | 10.2M | } |
2339 | | |
2340 | 1.49M | for (i = 0; i < frame_len_short / 2; i++) { |
2341 | 1.46M | ws1 = ptr_mdct_delay_buffer[(i + ls_trans)] * ptr_window[i]; |
2342 | | |
2343 | 1.46M | ws2 = ptr_mdct_delay_buffer[(frame_len - ls_trans - i - 1)] * |
2344 | 1.46M | ptr_window[frame_len_short - i - 1]; |
2345 | | |
2346 | 1.46M | ptr_dct_in[frame_len / 2 + i + ls_trans] = ws1 - ws2; |
2347 | 1.46M | } |
2348 | | |
2349 | 23.4k | ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment, |
2350 | 23.4k | frame_len); |
2351 | | |
2352 | 23.4k | if (frame_len == FRAME_LEN_1024) { |
2353 | 14.8k | ptr_window = &long_window_KBD[0]; |
2354 | 14.8k | } else if (frame_len == FRAME_LEN_960) { |
2355 | 8.69k | ptr_window = &long_window_sine_960[0]; |
2356 | 8.69k | } |
2357 | 11.7M | for (i = 0; i < frame_len / 2; i++) { |
2358 | 11.7M | ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1]; |
2359 | | |
2360 | 11.7M | ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i]; |
2361 | | |
2362 | 11.7M | ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2); |
2363 | 11.7M | } |
2364 | | |
2365 | 23.4k | if (frame_len == FRAME_LEN_960) { |
2366 | 8.69k | ixheaace_mdct_960(ptr_dct_in, ptr_scratch); |
2367 | 14.8k | } else { |
2368 | 14.8k | ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch); |
2369 | 14.8k | } |
2370 | | |
2371 | 23.4k | break; |
2372 | | |
2373 | 79.7k | case SHORT_WINDOW: |
2374 | 79.7k | ptr_window = &short_window_sine[0]; |
2375 | 79.7k | if (frame_len == FRAME_LEN_1024) { |
2376 | 42.5k | ptr_window = &short_window_sine[0]; |
2377 | 42.5k | } else if (frame_len == FRAME_LEN_960) { |
2378 | 37.1k | ptr_window = &short_window_sine_120[0]; |
2379 | 37.1k | } |
2380 | 717k | for (w = 0; w < TRANS_FAC; w++) { |
2381 | 637k | ptr_dct_in = ptr_real_out + w * frame_len_short; |
2382 | | |
2383 | 40.2M | for (i = 0; i < frame_len_short / 2; i++) { |
2384 | 39.6M | ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + i] * ptr_window[i]; |
2385 | | |
2386 | 39.6M | ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short - i - |
2387 | 39.6M | 1] * |
2388 | 39.6M | ptr_window[frame_len_short - i - 1]; |
2389 | | |
2390 | 39.6M | ptr_dct_in[frame_len_short / 2 + i] = ws1 - ws2; |
2391 | | |
2392 | 39.6M | ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short + i] * |
2393 | 39.6M | ptr_window[frame_len_short - i - 1]; |
2394 | | |
2395 | 39.6M | ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short * 2 - |
2396 | 39.6M | i - 1] * |
2397 | 39.6M | ptr_window[i]; |
2398 | | |
2399 | 39.6M | ptr_dct_in[frame_len_short / 2 - i - 1] = -(ws1 + ws2); |
2400 | 39.6M | } |
2401 | 637k | if (frame_len == FRAME_LEN_960) { |
2402 | 297k | ixheaace_mdct_120(ptr_dct_in, ptr_scratch); |
2403 | 340k | } else { |
2404 | 340k | ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, short_window_sine, frame_len_short, 7, |
2405 | 340k | ptr_scratch); |
2406 | 340k | } |
2407 | 637k | } |
2408 | | |
2409 | 79.7k | ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment, |
2410 | 79.7k | frame_len); |
2411 | 79.7k | break; |
2412 | 283k | } |
2413 | 283k | } |
2414 | | |
2415 | | VOID ia_enhaacplus_enc_transform_real_eld(FLOAT32 *ptr_mdct_delay_buffer, |
2416 | | const FLOAT32 *ptr_time_signal, WORD32 ch_increment, |
2417 | | FLOAT32 *ptr_real_out, WORD8 *ptr_shared_buffer5, |
2418 | 83.6k | WORD32 frame_len) { |
2419 | 83.6k | WORD32 i, loop_len; |
2420 | 83.6k | FLOAT32 w1, w2; |
2421 | 83.6k | FLOAT32 *ptr_curr_data, *ptr_prev1_data, *ptr_prev2_data, *ptr_prev3_data; |
2422 | 83.6k | const FLOAT32 *ptr_win0, *ptr_win1, *ptr_win2, *ptr_win3; |
2423 | | |
2424 | 83.6k | loop_len = frame_len / 4; |
2425 | | |
2426 | 83.6k | ptr_curr_data = &ptr_mdct_delay_buffer[3 * frame_len]; |
2427 | 83.6k | ptr_prev1_data = &ptr_mdct_delay_buffer[2 * frame_len]; |
2428 | 83.6k | ptr_prev2_data = &ptr_mdct_delay_buffer[frame_len]; |
2429 | 83.6k | ptr_prev3_data = &ptr_mdct_delay_buffer[0]; |
2430 | | |
2431 | 83.6k | ptr_win0 = &low_delay_window_eld[0]; |
2432 | 83.6k | ptr_win1 = &low_delay_window_eld[frame_len]; |
2433 | 83.6k | ptr_win2 = &low_delay_window_eld[2 * frame_len]; |
2434 | 83.6k | ptr_win3 = &low_delay_window_eld[3 * frame_len]; |
2435 | | |
2436 | 83.6k | memmove(&ptr_mdct_delay_buffer[0], &ptr_mdct_delay_buffer[frame_len], |
2437 | 83.6k | (3 * frame_len) * sizeof(ptr_mdct_delay_buffer[0])); |
2438 | | |
2439 | 42.9M | for (i = 0; i < frame_len; i++) { |
2440 | 42.8M | ptr_curr_data[i] = ptr_time_signal[i * ch_increment]; |
2441 | 42.8M | } |
2442 | | |
2443 | 10.7M | for (i = 0; i < loop_len; i++) { |
2444 | 10.7M | w1 = ptr_prev3_data[(frame_len / 2) + loop_len + i] * ptr_win3[(frame_len / 2) - 1 - i]; |
2445 | 10.7M | w1 += ptr_prev3_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win3[(frame_len / 2) + i]; |
2446 | | |
2447 | 10.7M | w2 = (-ptr_prev1_data[(frame_len / 2) + loop_len + i] * ptr_win1[(frame_len / 2) - 1 - i]); |
2448 | 10.7M | w2 += (-ptr_prev1_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win1[(frame_len / 2) + i]); |
2449 | | |
2450 | 10.7M | ptr_real_out[i] = w1 + w2; |
2451 | 10.7M | } |
2452 | | |
2453 | 10.7M | for (i = 0; i < loop_len; i++) { |
2454 | 10.7M | w1 = (-ptr_prev2_data[(frame_len / 2) + loop_len + i] * ptr_win2[(frame_len / 2) - 1 - i]); |
2455 | 10.7M | w1 += ptr_prev2_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win2[(frame_len / 2) + i]; |
2456 | | |
2457 | 10.7M | w2 = ptr_curr_data[(frame_len / 2) + loop_len + i] * ptr_win0[(frame_len / 2) - 1 - i]; |
2458 | 10.7M | w2 += (-ptr_curr_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win0[(frame_len / 2) + i]); |
2459 | | |
2460 | 10.7M | ptr_real_out[frame_len - 1 - i] = w1 + w2; |
2461 | 10.7M | } |
2462 | | |
2463 | 10.7M | for (i = 0; i < loop_len; i++) { |
2464 | 10.7M | w1 = ptr_prev2_data[loop_len - 1 - i] * ptr_win3[i]; |
2465 | 10.7M | w1 += ptr_prev3_data[loop_len + i] * ptr_win3[frame_len - 1 - i]; |
2466 | | |
2467 | 10.7M | w2 = (-ptr_curr_data[loop_len - 1 - i] * ptr_win1[i]); |
2468 | 10.7M | w2 += (-ptr_prev1_data[loop_len + i] * ptr_win1[frame_len - 1 - i]); |
2469 | | |
2470 | 10.7M | ptr_real_out[(frame_len / 2) - 1 - i] = w1 + w2; |
2471 | 10.7M | } |
2472 | | |
2473 | 10.7M | for (i = 0; i < loop_len; i++) { |
2474 | 10.7M | w1 = -(ptr_prev1_data[loop_len - 1 - i] * ptr_win2[i]); |
2475 | 10.7M | w1 += ptr_prev2_data[loop_len + i] * ptr_win2[frame_len - 1 - i]; |
2476 | | |
2477 | | /* First 128 coeffcients are zeros in the window table so they are not used in the code here*/ |
2478 | 10.7M | w2 = (-ptr_curr_data[loop_len + i] * ptr_win0[frame_len - 1 - i]); |
2479 | | |
2480 | 10.7M | ptr_real_out[(frame_len / 2) + i] = w1 + w2; |
2481 | 10.7M | } |
2482 | | |
2483 | 83.6k | ixheaace_mdct(ptr_real_out, fft_twiddle_tab, long_window_sine_ld, frame_len, 9, |
2484 | 83.6k | ptr_shared_buffer5); |
2485 | 83.6k | } |