/src/libxaac/decoder/ixheaacd_mps_poly_filt.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | #include <stdlib.h> |
21 | | #include "ixheaac_type_def.h" |
22 | | #include "ixheaac_constants.h" |
23 | | #include "ixheaacd_bitbuffer.h" |
24 | | #include "ixheaacd_interface.h" |
25 | | #include "ixheaacd_common_rom.h" |
26 | | #include "ixheaacd_sbrdecsettings.h" |
27 | | #include "ixheaacd_sbr_scale.h" |
28 | | #include "ixheaacd_env_extr_part.h" |
29 | | #include "ixheaacd_sbr_rom.h" |
30 | | #include "ixheaacd_hybrid.h" |
31 | | #include "ixheaacd_ps_dec.h" |
32 | | #include "ixheaacd_config.h" |
33 | | #include "ixheaacd_qmf_dec.h" |
34 | | #include "ixheaacd_mps_polyphase.h" |
35 | | #include "ixheaacd_mps_struct_def.h" |
36 | | #include "ixheaacd_mps_res_rom.h" |
37 | | #include "ixheaacd_mps_aac_struct.h" |
38 | | #include "ixheaacd_mps_dec.h" |
39 | | #include "ixheaacd_function_selector.h" |
40 | | |
41 | | extern const FLOAT32 |
42 | | ixheaacd_mps_polyphase_filter_coeff[10 * MAX_NUM_QMF_BANDS_SAC / 2]; |
43 | | extern const FLOAT32 ixheaacd_mps_post_twid[30]; |
44 | | extern const FLOAT32 ixheaacd_mps_pre_twid[64]; |
45 | | |
46 | | extern const FLOAT32 ixheaacd_ldmps_polyphase_filter_coeff[1280]; |
47 | | |
48 | | extern const FLOAT32 ixheaacd_ldmps_pre_twid[32]; |
49 | | extern const FLOAT32 ixheaacd_mps_post_re_32[64]; |
50 | | extern const FLOAT32 ixheaacd_mps_post_im_32[64]; |
51 | | |
52 | | |
53 | 4.92k | VOID ixheaacd_mps_synt_init(FLOAT32 state[POLY_PHASE_SYNTH_SIZE]) { |
54 | 4.92k | memset(state, 0, sizeof(FLOAT32) * POLY_PHASE_SYNTH_SIZE); |
55 | 4.92k | } |
56 | | |
57 | | VOID ixheaacd_mps_synt_post_fft_twiddle_dec(WORD32 resolution, FLOAT32 *fin_re, |
58 | | FLOAT32 *fin_im, |
59 | | const FLOAT32 *table_re, |
60 | | const FLOAT32 *table_im, |
61 | 22.6k | FLOAT32 *state) { |
62 | 22.6k | WORD32 l; |
63 | 1.46M | for (l = 0; l < 2 * resolution; l++) { |
64 | 1.44M | state[2 * resolution - l - 1] = |
65 | 1.44M | ((fin_re[l] * table_re[l]) + (fin_im[l] * table_im[l])); |
66 | 1.44M | } |
67 | 22.6k | } |
68 | | |
69 | | VOID ixheaacd_mps_synt_out_calc_dec(WORD32 resolution, FLOAT32 *out, |
70 | | FLOAT32 *state, |
71 | 5.28M | const FLOAT32 *filter_coeff) { |
72 | 5.28M | WORD32 l, k; |
73 | 5.28M | FLOAT32 *out1, *out2, *state1, *state2; |
74 | 5.28M | out1 = out; |
75 | 5.28M | out2 = out + resolution; |
76 | 5.28M | state1 = state; |
77 | 5.28M | state2 = state + (3 * resolution); |
78 | | |
79 | 31.7M | for (k = 0; k < 5; k++) { |
80 | 1.71G | for (l = 0; l < resolution; l++) { |
81 | 1.69G | *out1++ = (*state1++) * (*filter_coeff++); |
82 | 1.69G | *out2++ = (*state2++) * (*filter_coeff++); |
83 | 1.69G | } |
84 | 26.4M | out1 += resolution; |
85 | 26.4M | out2 += resolution; |
86 | 26.4M | state1 += (3 * resolution); |
87 | 26.4M | state2 += (3 * resolution); |
88 | 26.4M | } |
89 | 5.28M | } |
90 | | |
91 | | VOID ixheaacd_mps_synt_out_calc_dec_ldmps(WORD32 resolution, FLOAT32 *out, |
92 | 4.82k | FLOAT32 *state, const FLOAT32 *filter_coeff) { |
93 | 4.82k | WORD32 l, k; |
94 | 4.82k | FLOAT32 *out1, *out2, *state1, *state2; |
95 | 4.82k | const FLOAT32 *filter1, *filter2; |
96 | 4.82k | filter1 = filter_coeff; |
97 | 4.82k | filter2 = filter_coeff + resolution; |
98 | 4.82k | out1 = out; |
99 | 4.82k | out2 = out + resolution; |
100 | 4.82k | state1 = state; |
101 | 4.82k | state2 = state + (3 * resolution); |
102 | | |
103 | 28.9k | for (k = 0; k < 5; k++) { |
104 | 1.56M | for (l = 0; l < resolution; l++) { |
105 | 1.54M | *out1++ = (*state1++) * (*filter1++); |
106 | 1.54M | *out2++ = (*state2++) * (*filter2++); |
107 | 1.54M | } |
108 | 24.1k | filter1 += resolution; |
109 | 24.1k | filter2 += resolution; |
110 | 24.1k | out1 += resolution; |
111 | 24.1k | out2 += resolution; |
112 | 24.1k | state1 += (3 * resolution); |
113 | 24.1k | state2 += (3 * resolution); |
114 | 24.1k | } |
115 | 4.82k | } |
116 | | |
117 | | VOID ixheaacd_mps_synt_out_calc_dec_ldmps_32(WORD32 resolution, FLOAT32 *out, |
118 | 22.6k | FLOAT32 *state, const FLOAT32 *filter_coeff) { |
119 | 22.6k | WORD32 l, k; |
120 | 22.6k | FLOAT32 *out1, *out2, *state1, *state2; |
121 | 22.6k | const FLOAT32 *filter1, *filter2; |
122 | 22.6k | filter1 = filter_coeff; |
123 | 22.6k | filter2 = filter_coeff + 2 * resolution; |
124 | 22.6k | out1 = out; |
125 | 22.6k | out2 = out + resolution; |
126 | 22.6k | state1 = state; |
127 | 22.6k | state2 = state + (3 * resolution); |
128 | | |
129 | 135k | for (k = 0; k < 5; k++) { |
130 | 3.73M | for (l = 0; l < resolution; l++) { |
131 | 3.61M | *out1++ = ((*state1++) * (filter1[2*l] + filter1[2*l+1])/2); |
132 | 3.61M | *out2++ = ((*state2++) * (filter2[2 * l] + filter2[2 * l + 1])/2); |
133 | 3.61M | } |
134 | 113k | filter1 += 4 * resolution; |
135 | 113k | filter2 += 4 * resolution; |
136 | 113k | out1 += resolution; |
137 | 113k | out2 += resolution; |
138 | 113k | state1 += (3 * resolution); |
139 | 113k | state2 += (3 * resolution); |
140 | 113k | } |
141 | 22.6k | } |
142 | | |
143 | | VOID ixheaacd_mps_synth_pre_twidle(FLOAT32 *out_re, FLOAT32 *out_im, |
144 | 5.29M | FLOAT32 *c_in, WORD32 len) { |
145 | 5.29M | WORD32 i; |
146 | 5.29M | FLOAT32 *c_s = c_in; |
147 | 5.29M | FLOAT32 *p_re_s = out_re; |
148 | 5.29M | FLOAT32 *p_im_s = out_im; |
149 | 5.29M | FLOAT32 *c_e = c_in + (len << 1) - 1; |
150 | 5.29M | FLOAT32 *p_im_e = out_im + len - 1; |
151 | 5.29M | FLOAT32 *p_re_e = out_re + len - 1; |
152 | 5.29M | const FLOAT32 *prtw = ixheaacd_mps_pre_twid; |
153 | | |
154 | 89.9M | for (i = 0; i < len; i += 4) { |
155 | 84.6M | *p_re_s = ((*c_s++) * (*prtw)); |
156 | 84.6M | p_re_s++; |
157 | 84.6M | *p_im_s = -((*c_s--) * (*prtw)); |
158 | 84.6M | p_im_s++; |
159 | 84.6M | *p_im_s = ((*c_e--) * (*prtw)); |
160 | 84.6M | p_im_s--; |
161 | 84.6M | *p_re_s = ((*c_e++) * (*prtw++)); |
162 | 84.6M | p_re_s--; |
163 | 84.6M | *p_im_s += ((*c_e--) * (*prtw)); |
164 | 84.6M | p_im_s++; |
165 | 84.6M | *p_re_s += ((*c_e--) * (*prtw)); |
166 | 84.6M | p_re_s++; |
167 | 84.6M | *p_re_s -= ((*c_s++) * (*prtw)); |
168 | 84.6M | p_re_s++; |
169 | 84.6M | *p_im_s += ((*c_s++) * (*prtw++)); |
170 | 84.6M | p_im_s++; |
171 | 84.6M | *p_im_e = ((*c_e--) * (*prtw)); |
172 | 84.6M | p_im_e--; |
173 | 84.6M | *p_re_e = -((*c_e++) * (*prtw)); |
174 | 84.6M | p_re_e--; |
175 | 84.6M | *p_re_e = ((*c_s++) * (*prtw)); |
176 | 84.6M | p_re_e++; |
177 | 84.6M | *p_im_e = ((*c_s--) * (*prtw++)); |
178 | 84.6M | p_im_e++; |
179 | 84.6M | *p_re_e += ((*c_s++) * (*prtw)); |
180 | 84.6M | p_re_e--; |
181 | 84.6M | *p_im_e += ((*c_s++) * (*prtw)); |
182 | 84.6M | p_im_e--; |
183 | 84.6M | *p_im_e -= ((*c_e--) * (*prtw)); |
184 | 84.6M | p_im_e--; |
185 | 84.6M | *p_re_e += ((*c_e--) * (*prtw++)); |
186 | 84.6M | p_re_e--; |
187 | 84.6M | } |
188 | 5.29M | } |
189 | | |
190 | | VOID ixheaacd_mps_synth_post_twidle(FLOAT32 *state, FLOAT32 *out_re, |
191 | 5.29M | FLOAT32 *out_im, WORD32 len) { |
192 | 5.29M | WORD32 i; |
193 | 5.29M | { |
194 | 5.29M | FLOAT32 x_0, x_1, x_2, x_3; |
195 | 5.29M | FLOAT32 *p_re_e, *p_im_e; |
196 | 5.29M | const FLOAT32 *potw = ixheaacd_mps_post_twid; |
197 | 5.29M | FLOAT32 *p_re_s = out_re; |
198 | 5.29M | FLOAT32 *p_im_s = out_im; |
199 | | |
200 | 5.29M | p_re_e = p_re_s + (len - 2); |
201 | 5.29M | p_im_e = p_im_s + (len - 2); |
202 | 5.29M | x_0 = *p_re_e; |
203 | 5.29M | x_1 = *(p_re_e + 1); |
204 | 5.29M | x_2 = *p_im_e; |
205 | 5.29M | x_3 = *(p_im_e + 1); |
206 | | |
207 | 5.29M | *(p_re_e + 1) = -*(p_re_s + 1); |
208 | 5.29M | *(p_im_e + 1) = -*p_im_s; |
209 | 5.29M | *p_im_s = *(p_im_s + 1); |
210 | | |
211 | 84.6M | for (i = 5; i < len; i += 4) { |
212 | 79.3M | FLOAT32 twdr = *potw++; |
213 | 79.3M | FLOAT32 twdi = *potw++; |
214 | 79.3M | FLOAT32 tmp; |
215 | | |
216 | 79.3M | *p_re_e = (x_0 * twdi); |
217 | 79.3M | *p_re_e += (x_1 * twdr); |
218 | 79.3M | p_re_e--; |
219 | 79.3M | p_re_s++; |
220 | 79.3M | *p_re_s = (x_0 * twdr); |
221 | 79.3M | *p_re_s -= (x_1 * twdi); |
222 | 79.3M | p_re_s++; |
223 | 79.3M | x_1 = *p_re_e--; |
224 | 79.3M | x_0 = *p_re_e++; |
225 | 79.3M | *p_re_e = (*p_re_s++ * twdi); |
226 | 79.3M | *p_re_e += -(*p_re_s * twdr); |
227 | 79.3M | p_re_e--; |
228 | 79.3M | tmp = (*p_re_s-- * twdi); |
229 | 79.3M | *p_re_s = tmp + (*p_re_s * twdr); |
230 | | |
231 | 79.3M | *p_im_e = -(x_2 * twdr); |
232 | 79.3M | *p_im_e += (x_3 * twdi); |
233 | 79.3M | p_im_e--; |
234 | 79.3M | p_im_s++; |
235 | 79.3M | *p_im_s = -(x_2 * twdi); |
236 | 79.3M | *p_im_s -= (x_3 * twdr); |
237 | 79.3M | p_im_s++; |
238 | 79.3M | x_3 = *p_im_e--; |
239 | 79.3M | x_2 = *p_im_e++; |
240 | 79.3M | *p_im_e = -(*p_im_s++ * twdr); |
241 | 79.3M | *p_im_e -= (*p_im_s * twdi); |
242 | 79.3M | p_im_e--; |
243 | 79.3M | tmp = (*p_im_s-- * twdr); |
244 | 79.3M | *p_im_s = tmp - (*p_im_s * twdi); |
245 | 79.3M | } |
246 | | |
247 | 5.29M | *p_re_e = 0.7071067f * (x_1 + x_0); |
248 | 5.29M | *p_im_e = 0.7071067f * (x_3 - x_2); |
249 | 5.29M | *(p_re_s + 1) = -0.7071067f * (x_1 - x_0); |
250 | 5.29M | *(p_im_s + 1) = -0.7071067f * (x_3 + x_2); |
251 | 5.29M | } |
252 | | |
253 | 344M | for (i = 0; i < len; i++) { |
254 | 338M | state[i] = out_im[i] - out_re[i]; |
255 | 338M | state[len + i] = out_im[len - i - 1] + out_re[len - i - 1]; |
256 | 338M | state[len - i - 1] = out_im[len - i - 1] - out_re[len - i - 1]; |
257 | 338M | state[2 * len - i - 1] = out_im[i] + out_re[i]; |
258 | 338M | } |
259 | 5.29M | } |
260 | | |
261 | | VOID ixheaacd_mps_synt_pre_twiddle_dec(FLOAT32 *ptr_in, const FLOAT32 *table, |
262 | | FLOAT32 *fin_re, FLOAT32 *fin_im, |
263 | 22.6k | WORD32 resolution) { |
264 | 22.6k | WORD32 k; |
265 | 22.6k | FLOAT32 *c_s = ptr_in; |
266 | 22.6k | FLOAT32 *p_re_s = fin_re; |
267 | 22.6k | FLOAT32 *p_im_s = fin_im; |
268 | 22.6k | FLOAT32 *c_e = ptr_in + (resolution << 1) - 1; |
269 | 22.6k | FLOAT32 *p_im_e = fin_im + resolution - 1; |
270 | 22.6k | FLOAT32 *p_re_e = fin_re + resolution - 1; |
271 | | |
272 | 384k | for (k = 0; k < resolution; k += 2) { |
273 | 361k | *p_re_s = (*c_s++) * (*table); |
274 | 361k | *p_im_s = (*c_s) * (*table); |
275 | | |
276 | 361k | *p_re_e = (*c_e--) * (*table); |
277 | 361k | *p_im_e = -(*c_e) * (*table++); |
278 | | |
279 | 361k | *p_re_s += (*c_s--) * (*table); |
280 | 361k | *p_im_s += -(*c_s++) * (*table); |
281 | 361k | p_re_s++; |
282 | 361k | p_im_s++; |
283 | 361k | c_s++; |
284 | | |
285 | 361k | *p_re_e += (*c_e++) * (*table); |
286 | 361k | *p_im_e += (*c_e--) * (*table++); |
287 | 361k | p_re_e--; |
288 | 361k | p_im_e--; |
289 | 361k | c_e--; |
290 | 361k | } |
291 | 22.6k | } |
292 | | |
293 | 78.0k | VOID ixheaacd_mps_synt_calc(ia_mps_dec_state_struct *self) { |
294 | 78.0k | WORD32 k, l, ts, ch; |
295 | 78.0k | FLOAT32 *state, *tmp_state, *out; |
296 | 78.0k | const FLOAT32 *filt_coeff; |
297 | 78.0k | FLOAT32 *tmp_buf = self->tmp_buf; |
298 | 78.0k | FLOAT32 fin_re[64] = {0}; |
299 | 78.0k | FLOAT32 fin_im[64] = {0}; |
300 | | |
301 | 78.0k | WORD32 resolution = self->resolution; |
302 | 78.0k | WORD32 m_resolution = resolution >> 1; |
303 | 78.0k | const FLOAT32 *ixheaacd_mps_post_re, *ixheaacd_mps_post_im; |
304 | 78.0k | VOID(*ixheaacd_mps_synt_out_calc_pointer) |
305 | 78.0k | (WORD32 resolution, FLOAT32 *out, FLOAT32 *state, const FLOAT32 *filter_coeff); |
306 | | |
307 | 78.0k | if (self->ldmps_config.ldmps_present_flag) |
308 | 1.82k | { |
309 | 1.82k | ixheaacd_mps_synt_out_calc_pointer = &ixheaacd_mps_synt_out_calc_dec_ldmps; |
310 | 1.82k | filt_coeff = ixheaacd_ldmps_polyphase_filter_coeff; |
311 | 1.82k | } |
312 | 76.2k | else |
313 | 76.2k | { |
314 | 76.2k | ixheaacd_mps_synt_out_calc_pointer = ixheaacd_mps_synt_out_calc; |
315 | 76.2k | filt_coeff = ixheaacd_mps_polyphase_filter_coeff; |
316 | 76.2k | } |
317 | 78.0k | if (self->qmf_band_count == 32) |
318 | 1.71k | { |
319 | 5.15k | for (ch = 0; ch < self->out_ch_count; ch++) { |
320 | 3.43k | tmp_state = self->qmf_filt_state[ch]; |
321 | 3.43k | state = &tmp_buf[self->time_slots * 2 * resolution]; |
322 | 3.43k | memcpy(state, tmp_state, sizeof(FLOAT32) * 18 * resolution); |
323 | 3.43k | out = &tmp_buf[74 * MAX_NUM_QMF_BANDS_SAC]; |
324 | | |
325 | 3.43k | ixheaacd_mps_post_re = ixheaacd_mps_post_re_32; |
326 | 3.43k | ixheaacd_mps_post_im = ixheaacd_mps_post_im_32; |
327 | | |
328 | 26.0k | for (ts = 0; ts < self->time_slots; ts++) { |
329 | | |
330 | 22.6k | state -= (2 * resolution); |
331 | | |
332 | 22.6k | ixheaacd_mps_synt_pre_twiddle_dec(&self->qmf_out_dir[ch][ts][0].re, |
333 | 22.6k | ixheaacd_ldmps_pre_twid, fin_re, fin_im, resolution); |
334 | | |
335 | 746k | for (k = resolution; k < 2 * resolution; k++) |
336 | 723k | { |
337 | 723k | fin_re[k] = 0; |
338 | 723k | fin_im[k] = 0; |
339 | 723k | } |
340 | | |
341 | 22.6k | ixheaacd_mps_complex_fft(fin_re, fin_im, 2 * resolution); |
342 | | |
343 | 22.6k | ixheaacd_mps_synt_post_fft_twiddle_dec(resolution, fin_re, fin_im, |
344 | 22.6k | ixheaacd_mps_post_re, |
345 | 22.6k | ixheaacd_mps_post_im, state); |
346 | | |
347 | 22.6k | ixheaacd_mps_synt_out_calc_dec_ldmps_32(resolution, out, state, filt_coeff); |
348 | | |
349 | 746k | for (k = 0; k < resolution; k++) { |
350 | 723k | FLOAT32 acc = out[k]; |
351 | 7.23M | for (l = 1; l < 10; l++) { |
352 | 6.51M | acc += out[resolution * l + k]; |
353 | 6.51M | } |
354 | 723k | self->output_buffer[ch][self->qmf_band_count * ts + k] = acc; |
355 | 723k | } |
356 | 22.6k | } |
357 | 3.43k | memcpy(tmp_state, state, sizeof(FLOAT32) * 18 * resolution); |
358 | 3.43k | } |
359 | 1.71k | } |
360 | 76.3k | else |
361 | 76.3k | { |
362 | 229k | for (ch = 0; ch < self->out_ch_count; ch++) { |
363 | 152k | tmp_state = self->qmf_filt_state[ch]; |
364 | 152k | state = &tmp_buf[self->time_slots * 2 * resolution]; |
365 | 152k | memcpy(state, tmp_state, sizeof(FLOAT32) * 18 * resolution); |
366 | 152k | out = &tmp_buf[74 * MAX_NUM_QMF_BANDS_SAC]; |
367 | | |
368 | 5.44M | for (ts = 0; ts < self->time_slots; ts++) { |
369 | | |
370 | 5.29M | state -= (2 * resolution); |
371 | | |
372 | 5.29M | ixheaacd_mps_synth_pre_twidle( |
373 | 5.29M | fin_re, fin_im, &self->qmf_out_dir[ch][ts][0].re, resolution); |
374 | | |
375 | 5.29M | ixheaacd_mps_synth_calc_fft(fin_re, fin_im, m_resolution); |
376 | | |
377 | 5.29M | ixheaacd_mps_synth_post_twidle(state, fin_re, fin_im, resolution); |
378 | 5.29M | (*ixheaacd_mps_synt_out_calc_pointer)(resolution, out, state, filt_coeff); |
379 | | |
380 | 344M | for (k = 0; k < resolution; k++) { |
381 | 338M | FLOAT32 acc = out[k]; |
382 | 3.38G | for (l = 1; l < 10; l++) { |
383 | 3.04G | acc += out[resolution * l + k]; |
384 | 3.04G | } |
385 | 338M | self->output_buffer[ch][self->qmf_band_count * ts + k] = acc; |
386 | 338M | } |
387 | 5.29M | } |
388 | 152k | memcpy(tmp_state, state, sizeof(FLOAT32) * 18 * resolution); |
389 | 152k | } |
390 | 76.3k | } |
391 | 78.0k | } |