/src/libxaac/encoder/ixheaace_signal_classifier.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * * |
3 | | * Copyright (C) 2023 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | #include <string.h> |
21 | | #include <math.h> |
22 | | #include "iusace_type_def.h" |
23 | | #include "iusace_cnst.h" |
24 | | |
25 | | #include "iusace_fd_quant.h" |
26 | | #include "iusace_bitbuffer.h" |
27 | | #include "impd_drc_common_enc.h" |
28 | | #include "impd_drc_uni_drc.h" |
29 | | #include "impd_drc_api.h" |
30 | | #include "impd_drc_uni_drc_eq.h" |
31 | | #include "impd_drc_uni_drc_filter_bank.h" |
32 | | #include "impd_drc_gain_enc.h" |
33 | | #include "impd_drc_struct_def.h" |
34 | | |
35 | | #include "ixheaace_memory_standards.h" |
36 | | #include "iusace_tns_usac.h" |
37 | | #include "iusace_psy_mod.h" |
38 | | #include "iusace_config.h" |
39 | | #include "iusace_signal_classifier.h" |
40 | | #include "iusace_fft.h" |
41 | | #include "iusace_block_switch_const.h" |
42 | | #include "iusace_block_switch_struct_def.h" |
43 | | #include "iusace_cnst.h" |
44 | | #include "iusace_ms.h" |
45 | | #include "ixheaace_adjust_threshold_data.h" |
46 | | #include "iusace_fd_qc_util.h" |
47 | | #include "ixheaace_sbr_header.h" |
48 | | #include "ixheaace_config.h" |
49 | | #include "ixheaace_asc_write.h" |
50 | | #include "iusace_main.h" |
51 | | |
52 | 104k | static VOID iusace_calc_pds(FLOAT32 *ptr_input, WORD32 ccfl) { |
53 | 104k | WORD32 i; |
54 | 104k | FLOAT64 max_pow, delta; |
55 | 104k | FLOAT64 log_ccfl_base_10 = (ccfl == 1024) ? LOG_1024_BASE_10 : LOG_768_BASE_10; |
56 | | |
57 | 104k | max_pow = MAX( |
58 | 104k | 10 * (log10(ptr_input[0] * ptr_input[0] + ptr_input[1] * ptr_input[1]) - log_ccfl_base_10) + |
59 | 104k | 10e-15, |
60 | 104k | MIN_POW); |
61 | | |
62 | 48.5M | for (i = 1; i<ccfl>> 1; i++) { |
63 | | /* removed the sqrt along with clubbing the for loops */ |
64 | 48.4M | ptr_input[2 * i] = (FLOAT32)MAX(10 * (log10(ptr_input[2 * i] * ptr_input[2 * i] + |
65 | 48.4M | ptr_input[2 * i + 1] * ptr_input[2 * i + 1]) - |
66 | 48.4M | log_ccfl_base_10) + |
67 | 48.4M | 10e-15, |
68 | 48.4M | MIN_POW); |
69 | | |
70 | 48.4M | max_pow = MAX(max_pow, ptr_input[2 * i]); |
71 | 48.4M | } |
72 | | |
73 | | /* Normalized to reference sound pressure level 96 dB */ |
74 | 104k | delta = 96 - max_pow; |
75 | | |
76 | 48.7M | for (i = 0; i<ccfl>> 1; i++) { |
77 | 48.5M | ptr_input[2 * i] = ptr_input[2 * i] + (FLOAT32)delta; |
78 | 48.5M | } |
79 | 104k | return; |
80 | 104k | } |
81 | | |
82 | | static VOID iusace_find_tonal(FLOAT32 *ptr_input, WORD32 *ptr_tonal_flag, FLOAT32 *ptr_scratch, |
83 | 104k | WORD32 ccfl) { |
84 | 104k | WORD32 i, j; |
85 | 104k | WORD32 is_tonal; |
86 | 104k | FLOAT64 tonal_spl; |
87 | 104k | FLOAT64 absolute_threshold_xm; |
88 | | |
89 | 48.7M | for (i = 0; i<ccfl>> 1; i++) { |
90 | 48.5M | ptr_scratch[i] = ptr_input[2 * i]; |
91 | 48.5M | } |
92 | | |
93 | 104k | if (ccfl == FRAME_LEN_LONG) { |
94 | 34.1M | for (i = 0; i <= 511; i++) { |
95 | 34.0M | ptr_tonal_flag[i] = 0; |
96 | 34.0M | } |
97 | | |
98 | 33.1M | for (i = 2; i < 500; i++) { |
99 | 33.1M | if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) { |
100 | 8.70M | is_tonal = 1; |
101 | | |
102 | | /* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */ |
103 | | |
104 | 8.70M | if (1 < i && i < 62) { |
105 | 1.32M | for (j = -2; j <= -2; j++) { |
106 | 1.02M | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
107 | 1.02M | if (is_tonal == 0) break; |
108 | 1.02M | } |
109 | 1.02M | if (is_tonal == 1) { |
110 | 415k | for (j = 2; j <= 2; j++) { |
111 | 295k | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
112 | 295k | if (is_tonal == 0) break; |
113 | 295k | } |
114 | 295k | } |
115 | | |
116 | 1.02M | if (is_tonal == 1) { |
117 | 119k | ptr_tonal_flag[i] = 1; |
118 | 119k | } |
119 | 1.02M | } |
120 | | |
121 | 7.68M | else if (62 <= i && i < 126) { |
122 | 1.63M | for (j = -3; j <= -2; j++) { |
123 | 1.47M | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
124 | 1.47M | if (is_tonal == 0) break; |
125 | 1.47M | } |
126 | 1.14M | if (is_tonal == 1) { |
127 | 285k | for (j = 2; j <= 3; j++) { |
128 | 233k | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
129 | 233k | if (is_tonal == 0) break; |
130 | 233k | } |
131 | 156k | } |
132 | | |
133 | 1.14M | if (is_tonal == 1) { |
134 | 51.4k | ptr_tonal_flag[i] = 1; |
135 | 51.4k | } |
136 | 1.14M | } |
137 | | |
138 | 6.53M | else if (126 <= i && i < 254) { |
139 | 3.58M | for (j = -6; j <= -2; j++) { |
140 | 3.50M | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
141 | 3.50M | if (is_tonal == 0) break; |
142 | 3.50M | } |
143 | 2.26M | if (is_tonal == 1) { |
144 | 218k | for (j = 2; j <= 6; j++) { |
145 | 201k | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
146 | 201k | if (is_tonal == 0) break; |
147 | 201k | } |
148 | 74.5k | } |
149 | | |
150 | 2.26M | if (is_tonal == 1) { |
151 | 17.0k | ptr_tonal_flag[i] = 1; |
152 | 17.0k | } |
153 | 2.26M | } |
154 | | |
155 | 4.26M | else if (254 <= i && i < 500) { |
156 | 8.32M | for (j = -12; j <= -2; j++) { |
157 | 8.27M | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
158 | 8.27M | if (is_tonal == 0) break; |
159 | 8.27M | } |
160 | 4.26M | if (is_tonal == 1) { |
161 | 258k | for (j = 2; j <= 12; j++) { |
162 | 245k | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
163 | 245k | if (is_tonal == 0) break; |
164 | 245k | } |
165 | 52.1k | } |
166 | | |
167 | 4.26M | if (is_tonal == 1) { |
168 | 12.6k | ptr_tonal_flag[i] = 1; |
169 | 12.6k | } |
170 | 4.26M | } |
171 | 8.70M | } |
172 | 33.1M | } |
173 | | |
174 | 34.1M | for (i = 0; i <= 511; i++) { |
175 | 34.0M | if (ptr_tonal_flag[i] == 1) { |
176 | | /* compute the SPL of tonal */ |
177 | 201k | tonal_spl = |
178 | 201k | 10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) + |
179 | 201k | pow(10, (ptr_scratch[i + 1] / 10))); |
180 | | |
181 | 201k | if (i >= 324) { |
182 | 8.52k | absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i] + 20; |
183 | 192k | } else { |
184 | 192k | absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i]; |
185 | 192k | } |
186 | 201k | if (tonal_spl < absolute_threshold_xm) { |
187 | 95.2k | ptr_tonal_flag[i] = 0; |
188 | 95.2k | } |
189 | 201k | } |
190 | 34.0M | } |
191 | 66.4k | } else // (ccfl == 768) |
192 | 37.8k | { |
193 | 14.5M | for (i = 0; i <= 383; i++) { |
194 | 14.5M | ptr_tonal_flag[i] = 0; |
195 | 14.5M | } |
196 | | |
197 | 14.1M | for (i = 2; i < 375; i++) { |
198 | 14.1M | if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) { |
199 | 2.46M | is_tonal = 1; |
200 | | |
201 | | /* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */ |
202 | | |
203 | 2.46M | if (1 < i && i < 47) { |
204 | 157k | for (j = -2; j <= -2; j++) { |
205 | 124k | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
206 | 124k | if (is_tonal == 0) break; |
207 | 124k | } |
208 | 124k | if (is_tonal == 1) { |
209 | 53.0k | for (j = 2; j <= 2; j++) { |
210 | 32.5k | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
211 | 32.5k | if (is_tonal == 0) break; |
212 | 32.5k | } |
213 | 32.5k | } |
214 | | |
215 | 124k | if (is_tonal == 1) { |
216 | 20.5k | ptr_tonal_flag[i] = 1; |
217 | 20.5k | } |
218 | 124k | } |
219 | | |
220 | 2.33M | else if (47 <= i && i < 95) { |
221 | 483k | for (j = -3; j <= -2; j++) { |
222 | 427k | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
223 | 427k | if (is_tonal == 0) break; |
224 | 427k | } |
225 | 362k | if (is_tonal == 1) { |
226 | 147k | for (j = 2; j <= 3; j++) { |
227 | 102k | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
228 | 102k | if (is_tonal == 0) break; |
229 | 102k | } |
230 | 56.3k | } |
231 | | |
232 | 362k | if (is_tonal == 1) { |
233 | 44.3k | ptr_tonal_flag[i] = 1; |
234 | 44.3k | } |
235 | 362k | } |
236 | | |
237 | 1.97M | else if (95 <= i && i < 194) { |
238 | 1.29M | for (j = -5; j <= -2; j++) { |
239 | 1.17M | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
240 | 1.17M | if (is_tonal == 0) break; |
241 | 1.17M | } |
242 | 732k | if (is_tonal == 1) { |
243 | 540k | for (j = 2; j <= 5; j++) { |
244 | 438k | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
245 | 438k | if (is_tonal == 0) break; |
246 | 438k | } |
247 | 120k | } |
248 | | |
249 | 732k | if (is_tonal == 1) { |
250 | 102k | ptr_tonal_flag[i] = 1; |
251 | 102k | } |
252 | 732k | } |
253 | | |
254 | 1.24M | else if (191 <= i && i < 375) { |
255 | 2.81M | for (j = -9; j <= -2; j++) { |
256 | 2.67M | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
257 | 2.67M | if (is_tonal == 0) break; |
258 | 2.67M | } |
259 | 1.24M | if (is_tonal == 1) { |
260 | 1.13M | for (j = 2; j <= 9; j++) { |
261 | 1.04M | is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7; |
262 | 1.04M | if (is_tonal == 0) break; |
263 | 1.04M | } |
264 | 147k | } |
265 | | |
266 | 1.24M | if (is_tonal == 1) { |
267 | 92.2k | ptr_tonal_flag[i] = 1; |
268 | 92.2k | } |
269 | 1.24M | } |
270 | 2.46M | } |
271 | 14.1M | } |
272 | | |
273 | 14.5M | for (i = 0; i <= 383; i++) { |
274 | 14.5M | if (ptr_tonal_flag[i] == 1) { |
275 | | /* compute the SPL of tonal */ |
276 | 259k | tonal_spl = |
277 | 259k | 10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) + |
278 | 259k | pow(10, (ptr_scratch[i + 1] / 10))); |
279 | | |
280 | 259k | if (i >= 243) { |
281 | 72.5k | absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i] + 20; |
282 | 186k | } else { |
283 | 186k | absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i]; |
284 | 186k | } |
285 | 259k | if (tonal_spl < absolute_threshold_xm) { |
286 | 42.3k | ptr_tonal_flag[i] = 0; |
287 | 42.3k | } |
288 | 259k | } |
289 | 14.5M | } |
290 | 37.8k | } |
291 | 104k | return; |
292 | 104k | } |
293 | | |
294 | | static VOID iusace_tonal_analysis(ia_tonal_params_struct *pstr_ton_params, |
295 | 104k | iusace_scratch_mem *pstr_scratch, WORD32 ccfl) { |
296 | 104k | FLOAT32 *ptr_complex_fft = pstr_scratch->p_complex_fft; |
297 | 104k | WORD32 *ptr_tonal_flag = pstr_scratch->p_tonal_flag; |
298 | 104k | FLOAT32 *ptr_time_sig = pstr_ton_params->time_signal; |
299 | 104k | WORD32 framecnt_xm = pstr_ton_params->framecnt_xm; |
300 | 104k | WORD32 *ptr_n_tonal = pstr_ton_params->n_tonal; |
301 | 104k | WORD32 *ptr_n_tonal_low_frequency = pstr_ton_params->n_tonal_low_frequency; |
302 | 104k | FLOAT32 *ptr_n_tonal_low_frequency_ratio = pstr_ton_params->n_tonal_low_frequency_ratio; |
303 | 104k | FLOAT32 *ave_n_tonal = pstr_ton_params->ave_n_tonal; |
304 | 104k | FLOAT32 *ave_n_tonal_short = pstr_ton_params->ave_n_tonal_short; |
305 | 104k | WORD32 i; |
306 | 104k | WORD32 fft_size = ccfl; |
307 | | |
308 | 104k | WORD32 frame_length; |
309 | 104k | WORD32 n_tonal_total, n_tonal_low_frequency_total; |
310 | | |
311 | 97.3M | for (i = 0; i < ccfl; i++) { |
312 | 97.1M | ptr_complex_fft[2 * i] = (FLOAT32)( |
313 | 97.1M | ptr_time_sig[i] * ((ccfl == 1024) ? iusace_classify_arrays.hanning_window_1024[i] |
314 | 97.1M | : iusace_classify_arrays.hanning_window_768[i])); |
315 | 97.1M | ptr_complex_fft[2 * i + 1] = 0; |
316 | 97.1M | } |
317 | | |
318 | 104k | iusace_complex_fft(ptr_complex_fft, fft_size, pstr_scratch); |
319 | | |
320 | | /* compute power density spectrum */ |
321 | | /* re_fft contains the resulting pds */ |
322 | 104k | iusace_calc_pds(ptr_complex_fft, ccfl); |
323 | | |
324 | | /* detect tonal */ |
325 | 104k | iusace_find_tonal(ptr_complex_fft, ptr_tonal_flag, pstr_scratch->p_pow_spec, ccfl); |
326 | | |
327 | | /* update n_tonal, n_tonal_low_frequency */ |
328 | 10.4M | for (i = 0; i < 99; i++) { |
329 | 10.3M | ptr_n_tonal[i] = ptr_n_tonal[i + 1]; |
330 | 10.3M | ptr_n_tonal_low_frequency[i] = ptr_n_tonal_low_frequency[i + 1]; |
331 | 10.3M | } |
332 | 104k | ptr_n_tonal[99] = 0; |
333 | 48.7M | for (i = 0; i<ccfl>> 1; i++) { |
334 | 48.5M | ptr_n_tonal[99] += ptr_tonal_flag[i]; |
335 | 48.5M | } |
336 | 104k | ptr_n_tonal_low_frequency[99] = 0; |
337 | 16.8M | for (i = 0; i < INDEXOFLOWFREQUENCY; i++) { |
338 | 16.7M | ptr_n_tonal_low_frequency[99] += ptr_tonal_flag[i]; |
339 | 16.7M | } |
340 | | |
341 | | /* compute long-term AVE and the ratio of distribution in low-frequency domain */ |
342 | 104k | if (framecnt_xm < AVE_TONAL_LENGTH) { |
343 | 61.8k | frame_length = framecnt_xm; |
344 | 61.8k | } else { |
345 | 42.4k | frame_length = AVE_TONAL_LENGTH; |
346 | 42.4k | } |
347 | | |
348 | 104k | n_tonal_total = 0; |
349 | 104k | n_tonal_low_frequency_total = 0; |
350 | 6.67M | for (i = 0; i < frame_length; i++) { |
351 | 6.57M | n_tonal_total += ptr_n_tonal[99 - i]; |
352 | 6.57M | n_tonal_low_frequency_total += ptr_n_tonal_low_frequency[99 - i]; |
353 | 6.57M | } |
354 | | |
355 | 104k | *ave_n_tonal = (FLOAT32)n_tonal_total / frame_length; |
356 | | |
357 | 104k | if (n_tonal_total == 0) { |
358 | 21.1k | *ptr_n_tonal_low_frequency_ratio = 1; |
359 | 83.1k | } else { |
360 | 83.1k | *ptr_n_tonal_low_frequency_ratio = (FLOAT32)n_tonal_low_frequency_total / n_tonal_total; |
361 | 83.1k | } |
362 | | |
363 | | /* compute the short-term AVE */ |
364 | 104k | if (framecnt_xm < AVE_TONAL_LENGTH_SHORT) { |
365 | 10.6k | frame_length = framecnt_xm; |
366 | 93.7k | } else { |
367 | 93.7k | frame_length = AVE_TONAL_LENGTH_SHORT; |
368 | 93.7k | } |
369 | | |
370 | 104k | n_tonal_total = 0; |
371 | 1.09M | for (i = 0; i < frame_length; i++) { |
372 | 988k | n_tonal_total += ptr_n_tonal[99 - i]; |
373 | 988k | } |
374 | | |
375 | 104k | *ave_n_tonal_short = (FLOAT32)n_tonal_total / frame_length; |
376 | 104k | return; |
377 | 104k | } |
378 | | |
379 | | static VOID iusace_spectral_tilt_analysis(ia_spec_tilt_params_struct *ptr_spec_params, |
380 | 104k | WORD32 ccfl) { |
381 | 104k | FLOAT32 *ptr_time_signal = ptr_spec_params->time_signal; |
382 | 104k | WORD32 framecnt_xm = ptr_spec_params->framecnt_xm; |
383 | 104k | FLOAT32 *ptr_spec_tilt_buf = ptr_spec_params->spec_tilt_buf; |
384 | 104k | FLOAT32 *ptr_msd_spec_tilt = ptr_spec_params->msd_spec_tilt; |
385 | 104k | FLOAT32 *ptr_msd_spec_tilt_short = ptr_spec_params->msd_spec_tilt_short; |
386 | 104k | WORD32 i; |
387 | 104k | WORD32 frame_length; |
388 | | |
389 | 104k | FLOAT32 r0, r1; |
390 | 104k | FLOAT32 spec_tilt; |
391 | 104k | FLOAT32 ave_spec_tilt; |
392 | | |
393 | | /* compute spectral tilt */ |
394 | 104k | r0 = 0; |
395 | 104k | r1 = 0; |
396 | 97.1M | for (i = 0; i < ccfl - 1; i++) { |
397 | 97.0M | r0 += ptr_time_signal[i] * ptr_time_signal[i]; |
398 | 97.0M | r1 += ptr_time_signal[i] * ptr_time_signal[i + 1]; |
399 | 97.0M | } |
400 | 104k | r0 += ptr_time_signal[i] * ptr_time_signal[i]; |
401 | | |
402 | 104k | if (r0 == 0) { |
403 | 3.83k | spec_tilt = 1.0f; |
404 | 100k | } else { |
405 | 100k | spec_tilt = r1 / r0; |
406 | 100k | } |
407 | | |
408 | | /* update spec_tilt_buf */ |
409 | 10.4M | for (i = 0; i < 100 - 1; i++) { |
410 | 10.3M | ptr_spec_tilt_buf[i] = ptr_spec_tilt_buf[i + 1]; |
411 | 10.3M | } |
412 | 104k | ptr_spec_tilt_buf[99] = spec_tilt; |
413 | | |
414 | | /* compute the long-term mean square deviation of the spectral tilt */ |
415 | 104k | if (framecnt_xm < SPECTRAL_TILT_LENGTH) { |
416 | 55.4k | frame_length = framecnt_xm; |
417 | 55.4k | } else { |
418 | 48.9k | frame_length = SPECTRAL_TILT_LENGTH; |
419 | 48.9k | } |
420 | | |
421 | 104k | ave_spec_tilt = 0; |
422 | 5.76M | for (i = 0; i < frame_length; i++) { |
423 | 5.66M | ave_spec_tilt += ptr_spec_tilt_buf[99 - i]; |
424 | 5.66M | } |
425 | 104k | ave_spec_tilt /= frame_length; |
426 | | |
427 | 104k | *ptr_msd_spec_tilt = 0; |
428 | 5.76M | for (i = 0; i < frame_length; i++) { |
429 | 5.66M | *ptr_msd_spec_tilt += |
430 | 5.66M | (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt); |
431 | 5.66M | } |
432 | 104k | *ptr_msd_spec_tilt /= frame_length; |
433 | | |
434 | | /* compute the short-term mean square deviation of the spectral tilt */ |
435 | 104k | if (framecnt_xm < SPECTRAL_TILT_LENGTH_SHORT) { |
436 | 20.8k | frame_length = framecnt_xm; |
437 | 83.5k | } else { |
438 | 83.5k | frame_length = SPECTRAL_TILT_LENGTH_SHORT; |
439 | 83.5k | } |
440 | | |
441 | 104k | ave_spec_tilt = 0; |
442 | 1.97M | for (i = 0; i < frame_length; i++) { |
443 | 1.86M | ave_spec_tilt += ptr_spec_tilt_buf[99 - i]; |
444 | 1.86M | } |
445 | 104k | ave_spec_tilt /= frame_length; |
446 | | |
447 | 104k | *ptr_msd_spec_tilt_short = 0; |
448 | 1.97M | for (i = 0; i < frame_length; i++) { |
449 | 1.86M | *ptr_msd_spec_tilt_short += |
450 | 1.86M | (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt); |
451 | 1.86M | } |
452 | 104k | *ptr_msd_spec_tilt_short /= frame_length; |
453 | | |
454 | | /* compute the energy of current frame */ |
455 | 104k | if (r0 <= 1) { |
456 | 4.94k | ptr_spec_params->frame_energy = 0; |
457 | 99.4k | } else { |
458 | 99.4k | ptr_spec_params->frame_energy = (FLOAT32)(10 * log(r0) / log(10)); |
459 | 99.4k | } |
460 | 104k | return; |
461 | 104k | } |
462 | | |
463 | 104k | static WORD32 iusace_init_mode_decision(ia_mode_params_struct *pstr_mode_params) { |
464 | 104k | WORD32 i; |
465 | 104k | WORD32 framecnt = pstr_mode_params->framecnt; |
466 | 104k | WORD32 *framecnt_xm = pstr_mode_params->framecnt_xm; |
467 | 104k | WORD32 *flag_border = pstr_mode_params->flag_border; |
468 | 104k | FLOAT32 ave_n_tonal_short = pstr_mode_params->ave_n_tonal_short; |
469 | 104k | FLOAT32 ave_n_tonal = pstr_mode_params->ave_n_tonal; |
470 | 104k | FLOAT32 *ave_n_tonal_short_buf = pstr_mode_params->ave_n_tonal_short_buf; |
471 | 104k | FLOAT32 *ave_n_tonal_buf = pstr_mode_params->ave_n_tonal_buf; |
472 | 104k | FLOAT32 msd_spec_tilt = pstr_mode_params->msd_spec_tilt; |
473 | 104k | FLOAT32 msd_spec_tilt_short = pstr_mode_params->msd_spec_tilt_short; |
474 | 104k | FLOAT32 *msd_spec_tilt_buf = pstr_mode_params->msd_spec_tilt_buf; |
475 | 104k | FLOAT32 *msd_spec_tilt_short_buf = pstr_mode_params->msd_spec_tilt_short_buf; |
476 | 104k | FLOAT32 n_tonal_low_frequency_ratio = pstr_mode_params->n_tonal_low_frequency_ratio; |
477 | 104k | FLOAT32 frame_energy = pstr_mode_params->frame_energy; |
478 | 104k | WORD32 init_mode_decision_result = TBD; |
479 | 104k | WORD32 count_msd_st_monchhichi = 0; |
480 | 104k | WORD32 count_msd_st_speech_music = 0, count_msd_st_music_speech = 0; |
481 | 104k | WORD32 flag_ave_music_speech = 0; |
482 | 104k | WORD32 count_msd_st_music = 0; |
483 | 104k | WORD32 border_state = 0; |
484 | 104k | WORD32 count_quiet_mode = 0; |
485 | | |
486 | 104k | *flag_border = NO_BORDER; |
487 | | |
488 | | /* border decision according to spectral tilt */ |
489 | | |
490 | | /* update msd_spec_tilt_buf, msd_spec_tilt_short_buf */ |
491 | 521k | for (i = 0; i < 5 - 1; i++) { |
492 | 417k | msd_spec_tilt_buf[i] = msd_spec_tilt_buf[i + 1]; |
493 | 417k | msd_spec_tilt_short_buf[i] = msd_spec_tilt_short_buf[i + 1]; |
494 | 417k | } |
495 | 104k | msd_spec_tilt_buf[4] = msd_spec_tilt; |
496 | 104k | msd_spec_tilt_short_buf[4] = msd_spec_tilt_short; |
497 | | |
498 | | /* speech->music find strict border of speech->music */ |
499 | 104k | if ((msd_spec_tilt >= 0.014) && (msd_spec_tilt_short <= 0.000005)) { |
500 | 1.77k | count_msd_st_monchhichi++; |
501 | 102k | } else { |
502 | 102k | count_msd_st_monchhichi = 0; |
503 | 102k | } |
504 | 104k | if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) && |
505 | 104k | (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) && |
506 | 104k | (border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_monchhichi >= 15) && |
507 | 0 | (*framecnt_xm >= 300)) { |
508 | 0 | *framecnt_xm = 10; |
509 | 0 | *flag_border = BORDER_SPEECH_MUSIC; |
510 | 0 | } |
511 | | |
512 | | /* find the relative loose border of speech->music */ |
513 | 104k | if ((msd_spec_tilt >= 0.0025) && (msd_spec_tilt_short <= 0.000003)) { |
514 | 3.04k | count_msd_st_speech_music++; |
515 | 101k | } else { |
516 | 101k | count_msd_st_speech_music = 0; |
517 | 101k | } |
518 | 104k | if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) && |
519 | 104k | (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) && |
520 | 104k | (border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_speech_music >= 15) && |
521 | 0 | (*framecnt_xm >= 300)) { |
522 | 0 | *framecnt_xm = 10; |
523 | 0 | *flag_border = BORDER_SPEECH_MUSIC; |
524 | 0 | } |
525 | | |
526 | | /* music->speech */ |
527 | 104k | if ((msd_spec_tilt_buf[0] <= 0.0003) && (msd_spec_tilt_short_buf[0] <= 0.0002)) { |
528 | 56.3k | count_msd_st_music_speech++; |
529 | 56.3k | } |
530 | 104k | if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) && |
531 | 104k | (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) && |
532 | 104k | (border_state != BORDER_MUSIC_SPEECH_DEFINITE) && (count_msd_st_music_speech >= 100) && |
533 | 0 | (msd_spec_tilt >= 0.0008) && (msd_spec_tilt_short >= 0.0025) && (*framecnt_xm >= 20)) { |
534 | 0 | *framecnt_xm = 10; |
535 | 0 | *flag_border = BORDER_MUSIC_SPEECH; |
536 | 0 | } |
537 | | |
538 | | /* border decision according to tonal |
539 | | * update ave_n_tonal_short_buf, ave_n_tonal_buf */ |
540 | 521k | for (i = 0; i < 5 - 1; i++) { |
541 | 417k | ave_n_tonal_short_buf[i] = ave_n_tonal_short_buf[i + 1]; |
542 | 417k | ave_n_tonal_buf[i] = ave_n_tonal_buf[i + 1]; |
543 | 417k | } |
544 | 104k | ave_n_tonal_short_buf[4] = ave_n_tonal_short; |
545 | 104k | ave_n_tonal_buf[4] = ave_n_tonal; |
546 | | |
547 | | /* music->speech */ |
548 | 104k | if ((ave_n_tonal_buf[0] >= 12) && (ave_n_tonal_buf[0] < 15) && |
549 | 6.60k | (ave_n_tonal_buf[0] - ave_n_tonal_short_buf[0] >= 5) && (*framecnt_xm >= 20) && |
550 | 142 | (ave_n_tonal_short - ave_n_tonal_short_buf[0] < 5)) { |
551 | 131 | *framecnt_xm = 10; |
552 | 131 | flag_ave_music_speech = 1; |
553 | 131 | *flag_border = BORDER_MUSIC_SPEECH_DEFINITE; |
554 | 131 | } |
555 | | |
556 | | /* update border decision according to energy */ |
557 | 104k | if (frame_energy <= 60) { |
558 | 5.14k | count_quiet_mode = 0; |
559 | 99.2k | } else { |
560 | 99.2k | count_quiet_mode++; |
561 | 99.2k | } |
562 | | |
563 | 104k | if ((*flag_border == BORDER_MUSIC_SPEECH) && (count_quiet_mode <= 5)) { |
564 | 0 | *flag_border = BORDER_MUSIC_SPEECH_DEFINITE; |
565 | 0 | *framecnt_xm = 10; |
566 | 0 | } |
567 | | |
568 | | /* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision according to short-term characters */ |
569 | | |
570 | | /* ave_n_tonal_short */ |
571 | 104k | if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 19)) { |
572 | 1.70k | init_mode_decision_result = MUSIC_DEFINITE; |
573 | 1.70k | } |
574 | 104k | if ((init_mode_decision_result == TBD) && (ave_n_tonal_short <= 1.5)) { |
575 | 52.0k | init_mode_decision_result = SPEECH_DEFINITE; |
576 | 52.0k | } |
577 | | |
578 | | /* msd_spec_tilt_short */ |
579 | 104k | if (msd_spec_tilt_short >= 0.02) { |
580 | 29.9k | init_mode_decision_result = SPEECH_DEFINITE; |
581 | 29.9k | } |
582 | 104k | if ((init_mode_decision_result == TBD) && (msd_spec_tilt_short <= 0.00000025) && |
583 | 12.1k | (framecnt >= 10)) { |
584 | 10.2k | init_mode_decision_result = MUSIC_DEFINITE; |
585 | 10.2k | } |
586 | | |
587 | | /* SPEECH mode decision */ |
588 | | |
589 | | /* flag_ave_music_speech??ave_n_tonal_short */ |
590 | 104k | if ((init_mode_decision_result == TBD) && (flag_ave_music_speech == 1)) { |
591 | 71 | if ((ave_n_tonal_short <= 12) && (*framecnt_xm <= 150)) { |
592 | 56 | init_mode_decision_result = SPEECH; |
593 | 56 | } |
594 | 71 | } |
595 | | |
596 | | /* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision */ |
597 | | |
598 | | /* ave_n_tonal */ |
599 | 104k | if ((init_mode_decision_result == TBD) && (ave_n_tonal <= 3)) { |
600 | 10.4k | init_mode_decision_result = SPEECH_DEFINITE; |
601 | 10.4k | } |
602 | 104k | if ((init_mode_decision_result == TBD) && (ave_n_tonal >= 15)) { |
603 | 871 | init_mode_decision_result = MUSIC_DEFINITE; |
604 | 871 | } |
605 | | |
606 | | /** ave_n_tonal_short |
607 | | */ |
608 | 104k | if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 17)) { |
609 | 1.22k | init_mode_decision_result = MUSIC_DEFINITE; |
610 | 1.22k | } |
611 | | |
612 | | /** msd_spec_tilt |
613 | | */ |
614 | 104k | if ((init_mode_decision_result == TBD) && (msd_spec_tilt >= 0.01)) { |
615 | 4.59k | init_mode_decision_result = SPEECH_DEFINITE; |
616 | 4.59k | } |
617 | 104k | if ((init_mode_decision_result == TBD) && (framecnt >= 10) && (msd_spec_tilt <= 0.00004)) { |
618 | 3.14k | init_mode_decision_result = MUSIC_DEFINITE; |
619 | 3.14k | } |
620 | | |
621 | | /** n_tonal_low_frequency_ratio |
622 | | */ |
623 | 104k | if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.91)) { |
624 | 3.16k | init_mode_decision_result = MUSIC_DEFINITE; |
625 | 3.16k | } |
626 | | |
627 | | /** MUSIC and SPEECH mode decision |
628 | | */ |
629 | | |
630 | | /** msd_spec_tilt |
631 | | */ |
632 | 104k | if ((init_mode_decision_result == TBD) && (msd_spec_tilt <= 0.0002) && (*framecnt_xm >= 15)) { |
633 | 217 | init_mode_decision_result = MUSIC; |
634 | 217 | } |
635 | | |
636 | | /** n_tonal_low_frequency_ratio |
637 | | */ |
638 | 104k | if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio >= 0.95)) { |
639 | 942 | init_mode_decision_result = SPEECH; |
640 | 942 | } |
641 | 104k | if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.935)) { |
642 | 460 | init_mode_decision_result = MUSIC; |
643 | 460 | } |
644 | | |
645 | | /** the rest of the frame to SPEECH |
646 | | */ |
647 | 104k | if (init_mode_decision_result == TBD) { |
648 | 342 | init_mode_decision_result = SPEECH; |
649 | 342 | } |
650 | | |
651 | | /** MUSIC mode decision according to changes of the MSD of the spectral tilt |
652 | | */ |
653 | | |
654 | | /** compute the changes of the MSD of the spectral tilt |
655 | | */ |
656 | 104k | if ((msd_spec_tilt <= 0.007) && (init_mode_decision_result != SPEECH_DEFINITE)) { |
657 | 19.2k | if (init_mode_decision_result != SPEECH) { |
658 | 18.0k | count_msd_st_music++; |
659 | 18.0k | } |
660 | 85.1k | } else { |
661 | 85.1k | count_msd_st_music = 0; |
662 | 85.1k | } |
663 | | |
664 | 104k | if ((init_mode_decision_result != SPEECH_DEFINITE) && (count_msd_st_music >= 400) && |
665 | 0 | (border_state != BORDER_MUSIC_SPEECH_DEFINITE)) { |
666 | 0 | init_mode_decision_result = MUSIC; |
667 | 0 | } |
668 | | |
669 | | /** update border flag |
670 | | */ |
671 | | |
672 | 104k | if (*flag_border != NO_BORDER) { |
673 | 131 | border_state = *flag_border; |
674 | 131 | } |
675 | | |
676 | | /** update BORDER_SPEECH_MUSIC_DEFINITE |
677 | | */ |
678 | 104k | if (((border_state == BORDER_MUSIC_SPEECH) || (border_state == BORDER_MUSIC_SPEECH_DEFINITE)) && |
679 | 131 | (init_mode_decision_result == MUSIC_DEFINITE) && (*framecnt_xm >= 20)) { |
680 | 0 | *flag_border = BORDER_SPEECH_MUSIC_DEFINITE; |
681 | 0 | *framecnt_xm = 10; |
682 | 0 | border_state = *flag_border; |
683 | 0 | } |
684 | | |
685 | | /** update BORDER_MUSIC_SPEECH_DEFINITE |
686 | | */ |
687 | 104k | if (((border_state == BORDER_SPEECH_MUSIC) || (border_state == BORDER_SPEECH_MUSIC_DEFINITE)) && |
688 | 0 | (init_mode_decision_result == SPEECH_DEFINITE) && (*framecnt_xm >= 20)) { |
689 | 0 | *flag_border = BORDER_MUSIC_SPEECH_DEFINITE; |
690 | 0 | *framecnt_xm = 10; |
691 | 0 | } |
692 | | |
693 | 104k | return init_mode_decision_result; |
694 | 104k | } |
695 | | |
696 | 104k | static WORD32 iusace_smoothing_mode_decision(ia_smooth_params_struct *pstr_smooth_param) { |
697 | 104k | WORD32 *ptr_init_result_ahead = pstr_smooth_param->init_result_ahead; |
698 | 104k | WORD32 flag_border = pstr_smooth_param->flag_border; |
699 | 104k | WORD32 *ptr_flag_border_buf_behind = pstr_smooth_param->flag_border_buf_behind; |
700 | 104k | WORD32 *ptr_flag_border_buf_ahead = pstr_smooth_param->flag_border_buf_ahead; |
701 | 104k | FLOAT32 frame_energy = pstr_smooth_param->frame_energy; |
702 | 104k | FLOAT32 *ptr_frame_energy_buf_behind = pstr_smooth_param->frame_energy_buf_behind; |
703 | 104k | FLOAT32 *ptr_frame_energy_buf_ahead = pstr_smooth_param->frame_energy_buf_ahead; |
704 | 104k | WORD32 *ptr_smoothing_result_buf = pstr_smooth_param->smoothing_result_buf; |
705 | 104k | WORD32 *ptr_init_result_behind = pstr_smooth_param->init_result_behind; |
706 | 104k | WORD32 init_mode_decision_result = pstr_smooth_param->init_mode_decision_result; |
707 | 104k | WORD32 i; |
708 | | |
709 | 104k | WORD32 mode_decision_result; |
710 | | |
711 | 104k | WORD32 num_music, num_speech; |
712 | | |
713 | | /** update data array |
714 | | */ |
715 | | |
716 | | /** update init_result_behind, init_result_ahead |
717 | | */ |
718 | 10.4M | for (i = 0; i < 99; i++) { |
719 | 10.3M | ptr_init_result_behind[i] = ptr_init_result_behind[i + 1]; |
720 | 10.3M | } |
721 | 104k | ptr_init_result_behind[99] = ptr_init_result_ahead[0]; |
722 | | |
723 | 104k | ptr_init_result_ahead[NFRAMEAHEAD - 1] = init_mode_decision_result; |
724 | | |
725 | | /** update flag_border_buf_behind, flag_border_buf_ahead |
726 | | * update frame_energy_buf_behind, frame_energy_buf_ahead |
727 | | */ |
728 | | |
729 | 1.04M | for (i = 0; i < 9; i++) { |
730 | 939k | ptr_flag_border_buf_behind[i] = ptr_flag_border_buf_behind[i + 1]; |
731 | 939k | ptr_frame_energy_buf_behind[i] = ptr_frame_energy_buf_behind[i + 1]; |
732 | 939k | } |
733 | 104k | ptr_flag_border_buf_behind[9] = ptr_flag_border_buf_ahead[0]; |
734 | 104k | ptr_frame_energy_buf_behind[9] = ptr_frame_energy_buf_ahead[0]; |
735 | | |
736 | 104k | ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] = flag_border; |
737 | | |
738 | 104k | ptr_frame_energy_buf_ahead[NFRAMEAHEAD - 1] = frame_energy; |
739 | | |
740 | | /** smoothing according to past results |
741 | | */ |
742 | | |
743 | 104k | mode_decision_result = ptr_init_result_behind[99]; |
744 | | |
745 | | /** update smoothing_result_buf |
746 | | */ |
747 | 104k | if (ptr_flag_border_buf_behind[9] == NO_BORDER) { |
748 | 10.4M | for (i = 0; i < 99; i++) { |
749 | 10.3M | ptr_smoothing_result_buf[i] = ptr_smoothing_result_buf[i + 1]; |
750 | 10.3M | } |
751 | 104k | pstr_smooth_param->num_smoothing++; |
752 | 104k | } else { |
753 | 13.1k | for (i = 0; i < 99; i++) { |
754 | 12.9k | ptr_smoothing_result_buf[i] = TBD; |
755 | 12.9k | } |
756 | 131 | pstr_smooth_param->num_smoothing = 1; |
757 | 131 | } |
758 | 104k | ptr_smoothing_result_buf[99] = ptr_init_result_behind[99]; |
759 | | |
760 | 104k | if (pstr_smooth_param->num_smoothing >= SMOOTHING_LENGTH) { |
761 | 0 | num_music = 0; |
762 | 0 | num_speech = 0; |
763 | | |
764 | | /** smoothed result count |
765 | | */ |
766 | 0 | for (i = 0; i < SMOOTHING_LENGTH; i++) { |
767 | 0 | if ((ptr_smoothing_result_buf[100 - i] == SPEECH) || |
768 | 0 | (ptr_smoothing_result_buf[100 - i] == SPEECH_DEFINITE)) { |
769 | 0 | num_speech++; |
770 | 0 | } else { |
771 | 0 | num_music++; |
772 | 0 | } |
773 | 0 | } |
774 | | |
775 | | /** smoothing |
776 | | */ |
777 | 0 | if ((num_speech > num_music) && (init_mode_decision_result != MUSIC_DEFINITE)) { |
778 | 0 | mode_decision_result = SPEECH; |
779 | 0 | } |
780 | 0 | if ((num_music > num_speech) && (init_mode_decision_result != SPEECH_DEFINITE)) { |
781 | 0 | mode_decision_result = MUSIC; |
782 | 0 | } |
783 | 0 | } |
784 | | |
785 | | /** correct according to energies and ahead mode decision results |
786 | | */ |
787 | | |
788 | 104k | if ((mode_decision_result == MUSIC) && (ptr_frame_energy_buf_behind[9] <= 60)) { |
789 | 14 | for (i = 0; i < NFRAMEAHEAD; i++) { |
790 | 7 | if ((ptr_init_result_ahead[i] == SPEECH_DEFINITE) || (ptr_init_result_ahead[i] == SPEECH)) { |
791 | 4 | pstr_smooth_param->flag_speech_definite = 1; |
792 | 4 | } |
793 | 7 | } |
794 | 7 | } |
795 | 104k | if ((pstr_smooth_param->flag_speech_definite == 1) && (mode_decision_result == MUSIC)) { |
796 | 4 | mode_decision_result = SPEECH; |
797 | 104k | } else { |
798 | 104k | pstr_smooth_param->flag_speech_definite = 0; |
799 | 104k | } |
800 | | |
801 | | /** correct MUSIC mode |
802 | | */ |
803 | | |
804 | 104k | if (ptr_frame_energy_buf_behind[9] <= 65) { |
805 | 6.47k | pstr_smooth_param->count_small_energy = 0; |
806 | 97.9k | } else { |
807 | 97.9k | pstr_smooth_param->count_small_energy++; |
808 | 97.9k | } |
809 | 104k | if (((ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC) || |
810 | 104k | (ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC_DEFINITE)) && |
811 | 0 | (pstr_smooth_param->count_small_energy <= 30)) { |
812 | 0 | pstr_smooth_param->flag_music_definite = 1; |
813 | 0 | } |
814 | 104k | if ((pstr_smooth_param->flag_music_definite == 1) && |
815 | 0 | ((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE))) { |
816 | 0 | mode_decision_result = MUSIC; |
817 | 104k | } else { |
818 | 104k | pstr_smooth_param->flag_music_definite = 0; |
819 | 104k | } |
820 | | |
821 | 104k | return mode_decision_result; |
822 | 104k | } |
823 | | |
824 | | static WORD32 iusace_classification_ccfl(ia_classification_struct *pstr_sig_class, |
825 | | FLOAT32 *ptr_time_signal, |
826 | 104k | iusace_scratch_mem *pstr_scratch, WORD32 ccfl) { |
827 | 104k | WORD32 i; |
828 | 104k | ia_tonal_params_struct pstr_ton_params; |
829 | 104k | ia_smooth_params_struct smooth_param; |
830 | 104k | ia_mode_params_struct pstr_mode_params; |
831 | 104k | ia_spec_tilt_params_struct ptr_spec_params; |
832 | | |
833 | 104k | ia_classification_buf_struct *pstr_buffers = &(pstr_sig_class->buffers); |
834 | 104k | pFLOAT32 spec_tilt_buf = pstr_sig_class->spec_tilt_buf; |
835 | 104k | pWORD32 n_tonal = pstr_sig_class->n_tonal; |
836 | 104k | pWORD32 n_tonal_low_frequency = pstr_sig_class->n_tonal_low_frequency; |
837 | 104k | pWORD32 framecnt_xm = &(pstr_sig_class->framecnt_xm); |
838 | 104k | pWORD32 framecnt = &(pstr_sig_class->framecnt); |
839 | 104k | pFLOAT32 ave_n_tonal_short_buf = pstr_sig_class->ave_n_tonal_short_buf; |
840 | 104k | pFLOAT32 ave_n_tonal_buf = pstr_sig_class->ave_n_tonal_buf; |
841 | 104k | pFLOAT32 msd_spec_tilt_buf = pstr_sig_class->msd_spec_tilt_buf; |
842 | 104k | pFLOAT32 msd_spec_tilt_short_buf = pstr_sig_class->msd_spec_tilt_short_buf; |
843 | | |
844 | 104k | FLOAT32 n_tonal_low_frequency_ratio; /* the ratio of distribution of the numbers */ |
845 | | /* of tonal in the low frequency domain */ |
846 | 104k | FLOAT32 ave_n_tonal, ave_n_tonal_short; /**< the number of tonal */ |
847 | 104k | FLOAT32 msd_spec_tilt; /* the long-term MSD of spectral tilt */ |
848 | 104k | FLOAT32 msd_spec_tilt_short; /* the short-term MSD of spectral tilt */ |
849 | | |
850 | 104k | WORD32 init_mode_decision_result; /* the initial mode decision */ |
851 | 104k | WORD32 flag_border = NO_BORDER; /* flag of current border */ |
852 | | |
853 | 104k | WORD32 mode_decision_result; /* final mode decision result */ |
854 | | |
855 | 104k | if (pstr_sig_class->init_flag == 0) { |
856 | | /* initialize */ |
857 | 1.31k | pstr_sig_class->init_flag = 1; |
858 | | |
859 | 7.86k | for (i = 0; i < 5; i++) { |
860 | 6.55k | n_tonal[i] = 0; |
861 | 6.55k | n_tonal_low_frequency[i] = 0; |
862 | 6.55k | spec_tilt_buf[i] = 0; |
863 | 6.55k | pstr_buffers->init_result_behind[i] = TBD; |
864 | 6.55k | pstr_buffers->smoothing_result_buf[i] = TBD; |
865 | | |
866 | 6.55k | ave_n_tonal_short_buf[i] = 0; |
867 | 6.55k | ave_n_tonal_buf[i] = 0; |
868 | 6.55k | msd_spec_tilt_buf[i] = 0; |
869 | 6.55k | msd_spec_tilt_short_buf[i] = 0; |
870 | | |
871 | 6.55k | pstr_buffers->frame_energy_buf_behind[i] = 0; |
872 | 6.55k | pstr_buffers->flag_border_buf_behind[i] = NO_BORDER; |
873 | 6.55k | } |
874 | 7.86k | for (; i < 10; i++) { |
875 | 6.55k | n_tonal[i] = 0; |
876 | 6.55k | n_tonal_low_frequency[i] = 0; |
877 | 6.55k | spec_tilt_buf[i] = 0; |
878 | 6.55k | pstr_buffers->init_result_behind[i] = TBD; |
879 | 6.55k | pstr_buffers->smoothing_result_buf[i] = TBD; |
880 | | |
881 | 6.55k | pstr_buffers->frame_energy_buf_behind[i] = 0; |
882 | 6.55k | pstr_buffers->flag_border_buf_behind[i] = NO_BORDER; |
883 | 6.55k | } |
884 | | |
885 | 119k | for (; i < 100; i++) { |
886 | 117k | n_tonal[i] = 0; |
887 | 117k | n_tonal_low_frequency[i] = 0; |
888 | 117k | spec_tilt_buf[i] = 0; |
889 | 117k | pstr_buffers->init_result_behind[i] = TBD; |
890 | 117k | pstr_buffers->smoothing_result_buf[i] = TBD; |
891 | 117k | } |
892 | 2.62k | for (i = 0; i < NFRAMEAHEAD; i++) { |
893 | 1.31k | pstr_buffers->frame_energy_buf_ahead[i] = 0; |
894 | 1.31k | pstr_buffers->flag_border_buf_ahead[i] = NO_BORDER; |
895 | 1.31k | pstr_buffers->init_result_ahead[i] = TBD; |
896 | 1.31k | } |
897 | 1.31k | } |
898 | | |
899 | 104k | *framecnt += 1; |
900 | 104k | *framecnt_xm += 1; |
901 | | |
902 | 104k | pstr_ton_params.time_signal = (FLOAT32 *)ptr_time_signal; |
903 | 104k | pstr_ton_params.framecnt_xm = *framecnt_xm; |
904 | 104k | pstr_ton_params.n_tonal = n_tonal; |
905 | 104k | pstr_ton_params.n_tonal_low_frequency = n_tonal_low_frequency; |
906 | 104k | pstr_ton_params.n_tonal_low_frequency_ratio = &n_tonal_low_frequency_ratio; |
907 | 104k | pstr_ton_params.ave_n_tonal = &ave_n_tonal; |
908 | 104k | pstr_ton_params.ave_n_tonal_short = &ave_n_tonal_short; |
909 | | /** analysis tonal |
910 | | */ |
911 | 104k | iusace_tonal_analysis(&pstr_ton_params, pstr_scratch, ccfl); |
912 | | |
913 | 104k | ptr_spec_params.time_signal = ptr_time_signal; |
914 | 104k | ptr_spec_params.framecnt_xm = *framecnt_xm; |
915 | 104k | ptr_spec_params.spec_tilt_buf = spec_tilt_buf; |
916 | 104k | ptr_spec_params.msd_spec_tilt = &msd_spec_tilt; |
917 | 104k | ptr_spec_params.msd_spec_tilt_short = &msd_spec_tilt_short; |
918 | | /** analysis spectral tilt |
919 | | */ |
920 | 104k | iusace_spectral_tilt_analysis(&ptr_spec_params, ccfl); |
921 | | |
922 | 104k | pstr_mode_params.framecnt = *framecnt; |
923 | 104k | pstr_mode_params.framecnt_xm = framecnt_xm; |
924 | 104k | pstr_mode_params.flag_border = &flag_border; |
925 | 104k | pstr_mode_params.ave_n_tonal_short = ave_n_tonal_short; |
926 | 104k | pstr_mode_params.ave_n_tonal = ave_n_tonal; |
927 | 104k | pstr_mode_params.ave_n_tonal_short_buf = ave_n_tonal_short_buf; |
928 | 104k | pstr_mode_params.ave_n_tonal_buf = ave_n_tonal_buf; |
929 | 104k | pstr_mode_params.msd_spec_tilt = msd_spec_tilt; |
930 | 104k | pstr_mode_params.msd_spec_tilt_short = msd_spec_tilt_short; |
931 | 104k | pstr_mode_params.msd_spec_tilt_buf = msd_spec_tilt_buf; |
932 | 104k | pstr_mode_params.msd_spec_tilt_short_buf = msd_spec_tilt_short_buf; |
933 | 104k | pstr_mode_params.n_tonal_low_frequency_ratio = n_tonal_low_frequency_ratio; |
934 | 104k | pstr_mode_params.frame_energy = ptr_spec_params.frame_energy; |
935 | | /** initial mode decision and boundary decisions |
936 | | */ |
937 | 104k | init_mode_decision_result = iusace_init_mode_decision(&pstr_mode_params); |
938 | | |
939 | 104k | smooth_param.flag_border_buf_behind = pstr_buffers->flag_border_buf_behind; |
940 | 104k | smooth_param.flag_border_buf_ahead = pstr_buffers->flag_border_buf_ahead; |
941 | 104k | smooth_param.frame_energy = ptr_spec_params.frame_energy; |
942 | 104k | smooth_param.frame_energy_buf_behind = pstr_buffers->frame_energy_buf_behind; |
943 | 104k | smooth_param.frame_energy_buf_ahead = pstr_buffers->frame_energy_buf_ahead; |
944 | 104k | smooth_param.smoothing_result_buf = pstr_buffers->smoothing_result_buf; |
945 | 104k | smooth_param.init_result_ahead = pstr_buffers->init_result_ahead; |
946 | 104k | smooth_param.flag_border = flag_border; |
947 | 104k | smooth_param.init_result_behind = pstr_buffers->init_result_behind; |
948 | 104k | smooth_param.init_mode_decision_result = init_mode_decision_result; |
949 | 104k | smooth_param.flag_speech_definite = 0; |
950 | 104k | smooth_param.count_small_energy = 0; |
951 | 104k | smooth_param.flag_music_definite = 0; |
952 | 104k | smooth_param.num_smoothing = 0; |
953 | | /* smoothing */ |
954 | 104k | mode_decision_result = iusace_smoothing_mode_decision(&smooth_param); |
955 | | |
956 | 104k | return mode_decision_result; |
957 | 104k | } |
958 | | |
959 | | VOID iusace_classification(ia_classification_struct *pstr_sig_class, |
960 | 104k | iusace_scratch_mem *pstr_scratch, WORD32 ccfl) { |
961 | 104k | WORD32 n_frames, n_class, avg_cls, nf; |
962 | 104k | WORD32 i; |
963 | 104k | FLOAT32 *ptr_time_signal = pstr_scratch->p_time_signal; |
964 | 104k | WORD32 mode_decision_result; |
965 | | |
966 | 104k | n_frames = pstr_sig_class->n_buffer_samples / ccfl; |
967 | | |
968 | 208k | for (nf = 0; nf < n_frames; nf++) { |
969 | 97.3M | for (i = 0; i < ccfl; i++) { |
970 | 97.1M | ptr_time_signal[i] = pstr_sig_class->input_samples[ccfl * nf + i]; |
971 | 97.1M | } |
972 | | |
973 | | /* classification of ccfl-frame */ |
974 | 104k | mode_decision_result = |
975 | 104k | iusace_classification_ccfl(pstr_sig_class, ptr_time_signal, pstr_scratch, ccfl); |
976 | | |
977 | | /* coding mode decision of 1024-frame */ |
978 | 104k | if ((mode_decision_result == MUSIC) || (mode_decision_result == MUSIC_DEFINITE)) { |
979 | 20.7k | pstr_sig_class->coding_mode = FD_MODE; |
980 | 83.6k | } else if ((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE)) { |
981 | 82.3k | pstr_sig_class->coding_mode = TD_MODE; |
982 | 82.3k | } |
983 | | |
984 | 104k | pstr_sig_class->class_buf[pstr_sig_class->n_buf_class + nf] = pstr_sig_class->coding_mode; |
985 | 104k | pstr_sig_class->pre_mode = pstr_sig_class->coding_mode; |
986 | 104k | } |
987 | | |
988 | | /* merge ccfl-frame results */ |
989 | 104k | pstr_sig_class->n_buf_class += n_frames; |
990 | 104k | n_class = (pstr_sig_class->n_class_frames > pstr_sig_class->n_buf_class) |
991 | 104k | ? pstr_sig_class->n_buf_class |
992 | 104k | : pstr_sig_class->n_class_frames; |
993 | 104k | { |
994 | 104k | WORD32 min_cls, max_cls; |
995 | | |
996 | 104k | min_cls = max_cls = pstr_sig_class->class_buf[0]; |
997 | 104k | for (i = 1; i < n_class; i++) { |
998 | 0 | if (pstr_sig_class->class_buf[i] > max_cls) { |
999 | 0 | max_cls = pstr_sig_class->class_buf[i]; |
1000 | 0 | } else if (pstr_sig_class->class_buf[i] < min_cls) { |
1001 | 0 | min_cls = pstr_sig_class->class_buf[i]; |
1002 | 0 | } |
1003 | 0 | } |
1004 | | |
1005 | 104k | avg_cls = 0; |
1006 | 208k | for (i = 0; i < n_class; i++) { |
1007 | 104k | if (pstr_sig_class->class_buf[i] == max_cls) { |
1008 | 104k | avg_cls += 1; |
1009 | 104k | } |
1010 | 104k | if (pstr_sig_class->class_buf[i] == min_cls) { |
1011 | 104k | avg_cls += -1; |
1012 | 104k | } |
1013 | 104k | } |
1014 | | |
1015 | 104k | if (avg_cls > 0) { |
1016 | 0 | pstr_sig_class->coding_mode = max_cls; |
1017 | 104k | } else { |
1018 | 104k | pstr_sig_class->coding_mode = min_cls; |
1019 | 104k | } |
1020 | 104k | } |
1021 | | |
1022 | | /* shift, save pre_mode and unused class */ |
1023 | 104k | if (n_class > 0) { |
1024 | 104k | pstr_sig_class->pre_mode = pstr_sig_class->class_buf[n_class - 1]; |
1025 | 104k | } |
1026 | 104k | pstr_sig_class->n_buf_class -= n_class; |
1027 | 104k | pstr_sig_class->n_buffer_samples -= ccfl * n_frames; |
1028 | | |
1029 | 104k | WORD32 minimum = MIN(pstr_sig_class->n_buf_class, pstr_sig_class->n_buffer_samples); |
1030 | 104k | if (minimum == pstr_sig_class->n_buf_class) { |
1031 | 104k | for (i = 0; i < minimum; i++) { |
1032 | 0 | pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class]; |
1033 | 0 | pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames]; |
1034 | 0 | } |
1035 | | |
1036 | | /* shift, save unused samples */ |
1037 | 104k | for (; i < pstr_sig_class->n_buffer_samples; i++) { |
1038 | 0 | pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames]; |
1039 | 0 | } |
1040 | 104k | } else { |
1041 | 0 | for (i = 0; i < minimum; i++) { |
1042 | 0 | pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class]; |
1043 | 0 | pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames]; |
1044 | 0 | } |
1045 | | |
1046 | | /* shift, save unused samples */ |
1047 | 0 | for (; i < pstr_sig_class->n_buf_class; i++) { |
1048 | 0 | pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class]; |
1049 | 0 | } |
1050 | 0 | } |
1051 | 104k | } |
1052 | | |
1053 | 1.33k | VOID iusace_init_classification(ia_classification_struct *pstr_sig_class) { |
1054 | 1.33k | pstr_sig_class->pre_mode = FD_MODE; |
1055 | | |
1056 | 1.33k | pstr_sig_class->n_buffer_samples = 0; |
1057 | 1.33k | memset(pstr_sig_class->input_samples, 0, 3840 * 2 * sizeof(FLOAT32)); |
1058 | 1.33k | pstr_sig_class->n_class_frames = 2; |
1059 | 1.33k | pstr_sig_class->n_buf_class = 0; |
1060 | | |
1061 | 1.33k | pstr_sig_class->is_switch_mode = 1; |
1062 | | |
1063 | 1.33k | pstr_sig_class->framecnt = 0; |
1064 | 1.33k | pstr_sig_class->init_flag = 0; |
1065 | 1.33k | pstr_sig_class->framecnt_xm = 0; |
1066 | | |
1067 | 1.33k | memset(&pstr_sig_class->buffers, 0, sizeof(ia_classification_buf_struct)); |
1068 | 1.33k | memset(pstr_sig_class->spec_tilt_buf, 0, sizeof(FLOAT32) * 100); |
1069 | 1.33k | memset(pstr_sig_class->n_tonal, 0, sizeof(WORD32) * 100); |
1070 | 1.33k | memset(pstr_sig_class->n_tonal_low_frequency, 0, sizeof(WORD32) * 100); |
1071 | 1.33k | memset(pstr_sig_class->msd_spec_tilt_buf, 0, sizeof(FLOAT32) * 5); |
1072 | 1.33k | memset(pstr_sig_class->msd_spec_tilt_short_buf, 0, sizeof(FLOAT32) * 5); |
1073 | 1.33k | memset(pstr_sig_class->ave_n_tonal_short_buf, 0, sizeof(FLOAT32) * 5); |
1074 | 1.33k | memset(pstr_sig_class->ave_n_tonal_buf, 0, sizeof(FLOAT32) * 5); |
1075 | 1.33k | return; |
1076 | 1.33k | } |