Coverage Report

Created: 2026-01-17 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/encoder/ixheaace_signal_classifier.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2023 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
#include <string.h>
21
#include <math.h>
22
#include "iusace_type_def.h"
23
#include "iusace_cnst.h"
24
25
#include "iusace_fd_quant.h"
26
#include "iusace_bitbuffer.h"
27
#include "impd_drc_common_enc.h"
28
#include "impd_drc_uni_drc.h"
29
#include "impd_drc_api.h"
30
#include "impd_drc_uni_drc_eq.h"
31
#include "impd_drc_uni_drc_filter_bank.h"
32
#include "impd_drc_gain_enc.h"
33
#include "impd_drc_struct_def.h"
34
35
#include "ixheaace_memory_standards.h"
36
#include "iusace_tns_usac.h"
37
#include "iusace_psy_mod.h"
38
#include "iusace_config.h"
39
#include "iusace_signal_classifier.h"
40
#include "iusace_fft.h"
41
#include "iusace_block_switch_const.h"
42
#include "iusace_block_switch_struct_def.h"
43
#include "iusace_cnst.h"
44
#include "iusace_ms.h"
45
#include "ixheaace_adjust_threshold_data.h"
46
#include "iusace_fd_qc_util.h"
47
#include "ixheaace_sbr_header.h"
48
#include "ixheaace_config.h"
49
#include "ixheaace_asc_write.h"
50
#include "iusace_main.h"
51
52
124k
static VOID iusace_calc_pds(FLOAT32 *ptr_input, WORD32 ccfl) {
53
124k
  WORD32 i;
54
124k
  FLOAT64 max_pow, delta;
55
124k
  FLOAT64 log_ccfl_base_10 = (ccfl == 1024) ? LOG_1024_BASE_10 : LOG_768_BASE_10;
56
57
124k
  max_pow = MAX(
58
124k
      10 * (log10(ptr_input[0] * ptr_input[0] + ptr_input[1] * ptr_input[1]) - log_ccfl_base_10) +
59
124k
          10e-15,
60
124k
      MIN_POW);
61
62
58.6M
  for (i = 1; i<ccfl>> 1; i++) {
63
    /* removed the sqrt along with clubbing the for loops */
64
58.5M
    ptr_input[2 * i] = (FLOAT32)MAX(10 * (log10(ptr_input[2 * i] * ptr_input[2 * i] +
65
58.5M
                                                ptr_input[2 * i + 1] * ptr_input[2 * i + 1]) -
66
58.5M
                                          log_ccfl_base_10) +
67
58.5M
                                        10e-15,
68
58.5M
                                    MIN_POW);
69
70
58.5M
    max_pow = MAX(max_pow, ptr_input[2 * i]);
71
58.5M
  }
72
73
  /* Normalized to reference sound pressure level 96 dB */
74
124k
  delta = 96 - max_pow;
75
76
58.8M
  for (i = 0; i<ccfl>> 1; i++) {
77
58.6M
    ptr_input[2 * i] = ptr_input[2 * i] + (FLOAT32)delta;
78
58.6M
  }
79
124k
  return;
80
124k
}
81
82
static VOID iusace_find_tonal(FLOAT32 *ptr_input, WORD32 *ptr_tonal_flag, FLOAT32 *ptr_scratch,
83
124k
                              WORD32 ccfl) {
84
124k
  WORD32 i, j;
85
124k
  WORD32 is_tonal;
86
124k
  FLOAT64 tonal_spl;
87
124k
  FLOAT64 absolute_threshold_xm;
88
89
58.8M
  for (i = 0; i<ccfl>> 1; i++) {
90
58.6M
    ptr_scratch[i] = ptr_input[2 * i];
91
58.6M
  }
92
93
124k
  if (ccfl == FRAME_LEN_LONG) {
94
42.8M
    for (i = 0; i <= 511; i++) {
95
42.7M
      ptr_tonal_flag[i] = 0;
96
42.7M
    }
97
98
41.6M
    for (i = 2; i < 500; i++) {
99
41.6M
      if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) {
100
10.9M
        is_tonal = 1;
101
102
        /* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */
103
104
10.9M
        if (1 < i && i < 62) {
105
1.66M
          for (j = -2; j <= -2; j++) {
106
1.28M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
107
1.28M
            if (is_tonal == 0) break;
108
1.28M
          }
109
1.28M
          if (is_tonal == 1) {
110
533k
            for (j = 2; j <= 2; j++) {
111
379k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
112
379k
              if (is_tonal == 0) break;
113
379k
            }
114
379k
          }
115
116
1.28M
          if (is_tonal == 1) {
117
154k
            ptr_tonal_flag[i] = 1;
118
154k
          }
119
1.28M
        }
120
121
9.66M
        else if (62 <= i && i < 126) {
122
2.04M
          for (j = -3; j <= -2; j++) {
123
1.85M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
124
1.85M
            if (is_tonal == 0) break;
125
1.85M
          }
126
1.44M
          if (is_tonal == 1) {
127
359k
            for (j = 2; j <= 3; j++) {
128
294k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
129
294k
              if (is_tonal == 0) break;
130
294k
            }
131
197k
          }
132
133
1.44M
          if (is_tonal == 1) {
134
64.3k
            ptr_tonal_flag[i] = 1;
135
64.3k
          }
136
1.44M
        }
137
138
8.22M
        else if (126 <= i && i < 254) {
139
4.53M
          for (j = -6; j <= -2; j++) {
140
4.43M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
141
4.43M
            if (is_tonal == 0) break;
142
4.43M
          }
143
2.84M
          if (is_tonal == 1) {
144
287k
            for (j = 2; j <= 6; j++) {
145
264k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
146
264k
              if (is_tonal == 0) break;
147
264k
            }
148
96.0k
          }
149
150
2.84M
          if (is_tonal == 1) {
151
23.2k
            ptr_tonal_flag[i] = 1;
152
23.2k
          }
153
2.84M
        }
154
155
5.37M
        else if (254 <= i && i < 500) {
156
10.5M
          for (j = -12; j <= -2; j++) {
157
10.4M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
158
10.4M
            if (is_tonal == 0) break;
159
10.4M
          }
160
5.37M
          if (is_tonal == 1) {
161
359k
            for (j = 2; j <= 12; j++) {
162
341k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
163
341k
              if (is_tonal == 0) break;
164
341k
            }
165
68.9k
          }
166
167
5.37M
          if (is_tonal == 1) {
168
18.6k
            ptr_tonal_flag[i] = 1;
169
18.6k
          }
170
5.37M
        }
171
10.9M
      }
172
41.6M
    }
173
174
42.8M
    for (i = 0; i <= 511; i++) {
175
42.7M
      if (ptr_tonal_flag[i] == 1) {
176
        /* compute the SPL of tonal */
177
260k
        tonal_spl =
178
260k
            10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) +
179
260k
                       pow(10, (ptr_scratch[i + 1] / 10)));
180
181
260k
        if (i >= 324) {
182
12.7k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i] + 20;
183
248k
        } else {
184
248k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i];
185
248k
        }
186
260k
        if (tonal_spl < absolute_threshold_xm) {
187
128k
          ptr_tonal_flag[i] = 0;
188
128k
        }
189
260k
      }
190
42.7M
    }
191
83.5k
  } else  // (ccfl == 768)
192
41.4k
  {
193
15.9M
    for (i = 0; i <= 383; i++) {
194
15.9M
      ptr_tonal_flag[i] = 0;
195
15.9M
    }
196
197
15.4M
    for (i = 2; i < 375; i++) {
198
15.4M
      if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) {
199
2.73M
        is_tonal = 1;
200
201
        /* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */
202
203
2.73M
        if (1 < i && i < 47) {
204
167k
          for (j = -2; j <= -2; j++) {
205
132k
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
206
132k
            if (is_tonal == 0) break;
207
132k
          }
208
132k
          if (is_tonal == 1) {
209
56.1k
            for (j = 2; j <= 2; j++) {
210
34.6k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
211
34.6k
              if (is_tonal == 0) break;
212
34.6k
            }
213
34.6k
          }
214
215
132k
          if (is_tonal == 1) {
216
21.5k
            ptr_tonal_flag[i] = 1;
217
21.5k
          }
218
132k
        }
219
220
2.60M
        else if (47 <= i && i < 95) {
221
540k
          for (j = -3; j <= -2; j++) {
222
480k
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
223
480k
            if (is_tonal == 0) break;
224
480k
          }
225
410k
          if (is_tonal == 1) {
226
156k
            for (j = 2; j <= 3; j++) {
227
109k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
228
109k
              if (is_tonal == 0) break;
229
109k
            }
230
59.8k
          }
231
232
410k
          if (is_tonal == 1) {
233
47.2k
            ptr_tonal_flag[i] = 1;
234
47.2k
          }
235
410k
        }
236
237
2.19M
        else if (95 <= i && i < 194) {
238
1.46M
          for (j = -5; j <= -2; j++) {
239
1.32M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
240
1.32M
            if (is_tonal == 0) break;
241
1.32M
          }
242
838k
          if (is_tonal == 1) {
243
589k
            for (j = 2; j <= 5; j++) {
244
478k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
245
478k
              if (is_tonal == 0) break;
246
478k
            }
247
131k
          }
248
249
838k
          if (is_tonal == 1) {
250
110k
            ptr_tonal_flag[i] = 1;
251
110k
          }
252
838k
        }
253
254
1.35M
        else if (191 <= i && i < 375) {
255
3.06M
          for (j = -9; j <= -2; j++) {
256
2.90M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
257
2.90M
            if (is_tonal == 0) break;
258
2.90M
          }
259
1.35M
          if (is_tonal == 1) {
260
1.22M
            for (j = 2; j <= 9; j++) {
261
1.12M
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
262
1.12M
              if (is_tonal == 0) break;
263
1.12M
            }
264
158k
          }
265
266
1.35M
          if (is_tonal == 1) {
267
100k
            ptr_tonal_flag[i] = 1;
268
100k
          }
269
1.35M
        }
270
2.73M
      }
271
15.4M
    }
272
273
15.9M
    for (i = 0; i <= 383; i++) {
274
15.9M
      if (ptr_tonal_flag[i] == 1) {
275
        /* compute the SPL of tonal */
276
279k
        tonal_spl =
277
279k
            10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) +
278
279k
                       pow(10, (ptr_scratch[i + 1] / 10)));
279
280
279k
        if (i >= 243) {
281
79.4k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i] + 20;
282
200k
        } else {
283
200k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i];
284
200k
        }
285
279k
        if (tonal_spl < absolute_threshold_xm) {
286
49.9k
          ptr_tonal_flag[i] = 0;
287
49.9k
        }
288
279k
      }
289
15.9M
    }
290
41.4k
  }
291
124k
  return;
292
124k
}
293
294
static VOID iusace_tonal_analysis(ia_tonal_params_struct *pstr_ton_params,
295
124k
                                  iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
296
124k
  FLOAT32 *ptr_complex_fft = pstr_scratch->p_complex_fft;
297
124k
  WORD32 *ptr_tonal_flag = pstr_scratch->p_tonal_flag;
298
124k
  FLOAT32 *ptr_time_sig = pstr_ton_params->time_signal;
299
124k
  WORD32 framecnt_xm = pstr_ton_params->framecnt_xm;
300
124k
  WORD32 *ptr_n_tonal = pstr_ton_params->n_tonal;
301
124k
  WORD32 *ptr_n_tonal_low_frequency = pstr_ton_params->n_tonal_low_frequency;
302
124k
  FLOAT32 *ptr_n_tonal_low_frequency_ratio = pstr_ton_params->n_tonal_low_frequency_ratio;
303
124k
  FLOAT32 *ave_n_tonal = pstr_ton_params->ave_n_tonal;
304
124k
  FLOAT32 *ave_n_tonal_short = pstr_ton_params->ave_n_tonal_short;
305
124k
  WORD32 i;
306
124k
  WORD32 fft_size = ccfl;
307
308
124k
  WORD32 frame_length;
309
124k
  WORD32 n_tonal_total, n_tonal_low_frequency_total;
310
311
117M
  for (i = 0; i < ccfl; i++) {
312
117M
    ptr_complex_fft[2 * i] = (FLOAT32)(
313
117M
        ptr_time_sig[i] * ((ccfl == 1024) ? iusace_classify_arrays.hanning_window_1024[i]
314
117M
                                          : iusace_classify_arrays.hanning_window_768[i]));
315
117M
    ptr_complex_fft[2 * i + 1] = 0;
316
117M
  }
317
318
124k
  iusace_complex_fft(ptr_complex_fft, fft_size, pstr_scratch);
319
320
  /* compute power density spectrum */
321
  /* re_fft contains the resulting pds */
322
124k
  iusace_calc_pds(ptr_complex_fft, ccfl);
323
324
  /* detect tonal */
325
124k
  iusace_find_tonal(ptr_complex_fft, ptr_tonal_flag, pstr_scratch->p_pow_spec, ccfl);
326
327
  /* update n_tonal, n_tonal_low_frequency */
328
12.4M
  for (i = 0; i < 99; i++) {
329
12.3M
    ptr_n_tonal[i] = ptr_n_tonal[i + 1];
330
12.3M
    ptr_n_tonal_low_frequency[i] = ptr_n_tonal_low_frequency[i + 1];
331
12.3M
  }
332
124k
  ptr_n_tonal[99] = 0;
333
58.8M
  for (i = 0; i<ccfl>> 1; i++) {
334
58.6M
    ptr_n_tonal[99] += ptr_tonal_flag[i];
335
58.6M
  }
336
124k
  ptr_n_tonal_low_frequency[99] = 0;
337
20.1M
  for (i = 0; i < INDEXOFLOWFREQUENCY; i++) {
338
19.9M
    ptr_n_tonal_low_frequency[99] += ptr_tonal_flag[i];
339
19.9M
  }
340
341
  /* compute long-term AVE and the ratio of distribution in low-frequency domain */
342
124k
  if (framecnt_xm < AVE_TONAL_LENGTH) {
343
76.4k
    frame_length = framecnt_xm;
344
76.4k
  } else {
345
48.5k
    frame_length = AVE_TONAL_LENGTH;
346
48.5k
  }
347
348
124k
  n_tonal_total = 0;
349
124k
  n_tonal_low_frequency_total = 0;
350
7.90M
  for (i = 0; i < frame_length; i++) {
351
7.77M
    n_tonal_total += ptr_n_tonal[99 - i];
352
7.77M
    n_tonal_low_frequency_total += ptr_n_tonal_low_frequency[99 - i];
353
7.77M
  }
354
355
124k
  *ave_n_tonal = (FLOAT32)n_tonal_total / frame_length;
356
357
124k
  if (n_tonal_total == 0) {
358
28.1k
    *ptr_n_tonal_low_frequency_ratio = 1;
359
96.8k
  } else {
360
96.8k
    *ptr_n_tonal_low_frequency_ratio = (FLOAT32)n_tonal_low_frequency_total / n_tonal_total;
361
96.8k
  }
362
363
  /* compute the short-term AVE */
364
124k
  if (framecnt_xm < AVE_TONAL_LENGTH_SHORT) {
365
12.8k
    frame_length = framecnt_xm;
366
112k
  } else {
367
112k
    frame_length = AVE_TONAL_LENGTH_SHORT;
368
112k
  }
369
370
124k
  n_tonal_total = 0;
371
1.30M
  for (i = 0; i < frame_length; i++) {
372
1.18M
    n_tonal_total += ptr_n_tonal[99 - i];
373
1.18M
  }
374
375
124k
  *ave_n_tonal_short = (FLOAT32)n_tonal_total / frame_length;
376
124k
  return;
377
124k
}
378
379
static VOID iusace_spectral_tilt_analysis(ia_spec_tilt_params_struct *ptr_spec_params,
380
124k
                                          WORD32 ccfl) {
381
124k
  FLOAT32 *ptr_time_signal = ptr_spec_params->time_signal;
382
124k
  WORD32 framecnt_xm = ptr_spec_params->framecnt_xm;
383
124k
  FLOAT32 *ptr_spec_tilt_buf = ptr_spec_params->spec_tilt_buf;
384
124k
  FLOAT32 *ptr_msd_spec_tilt = ptr_spec_params->msd_spec_tilt;
385
124k
  FLOAT32 *ptr_msd_spec_tilt_short = ptr_spec_params->msd_spec_tilt_short;
386
124k
  WORD32 i;
387
124k
  WORD32 frame_length;
388
389
124k
  FLOAT32 r0, r1;
390
124k
  FLOAT32 spec_tilt;
391
124k
  FLOAT32 ave_spec_tilt;
392
393
  /* compute spectral tilt */
394
124k
  r0 = 0;
395
124k
  r1 = 0;
396
117M
  for (i = 0; i < ccfl - 1; i++) {
397
117M
    r0 += ptr_time_signal[i] * ptr_time_signal[i];
398
117M
    r1 += ptr_time_signal[i] * ptr_time_signal[i + 1];
399
117M
  }
400
124k
  r0 += ptr_time_signal[i] * ptr_time_signal[i];
401
402
124k
  if (r0 == 0) {
403
3.92k
    spec_tilt = 1.0f;
404
121k
  } else {
405
121k
    spec_tilt = r1 / r0;
406
121k
  }
407
408
  /* update spec_tilt_buf */
409
12.4M
  for (i = 0; i < 100 - 1; i++) {
410
12.3M
    ptr_spec_tilt_buf[i] = ptr_spec_tilt_buf[i + 1];
411
12.3M
  }
412
124k
  ptr_spec_tilt_buf[99] = spec_tilt;
413
414
  /* compute the long-term mean square deviation of the spectral tilt */
415
124k
  if (framecnt_xm < SPECTRAL_TILT_LENGTH) {
416
68.3k
    frame_length = framecnt_xm;
417
68.3k
  } else {
418
56.6k
    frame_length = SPECTRAL_TILT_LENGTH;
419
56.6k
  }
420
421
124k
  ave_spec_tilt = 0;
422
6.85M
  for (i = 0; i < frame_length; i++) {
423
6.73M
    ave_spec_tilt += ptr_spec_tilt_buf[99 - i];
424
6.73M
  }
425
124k
  ave_spec_tilt /= frame_length;
426
427
124k
  *ptr_msd_spec_tilt = 0;
428
6.85M
  for (i = 0; i < frame_length; i++) {
429
6.73M
    *ptr_msd_spec_tilt +=
430
6.73M
        (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt);
431
6.73M
  }
432
124k
  *ptr_msd_spec_tilt /= frame_length;
433
434
  /* compute the short-term mean square deviation of the spectral tilt */
435
124k
  if (framecnt_xm < SPECTRAL_TILT_LENGTH_SHORT) {
436
25.0k
    frame_length = framecnt_xm;
437
99.8k
  } else {
438
99.8k
    frame_length = SPECTRAL_TILT_LENGTH_SHORT;
439
99.8k
  }
440
441
124k
  ave_spec_tilt = 0;
442
2.36M
  for (i = 0; i < frame_length; i++) {
443
2.23M
    ave_spec_tilt += ptr_spec_tilt_buf[99 - i];
444
2.23M
  }
445
124k
  ave_spec_tilt /= frame_length;
446
447
124k
  *ptr_msd_spec_tilt_short = 0;
448
2.36M
  for (i = 0; i < frame_length; i++) {
449
2.23M
    *ptr_msd_spec_tilt_short +=
450
2.23M
        (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt);
451
2.23M
  }
452
124k
  *ptr_msd_spec_tilt_short /= frame_length;
453
454
  /* compute the energy of current frame */
455
124k
  if (r0 <= 1) {
456
5.17k
    ptr_spec_params->frame_energy = 0;
457
119k
  } else {
458
119k
    ptr_spec_params->frame_energy = (FLOAT32)(10 * log(r0) / log(10));
459
119k
  }
460
124k
  return;
461
124k
}
462
463
124k
static WORD32 iusace_init_mode_decision(ia_mode_params_struct *pstr_mode_params) {
464
124k
  WORD32 i;
465
124k
  WORD32 framecnt = pstr_mode_params->framecnt;
466
124k
  WORD32 *framecnt_xm = pstr_mode_params->framecnt_xm;
467
124k
  WORD32 *flag_border = pstr_mode_params->flag_border;
468
124k
  FLOAT32 ave_n_tonal_short = pstr_mode_params->ave_n_tonal_short;
469
124k
  FLOAT32 ave_n_tonal = pstr_mode_params->ave_n_tonal;
470
124k
  FLOAT32 *ave_n_tonal_short_buf = pstr_mode_params->ave_n_tonal_short_buf;
471
124k
  FLOAT32 *ave_n_tonal_buf = pstr_mode_params->ave_n_tonal_buf;
472
124k
  FLOAT32 msd_spec_tilt = pstr_mode_params->msd_spec_tilt;
473
124k
  FLOAT32 msd_spec_tilt_short = pstr_mode_params->msd_spec_tilt_short;
474
124k
  FLOAT32 *msd_spec_tilt_buf = pstr_mode_params->msd_spec_tilt_buf;
475
124k
  FLOAT32 *msd_spec_tilt_short_buf = pstr_mode_params->msd_spec_tilt_short_buf;
476
124k
  FLOAT32 n_tonal_low_frequency_ratio = pstr_mode_params->n_tonal_low_frequency_ratio;
477
124k
  FLOAT32 frame_energy = pstr_mode_params->frame_energy;
478
124k
  WORD32 init_mode_decision_result = TBD;
479
124k
  WORD32 count_msd_st_monchhichi = 0;
480
124k
  WORD32 count_msd_st_speech_music = 0, count_msd_st_music_speech = 0;
481
124k
  WORD32 flag_ave_music_speech = 0;
482
124k
  WORD32 count_msd_st_music = 0;
483
124k
  WORD32 border_state = 0;
484
124k
  WORD32 count_quiet_mode = 0;
485
486
124k
  *flag_border = NO_BORDER;
487
488
  /* border decision according to spectral tilt */
489
490
  /* update msd_spec_tilt_buf, msd_spec_tilt_short_buf */
491
624k
  for (i = 0; i < 5 - 1; i++) {
492
499k
    msd_spec_tilt_buf[i] = msd_spec_tilt_buf[i + 1];
493
499k
    msd_spec_tilt_short_buf[i] = msd_spec_tilt_short_buf[i + 1];
494
499k
  }
495
124k
  msd_spec_tilt_buf[4] = msd_spec_tilt;
496
124k
  msd_spec_tilt_short_buf[4] = msd_spec_tilt_short;
497
498
  /* speech->music find strict border of speech->music */
499
124k
  if ((msd_spec_tilt >= 0.014) && (msd_spec_tilt_short <= 0.000005)) {
500
2.72k
    count_msd_st_monchhichi++;
501
122k
  } else {
502
122k
    count_msd_st_monchhichi = 0;
503
122k
  }
504
124k
  if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
505
124k
       (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
506
124k
      (border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_monchhichi >= 15) &&
507
0
      (*framecnt_xm >= 300)) {
508
0
    *framecnt_xm = 10;
509
0
    *flag_border = BORDER_SPEECH_MUSIC;
510
0
  }
511
512
  /* find the relative loose border of speech->music */
513
124k
  if ((msd_spec_tilt >= 0.0025) && (msd_spec_tilt_short <= 0.000003)) {
514
4.22k
    count_msd_st_speech_music++;
515
120k
  } else {
516
120k
    count_msd_st_speech_music = 0;
517
120k
  }
518
124k
  if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
519
124k
       (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
520
124k
      (border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_speech_music >= 15) &&
521
0
      (*framecnt_xm >= 300)) {
522
0
    *framecnt_xm = 10;
523
0
    *flag_border = BORDER_SPEECH_MUSIC;
524
0
  }
525
526
  /* music->speech */
527
124k
  if ((msd_spec_tilt_buf[0] <= 0.0003) && (msd_spec_tilt_short_buf[0] <= 0.0002)) {
528
69.5k
    count_msd_st_music_speech++;
529
69.5k
  }
530
124k
  if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
531
124k
       (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
532
124k
      (border_state != BORDER_MUSIC_SPEECH_DEFINITE) && (count_msd_st_music_speech >= 100) &&
533
0
      (msd_spec_tilt >= 0.0008) && (msd_spec_tilt_short >= 0.0025) && (*framecnt_xm >= 20)) {
534
0
    *framecnt_xm = 10;
535
0
    *flag_border = BORDER_MUSIC_SPEECH;
536
0
  }
537
538
  /* border decision according to tonal
539
   *  update ave_n_tonal_short_buf, ave_n_tonal_buf */
540
624k
  for (i = 0; i < 5 - 1; i++) {
541
499k
    ave_n_tonal_short_buf[i] = ave_n_tonal_short_buf[i + 1];
542
499k
    ave_n_tonal_buf[i] = ave_n_tonal_buf[i + 1];
543
499k
  }
544
124k
  ave_n_tonal_short_buf[4] = ave_n_tonal_short;
545
124k
  ave_n_tonal_buf[4] = ave_n_tonal;
546
547
  /* music->speech */
548
124k
  if ((ave_n_tonal_buf[0] >= 12) && (ave_n_tonal_buf[0] < 15) &&
549
6.45k
      (ave_n_tonal_buf[0] - ave_n_tonal_short_buf[0] >= 5) && (*framecnt_xm >= 20) &&
550
141
      (ave_n_tonal_short - ave_n_tonal_short_buf[0] < 5)) {
551
131
    *framecnt_xm = 10;
552
131
    flag_ave_music_speech = 1;
553
131
    *flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
554
131
  }
555
556
  /* update border decision according to energy */
557
124k
  if (frame_energy <= 60) {
558
5.40k
    count_quiet_mode = 0;
559
119k
  } else {
560
119k
    count_quiet_mode++;
561
119k
  }
562
563
124k
  if ((*flag_border == BORDER_MUSIC_SPEECH) && (count_quiet_mode <= 5)) {
564
0
    *flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
565
0
    *framecnt_xm = 10;
566
0
  }
567
568
  /* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision according to short-term characters */
569
570
  /* ave_n_tonal_short */
571
124k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 19)) {
572
1.77k
    init_mode_decision_result = MUSIC_DEFINITE;
573
1.77k
  }
574
124k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal_short <= 1.5)) {
575
62.6k
    init_mode_decision_result = SPEECH_DEFINITE;
576
62.6k
  }
577
578
  /* msd_spec_tilt_short */
579
124k
  if (msd_spec_tilt_short >= 0.02) {
580
33.0k
    init_mode_decision_result = SPEECH_DEFINITE;
581
33.0k
  }
582
124k
  if ((init_mode_decision_result == TBD) && (msd_spec_tilt_short <= 0.00000025) &&
583
14.5k
      (framecnt >= 10)) {
584
12.2k
    init_mode_decision_result = MUSIC_DEFINITE;
585
12.2k
  }
586
587
  /* SPEECH mode decision */
588
589
  /* flag_ave_music_speech??ave_n_tonal_short */
590
124k
  if ((init_mode_decision_result == TBD) && (flag_ave_music_speech == 1)) {
591
74
    if ((ave_n_tonal_short <= 12) && (*framecnt_xm <= 150)) {
592
56
      init_mode_decision_result = SPEECH;
593
56
    }
594
74
  }
595
596
  /* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision */
597
598
  /* ave_n_tonal */
599
124k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal <= 3)) {
600
16.0k
    init_mode_decision_result = SPEECH_DEFINITE;
601
16.0k
  }
602
124k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal >= 15)) {
603
1.07k
    init_mode_decision_result = MUSIC_DEFINITE;
604
1.07k
  }
605
606
  /** ave_n_tonal_short
607
   */
608
124k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 17)) {
609
1.19k
    init_mode_decision_result = MUSIC_DEFINITE;
610
1.19k
  }
611
612
  /** msd_spec_tilt
613
   */
614
124k
  if ((init_mode_decision_result == TBD) && (msd_spec_tilt >= 0.01)) {
615
4.57k
    init_mode_decision_result = SPEECH_DEFINITE;
616
4.57k
  }
617
124k
  if ((init_mode_decision_result == TBD) && (framecnt >= 10) && (msd_spec_tilt <= 0.00004)) {
618
2.87k
    init_mode_decision_result = MUSIC_DEFINITE;
619
2.87k
  }
620
621
  /** n_tonal_low_frequency_ratio
622
   */
623
124k
  if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.91)) {
624
3.34k
    init_mode_decision_result = MUSIC_DEFINITE;
625
3.34k
  }
626
627
  /** MUSIC and SPEECH mode decision
628
   */
629
630
  /** msd_spec_tilt
631
   */
632
124k
  if ((init_mode_decision_result == TBD) && (msd_spec_tilt <= 0.0002) && (*framecnt_xm >= 15)) {
633
391
    init_mode_decision_result = MUSIC;
634
391
  }
635
636
  /** n_tonal_low_frequency_ratio
637
   */
638
124k
  if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio >= 0.95)) {
639
1.18k
    init_mode_decision_result = SPEECH;
640
1.18k
  }
641
124k
  if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.935)) {
642
773
    init_mode_decision_result = MUSIC;
643
773
  }
644
645
  /** the rest of the frame to SPEECH
646
   */
647
124k
  if (init_mode_decision_result == TBD) {
648
465
    init_mode_decision_result = SPEECH;
649
465
  }
650
651
  /** MUSIC mode decision according to changes of the MSD of the spectral tilt
652
   */
653
654
  /** compute the changes of the MSD of the spectral tilt
655
   */
656
124k
  if ((msd_spec_tilt <= 0.007) && (init_mode_decision_result != SPEECH_DEFINITE)) {
657
21.8k
    if (init_mode_decision_result != SPEECH) {
658
20.1k
      count_msd_st_music++;
659
20.1k
    }
660
103k
  } else {
661
103k
    count_msd_st_music = 0;
662
103k
  }
663
664
124k
  if ((init_mode_decision_result != SPEECH_DEFINITE) && (count_msd_st_music >= 400) &&
665
0
      (border_state != BORDER_MUSIC_SPEECH_DEFINITE)) {
666
0
    init_mode_decision_result = MUSIC;
667
0
  }
668
669
  /** update border flag
670
   */
671
672
124k
  if (*flag_border != NO_BORDER) {
673
131
    border_state = *flag_border;
674
131
  }
675
676
  /** update BORDER_SPEECH_MUSIC_DEFINITE
677
   */
678
124k
  if (((border_state == BORDER_MUSIC_SPEECH) || (border_state == BORDER_MUSIC_SPEECH_DEFINITE)) &&
679
131
      (init_mode_decision_result == MUSIC_DEFINITE) && (*framecnt_xm >= 20)) {
680
0
    *flag_border = BORDER_SPEECH_MUSIC_DEFINITE;
681
0
    *framecnt_xm = 10;
682
0
    border_state = *flag_border;
683
0
  }
684
685
  /** update BORDER_MUSIC_SPEECH_DEFINITE
686
   */
687
124k
  if (((border_state == BORDER_SPEECH_MUSIC) || (border_state == BORDER_SPEECH_MUSIC_DEFINITE)) &&
688
0
      (init_mode_decision_result == SPEECH_DEFINITE) && (*framecnt_xm >= 20)) {
689
0
    *flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
690
0
    *framecnt_xm = 10;
691
0
  }
692
693
124k
  return init_mode_decision_result;
694
124k
}
695
696
124k
static WORD32 iusace_smoothing_mode_decision(ia_smooth_params_struct *pstr_smooth_param) {
697
124k
  WORD32 *ptr_init_result_ahead = pstr_smooth_param->init_result_ahead;
698
124k
  WORD32 flag_border = pstr_smooth_param->flag_border;
699
124k
  WORD32 *ptr_flag_border_buf_behind = pstr_smooth_param->flag_border_buf_behind;
700
124k
  WORD32 *ptr_flag_border_buf_ahead = pstr_smooth_param->flag_border_buf_ahead;
701
124k
  FLOAT32 frame_energy = pstr_smooth_param->frame_energy;
702
124k
  FLOAT32 *ptr_frame_energy_buf_behind = pstr_smooth_param->frame_energy_buf_behind;
703
124k
  FLOAT32 *ptr_frame_energy_buf_ahead = pstr_smooth_param->frame_energy_buf_ahead;
704
124k
  WORD32 *ptr_smoothing_result_buf = pstr_smooth_param->smoothing_result_buf;
705
124k
  WORD32 *ptr_init_result_behind = pstr_smooth_param->init_result_behind;
706
124k
  WORD32 init_mode_decision_result = pstr_smooth_param->init_mode_decision_result;
707
124k
  WORD32 i;
708
709
124k
  WORD32 mode_decision_result;
710
711
124k
  WORD32 num_music, num_speech;
712
713
  /** update data array
714
   */
715
716
  /** update init_result_behind, init_result_ahead
717
   */
718
12.4M
  for (i = 0; i < 99; i++) {
719
12.3M
    ptr_init_result_behind[i] = ptr_init_result_behind[i + 1];
720
12.3M
  }
721
124k
  ptr_init_result_behind[99] = ptr_init_result_ahead[0];
722
723
124k
  ptr_init_result_ahead[NFRAMEAHEAD - 1] = init_mode_decision_result;
724
725
  /** update flag_border_buf_behind, flag_border_buf_ahead
726
   * update frame_energy_buf_behind, frame_energy_buf_ahead
727
   */
728
729
1.24M
  for (i = 0; i < 9; i++) {
730
1.12M
    ptr_flag_border_buf_behind[i] = ptr_flag_border_buf_behind[i + 1];
731
1.12M
    ptr_frame_energy_buf_behind[i] = ptr_frame_energy_buf_behind[i + 1];
732
1.12M
  }
733
124k
  ptr_flag_border_buf_behind[9] = ptr_flag_border_buf_ahead[0];
734
124k
  ptr_frame_energy_buf_behind[9] = ptr_frame_energy_buf_ahead[0];
735
736
124k
  ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] = flag_border;
737
738
124k
  ptr_frame_energy_buf_ahead[NFRAMEAHEAD - 1] = frame_energy;
739
740
  /** smoothing according to past results
741
   */
742
743
124k
  mode_decision_result = ptr_init_result_behind[99];
744
745
  /** update smoothing_result_buf
746
   */
747
124k
  if (ptr_flag_border_buf_behind[9] == NO_BORDER) {
748
12.4M
    for (i = 0; i < 99; i++) {
749
12.3M
      ptr_smoothing_result_buf[i] = ptr_smoothing_result_buf[i + 1];
750
12.3M
    }
751
124k
    pstr_smooth_param->num_smoothing++;
752
124k
  } else {
753
13.1k
    for (i = 0; i < 99; i++) {
754
12.9k
      ptr_smoothing_result_buf[i] = TBD;
755
12.9k
    }
756
131
    pstr_smooth_param->num_smoothing = 1;
757
131
  }
758
124k
  ptr_smoothing_result_buf[99] = ptr_init_result_behind[99];
759
760
124k
  if (pstr_smooth_param->num_smoothing >= SMOOTHING_LENGTH) {
761
0
    num_music = 0;
762
0
    num_speech = 0;
763
764
    /** smoothed result count
765
     */
766
0
    for (i = 0; i < SMOOTHING_LENGTH; i++) {
767
0
      if ((ptr_smoothing_result_buf[100 - i] == SPEECH) ||
768
0
          (ptr_smoothing_result_buf[100 - i] == SPEECH_DEFINITE)) {
769
0
        num_speech++;
770
0
      } else {
771
0
        num_music++;
772
0
      }
773
0
    }
774
775
    /** smoothing
776
     */
777
0
    if ((num_speech > num_music) && (init_mode_decision_result != MUSIC_DEFINITE)) {
778
0
      mode_decision_result = SPEECH;
779
0
    }
780
0
    if ((num_music > num_speech) && (init_mode_decision_result != SPEECH_DEFINITE)) {
781
0
      mode_decision_result = MUSIC;
782
0
    }
783
0
  }
784
785
  /** correct according to energies and ahead mode decision results
786
   */
787
788
124k
  if ((mode_decision_result == MUSIC) && (ptr_frame_energy_buf_behind[9] <= 60)) {
789
24
    for (i = 0; i < NFRAMEAHEAD; i++) {
790
12
      if ((ptr_init_result_ahead[i] == SPEECH_DEFINITE) || (ptr_init_result_ahead[i] == SPEECH)) {
791
4
        pstr_smooth_param->flag_speech_definite = 1;
792
4
      }
793
12
    }
794
12
  }
795
124k
  if ((pstr_smooth_param->flag_speech_definite == 1) && (mode_decision_result == MUSIC)) {
796
4
    mode_decision_result = SPEECH;
797
124k
  } else {
798
124k
    pstr_smooth_param->flag_speech_definite = 0;
799
124k
  }
800
801
  /** correct MUSIC mode
802
   */
803
804
124k
  if (ptr_frame_energy_buf_behind[9] <= 65) {
805
7.00k
    pstr_smooth_param->count_small_energy = 0;
806
117k
  } else {
807
117k
    pstr_smooth_param->count_small_energy++;
808
117k
  }
809
124k
  if (((ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC) ||
810
124k
       (ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC_DEFINITE)) &&
811
0
      (pstr_smooth_param->count_small_energy <= 30)) {
812
0
    pstr_smooth_param->flag_music_definite = 1;
813
0
  }
814
124k
  if ((pstr_smooth_param->flag_music_definite == 1) &&
815
0
      ((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE))) {
816
0
    mode_decision_result = MUSIC;
817
124k
  } else {
818
124k
    pstr_smooth_param->flag_music_definite = 0;
819
124k
  }
820
821
124k
  return mode_decision_result;
822
124k
}
823
824
static WORD32 iusace_classification_ccfl(ia_classification_struct *pstr_sig_class,
825
                                         FLOAT32 *ptr_time_signal,
826
124k
                                         iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
827
124k
  WORD32 i;
828
124k
  ia_tonal_params_struct pstr_ton_params;
829
124k
  ia_smooth_params_struct smooth_param;
830
124k
  ia_mode_params_struct pstr_mode_params;
831
124k
  ia_spec_tilt_params_struct ptr_spec_params;
832
833
124k
  ia_classification_buf_struct *pstr_buffers = &(pstr_sig_class->buffers);
834
124k
  pFLOAT32 spec_tilt_buf = pstr_sig_class->spec_tilt_buf;
835
124k
  pWORD32 n_tonal = pstr_sig_class->n_tonal;
836
124k
  pWORD32 n_tonal_low_frequency = pstr_sig_class->n_tonal_low_frequency;
837
124k
  pWORD32 framecnt_xm = &(pstr_sig_class->framecnt_xm);
838
124k
  pWORD32 framecnt = &(pstr_sig_class->framecnt);
839
124k
  pFLOAT32 ave_n_tonal_short_buf = pstr_sig_class->ave_n_tonal_short_buf;
840
124k
  pFLOAT32 ave_n_tonal_buf = pstr_sig_class->ave_n_tonal_buf;
841
124k
  pFLOAT32 msd_spec_tilt_buf = pstr_sig_class->msd_spec_tilt_buf;
842
124k
  pFLOAT32 msd_spec_tilt_short_buf = pstr_sig_class->msd_spec_tilt_short_buf;
843
844
124k
  FLOAT32 n_tonal_low_frequency_ratio;    /* the ratio of distribution of the numbers */
845
                                          /* of tonal in the low frequency domain     */
846
124k
  FLOAT32 ave_n_tonal, ave_n_tonal_short; /**< the number of tonal */
847
124k
  FLOAT32 msd_spec_tilt;                  /* the long-term MSD of spectral tilt */
848
124k
  FLOAT32 msd_spec_tilt_short;            /* the short-term MSD of spectral tilt */
849
850
124k
  WORD32 init_mode_decision_result; /* the initial mode decision */
851
124k
  WORD32 flag_border = NO_BORDER;   /* flag of current border */
852
853
124k
  WORD32 mode_decision_result; /* final mode decision result */
854
855
124k
  if (pstr_sig_class->init_flag == 0) {
856
    /* initialize */
857
1.57k
    pstr_sig_class->init_flag = 1;
858
859
9.44k
    for (i = 0; i < 5; i++) {
860
7.87k
      n_tonal[i] = 0;
861
7.87k
      n_tonal_low_frequency[i] = 0;
862
7.87k
      spec_tilt_buf[i] = 0;
863
7.87k
      pstr_buffers->init_result_behind[i] = TBD;
864
7.87k
      pstr_buffers->smoothing_result_buf[i] = TBD;
865
866
7.87k
      ave_n_tonal_short_buf[i] = 0;
867
7.87k
      ave_n_tonal_buf[i] = 0;
868
7.87k
      msd_spec_tilt_buf[i] = 0;
869
7.87k
      msd_spec_tilt_short_buf[i] = 0;
870
871
7.87k
      pstr_buffers->frame_energy_buf_behind[i] = 0;
872
7.87k
      pstr_buffers->flag_border_buf_behind[i] = NO_BORDER;
873
7.87k
    }
874
9.44k
    for (; i < 10; i++) {
875
7.87k
      n_tonal[i] = 0;
876
7.87k
      n_tonal_low_frequency[i] = 0;
877
7.87k
      spec_tilt_buf[i] = 0;
878
7.87k
      pstr_buffers->init_result_behind[i] = TBD;
879
7.87k
      pstr_buffers->smoothing_result_buf[i] = TBD;
880
881
7.87k
      pstr_buffers->frame_energy_buf_behind[i] = 0;
882
7.87k
      pstr_buffers->flag_border_buf_behind[i] = NO_BORDER;
883
7.87k
    }
884
885
143k
    for (; i < 100; i++) {
886
141k
      n_tonal[i] = 0;
887
141k
      n_tonal_low_frequency[i] = 0;
888
141k
      spec_tilt_buf[i] = 0;
889
141k
      pstr_buffers->init_result_behind[i] = TBD;
890
141k
      pstr_buffers->smoothing_result_buf[i] = TBD;
891
141k
    }
892
3.14k
    for (i = 0; i < NFRAMEAHEAD; i++) {
893
1.57k
      pstr_buffers->frame_energy_buf_ahead[i] = 0;
894
1.57k
      pstr_buffers->flag_border_buf_ahead[i] = NO_BORDER;
895
1.57k
      pstr_buffers->init_result_ahead[i] = TBD;
896
1.57k
    }
897
1.57k
  }
898
899
124k
  *framecnt += 1;
900
124k
  *framecnt_xm += 1;
901
902
124k
  pstr_ton_params.time_signal = (FLOAT32 *)ptr_time_signal;
903
124k
  pstr_ton_params.framecnt_xm = *framecnt_xm;
904
124k
  pstr_ton_params.n_tonal = n_tonal;
905
124k
  pstr_ton_params.n_tonal_low_frequency = n_tonal_low_frequency;
906
124k
  pstr_ton_params.n_tonal_low_frequency_ratio = &n_tonal_low_frequency_ratio;
907
124k
  pstr_ton_params.ave_n_tonal = &ave_n_tonal;
908
124k
  pstr_ton_params.ave_n_tonal_short = &ave_n_tonal_short;
909
  /** analysis tonal
910
   */
911
124k
  iusace_tonal_analysis(&pstr_ton_params, pstr_scratch, ccfl);
912
913
124k
  ptr_spec_params.time_signal = ptr_time_signal;
914
124k
  ptr_spec_params.framecnt_xm = *framecnt_xm;
915
124k
  ptr_spec_params.spec_tilt_buf = spec_tilt_buf;
916
124k
  ptr_spec_params.msd_spec_tilt = &msd_spec_tilt;
917
124k
  ptr_spec_params.msd_spec_tilt_short = &msd_spec_tilt_short;
918
  /** analysis spectral tilt
919
   */
920
124k
  iusace_spectral_tilt_analysis(&ptr_spec_params, ccfl);
921
922
124k
  pstr_mode_params.framecnt = *framecnt;
923
124k
  pstr_mode_params.framecnt_xm = framecnt_xm;
924
124k
  pstr_mode_params.flag_border = &flag_border;
925
124k
  pstr_mode_params.ave_n_tonal_short = ave_n_tonal_short;
926
124k
  pstr_mode_params.ave_n_tonal = ave_n_tonal;
927
124k
  pstr_mode_params.ave_n_tonal_short_buf = ave_n_tonal_short_buf;
928
124k
  pstr_mode_params.ave_n_tonal_buf = ave_n_tonal_buf;
929
124k
  pstr_mode_params.msd_spec_tilt = msd_spec_tilt;
930
124k
  pstr_mode_params.msd_spec_tilt_short = msd_spec_tilt_short;
931
124k
  pstr_mode_params.msd_spec_tilt_buf = msd_spec_tilt_buf;
932
124k
  pstr_mode_params.msd_spec_tilt_short_buf = msd_spec_tilt_short_buf;
933
124k
  pstr_mode_params.n_tonal_low_frequency_ratio = n_tonal_low_frequency_ratio;
934
124k
  pstr_mode_params.frame_energy = ptr_spec_params.frame_energy;
935
  /** initial mode decision and boundary decisions
936
   */
937
124k
  init_mode_decision_result = iusace_init_mode_decision(&pstr_mode_params);
938
939
124k
  smooth_param.flag_border_buf_behind = pstr_buffers->flag_border_buf_behind;
940
124k
  smooth_param.flag_border_buf_ahead = pstr_buffers->flag_border_buf_ahead;
941
124k
  smooth_param.frame_energy = ptr_spec_params.frame_energy;
942
124k
  smooth_param.frame_energy_buf_behind = pstr_buffers->frame_energy_buf_behind;
943
124k
  smooth_param.frame_energy_buf_ahead = pstr_buffers->frame_energy_buf_ahead;
944
124k
  smooth_param.smoothing_result_buf = pstr_buffers->smoothing_result_buf;
945
124k
  smooth_param.init_result_ahead = pstr_buffers->init_result_ahead;
946
124k
  smooth_param.flag_border = flag_border;
947
124k
  smooth_param.init_result_behind = pstr_buffers->init_result_behind;
948
124k
  smooth_param.init_mode_decision_result = init_mode_decision_result;
949
124k
  smooth_param.flag_speech_definite = 0;
950
124k
  smooth_param.count_small_energy = 0;
951
124k
  smooth_param.flag_music_definite = 0;
952
124k
  smooth_param.num_smoothing = 0;
953
  /* smoothing */
954
124k
  mode_decision_result = iusace_smoothing_mode_decision(&smooth_param);
955
956
124k
  return mode_decision_result;
957
124k
}
958
959
VOID iusace_classification(ia_classification_struct *pstr_sig_class,
960
124k
                           iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
961
124k
  WORD32 n_frames, n_class, avg_cls, nf;
962
124k
  WORD32 i;
963
124k
  FLOAT32 *ptr_time_signal = pstr_scratch->p_time_signal;
964
124k
  WORD32 mode_decision_result;
965
966
124k
  n_frames = pstr_sig_class->n_buffer_samples / ccfl;
967
968
249k
  for (nf = 0; nf < n_frames; nf++) {
969
117M
    for (i = 0; i < ccfl; i++) {
970
117M
      ptr_time_signal[i] = pstr_sig_class->input_samples[ccfl * nf + i];
971
117M
    }
972
973
    /* classification of ccfl-frame */
974
124k
    mode_decision_result =
975
124k
        iusace_classification_ccfl(pstr_sig_class, ptr_time_signal, pstr_scratch, ccfl);
976
977
    /* coding mode decision of 1024-frame */
978
124k
    if ((mode_decision_result == MUSIC) || (mode_decision_result == MUSIC_DEFINITE)) {
979
23.4k
      pstr_sig_class->coding_mode = FD_MODE;
980
101k
    } else if ((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE)) {
981
99.9k
      pstr_sig_class->coding_mode = TD_MODE;
982
99.9k
    }
983
984
124k
    pstr_sig_class->class_buf[pstr_sig_class->n_buf_class + nf] = pstr_sig_class->coding_mode;
985
124k
    pstr_sig_class->pre_mode = pstr_sig_class->coding_mode;
986
124k
  }
987
988
  /* merge ccfl-frame results */
989
124k
  pstr_sig_class->n_buf_class += n_frames;
990
124k
  n_class = (pstr_sig_class->n_class_frames > pstr_sig_class->n_buf_class)
991
124k
                ? pstr_sig_class->n_buf_class
992
124k
                : pstr_sig_class->n_class_frames;
993
124k
  {
994
124k
    WORD32 min_cls, max_cls;
995
996
124k
    min_cls = max_cls = pstr_sig_class->class_buf[0];
997
124k
    for (i = 1; i < n_class; i++) {
998
0
      if (pstr_sig_class->class_buf[i] > max_cls) {
999
0
        max_cls = pstr_sig_class->class_buf[i];
1000
0
      } else if (pstr_sig_class->class_buf[i] < min_cls) {
1001
0
        min_cls = pstr_sig_class->class_buf[i];
1002
0
      }
1003
0
    }
1004
1005
124k
    avg_cls = 0;
1006
249k
    for (i = 0; i < n_class; i++) {
1007
124k
      if (pstr_sig_class->class_buf[i] == max_cls) {
1008
124k
        avg_cls += 1;
1009
124k
      }
1010
124k
      if (pstr_sig_class->class_buf[i] == min_cls) {
1011
124k
        avg_cls += -1;
1012
124k
      }
1013
124k
    }
1014
1015
124k
    if (avg_cls > 0) {
1016
0
      pstr_sig_class->coding_mode = max_cls;
1017
124k
    } else {
1018
124k
      pstr_sig_class->coding_mode = min_cls;
1019
124k
    }
1020
124k
  }
1021
1022
  /* shift, save pre_mode and unused class */
1023
124k
  if (n_class > 0) {
1024
124k
    pstr_sig_class->pre_mode = pstr_sig_class->class_buf[n_class - 1];
1025
124k
  }
1026
124k
  pstr_sig_class->n_buf_class -= n_class;
1027
124k
  pstr_sig_class->n_buffer_samples -= ccfl * n_frames;
1028
1029
124k
  WORD32 minimum = MIN(pstr_sig_class->n_buf_class, pstr_sig_class->n_buffer_samples);
1030
124k
  if (minimum == pstr_sig_class->n_buf_class) {
1031
124k
    for (i = 0; i < minimum; i++) {
1032
0
      pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
1033
0
      pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
1034
0
    }
1035
1036
    /* shift, save unused samples */
1037
124k
    for (; i < pstr_sig_class->n_buffer_samples; i++) {
1038
0
      pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
1039
0
    }
1040
124k
  } else {
1041
0
    for (i = 0; i < minimum; i++) {
1042
0
      pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
1043
0
      pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
1044
0
    }
1045
1046
    /* shift, save unused samples */
1047
0
    for (; i < pstr_sig_class->n_buf_class; i++) {
1048
0
      pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
1049
0
    }
1050
0
  }
1051
124k
}
1052
1053
1.60k
VOID iusace_init_classification(ia_classification_struct *pstr_sig_class) {
1054
1.60k
  pstr_sig_class->pre_mode = FD_MODE;
1055
1056
1.60k
  pstr_sig_class->n_buffer_samples = 0;
1057
1.60k
  memset(pstr_sig_class->input_samples, 0, 3840 * 2 * sizeof(FLOAT32));
1058
1.60k
  pstr_sig_class->n_class_frames = 2;
1059
1.60k
  pstr_sig_class->n_buf_class = 0;
1060
1061
1.60k
  pstr_sig_class->is_switch_mode = 1;
1062
1063
1.60k
  pstr_sig_class->framecnt = 0;
1064
1.60k
  pstr_sig_class->init_flag = 0;
1065
1.60k
  pstr_sig_class->framecnt_xm = 0;
1066
1067
1.60k
  memset(&pstr_sig_class->buffers, 0, sizeof(ia_classification_buf_struct));
1068
1.60k
  memset(pstr_sig_class->spec_tilt_buf, 0, sizeof(FLOAT32) * 100);
1069
1.60k
  memset(pstr_sig_class->n_tonal, 0, sizeof(WORD32) * 100);
1070
1.60k
  memset(pstr_sig_class->n_tonal_low_frequency, 0, sizeof(WORD32) * 100);
1071
1.60k
  memset(pstr_sig_class->msd_spec_tilt_buf, 0, sizeof(FLOAT32) * 5);
1072
1.60k
  memset(pstr_sig_class->msd_spec_tilt_short_buf, 0, sizeof(FLOAT32) * 5);
1073
1.60k
  memset(pstr_sig_class->ave_n_tonal_short_buf, 0, sizeof(FLOAT32) * 5);
1074
1.60k
  memset(pstr_sig_class->ave_n_tonal_buf, 0, sizeof(FLOAT32) * 5);
1075
1.60k
  return;
1076
1.60k
}