Coverage Report

Created: 2026-01-09 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/encoder/ixheaace_signal_classifier.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2023 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
#include <string.h>
21
#include <math.h>
22
#include "iusace_type_def.h"
23
#include "iusace_cnst.h"
24
25
#include "iusace_fd_quant.h"
26
#include "iusace_bitbuffer.h"
27
#include "impd_drc_common_enc.h"
28
#include "impd_drc_uni_drc.h"
29
#include "impd_drc_api.h"
30
#include "impd_drc_uni_drc_eq.h"
31
#include "impd_drc_uni_drc_filter_bank.h"
32
#include "impd_drc_gain_enc.h"
33
#include "impd_drc_struct_def.h"
34
35
#include "ixheaace_memory_standards.h"
36
#include "iusace_tns_usac.h"
37
#include "iusace_psy_mod.h"
38
#include "iusace_config.h"
39
#include "iusace_signal_classifier.h"
40
#include "iusace_fft.h"
41
#include "iusace_block_switch_const.h"
42
#include "iusace_block_switch_struct_def.h"
43
#include "iusace_cnst.h"
44
#include "iusace_ms.h"
45
#include "ixheaace_adjust_threshold_data.h"
46
#include "iusace_fd_qc_util.h"
47
#include "ixheaace_sbr_header.h"
48
#include "ixheaace_config.h"
49
#include "ixheaace_asc_write.h"
50
#include "iusace_main.h"
51
52
123k
static VOID iusace_calc_pds(FLOAT32 *ptr_input, WORD32 ccfl) {
53
123k
  WORD32 i;
54
123k
  FLOAT64 max_pow, delta;
55
123k
  FLOAT64 log_ccfl_base_10 = (ccfl == 1024) ? LOG_1024_BASE_10 : LOG_768_BASE_10;
56
57
123k
  max_pow = MAX(
58
123k
      10 * (log10(ptr_input[0] * ptr_input[0] + ptr_input[1] * ptr_input[1]) - log_ccfl_base_10) +
59
123k
          10e-15,
60
123k
      MIN_POW);
61
62
58.0M
  for (i = 1; i<ccfl>> 1; i++) {
63
    /* removed the sqrt along with clubbing the for loops */
64
57.9M
    ptr_input[2 * i] = (FLOAT32)MAX(10 * (log10(ptr_input[2 * i] * ptr_input[2 * i] +
65
57.9M
                                                ptr_input[2 * i + 1] * ptr_input[2 * i + 1]) -
66
57.9M
                                          log_ccfl_base_10) +
67
57.9M
                                        10e-15,
68
57.9M
                                    MIN_POW);
69
70
57.9M
    max_pow = MAX(max_pow, ptr_input[2 * i]);
71
57.9M
  }
72
73
  /* Normalized to reference sound pressure level 96 dB */
74
123k
  delta = 96 - max_pow;
75
76
58.1M
  for (i = 0; i<ccfl>> 1; i++) {
77
58.0M
    ptr_input[2 * i] = ptr_input[2 * i] + (FLOAT32)delta;
78
58.0M
  }
79
123k
  return;
80
123k
}
81
82
static VOID iusace_find_tonal(FLOAT32 *ptr_input, WORD32 *ptr_tonal_flag, FLOAT32 *ptr_scratch,
83
123k
                              WORD32 ccfl) {
84
123k
  WORD32 i, j;
85
123k
  WORD32 is_tonal;
86
123k
  FLOAT64 tonal_spl;
87
123k
  FLOAT64 absolute_threshold_xm;
88
89
58.1M
  for (i = 0; i<ccfl>> 1; i++) {
90
58.0M
    ptr_scratch[i] = ptr_input[2 * i];
91
58.0M
  }
92
93
123k
  if (ccfl == FRAME_LEN_LONG) {
94
42.7M
    for (i = 0; i <= 511; i++) {
95
42.6M
      ptr_tonal_flag[i] = 0;
96
42.6M
    }
97
98
41.5M
    for (i = 2; i < 500; i++) {
99
41.5M
      if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) {
100
11.0M
        is_tonal = 1;
101
102
        /* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */
103
104
11.0M
        if (1 < i && i < 62) {
105
1.67M
          for (j = -2; j <= -2; j++) {
106
1.29M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
107
1.29M
            if (is_tonal == 0) break;
108
1.29M
          }
109
1.29M
          if (is_tonal == 1) {
110
537k
            for (j = 2; j <= 2; j++) {
111
381k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
112
381k
              if (is_tonal == 0) break;
113
381k
            }
114
381k
          }
115
116
1.29M
          if (is_tonal == 1) {
117
155k
            ptr_tonal_flag[i] = 1;
118
155k
          }
119
1.29M
        }
120
121
9.72M
        else if (62 <= i && i < 126) {
122
2.05M
          for (j = -3; j <= -2; j++) {
123
1.86M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
124
1.86M
            if (is_tonal == 0) break;
125
1.86M
          }
126
1.45M
          if (is_tonal == 1) {
127
356k
            for (j = 2; j <= 3; j++) {
128
292k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
129
292k
              if (is_tonal == 0) break;
130
292k
            }
131
196k
          }
132
133
1.45M
          if (is_tonal == 1) {
134
63.5k
            ptr_tonal_flag[i] = 1;
135
63.5k
          }
136
1.45M
        }
137
138
8.27M
        else if (126 <= i && i < 254) {
139
4.55M
          for (j = -6; j <= -2; j++) {
140
4.46M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
141
4.46M
            if (is_tonal == 0) break;
142
4.46M
          }
143
2.86M
          if (is_tonal == 1) {
144
282k
            for (j = 2; j <= 6; j++) {
145
260k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
146
260k
              if (is_tonal == 0) break;
147
260k
            }
148
94.9k
          }
149
150
2.86M
          if (is_tonal == 1) {
151
22.4k
            ptr_tonal_flag[i] = 1;
152
22.4k
          }
153
2.86M
        }
154
155
5.40M
        else if (254 <= i && i < 500) {
156
10.6M
          for (j = -12; j <= -2; j++) {
157
10.5M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
158
10.5M
            if (is_tonal == 0) break;
159
10.5M
          }
160
5.40M
          if (is_tonal == 1) {
161
358k
            for (j = 2; j <= 12; j++) {
162
340k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
163
340k
              if (is_tonal == 0) break;
164
340k
            }
165
69.6k
          }
166
167
5.40M
          if (is_tonal == 1) {
168
18.3k
            ptr_tonal_flag[i] = 1;
169
18.3k
          }
170
5.40M
        }
171
11.0M
      }
172
41.5M
    }
173
174
42.7M
    for (i = 0; i <= 511; i++) {
175
42.6M
      if (ptr_tonal_flag[i] == 1) {
176
        /* compute the SPL of tonal */
177
259k
        tonal_spl =
178
259k
            10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) +
179
259k
                       pow(10, (ptr_scratch[i + 1] / 10)));
180
181
259k
        if (i >= 324) {
182
12.4k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i] + 20;
183
247k
        } else {
184
247k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i];
185
247k
        }
186
259k
        if (tonal_spl < absolute_threshold_xm) {
187
129k
          ptr_tonal_flag[i] = 0;
188
129k
        }
189
259k
      }
190
42.6M
    }
191
83.3k
  } else  // (ccfl == 768)
192
40.0k
  {
193
15.4M
    for (i = 0; i <= 383; i++) {
194
15.3M
      ptr_tonal_flag[i] = 0;
195
15.3M
    }
196
197
14.9M
    for (i = 2; i < 375; i++) {
198
14.9M
      if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) {
199
2.63M
        is_tonal = 1;
200
201
        /* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */
202
203
2.63M
        if (1 < i && i < 47) {
204
164k
          for (j = -2; j <= -2; j++) {
205
129k
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
206
129k
            if (is_tonal == 0) break;
207
129k
          }
208
129k
          if (is_tonal == 1) {
209
55.4k
            for (j = 2; j <= 2; j++) {
210
34.2k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
211
34.2k
              if (is_tonal == 0) break;
212
34.2k
            }
213
34.2k
          }
214
215
129k
          if (is_tonal == 1) {
216
21.2k
            ptr_tonal_flag[i] = 1;
217
21.2k
          }
218
129k
        }
219
220
2.50M
        else if (47 <= i && i < 95) {
221
522k
          for (j = -3; j <= -2; j++) {
222
463k
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
223
463k
            if (is_tonal == 0) break;
224
463k
          }
225
394k
          if (is_tonal == 1) {
226
155k
            for (j = 2; j <= 3; j++) {
227
108k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
228
108k
              if (is_tonal == 0) break;
229
108k
            }
230
59.3k
          }
231
232
394k
          if (is_tonal == 1) {
233
46.7k
            ptr_tonal_flag[i] = 1;
234
46.7k
          }
235
394k
        }
236
237
2.11M
        else if (95 <= i && i < 194) {
238
1.41M
          for (j = -5; j <= -2; j++) {
239
1.28M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
240
1.28M
            if (is_tonal == 0) break;
241
1.28M
          }
242
802k
          if (is_tonal == 1) {
243
575k
            for (j = 2; j <= 5; j++) {
244
466k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
245
466k
              if (is_tonal == 0) break;
246
466k
            }
247
128k
          }
248
249
802k
          if (is_tonal == 1) {
250
108k
            ptr_tonal_flag[i] = 1;
251
108k
          }
252
802k
        }
253
254
1.30M
        else if (191 <= i && i < 375) {
255
2.97M
          for (j = -9; j <= -2; j++) {
256
2.82M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
257
2.82M
            if (is_tonal == 0) break;
258
2.82M
          }
259
1.30M
          if (is_tonal == 1) {
260
1.19M
            for (j = 2; j <= 9; j++) {
261
1.10M
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
262
1.10M
              if (is_tonal == 0) break;
263
1.10M
            }
264
155k
          }
265
266
1.30M
          if (is_tonal == 1) {
267
97.6k
            ptr_tonal_flag[i] = 1;
268
97.6k
          }
269
1.30M
        }
270
2.63M
      }
271
14.9M
    }
272
273
15.4M
    for (i = 0; i <= 383; i++) {
274
15.3M
      if (ptr_tonal_flag[i] == 1) {
275
        /* compute the SPL of tonal */
276
273k
        tonal_spl =
277
273k
            10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) +
278
273k
                       pow(10, (ptr_scratch[i + 1] / 10)));
279
280
273k
        if (i >= 243) {
281
77.4k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i] + 20;
282
196k
        } else {
283
196k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i];
284
196k
        }
285
273k
        if (tonal_spl < absolute_threshold_xm) {
286
48.2k
          ptr_tonal_flag[i] = 0;
287
48.2k
        }
288
273k
      }
289
15.3M
    }
290
40.0k
  }
291
123k
  return;
292
123k
}
293
294
static VOID iusace_tonal_analysis(ia_tonal_params_struct *pstr_ton_params,
295
123k
                                  iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
296
123k
  FLOAT32 *ptr_complex_fft = pstr_scratch->p_complex_fft;
297
123k
  WORD32 *ptr_tonal_flag = pstr_scratch->p_tonal_flag;
298
123k
  FLOAT32 *ptr_time_sig = pstr_ton_params->time_signal;
299
123k
  WORD32 framecnt_xm = pstr_ton_params->framecnt_xm;
300
123k
  WORD32 *ptr_n_tonal = pstr_ton_params->n_tonal;
301
123k
  WORD32 *ptr_n_tonal_low_frequency = pstr_ton_params->n_tonal_low_frequency;
302
123k
  FLOAT32 *ptr_n_tonal_low_frequency_ratio = pstr_ton_params->n_tonal_low_frequency_ratio;
303
123k
  FLOAT32 *ave_n_tonal = pstr_ton_params->ave_n_tonal;
304
123k
  FLOAT32 *ave_n_tonal_short = pstr_ton_params->ave_n_tonal_short;
305
123k
  WORD32 i;
306
123k
  WORD32 fft_size = ccfl;
307
308
123k
  WORD32 frame_length;
309
123k
  WORD32 n_tonal_total, n_tonal_low_frequency_total;
310
311
116M
  for (i = 0; i < ccfl; i++) {
312
116M
    ptr_complex_fft[2 * i] = (FLOAT32)(
313
116M
        ptr_time_sig[i] * ((ccfl == 1024) ? iusace_classify_arrays.hanning_window_1024[i]
314
116M
                                          : iusace_classify_arrays.hanning_window_768[i]));
315
116M
    ptr_complex_fft[2 * i + 1] = 0;
316
116M
  }
317
318
123k
  iusace_complex_fft(ptr_complex_fft, fft_size, pstr_scratch);
319
320
  /* compute power density spectrum */
321
  /* re_fft contains the resulting pds */
322
123k
  iusace_calc_pds(ptr_complex_fft, ccfl);
323
324
  /* detect tonal */
325
123k
  iusace_find_tonal(ptr_complex_fft, ptr_tonal_flag, pstr_scratch->p_pow_spec, ccfl);
326
327
  /* update n_tonal, n_tonal_low_frequency */
328
12.3M
  for (i = 0; i < 99; i++) {
329
12.2M
    ptr_n_tonal[i] = ptr_n_tonal[i + 1];
330
12.2M
    ptr_n_tonal_low_frequency[i] = ptr_n_tonal_low_frequency[i + 1];
331
12.2M
  }
332
123k
  ptr_n_tonal[99] = 0;
333
58.1M
  for (i = 0; i<ccfl>> 1; i++) {
334
58.0M
    ptr_n_tonal[99] += ptr_tonal_flag[i];
335
58.0M
  }
336
123k
  ptr_n_tonal_low_frequency[99] = 0;
337
19.8M
  for (i = 0; i < INDEXOFLOWFREQUENCY; i++) {
338
19.7M
    ptr_n_tonal_low_frequency[99] += ptr_tonal_flag[i];
339
19.7M
  }
340
341
  /* compute long-term AVE and the ratio of distribution in low-frequency domain */
342
123k
  if (framecnt_xm < AVE_TONAL_LENGTH) {
343
74.6k
    frame_length = framecnt_xm;
344
74.6k
  } else {
345
48.7k
    frame_length = AVE_TONAL_LENGTH;
346
48.7k
  }
347
348
123k
  n_tonal_total = 0;
349
123k
  n_tonal_low_frequency_total = 0;
350
7.84M
  for (i = 0; i < frame_length; i++) {
351
7.72M
    n_tonal_total += ptr_n_tonal[99 - i];
352
7.72M
    n_tonal_low_frequency_total += ptr_n_tonal_low_frequency[99 - i];
353
7.72M
  }
354
355
123k
  *ave_n_tonal = (FLOAT32)n_tonal_total / frame_length;
356
357
123k
  if (n_tonal_total == 0) {
358
28.0k
    *ptr_n_tonal_low_frequency_ratio = 1;
359
95.3k
  } else {
360
95.3k
    *ptr_n_tonal_low_frequency_ratio = (FLOAT32)n_tonal_low_frequency_total / n_tonal_total;
361
95.3k
  }
362
363
  /* compute the short-term AVE */
364
123k
  if (framecnt_xm < AVE_TONAL_LENGTH_SHORT) {
365
12.5k
    frame_length = framecnt_xm;
366
110k
  } else {
367
110k
    frame_length = AVE_TONAL_LENGTH_SHORT;
368
110k
  }
369
370
123k
  n_tonal_total = 0;
371
1.29M
  for (i = 0; i < frame_length; i++) {
372
1.16M
    n_tonal_total += ptr_n_tonal[99 - i];
373
1.16M
  }
374
375
123k
  *ave_n_tonal_short = (FLOAT32)n_tonal_total / frame_length;
376
123k
  return;
377
123k
}
378
379
static VOID iusace_spectral_tilt_analysis(ia_spec_tilt_params_struct *ptr_spec_params,
380
123k
                                          WORD32 ccfl) {
381
123k
  FLOAT32 *ptr_time_signal = ptr_spec_params->time_signal;
382
123k
  WORD32 framecnt_xm = ptr_spec_params->framecnt_xm;
383
123k
  FLOAT32 *ptr_spec_tilt_buf = ptr_spec_params->spec_tilt_buf;
384
123k
  FLOAT32 *ptr_msd_spec_tilt = ptr_spec_params->msd_spec_tilt;
385
123k
  FLOAT32 *ptr_msd_spec_tilt_short = ptr_spec_params->msd_spec_tilt_short;
386
123k
  WORD32 i;
387
123k
  WORD32 frame_length;
388
389
123k
  FLOAT32 r0, r1;
390
123k
  FLOAT32 spec_tilt;
391
123k
  FLOAT32 ave_spec_tilt;
392
393
  /* compute spectral tilt */
394
123k
  r0 = 0;
395
123k
  r1 = 0;
396
116M
  for (i = 0; i < ccfl - 1; i++) {
397
115M
    r0 += ptr_time_signal[i] * ptr_time_signal[i];
398
115M
    r1 += ptr_time_signal[i] * ptr_time_signal[i + 1];
399
115M
  }
400
123k
  r0 += ptr_time_signal[i] * ptr_time_signal[i];
401
402
123k
  if (r0 == 0) {
403
3.70k
    spec_tilt = 1.0f;
404
119k
  } else {
405
119k
    spec_tilt = r1 / r0;
406
119k
  }
407
408
  /* update spec_tilt_buf */
409
12.3M
  for (i = 0; i < 100 - 1; i++) {
410
12.2M
    ptr_spec_tilt_buf[i] = ptr_spec_tilt_buf[i + 1];
411
12.2M
  }
412
123k
  ptr_spec_tilt_buf[99] = spec_tilt;
413
414
  /* compute the long-term mean square deviation of the spectral tilt */
415
123k
  if (framecnt_xm < SPECTRAL_TILT_LENGTH) {
416
66.6k
    frame_length = framecnt_xm;
417
66.6k
  } else {
418
56.7k
    frame_length = SPECTRAL_TILT_LENGTH;
419
56.7k
  }
420
421
123k
  ave_spec_tilt = 0;
422
6.80M
  for (i = 0; i < frame_length; i++) {
423
6.67M
    ave_spec_tilt += ptr_spec_tilt_buf[99 - i];
424
6.67M
  }
425
123k
  ave_spec_tilt /= frame_length;
426
427
123k
  *ptr_msd_spec_tilt = 0;
428
6.80M
  for (i = 0; i < frame_length; i++) {
429
6.67M
    *ptr_msd_spec_tilt +=
430
6.67M
        (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt);
431
6.67M
  }
432
123k
  *ptr_msd_spec_tilt /= frame_length;
433
434
  /* compute the short-term mean square deviation of the spectral tilt */
435
123k
  if (framecnt_xm < SPECTRAL_TILT_LENGTH_SHORT) {
436
24.5k
    frame_length = framecnt_xm;
437
98.8k
  } else {
438
98.8k
    frame_length = SPECTRAL_TILT_LENGTH_SHORT;
439
98.8k
  }
440
441
123k
  ave_spec_tilt = 0;
442
2.33M
  for (i = 0; i < frame_length; i++) {
443
2.20M
    ave_spec_tilt += ptr_spec_tilt_buf[99 - i];
444
2.20M
  }
445
123k
  ave_spec_tilt /= frame_length;
446
447
123k
  *ptr_msd_spec_tilt_short = 0;
448
2.33M
  for (i = 0; i < frame_length; i++) {
449
2.20M
    *ptr_msd_spec_tilt_short +=
450
2.20M
        (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt);
451
2.20M
  }
452
123k
  *ptr_msd_spec_tilt_short /= frame_length;
453
454
  /* compute the energy of current frame */
455
123k
  if (r0 <= 1) {
456
4.90k
    ptr_spec_params->frame_energy = 0;
457
118k
  } else {
458
118k
    ptr_spec_params->frame_energy = (FLOAT32)(10 * log(r0) / log(10));
459
118k
  }
460
123k
  return;
461
123k
}
462
463
123k
static WORD32 iusace_init_mode_decision(ia_mode_params_struct *pstr_mode_params) {
464
123k
  WORD32 i;
465
123k
  WORD32 framecnt = pstr_mode_params->framecnt;
466
123k
  WORD32 *framecnt_xm = pstr_mode_params->framecnt_xm;
467
123k
  WORD32 *flag_border = pstr_mode_params->flag_border;
468
123k
  FLOAT32 ave_n_tonal_short = pstr_mode_params->ave_n_tonal_short;
469
123k
  FLOAT32 ave_n_tonal = pstr_mode_params->ave_n_tonal;
470
123k
  FLOAT32 *ave_n_tonal_short_buf = pstr_mode_params->ave_n_tonal_short_buf;
471
123k
  FLOAT32 *ave_n_tonal_buf = pstr_mode_params->ave_n_tonal_buf;
472
123k
  FLOAT32 msd_spec_tilt = pstr_mode_params->msd_spec_tilt;
473
123k
  FLOAT32 msd_spec_tilt_short = pstr_mode_params->msd_spec_tilt_short;
474
123k
  FLOAT32 *msd_spec_tilt_buf = pstr_mode_params->msd_spec_tilt_buf;
475
123k
  FLOAT32 *msd_spec_tilt_short_buf = pstr_mode_params->msd_spec_tilt_short_buf;
476
123k
  FLOAT32 n_tonal_low_frequency_ratio = pstr_mode_params->n_tonal_low_frequency_ratio;
477
123k
  FLOAT32 frame_energy = pstr_mode_params->frame_energy;
478
123k
  WORD32 init_mode_decision_result = TBD;
479
123k
  WORD32 count_msd_st_monchhichi = 0;
480
123k
  WORD32 count_msd_st_speech_music = 0, count_msd_st_music_speech = 0;
481
123k
  WORD32 flag_ave_music_speech = 0;
482
123k
  WORD32 count_msd_st_music = 0;
483
123k
  WORD32 border_state = 0;
484
123k
  WORD32 count_quiet_mode = 0;
485
486
123k
  *flag_border = NO_BORDER;
487
488
  /* border decision according to spectral tilt */
489
490
  /* update msd_spec_tilt_buf, msd_spec_tilt_short_buf */
491
616k
  for (i = 0; i < 5 - 1; i++) {
492
493k
    msd_spec_tilt_buf[i] = msd_spec_tilt_buf[i + 1];
493
493k
    msd_spec_tilt_short_buf[i] = msd_spec_tilt_short_buf[i + 1];
494
493k
  }
495
123k
  msd_spec_tilt_buf[4] = msd_spec_tilt;
496
123k
  msd_spec_tilt_short_buf[4] = msd_spec_tilt_short;
497
498
  /* speech->music find strict border of speech->music */
499
123k
  if ((msd_spec_tilt >= 0.014) && (msd_spec_tilt_short <= 0.000005)) {
500
2.97k
    count_msd_st_monchhichi++;
501
120k
  } else {
502
120k
    count_msd_st_monchhichi = 0;
503
120k
  }
504
123k
  if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
505
123k
       (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
506
123k
      (border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_monchhichi >= 15) &&
507
0
      (*framecnt_xm >= 300)) {
508
0
    *framecnt_xm = 10;
509
0
    *flag_border = BORDER_SPEECH_MUSIC;
510
0
  }
511
512
  /* find the relative loose border of speech->music */
513
123k
  if ((msd_spec_tilt >= 0.0025) && (msd_spec_tilt_short <= 0.000003)) {
514
4.46k
    count_msd_st_speech_music++;
515
118k
  } else {
516
118k
    count_msd_st_speech_music = 0;
517
118k
  }
518
123k
  if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
519
123k
       (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
520
123k
      (border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_speech_music >= 15) &&
521
0
      (*framecnt_xm >= 300)) {
522
0
    *framecnt_xm = 10;
523
0
    *flag_border = BORDER_SPEECH_MUSIC;
524
0
  }
525
526
  /* music->speech */
527
123k
  if ((msd_spec_tilt_buf[0] <= 0.0003) && (msd_spec_tilt_short_buf[0] <= 0.0002)) {
528
69.3k
    count_msd_st_music_speech++;
529
69.3k
  }
530
123k
  if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
531
123k
       (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
532
123k
      (border_state != BORDER_MUSIC_SPEECH_DEFINITE) && (count_msd_st_music_speech >= 100) &&
533
0
      (msd_spec_tilt >= 0.0008) && (msd_spec_tilt_short >= 0.0025) && (*framecnt_xm >= 20)) {
534
0
    *framecnt_xm = 10;
535
0
    *flag_border = BORDER_MUSIC_SPEECH;
536
0
  }
537
538
  /* border decision according to tonal
539
   *  update ave_n_tonal_short_buf, ave_n_tonal_buf */
540
616k
  for (i = 0; i < 5 - 1; i++) {
541
493k
    ave_n_tonal_short_buf[i] = ave_n_tonal_short_buf[i + 1];
542
493k
    ave_n_tonal_buf[i] = ave_n_tonal_buf[i + 1];
543
493k
  }
544
123k
  ave_n_tonal_short_buf[4] = ave_n_tonal_short;
545
123k
  ave_n_tonal_buf[4] = ave_n_tonal;
546
547
  /* music->speech */
548
123k
  if ((ave_n_tonal_buf[0] >= 12) && (ave_n_tonal_buf[0] < 15) &&
549
6.35k
      (ave_n_tonal_buf[0] - ave_n_tonal_short_buf[0] >= 5) && (*framecnt_xm >= 20) &&
550
140
      (ave_n_tonal_short - ave_n_tonal_short_buf[0] < 5)) {
551
130
    *framecnt_xm = 10;
552
130
    flag_ave_music_speech = 1;
553
130
    *flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
554
130
  }
555
556
  /* update border decision according to energy */
557
123k
  if (frame_energy <= 60) {
558
5.14k
    count_quiet_mode = 0;
559
118k
  } else {
560
118k
    count_quiet_mode++;
561
118k
  }
562
563
123k
  if ((*flag_border == BORDER_MUSIC_SPEECH) && (count_quiet_mode <= 5)) {
564
0
    *flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
565
0
    *framecnt_xm = 10;
566
0
  }
567
568
  /* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision according to short-term characters */
569
570
  /* ave_n_tonal_short */
571
123k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 19)) {
572
1.73k
    init_mode_decision_result = MUSIC_DEFINITE;
573
1.73k
  }
574
123k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal_short <= 1.5)) {
575
61.6k
    init_mode_decision_result = SPEECH_DEFINITE;
576
61.6k
  }
577
578
  /* msd_spec_tilt_short */
579
123k
  if (msd_spec_tilt_short >= 0.02) {
580
32.4k
    init_mode_decision_result = SPEECH_DEFINITE;
581
32.4k
  }
582
123k
  if ((init_mode_decision_result == TBD) && (msd_spec_tilt_short <= 0.00000025) &&
583
15.4k
      (framecnt >= 10)) {
584
13.2k
    init_mode_decision_result = MUSIC_DEFINITE;
585
13.2k
  }
586
587
  /* SPEECH mode decision */
588
589
  /* flag_ave_music_speech??ave_n_tonal_short */
590
123k
  if ((init_mode_decision_result == TBD) && (flag_ave_music_speech == 1)) {
591
73
    if ((ave_n_tonal_short <= 12) && (*framecnt_xm <= 150)) {
592
55
      init_mode_decision_result = SPEECH;
593
55
    }
594
73
  }
595
596
  /* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision */
597
598
  /* ave_n_tonal */
599
123k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal <= 3)) {
600
15.2k
    init_mode_decision_result = SPEECH_DEFINITE;
601
15.2k
  }
602
123k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal >= 15)) {
603
1.07k
    init_mode_decision_result = MUSIC_DEFINITE;
604
1.07k
  }
605
606
  /** ave_n_tonal_short
607
   */
608
123k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 17)) {
609
1.18k
    init_mode_decision_result = MUSIC_DEFINITE;
610
1.18k
  }
611
612
  /** msd_spec_tilt
613
   */
614
123k
  if ((init_mode_decision_result == TBD) && (msd_spec_tilt >= 0.01)) {
615
4.53k
    init_mode_decision_result = SPEECH_DEFINITE;
616
4.53k
  }
617
123k
  if ((init_mode_decision_result == TBD) && (framecnt >= 10) && (msd_spec_tilt <= 0.00004)) {
618
2.88k
    init_mode_decision_result = MUSIC_DEFINITE;
619
2.88k
  }
620
621
  /** n_tonal_low_frequency_ratio
622
   */
623
123k
  if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.91)) {
624
3.23k
    init_mode_decision_result = MUSIC_DEFINITE;
625
3.23k
  }
626
627
  /** MUSIC and SPEECH mode decision
628
   */
629
630
  /** msd_spec_tilt
631
   */
632
123k
  if ((init_mode_decision_result == TBD) && (msd_spec_tilt <= 0.0002) && (*framecnt_xm >= 15)) {
633
338
    init_mode_decision_result = MUSIC;
634
338
  }
635
636
  /** n_tonal_low_frequency_ratio
637
   */
638
123k
  if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio >= 0.95)) {
639
1.17k
    init_mode_decision_result = SPEECH;
640
1.17k
  }
641
123k
  if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.935)) {
642
763
    init_mode_decision_result = MUSIC;
643
763
  }
644
645
  /** the rest of the frame to SPEECH
646
   */
647
123k
  if (init_mode_decision_result == TBD) {
648
481
    init_mode_decision_result = SPEECH;
649
481
  }
650
651
  /** MUSIC mode decision according to changes of the MSD of the spectral tilt
652
   */
653
654
  /** compute the changes of the MSD of the spectral tilt
655
   */
656
123k
  if ((msd_spec_tilt <= 0.007) && (init_mode_decision_result != SPEECH_DEFINITE)) {
657
22.5k
    if (init_mode_decision_result != SPEECH) {
658
20.8k
      count_msd_st_music++;
659
20.8k
    }
660
100k
  } else {
661
100k
    count_msd_st_music = 0;
662
100k
  }
663
664
123k
  if ((init_mode_decision_result != SPEECH_DEFINITE) && (count_msd_st_music >= 400) &&
665
0
      (border_state != BORDER_MUSIC_SPEECH_DEFINITE)) {
666
0
    init_mode_decision_result = MUSIC;
667
0
  }
668
669
  /** update border flag
670
   */
671
672
123k
  if (*flag_border != NO_BORDER) {
673
130
    border_state = *flag_border;
674
130
  }
675
676
  /** update BORDER_SPEECH_MUSIC_DEFINITE
677
   */
678
123k
  if (((border_state == BORDER_MUSIC_SPEECH) || (border_state == BORDER_MUSIC_SPEECH_DEFINITE)) &&
679
130
      (init_mode_decision_result == MUSIC_DEFINITE) && (*framecnt_xm >= 20)) {
680
0
    *flag_border = BORDER_SPEECH_MUSIC_DEFINITE;
681
0
    *framecnt_xm = 10;
682
0
    border_state = *flag_border;
683
0
  }
684
685
  /** update BORDER_MUSIC_SPEECH_DEFINITE
686
   */
687
123k
  if (((border_state == BORDER_SPEECH_MUSIC) || (border_state == BORDER_SPEECH_MUSIC_DEFINITE)) &&
688
0
      (init_mode_decision_result == SPEECH_DEFINITE) && (*framecnt_xm >= 20)) {
689
0
    *flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
690
0
    *framecnt_xm = 10;
691
0
  }
692
693
123k
  return init_mode_decision_result;
694
123k
}
695
696
123k
static WORD32 iusace_smoothing_mode_decision(ia_smooth_params_struct *pstr_smooth_param) {
697
123k
  WORD32 *ptr_init_result_ahead = pstr_smooth_param->init_result_ahead;
698
123k
  WORD32 flag_border = pstr_smooth_param->flag_border;
699
123k
  WORD32 *ptr_flag_border_buf_behind = pstr_smooth_param->flag_border_buf_behind;
700
123k
  WORD32 *ptr_flag_border_buf_ahead = pstr_smooth_param->flag_border_buf_ahead;
701
123k
  FLOAT32 frame_energy = pstr_smooth_param->frame_energy;
702
123k
  FLOAT32 *ptr_frame_energy_buf_behind = pstr_smooth_param->frame_energy_buf_behind;
703
123k
  FLOAT32 *ptr_frame_energy_buf_ahead = pstr_smooth_param->frame_energy_buf_ahead;
704
123k
  WORD32 *ptr_smoothing_result_buf = pstr_smooth_param->smoothing_result_buf;
705
123k
  WORD32 *ptr_init_result_behind = pstr_smooth_param->init_result_behind;
706
123k
  WORD32 init_mode_decision_result = pstr_smooth_param->init_mode_decision_result;
707
123k
  WORD32 i;
708
709
123k
  WORD32 mode_decision_result;
710
711
123k
  WORD32 num_music, num_speech;
712
713
  /** update data array
714
   */
715
716
  /** update init_result_behind, init_result_ahead
717
   */
718
12.3M
  for (i = 0; i < 99; i++) {
719
12.2M
    ptr_init_result_behind[i] = ptr_init_result_behind[i + 1];
720
12.2M
  }
721
123k
  ptr_init_result_behind[99] = ptr_init_result_ahead[0];
722
723
123k
  ptr_init_result_ahead[NFRAMEAHEAD - 1] = init_mode_decision_result;
724
725
  /** update flag_border_buf_behind, flag_border_buf_ahead
726
   * update frame_energy_buf_behind, frame_energy_buf_ahead
727
   */
728
729
1.23M
  for (i = 0; i < 9; i++) {
730
1.11M
    ptr_flag_border_buf_behind[i] = ptr_flag_border_buf_behind[i + 1];
731
1.11M
    ptr_frame_energy_buf_behind[i] = ptr_frame_energy_buf_behind[i + 1];
732
1.11M
  }
733
123k
  ptr_flag_border_buf_behind[9] = ptr_flag_border_buf_ahead[0];
734
123k
  ptr_frame_energy_buf_behind[9] = ptr_frame_energy_buf_ahead[0];
735
736
123k
  ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] = flag_border;
737
738
123k
  ptr_frame_energy_buf_ahead[NFRAMEAHEAD - 1] = frame_energy;
739
740
  /** smoothing according to past results
741
   */
742
743
123k
  mode_decision_result = ptr_init_result_behind[99];
744
745
  /** update smoothing_result_buf
746
   */
747
123k
  if (ptr_flag_border_buf_behind[9] == NO_BORDER) {
748
12.3M
    for (i = 0; i < 99; i++) {
749
12.2M
      ptr_smoothing_result_buf[i] = ptr_smoothing_result_buf[i + 1];
750
12.2M
    }
751
123k
    pstr_smooth_param->num_smoothing++;
752
123k
  } else {
753
13.0k
    for (i = 0; i < 99; i++) {
754
12.8k
      ptr_smoothing_result_buf[i] = TBD;
755
12.8k
    }
756
130
    pstr_smooth_param->num_smoothing = 1;
757
130
  }
758
123k
  ptr_smoothing_result_buf[99] = ptr_init_result_behind[99];
759
760
123k
  if (pstr_smooth_param->num_smoothing >= SMOOTHING_LENGTH) {
761
0
    num_music = 0;
762
0
    num_speech = 0;
763
764
    /** smoothed result count
765
     */
766
0
    for (i = 0; i < SMOOTHING_LENGTH; i++) {
767
0
      if ((ptr_smoothing_result_buf[100 - i] == SPEECH) ||
768
0
          (ptr_smoothing_result_buf[100 - i] == SPEECH_DEFINITE)) {
769
0
        num_speech++;
770
0
      } else {
771
0
        num_music++;
772
0
      }
773
0
    }
774
775
    /** smoothing
776
     */
777
0
    if ((num_speech > num_music) && (init_mode_decision_result != MUSIC_DEFINITE)) {
778
0
      mode_decision_result = SPEECH;
779
0
    }
780
0
    if ((num_music > num_speech) && (init_mode_decision_result != SPEECH_DEFINITE)) {
781
0
      mode_decision_result = MUSIC;
782
0
    }
783
0
  }
784
785
  /** correct according to energies and ahead mode decision results
786
   */
787
788
123k
  if ((mode_decision_result == MUSIC) && (ptr_frame_energy_buf_behind[9] <= 60)) {
789
24
    for (i = 0; i < NFRAMEAHEAD; i++) {
790
12
      if ((ptr_init_result_ahead[i] == SPEECH_DEFINITE) || (ptr_init_result_ahead[i] == SPEECH)) {
791
4
        pstr_smooth_param->flag_speech_definite = 1;
792
4
      }
793
12
    }
794
12
  }
795
123k
  if ((pstr_smooth_param->flag_speech_definite == 1) && (mode_decision_result == MUSIC)) {
796
4
    mode_decision_result = SPEECH;
797
123k
  } else {
798
123k
    pstr_smooth_param->flag_speech_definite = 0;
799
123k
  }
800
801
  /** correct MUSIC mode
802
   */
803
804
123k
  if (ptr_frame_energy_buf_behind[9] <= 65) {
805
6.71k
    pstr_smooth_param->count_small_energy = 0;
806
116k
  } else {
807
116k
    pstr_smooth_param->count_small_energy++;
808
116k
  }
809
123k
  if (((ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC) ||
810
123k
       (ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC_DEFINITE)) &&
811
0
      (pstr_smooth_param->count_small_energy <= 30)) {
812
0
    pstr_smooth_param->flag_music_definite = 1;
813
0
  }
814
123k
  if ((pstr_smooth_param->flag_music_definite == 1) &&
815
0
      ((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE))) {
816
0
    mode_decision_result = MUSIC;
817
123k
  } else {
818
123k
    pstr_smooth_param->flag_music_definite = 0;
819
123k
  }
820
821
123k
  return mode_decision_result;
822
123k
}
823
824
static WORD32 iusace_classification_ccfl(ia_classification_struct *pstr_sig_class,
825
                                         FLOAT32 *ptr_time_signal,
826
123k
                                         iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
827
123k
  WORD32 i;
828
123k
  ia_tonal_params_struct pstr_ton_params;
829
123k
  ia_smooth_params_struct smooth_param;
830
123k
  ia_mode_params_struct pstr_mode_params;
831
123k
  ia_spec_tilt_params_struct ptr_spec_params;
832
833
123k
  ia_classification_buf_struct *pstr_buffers = &(pstr_sig_class->buffers);
834
123k
  pFLOAT32 spec_tilt_buf = pstr_sig_class->spec_tilt_buf;
835
123k
  pWORD32 n_tonal = pstr_sig_class->n_tonal;
836
123k
  pWORD32 n_tonal_low_frequency = pstr_sig_class->n_tonal_low_frequency;
837
123k
  pWORD32 framecnt_xm = &(pstr_sig_class->framecnt_xm);
838
123k
  pWORD32 framecnt = &(pstr_sig_class->framecnt);
839
123k
  pFLOAT32 ave_n_tonal_short_buf = pstr_sig_class->ave_n_tonal_short_buf;
840
123k
  pFLOAT32 ave_n_tonal_buf = pstr_sig_class->ave_n_tonal_buf;
841
123k
  pFLOAT32 msd_spec_tilt_buf = pstr_sig_class->msd_spec_tilt_buf;
842
123k
  pFLOAT32 msd_spec_tilt_short_buf = pstr_sig_class->msd_spec_tilt_short_buf;
843
844
123k
  FLOAT32 n_tonal_low_frequency_ratio;    /* the ratio of distribution of the numbers */
845
                                          /* of tonal in the low frequency domain     */
846
123k
  FLOAT32 ave_n_tonal, ave_n_tonal_short; /**< the number of tonal */
847
123k
  FLOAT32 msd_spec_tilt;                  /* the long-term MSD of spectral tilt */
848
123k
  FLOAT32 msd_spec_tilt_short;            /* the short-term MSD of spectral tilt */
849
850
123k
  WORD32 init_mode_decision_result; /* the initial mode decision */
851
123k
  WORD32 flag_border = NO_BORDER;   /* flag of current border */
852
853
123k
  WORD32 mode_decision_result; /* final mode decision result */
854
855
123k
  if (pstr_sig_class->init_flag == 0) {
856
    /* initialize */
857
1.53k
    pstr_sig_class->init_flag = 1;
858
859
9.22k
    for (i = 0; i < 5; i++) {
860
7.68k
      n_tonal[i] = 0;
861
7.68k
      n_tonal_low_frequency[i] = 0;
862
7.68k
      spec_tilt_buf[i] = 0;
863
7.68k
      pstr_buffers->init_result_behind[i] = TBD;
864
7.68k
      pstr_buffers->smoothing_result_buf[i] = TBD;
865
866
7.68k
      ave_n_tonal_short_buf[i] = 0;
867
7.68k
      ave_n_tonal_buf[i] = 0;
868
7.68k
      msd_spec_tilt_buf[i] = 0;
869
7.68k
      msd_spec_tilt_short_buf[i] = 0;
870
871
7.68k
      pstr_buffers->frame_energy_buf_behind[i] = 0;
872
7.68k
      pstr_buffers->flag_border_buf_behind[i] = NO_BORDER;
873
7.68k
    }
874
9.22k
    for (; i < 10; i++) {
875
7.68k
      n_tonal[i] = 0;
876
7.68k
      n_tonal_low_frequency[i] = 0;
877
7.68k
      spec_tilt_buf[i] = 0;
878
7.68k
      pstr_buffers->init_result_behind[i] = TBD;
879
7.68k
      pstr_buffers->smoothing_result_buf[i] = TBD;
880
881
7.68k
      pstr_buffers->frame_energy_buf_behind[i] = 0;
882
7.68k
      pstr_buffers->flag_border_buf_behind[i] = NO_BORDER;
883
7.68k
    }
884
885
139k
    for (; i < 100; i++) {
886
138k
      n_tonal[i] = 0;
887
138k
      n_tonal_low_frequency[i] = 0;
888
138k
      spec_tilt_buf[i] = 0;
889
138k
      pstr_buffers->init_result_behind[i] = TBD;
890
138k
      pstr_buffers->smoothing_result_buf[i] = TBD;
891
138k
    }
892
3.07k
    for (i = 0; i < NFRAMEAHEAD; i++) {
893
1.53k
      pstr_buffers->frame_energy_buf_ahead[i] = 0;
894
1.53k
      pstr_buffers->flag_border_buf_ahead[i] = NO_BORDER;
895
1.53k
      pstr_buffers->init_result_ahead[i] = TBD;
896
1.53k
    }
897
1.53k
  }
898
899
123k
  *framecnt += 1;
900
123k
  *framecnt_xm += 1;
901
902
123k
  pstr_ton_params.time_signal = (FLOAT32 *)ptr_time_signal;
903
123k
  pstr_ton_params.framecnt_xm = *framecnt_xm;
904
123k
  pstr_ton_params.n_tonal = n_tonal;
905
123k
  pstr_ton_params.n_tonal_low_frequency = n_tonal_low_frequency;
906
123k
  pstr_ton_params.n_tonal_low_frequency_ratio = &n_tonal_low_frequency_ratio;
907
123k
  pstr_ton_params.ave_n_tonal = &ave_n_tonal;
908
123k
  pstr_ton_params.ave_n_tonal_short = &ave_n_tonal_short;
909
  /** analysis tonal
910
   */
911
123k
  iusace_tonal_analysis(&pstr_ton_params, pstr_scratch, ccfl);
912
913
123k
  ptr_spec_params.time_signal = ptr_time_signal;
914
123k
  ptr_spec_params.framecnt_xm = *framecnt_xm;
915
123k
  ptr_spec_params.spec_tilt_buf = spec_tilt_buf;
916
123k
  ptr_spec_params.msd_spec_tilt = &msd_spec_tilt;
917
123k
  ptr_spec_params.msd_spec_tilt_short = &msd_spec_tilt_short;
918
  /** analysis spectral tilt
919
   */
920
123k
  iusace_spectral_tilt_analysis(&ptr_spec_params, ccfl);
921
922
123k
  pstr_mode_params.framecnt = *framecnt;
923
123k
  pstr_mode_params.framecnt_xm = framecnt_xm;
924
123k
  pstr_mode_params.flag_border = &flag_border;
925
123k
  pstr_mode_params.ave_n_tonal_short = ave_n_tonal_short;
926
123k
  pstr_mode_params.ave_n_tonal = ave_n_tonal;
927
123k
  pstr_mode_params.ave_n_tonal_short_buf = ave_n_tonal_short_buf;
928
123k
  pstr_mode_params.ave_n_tonal_buf = ave_n_tonal_buf;
929
123k
  pstr_mode_params.msd_spec_tilt = msd_spec_tilt;
930
123k
  pstr_mode_params.msd_spec_tilt_short = msd_spec_tilt_short;
931
123k
  pstr_mode_params.msd_spec_tilt_buf = msd_spec_tilt_buf;
932
123k
  pstr_mode_params.msd_spec_tilt_short_buf = msd_spec_tilt_short_buf;
933
123k
  pstr_mode_params.n_tonal_low_frequency_ratio = n_tonal_low_frequency_ratio;
934
123k
  pstr_mode_params.frame_energy = ptr_spec_params.frame_energy;
935
  /** initial mode decision and boundary decisions
936
   */
937
123k
  init_mode_decision_result = iusace_init_mode_decision(&pstr_mode_params);
938
939
123k
  smooth_param.flag_border_buf_behind = pstr_buffers->flag_border_buf_behind;
940
123k
  smooth_param.flag_border_buf_ahead = pstr_buffers->flag_border_buf_ahead;
941
123k
  smooth_param.frame_energy = ptr_spec_params.frame_energy;
942
123k
  smooth_param.frame_energy_buf_behind = pstr_buffers->frame_energy_buf_behind;
943
123k
  smooth_param.frame_energy_buf_ahead = pstr_buffers->frame_energy_buf_ahead;
944
123k
  smooth_param.smoothing_result_buf = pstr_buffers->smoothing_result_buf;
945
123k
  smooth_param.init_result_ahead = pstr_buffers->init_result_ahead;
946
123k
  smooth_param.flag_border = flag_border;
947
123k
  smooth_param.init_result_behind = pstr_buffers->init_result_behind;
948
123k
  smooth_param.init_mode_decision_result = init_mode_decision_result;
949
123k
  smooth_param.flag_speech_definite = 0;
950
123k
  smooth_param.count_small_energy = 0;
951
123k
  smooth_param.flag_music_definite = 0;
952
123k
  smooth_param.num_smoothing = 0;
953
  /* smoothing */
954
123k
  mode_decision_result = iusace_smoothing_mode_decision(&smooth_param);
955
956
123k
  return mode_decision_result;
957
123k
}
958
959
VOID iusace_classification(ia_classification_struct *pstr_sig_class,
960
123k
                           iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
961
123k
  WORD32 n_frames, n_class, avg_cls, nf;
962
123k
  WORD32 i;
963
123k
  FLOAT32 *ptr_time_signal = pstr_scratch->p_time_signal;
964
123k
  WORD32 mode_decision_result;
965
966
123k
  n_frames = pstr_sig_class->n_buffer_samples / ccfl;
967
968
246k
  for (nf = 0; nf < n_frames; nf++) {
969
116M
    for (i = 0; i < ccfl; i++) {
970
116M
      ptr_time_signal[i] = pstr_sig_class->input_samples[ccfl * nf + i];
971
116M
    }
972
973
    /* classification of ccfl-frame */
974
123k
    mode_decision_result =
975
123k
        iusace_classification_ccfl(pstr_sig_class, ptr_time_signal, pstr_scratch, ccfl);
976
977
    /* coding mode decision of 1024-frame */
978
123k
    if ((mode_decision_result == MUSIC) || (mode_decision_result == MUSIC_DEFINITE)) {
979
24.1k
      pstr_sig_class->coding_mode = FD_MODE;
980
99.2k
    } else if ((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE)) {
981
97.7k
      pstr_sig_class->coding_mode = TD_MODE;
982
97.7k
    }
983
984
123k
    pstr_sig_class->class_buf[pstr_sig_class->n_buf_class + nf] = pstr_sig_class->coding_mode;
985
123k
    pstr_sig_class->pre_mode = pstr_sig_class->coding_mode;
986
123k
  }
987
988
  /* merge ccfl-frame results */
989
123k
  pstr_sig_class->n_buf_class += n_frames;
990
123k
  n_class = (pstr_sig_class->n_class_frames > pstr_sig_class->n_buf_class)
991
123k
                ? pstr_sig_class->n_buf_class
992
123k
                : pstr_sig_class->n_class_frames;
993
123k
  {
994
123k
    WORD32 min_cls, max_cls;
995
996
123k
    min_cls = max_cls = pstr_sig_class->class_buf[0];
997
123k
    for (i = 1; i < n_class; i++) {
998
0
      if (pstr_sig_class->class_buf[i] > max_cls) {
999
0
        max_cls = pstr_sig_class->class_buf[i];
1000
0
      } else if (pstr_sig_class->class_buf[i] < min_cls) {
1001
0
        min_cls = pstr_sig_class->class_buf[i];
1002
0
      }
1003
0
    }
1004
1005
123k
    avg_cls = 0;
1006
246k
    for (i = 0; i < n_class; i++) {
1007
123k
      if (pstr_sig_class->class_buf[i] == max_cls) {
1008
123k
        avg_cls += 1;
1009
123k
      }
1010
123k
      if (pstr_sig_class->class_buf[i] == min_cls) {
1011
123k
        avg_cls += -1;
1012
123k
      }
1013
123k
    }
1014
1015
123k
    if (avg_cls > 0) {
1016
0
      pstr_sig_class->coding_mode = max_cls;
1017
123k
    } else {
1018
123k
      pstr_sig_class->coding_mode = min_cls;
1019
123k
    }
1020
123k
  }
1021
1022
  /* shift, save pre_mode and unused class */
1023
123k
  if (n_class > 0) {
1024
123k
    pstr_sig_class->pre_mode = pstr_sig_class->class_buf[n_class - 1];
1025
123k
  }
1026
123k
  pstr_sig_class->n_buf_class -= n_class;
1027
123k
  pstr_sig_class->n_buffer_samples -= ccfl * n_frames;
1028
1029
123k
  WORD32 minimum = MIN(pstr_sig_class->n_buf_class, pstr_sig_class->n_buffer_samples);
1030
123k
  if (minimum == pstr_sig_class->n_buf_class) {
1031
123k
    for (i = 0; i < minimum; i++) {
1032
0
      pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
1033
0
      pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
1034
0
    }
1035
1036
    /* shift, save unused samples */
1037
123k
    for (; i < pstr_sig_class->n_buffer_samples; i++) {
1038
0
      pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
1039
0
    }
1040
123k
  } else {
1041
0
    for (i = 0; i < minimum; i++) {
1042
0
      pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
1043
0
      pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
1044
0
    }
1045
1046
    /* shift, save unused samples */
1047
0
    for (; i < pstr_sig_class->n_buf_class; i++) {
1048
0
      pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
1049
0
    }
1050
0
  }
1051
123k
}
1052
1053
1.56k
VOID iusace_init_classification(ia_classification_struct *pstr_sig_class) {
1054
1.56k
  pstr_sig_class->pre_mode = FD_MODE;
1055
1056
1.56k
  pstr_sig_class->n_buffer_samples = 0;
1057
1.56k
  memset(pstr_sig_class->input_samples, 0, 3840 * 2 * sizeof(FLOAT32));
1058
1.56k
  pstr_sig_class->n_class_frames = 2;
1059
1.56k
  pstr_sig_class->n_buf_class = 0;
1060
1061
1.56k
  pstr_sig_class->is_switch_mode = 1;
1062
1063
1.56k
  pstr_sig_class->framecnt = 0;
1064
1.56k
  pstr_sig_class->init_flag = 0;
1065
1.56k
  pstr_sig_class->framecnt_xm = 0;
1066
1067
1.56k
  memset(&pstr_sig_class->buffers, 0, sizeof(ia_classification_buf_struct));
1068
1.56k
  memset(pstr_sig_class->spec_tilt_buf, 0, sizeof(FLOAT32) * 100);
1069
1.56k
  memset(pstr_sig_class->n_tonal, 0, sizeof(WORD32) * 100);
1070
1.56k
  memset(pstr_sig_class->n_tonal_low_frequency, 0, sizeof(WORD32) * 100);
1071
1.56k
  memset(pstr_sig_class->msd_spec_tilt_buf, 0, sizeof(FLOAT32) * 5);
1072
1.56k
  memset(pstr_sig_class->msd_spec_tilt_short_buf, 0, sizeof(FLOAT32) * 5);
1073
1.56k
  memset(pstr_sig_class->ave_n_tonal_short_buf, 0, sizeof(FLOAT32) * 5);
1074
1.56k
  memset(pstr_sig_class->ave_n_tonal_buf, 0, sizeof(FLOAT32) * 5);
1075
1.56k
  return;
1076
1.56k
}