Coverage Report

Created: 2025-12-10 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxaac/encoder/ixheaace_signal_classifier.c
Line
Count
Source
1
/******************************************************************************
2
 *                                                                            *
3
 * Copyright (C) 2023 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
#include <string.h>
21
#include <math.h>
22
#include "iusace_type_def.h"
23
#include "iusace_cnst.h"
24
25
#include "iusace_fd_quant.h"
26
#include "iusace_bitbuffer.h"
27
#include "impd_drc_common_enc.h"
28
#include "impd_drc_uni_drc.h"
29
#include "impd_drc_api.h"
30
#include "impd_drc_uni_drc_eq.h"
31
#include "impd_drc_uni_drc_filter_bank.h"
32
#include "impd_drc_gain_enc.h"
33
#include "impd_drc_struct_def.h"
34
35
#include "ixheaace_memory_standards.h"
36
#include "iusace_tns_usac.h"
37
#include "iusace_psy_mod.h"
38
#include "iusace_config.h"
39
#include "iusace_signal_classifier.h"
40
#include "iusace_fft.h"
41
#include "iusace_block_switch_const.h"
42
#include "iusace_block_switch_struct_def.h"
43
#include "iusace_cnst.h"
44
#include "iusace_ms.h"
45
#include "ixheaace_adjust_threshold_data.h"
46
#include "iusace_fd_qc_util.h"
47
#include "ixheaace_sbr_header.h"
48
#include "ixheaace_config.h"
49
#include "ixheaace_asc_write.h"
50
#include "iusace_main.h"
51
52
104k
static VOID iusace_calc_pds(FLOAT32 *ptr_input, WORD32 ccfl) {
53
104k
  WORD32 i;
54
104k
  FLOAT64 max_pow, delta;
55
104k
  FLOAT64 log_ccfl_base_10 = (ccfl == 1024) ? LOG_1024_BASE_10 : LOG_768_BASE_10;
56
57
104k
  max_pow = MAX(
58
104k
      10 * (log10(ptr_input[0] * ptr_input[0] + ptr_input[1] * ptr_input[1]) - log_ccfl_base_10) +
59
104k
          10e-15,
60
104k
      MIN_POW);
61
62
48.5M
  for (i = 1; i<ccfl>> 1; i++) {
63
    /* removed the sqrt along with clubbing the for loops */
64
48.4M
    ptr_input[2 * i] = (FLOAT32)MAX(10 * (log10(ptr_input[2 * i] * ptr_input[2 * i] +
65
48.4M
                                                ptr_input[2 * i + 1] * ptr_input[2 * i + 1]) -
66
48.4M
                                          log_ccfl_base_10) +
67
48.4M
                                        10e-15,
68
48.4M
                                    MIN_POW);
69
70
48.4M
    max_pow = MAX(max_pow, ptr_input[2 * i]);
71
48.4M
  }
72
73
  /* Normalized to reference sound pressure level 96 dB */
74
104k
  delta = 96 - max_pow;
75
76
48.7M
  for (i = 0; i<ccfl>> 1; i++) {
77
48.5M
    ptr_input[2 * i] = ptr_input[2 * i] + (FLOAT32)delta;
78
48.5M
  }
79
104k
  return;
80
104k
}
81
82
static VOID iusace_find_tonal(FLOAT32 *ptr_input, WORD32 *ptr_tonal_flag, FLOAT32 *ptr_scratch,
83
104k
                              WORD32 ccfl) {
84
104k
  WORD32 i, j;
85
104k
  WORD32 is_tonal;
86
104k
  FLOAT64 tonal_spl;
87
104k
  FLOAT64 absolute_threshold_xm;
88
89
48.7M
  for (i = 0; i<ccfl>> 1; i++) {
90
48.5M
    ptr_scratch[i] = ptr_input[2 * i];
91
48.5M
  }
92
93
104k
  if (ccfl == FRAME_LEN_LONG) {
94
34.1M
    for (i = 0; i <= 511; i++) {
95
34.0M
      ptr_tonal_flag[i] = 0;
96
34.0M
    }
97
98
33.1M
    for (i = 2; i < 500; i++) {
99
33.1M
      if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) {
100
8.70M
        is_tonal = 1;
101
102
        /* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */
103
104
8.70M
        if (1 < i && i < 62) {
105
1.32M
          for (j = -2; j <= -2; j++) {
106
1.02M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
107
1.02M
            if (is_tonal == 0) break;
108
1.02M
          }
109
1.02M
          if (is_tonal == 1) {
110
415k
            for (j = 2; j <= 2; j++) {
111
295k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
112
295k
              if (is_tonal == 0) break;
113
295k
            }
114
295k
          }
115
116
1.02M
          if (is_tonal == 1) {
117
119k
            ptr_tonal_flag[i] = 1;
118
119k
          }
119
1.02M
        }
120
121
7.68M
        else if (62 <= i && i < 126) {
122
1.63M
          for (j = -3; j <= -2; j++) {
123
1.47M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
124
1.47M
            if (is_tonal == 0) break;
125
1.47M
          }
126
1.14M
          if (is_tonal == 1) {
127
285k
            for (j = 2; j <= 3; j++) {
128
233k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
129
233k
              if (is_tonal == 0) break;
130
233k
            }
131
156k
          }
132
133
1.14M
          if (is_tonal == 1) {
134
51.4k
            ptr_tonal_flag[i] = 1;
135
51.4k
          }
136
1.14M
        }
137
138
6.53M
        else if (126 <= i && i < 254) {
139
3.58M
          for (j = -6; j <= -2; j++) {
140
3.50M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
141
3.50M
            if (is_tonal == 0) break;
142
3.50M
          }
143
2.26M
          if (is_tonal == 1) {
144
218k
            for (j = 2; j <= 6; j++) {
145
201k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
146
201k
              if (is_tonal == 0) break;
147
201k
            }
148
74.5k
          }
149
150
2.26M
          if (is_tonal == 1) {
151
17.0k
            ptr_tonal_flag[i] = 1;
152
17.0k
          }
153
2.26M
        }
154
155
4.26M
        else if (254 <= i && i < 500) {
156
8.32M
          for (j = -12; j <= -2; j++) {
157
8.27M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
158
8.27M
            if (is_tonal == 0) break;
159
8.27M
          }
160
4.26M
          if (is_tonal == 1) {
161
258k
            for (j = 2; j <= 12; j++) {
162
245k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
163
245k
              if (is_tonal == 0) break;
164
245k
            }
165
52.1k
          }
166
167
4.26M
          if (is_tonal == 1) {
168
12.6k
            ptr_tonal_flag[i] = 1;
169
12.6k
          }
170
4.26M
        }
171
8.70M
      }
172
33.1M
    }
173
174
34.1M
    for (i = 0; i <= 511; i++) {
175
34.0M
      if (ptr_tonal_flag[i] == 1) {
176
        /* compute the SPL of tonal */
177
201k
        tonal_spl =
178
201k
            10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) +
179
201k
                       pow(10, (ptr_scratch[i + 1] / 10)));
180
181
201k
        if (i >= 324) {
182
8.52k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i] + 20;
183
192k
        } else {
184
192k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i];
185
192k
        }
186
201k
        if (tonal_spl < absolute_threshold_xm) {
187
95.2k
          ptr_tonal_flag[i] = 0;
188
95.2k
        }
189
201k
      }
190
34.0M
    }
191
66.4k
  } else  // (ccfl == 768)
192
37.8k
  {
193
14.5M
    for (i = 0; i <= 383; i++) {
194
14.5M
      ptr_tonal_flag[i] = 0;
195
14.5M
    }
196
197
14.1M
    for (i = 2; i < 375; i++) {
198
14.1M
      if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) {
199
2.46M
        is_tonal = 1;
200
201
        /* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */
202
203
2.46M
        if (1 < i && i < 47) {
204
157k
          for (j = -2; j <= -2; j++) {
205
124k
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
206
124k
            if (is_tonal == 0) break;
207
124k
          }
208
124k
          if (is_tonal == 1) {
209
53.0k
            for (j = 2; j <= 2; j++) {
210
32.5k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
211
32.5k
              if (is_tonal == 0) break;
212
32.5k
            }
213
32.5k
          }
214
215
124k
          if (is_tonal == 1) {
216
20.5k
            ptr_tonal_flag[i] = 1;
217
20.5k
          }
218
124k
        }
219
220
2.33M
        else if (47 <= i && i < 95) {
221
483k
          for (j = -3; j <= -2; j++) {
222
427k
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
223
427k
            if (is_tonal == 0) break;
224
427k
          }
225
362k
          if (is_tonal == 1) {
226
147k
            for (j = 2; j <= 3; j++) {
227
102k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
228
102k
              if (is_tonal == 0) break;
229
102k
            }
230
56.3k
          }
231
232
362k
          if (is_tonal == 1) {
233
44.3k
            ptr_tonal_flag[i] = 1;
234
44.3k
          }
235
362k
        }
236
237
1.97M
        else if (95 <= i && i < 194) {
238
1.29M
          for (j = -5; j <= -2; j++) {
239
1.17M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
240
1.17M
            if (is_tonal == 0) break;
241
1.17M
          }
242
732k
          if (is_tonal == 1) {
243
540k
            for (j = 2; j <= 5; j++) {
244
438k
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
245
438k
              if (is_tonal == 0) break;
246
438k
            }
247
120k
          }
248
249
732k
          if (is_tonal == 1) {
250
102k
            ptr_tonal_flag[i] = 1;
251
102k
          }
252
732k
        }
253
254
1.24M
        else if (191 <= i && i < 375) {
255
2.81M
          for (j = -9; j <= -2; j++) {
256
2.67M
            is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
257
2.67M
            if (is_tonal == 0) break;
258
2.67M
          }
259
1.24M
          if (is_tonal == 1) {
260
1.13M
            for (j = 2; j <= 9; j++) {
261
1.04M
              is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
262
1.04M
              if (is_tonal == 0) break;
263
1.04M
            }
264
147k
          }
265
266
1.24M
          if (is_tonal == 1) {
267
92.2k
            ptr_tonal_flag[i] = 1;
268
92.2k
          }
269
1.24M
        }
270
2.46M
      }
271
14.1M
    }
272
273
14.5M
    for (i = 0; i <= 383; i++) {
274
14.5M
      if (ptr_tonal_flag[i] == 1) {
275
        /* compute the SPL of tonal */
276
259k
        tonal_spl =
277
259k
            10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) +
278
259k
                       pow(10, (ptr_scratch[i + 1] / 10)));
279
280
259k
        if (i >= 243) {
281
72.5k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i] + 20;
282
186k
        } else {
283
186k
          absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i];
284
186k
        }
285
259k
        if (tonal_spl < absolute_threshold_xm) {
286
42.3k
          ptr_tonal_flag[i] = 0;
287
42.3k
        }
288
259k
      }
289
14.5M
    }
290
37.8k
  }
291
104k
  return;
292
104k
}
293
294
static VOID iusace_tonal_analysis(ia_tonal_params_struct *pstr_ton_params,
295
104k
                                  iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
296
104k
  FLOAT32 *ptr_complex_fft = pstr_scratch->p_complex_fft;
297
104k
  WORD32 *ptr_tonal_flag = pstr_scratch->p_tonal_flag;
298
104k
  FLOAT32 *ptr_time_sig = pstr_ton_params->time_signal;
299
104k
  WORD32 framecnt_xm = pstr_ton_params->framecnt_xm;
300
104k
  WORD32 *ptr_n_tonal = pstr_ton_params->n_tonal;
301
104k
  WORD32 *ptr_n_tonal_low_frequency = pstr_ton_params->n_tonal_low_frequency;
302
104k
  FLOAT32 *ptr_n_tonal_low_frequency_ratio = pstr_ton_params->n_tonal_low_frequency_ratio;
303
104k
  FLOAT32 *ave_n_tonal = pstr_ton_params->ave_n_tonal;
304
104k
  FLOAT32 *ave_n_tonal_short = pstr_ton_params->ave_n_tonal_short;
305
104k
  WORD32 i;
306
104k
  WORD32 fft_size = ccfl;
307
308
104k
  WORD32 frame_length;
309
104k
  WORD32 n_tonal_total, n_tonal_low_frequency_total;
310
311
97.3M
  for (i = 0; i < ccfl; i++) {
312
97.1M
    ptr_complex_fft[2 * i] = (FLOAT32)(
313
97.1M
        ptr_time_sig[i] * ((ccfl == 1024) ? iusace_classify_arrays.hanning_window_1024[i]
314
97.1M
                                          : iusace_classify_arrays.hanning_window_768[i]));
315
97.1M
    ptr_complex_fft[2 * i + 1] = 0;
316
97.1M
  }
317
318
104k
  iusace_complex_fft(ptr_complex_fft, fft_size, pstr_scratch);
319
320
  /* compute power density spectrum */
321
  /* re_fft contains the resulting pds */
322
104k
  iusace_calc_pds(ptr_complex_fft, ccfl);
323
324
  /* detect tonal */
325
104k
  iusace_find_tonal(ptr_complex_fft, ptr_tonal_flag, pstr_scratch->p_pow_spec, ccfl);
326
327
  /* update n_tonal, n_tonal_low_frequency */
328
10.4M
  for (i = 0; i < 99; i++) {
329
10.3M
    ptr_n_tonal[i] = ptr_n_tonal[i + 1];
330
10.3M
    ptr_n_tonal_low_frequency[i] = ptr_n_tonal_low_frequency[i + 1];
331
10.3M
  }
332
104k
  ptr_n_tonal[99] = 0;
333
48.7M
  for (i = 0; i<ccfl>> 1; i++) {
334
48.5M
    ptr_n_tonal[99] += ptr_tonal_flag[i];
335
48.5M
  }
336
104k
  ptr_n_tonal_low_frequency[99] = 0;
337
16.8M
  for (i = 0; i < INDEXOFLOWFREQUENCY; i++) {
338
16.7M
    ptr_n_tonal_low_frequency[99] += ptr_tonal_flag[i];
339
16.7M
  }
340
341
  /* compute long-term AVE and the ratio of distribution in low-frequency domain */
342
104k
  if (framecnt_xm < AVE_TONAL_LENGTH) {
343
61.8k
    frame_length = framecnt_xm;
344
61.8k
  } else {
345
42.4k
    frame_length = AVE_TONAL_LENGTH;
346
42.4k
  }
347
348
104k
  n_tonal_total = 0;
349
104k
  n_tonal_low_frequency_total = 0;
350
6.67M
  for (i = 0; i < frame_length; i++) {
351
6.57M
    n_tonal_total += ptr_n_tonal[99 - i];
352
6.57M
    n_tonal_low_frequency_total += ptr_n_tonal_low_frequency[99 - i];
353
6.57M
  }
354
355
104k
  *ave_n_tonal = (FLOAT32)n_tonal_total / frame_length;
356
357
104k
  if (n_tonal_total == 0) {
358
21.1k
    *ptr_n_tonal_low_frequency_ratio = 1;
359
83.1k
  } else {
360
83.1k
    *ptr_n_tonal_low_frequency_ratio = (FLOAT32)n_tonal_low_frequency_total / n_tonal_total;
361
83.1k
  }
362
363
  /* compute the short-term AVE */
364
104k
  if (framecnt_xm < AVE_TONAL_LENGTH_SHORT) {
365
10.6k
    frame_length = framecnt_xm;
366
93.7k
  } else {
367
93.7k
    frame_length = AVE_TONAL_LENGTH_SHORT;
368
93.7k
  }
369
370
104k
  n_tonal_total = 0;
371
1.09M
  for (i = 0; i < frame_length; i++) {
372
988k
    n_tonal_total += ptr_n_tonal[99 - i];
373
988k
  }
374
375
104k
  *ave_n_tonal_short = (FLOAT32)n_tonal_total / frame_length;
376
104k
  return;
377
104k
}
378
379
static VOID iusace_spectral_tilt_analysis(ia_spec_tilt_params_struct *ptr_spec_params,
380
104k
                                          WORD32 ccfl) {
381
104k
  FLOAT32 *ptr_time_signal = ptr_spec_params->time_signal;
382
104k
  WORD32 framecnt_xm = ptr_spec_params->framecnt_xm;
383
104k
  FLOAT32 *ptr_spec_tilt_buf = ptr_spec_params->spec_tilt_buf;
384
104k
  FLOAT32 *ptr_msd_spec_tilt = ptr_spec_params->msd_spec_tilt;
385
104k
  FLOAT32 *ptr_msd_spec_tilt_short = ptr_spec_params->msd_spec_tilt_short;
386
104k
  WORD32 i;
387
104k
  WORD32 frame_length;
388
389
104k
  FLOAT32 r0, r1;
390
104k
  FLOAT32 spec_tilt;
391
104k
  FLOAT32 ave_spec_tilt;
392
393
  /* compute spectral tilt */
394
104k
  r0 = 0;
395
104k
  r1 = 0;
396
97.1M
  for (i = 0; i < ccfl - 1; i++) {
397
97.0M
    r0 += ptr_time_signal[i] * ptr_time_signal[i];
398
97.0M
    r1 += ptr_time_signal[i] * ptr_time_signal[i + 1];
399
97.0M
  }
400
104k
  r0 += ptr_time_signal[i] * ptr_time_signal[i];
401
402
104k
  if (r0 == 0) {
403
3.83k
    spec_tilt = 1.0f;
404
100k
  } else {
405
100k
    spec_tilt = r1 / r0;
406
100k
  }
407
408
  /* update spec_tilt_buf */
409
10.4M
  for (i = 0; i < 100 - 1; i++) {
410
10.3M
    ptr_spec_tilt_buf[i] = ptr_spec_tilt_buf[i + 1];
411
10.3M
  }
412
104k
  ptr_spec_tilt_buf[99] = spec_tilt;
413
414
  /* compute the long-term mean square deviation of the spectral tilt */
415
104k
  if (framecnt_xm < SPECTRAL_TILT_LENGTH) {
416
55.4k
    frame_length = framecnt_xm;
417
55.4k
  } else {
418
48.9k
    frame_length = SPECTRAL_TILT_LENGTH;
419
48.9k
  }
420
421
104k
  ave_spec_tilt = 0;
422
5.76M
  for (i = 0; i < frame_length; i++) {
423
5.66M
    ave_spec_tilt += ptr_spec_tilt_buf[99 - i];
424
5.66M
  }
425
104k
  ave_spec_tilt /= frame_length;
426
427
104k
  *ptr_msd_spec_tilt = 0;
428
5.76M
  for (i = 0; i < frame_length; i++) {
429
5.66M
    *ptr_msd_spec_tilt +=
430
5.66M
        (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt);
431
5.66M
  }
432
104k
  *ptr_msd_spec_tilt /= frame_length;
433
434
  /* compute the short-term mean square deviation of the spectral tilt */
435
104k
  if (framecnt_xm < SPECTRAL_TILT_LENGTH_SHORT) {
436
20.8k
    frame_length = framecnt_xm;
437
83.5k
  } else {
438
83.5k
    frame_length = SPECTRAL_TILT_LENGTH_SHORT;
439
83.5k
  }
440
441
104k
  ave_spec_tilt = 0;
442
1.97M
  for (i = 0; i < frame_length; i++) {
443
1.86M
    ave_spec_tilt += ptr_spec_tilt_buf[99 - i];
444
1.86M
  }
445
104k
  ave_spec_tilt /= frame_length;
446
447
104k
  *ptr_msd_spec_tilt_short = 0;
448
1.97M
  for (i = 0; i < frame_length; i++) {
449
1.86M
    *ptr_msd_spec_tilt_short +=
450
1.86M
        (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt);
451
1.86M
  }
452
104k
  *ptr_msd_spec_tilt_short /= frame_length;
453
454
  /* compute the energy of current frame */
455
104k
  if (r0 <= 1) {
456
4.94k
    ptr_spec_params->frame_energy = 0;
457
99.4k
  } else {
458
99.4k
    ptr_spec_params->frame_energy = (FLOAT32)(10 * log(r0) / log(10));
459
99.4k
  }
460
104k
  return;
461
104k
}
462
463
104k
static WORD32 iusace_init_mode_decision(ia_mode_params_struct *pstr_mode_params) {
464
104k
  WORD32 i;
465
104k
  WORD32 framecnt = pstr_mode_params->framecnt;
466
104k
  WORD32 *framecnt_xm = pstr_mode_params->framecnt_xm;
467
104k
  WORD32 *flag_border = pstr_mode_params->flag_border;
468
104k
  FLOAT32 ave_n_tonal_short = pstr_mode_params->ave_n_tonal_short;
469
104k
  FLOAT32 ave_n_tonal = pstr_mode_params->ave_n_tonal;
470
104k
  FLOAT32 *ave_n_tonal_short_buf = pstr_mode_params->ave_n_tonal_short_buf;
471
104k
  FLOAT32 *ave_n_tonal_buf = pstr_mode_params->ave_n_tonal_buf;
472
104k
  FLOAT32 msd_spec_tilt = pstr_mode_params->msd_spec_tilt;
473
104k
  FLOAT32 msd_spec_tilt_short = pstr_mode_params->msd_spec_tilt_short;
474
104k
  FLOAT32 *msd_spec_tilt_buf = pstr_mode_params->msd_spec_tilt_buf;
475
104k
  FLOAT32 *msd_spec_tilt_short_buf = pstr_mode_params->msd_spec_tilt_short_buf;
476
104k
  FLOAT32 n_tonal_low_frequency_ratio = pstr_mode_params->n_tonal_low_frequency_ratio;
477
104k
  FLOAT32 frame_energy = pstr_mode_params->frame_energy;
478
104k
  WORD32 init_mode_decision_result = TBD;
479
104k
  WORD32 count_msd_st_monchhichi = 0;
480
104k
  WORD32 count_msd_st_speech_music = 0, count_msd_st_music_speech = 0;
481
104k
  WORD32 flag_ave_music_speech = 0;
482
104k
  WORD32 count_msd_st_music = 0;
483
104k
  WORD32 border_state = 0;
484
104k
  WORD32 count_quiet_mode = 0;
485
486
104k
  *flag_border = NO_BORDER;
487
488
  /* border decision according to spectral tilt */
489
490
  /* update msd_spec_tilt_buf, msd_spec_tilt_short_buf */
491
521k
  for (i = 0; i < 5 - 1; i++) {
492
417k
    msd_spec_tilt_buf[i] = msd_spec_tilt_buf[i + 1];
493
417k
    msd_spec_tilt_short_buf[i] = msd_spec_tilt_short_buf[i + 1];
494
417k
  }
495
104k
  msd_spec_tilt_buf[4] = msd_spec_tilt;
496
104k
  msd_spec_tilt_short_buf[4] = msd_spec_tilt_short;
497
498
  /* speech->music find strict border of speech->music */
499
104k
  if ((msd_spec_tilt >= 0.014) && (msd_spec_tilt_short <= 0.000005)) {
500
1.77k
    count_msd_st_monchhichi++;
501
102k
  } else {
502
102k
    count_msd_st_monchhichi = 0;
503
102k
  }
504
104k
  if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
505
104k
       (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
506
104k
      (border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_monchhichi >= 15) &&
507
0
      (*framecnt_xm >= 300)) {
508
0
    *framecnt_xm = 10;
509
0
    *flag_border = BORDER_SPEECH_MUSIC;
510
0
  }
511
512
  /* find the relative loose border of speech->music */
513
104k
  if ((msd_spec_tilt >= 0.0025) && (msd_spec_tilt_short <= 0.000003)) {
514
3.04k
    count_msd_st_speech_music++;
515
101k
  } else {
516
101k
    count_msd_st_speech_music = 0;
517
101k
  }
518
104k
  if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
519
104k
       (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
520
104k
      (border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_speech_music >= 15) &&
521
0
      (*framecnt_xm >= 300)) {
522
0
    *framecnt_xm = 10;
523
0
    *flag_border = BORDER_SPEECH_MUSIC;
524
0
  }
525
526
  /* music->speech */
527
104k
  if ((msd_spec_tilt_buf[0] <= 0.0003) && (msd_spec_tilt_short_buf[0] <= 0.0002)) {
528
56.3k
    count_msd_st_music_speech++;
529
56.3k
  }
530
104k
  if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
531
104k
       (*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
532
104k
      (border_state != BORDER_MUSIC_SPEECH_DEFINITE) && (count_msd_st_music_speech >= 100) &&
533
0
      (msd_spec_tilt >= 0.0008) && (msd_spec_tilt_short >= 0.0025) && (*framecnt_xm >= 20)) {
534
0
    *framecnt_xm = 10;
535
0
    *flag_border = BORDER_MUSIC_SPEECH;
536
0
  }
537
538
  /* border decision according to tonal
539
   *  update ave_n_tonal_short_buf, ave_n_tonal_buf */
540
521k
  for (i = 0; i < 5 - 1; i++) {
541
417k
    ave_n_tonal_short_buf[i] = ave_n_tonal_short_buf[i + 1];
542
417k
    ave_n_tonal_buf[i] = ave_n_tonal_buf[i + 1];
543
417k
  }
544
104k
  ave_n_tonal_short_buf[4] = ave_n_tonal_short;
545
104k
  ave_n_tonal_buf[4] = ave_n_tonal;
546
547
  /* music->speech */
548
104k
  if ((ave_n_tonal_buf[0] >= 12) && (ave_n_tonal_buf[0] < 15) &&
549
6.60k
      (ave_n_tonal_buf[0] - ave_n_tonal_short_buf[0] >= 5) && (*framecnt_xm >= 20) &&
550
142
      (ave_n_tonal_short - ave_n_tonal_short_buf[0] < 5)) {
551
131
    *framecnt_xm = 10;
552
131
    flag_ave_music_speech = 1;
553
131
    *flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
554
131
  }
555
556
  /* update border decision according to energy */
557
104k
  if (frame_energy <= 60) {
558
5.14k
    count_quiet_mode = 0;
559
99.2k
  } else {
560
99.2k
    count_quiet_mode++;
561
99.2k
  }
562
563
104k
  if ((*flag_border == BORDER_MUSIC_SPEECH) && (count_quiet_mode <= 5)) {
564
0
    *flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
565
0
    *framecnt_xm = 10;
566
0
  }
567
568
  /* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision according to short-term characters */
569
570
  /* ave_n_tonal_short */
571
104k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 19)) {
572
1.70k
    init_mode_decision_result = MUSIC_DEFINITE;
573
1.70k
  }
574
104k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal_short <= 1.5)) {
575
52.0k
    init_mode_decision_result = SPEECH_DEFINITE;
576
52.0k
  }
577
578
  /* msd_spec_tilt_short */
579
104k
  if (msd_spec_tilt_short >= 0.02) {
580
29.9k
    init_mode_decision_result = SPEECH_DEFINITE;
581
29.9k
  }
582
104k
  if ((init_mode_decision_result == TBD) && (msd_spec_tilt_short <= 0.00000025) &&
583
12.1k
      (framecnt >= 10)) {
584
10.2k
    init_mode_decision_result = MUSIC_DEFINITE;
585
10.2k
  }
586
587
  /* SPEECH mode decision */
588
589
  /* flag_ave_music_speech??ave_n_tonal_short */
590
104k
  if ((init_mode_decision_result == TBD) && (flag_ave_music_speech == 1)) {
591
71
    if ((ave_n_tonal_short <= 12) && (*framecnt_xm <= 150)) {
592
56
      init_mode_decision_result = SPEECH;
593
56
    }
594
71
  }
595
596
  /* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision */
597
598
  /* ave_n_tonal */
599
104k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal <= 3)) {
600
10.4k
    init_mode_decision_result = SPEECH_DEFINITE;
601
10.4k
  }
602
104k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal >= 15)) {
603
871
    init_mode_decision_result = MUSIC_DEFINITE;
604
871
  }
605
606
  /** ave_n_tonal_short
607
   */
608
104k
  if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 17)) {
609
1.22k
    init_mode_decision_result = MUSIC_DEFINITE;
610
1.22k
  }
611
612
  /** msd_spec_tilt
613
   */
614
104k
  if ((init_mode_decision_result == TBD) && (msd_spec_tilt >= 0.01)) {
615
4.59k
    init_mode_decision_result = SPEECH_DEFINITE;
616
4.59k
  }
617
104k
  if ((init_mode_decision_result == TBD) && (framecnt >= 10) && (msd_spec_tilt <= 0.00004)) {
618
3.14k
    init_mode_decision_result = MUSIC_DEFINITE;
619
3.14k
  }
620
621
  /** n_tonal_low_frequency_ratio
622
   */
623
104k
  if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.91)) {
624
3.16k
    init_mode_decision_result = MUSIC_DEFINITE;
625
3.16k
  }
626
627
  /** MUSIC and SPEECH mode decision
628
   */
629
630
  /** msd_spec_tilt
631
   */
632
104k
  if ((init_mode_decision_result == TBD) && (msd_spec_tilt <= 0.0002) && (*framecnt_xm >= 15)) {
633
217
    init_mode_decision_result = MUSIC;
634
217
  }
635
636
  /** n_tonal_low_frequency_ratio
637
   */
638
104k
  if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio >= 0.95)) {
639
942
    init_mode_decision_result = SPEECH;
640
942
  }
641
104k
  if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.935)) {
642
460
    init_mode_decision_result = MUSIC;
643
460
  }
644
645
  /** the rest of the frame to SPEECH
646
   */
647
104k
  if (init_mode_decision_result == TBD) {
648
342
    init_mode_decision_result = SPEECH;
649
342
  }
650
651
  /** MUSIC mode decision according to changes of the MSD of the spectral tilt
652
   */
653
654
  /** compute the changes of the MSD of the spectral tilt
655
   */
656
104k
  if ((msd_spec_tilt <= 0.007) && (init_mode_decision_result != SPEECH_DEFINITE)) {
657
19.2k
    if (init_mode_decision_result != SPEECH) {
658
18.0k
      count_msd_st_music++;
659
18.0k
    }
660
85.1k
  } else {
661
85.1k
    count_msd_st_music = 0;
662
85.1k
  }
663
664
104k
  if ((init_mode_decision_result != SPEECH_DEFINITE) && (count_msd_st_music >= 400) &&
665
0
      (border_state != BORDER_MUSIC_SPEECH_DEFINITE)) {
666
0
    init_mode_decision_result = MUSIC;
667
0
  }
668
669
  /** update border flag
670
   */
671
672
104k
  if (*flag_border != NO_BORDER) {
673
131
    border_state = *flag_border;
674
131
  }
675
676
  /** update BORDER_SPEECH_MUSIC_DEFINITE
677
   */
678
104k
  if (((border_state == BORDER_MUSIC_SPEECH) || (border_state == BORDER_MUSIC_SPEECH_DEFINITE)) &&
679
131
      (init_mode_decision_result == MUSIC_DEFINITE) && (*framecnt_xm >= 20)) {
680
0
    *flag_border = BORDER_SPEECH_MUSIC_DEFINITE;
681
0
    *framecnt_xm = 10;
682
0
    border_state = *flag_border;
683
0
  }
684
685
  /** update BORDER_MUSIC_SPEECH_DEFINITE
686
   */
687
104k
  if (((border_state == BORDER_SPEECH_MUSIC) || (border_state == BORDER_SPEECH_MUSIC_DEFINITE)) &&
688
0
      (init_mode_decision_result == SPEECH_DEFINITE) && (*framecnt_xm >= 20)) {
689
0
    *flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
690
0
    *framecnt_xm = 10;
691
0
  }
692
693
104k
  return init_mode_decision_result;
694
104k
}
695
696
104k
static WORD32 iusace_smoothing_mode_decision(ia_smooth_params_struct *pstr_smooth_param) {
697
104k
  WORD32 *ptr_init_result_ahead = pstr_smooth_param->init_result_ahead;
698
104k
  WORD32 flag_border = pstr_smooth_param->flag_border;
699
104k
  WORD32 *ptr_flag_border_buf_behind = pstr_smooth_param->flag_border_buf_behind;
700
104k
  WORD32 *ptr_flag_border_buf_ahead = pstr_smooth_param->flag_border_buf_ahead;
701
104k
  FLOAT32 frame_energy = pstr_smooth_param->frame_energy;
702
104k
  FLOAT32 *ptr_frame_energy_buf_behind = pstr_smooth_param->frame_energy_buf_behind;
703
104k
  FLOAT32 *ptr_frame_energy_buf_ahead = pstr_smooth_param->frame_energy_buf_ahead;
704
104k
  WORD32 *ptr_smoothing_result_buf = pstr_smooth_param->smoothing_result_buf;
705
104k
  WORD32 *ptr_init_result_behind = pstr_smooth_param->init_result_behind;
706
104k
  WORD32 init_mode_decision_result = pstr_smooth_param->init_mode_decision_result;
707
104k
  WORD32 i;
708
709
104k
  WORD32 mode_decision_result;
710
711
104k
  WORD32 num_music, num_speech;
712
713
  /** update data array
714
   */
715
716
  /** update init_result_behind, init_result_ahead
717
   */
718
10.4M
  for (i = 0; i < 99; i++) {
719
10.3M
    ptr_init_result_behind[i] = ptr_init_result_behind[i + 1];
720
10.3M
  }
721
104k
  ptr_init_result_behind[99] = ptr_init_result_ahead[0];
722
723
104k
  ptr_init_result_ahead[NFRAMEAHEAD - 1] = init_mode_decision_result;
724
725
  /** update flag_border_buf_behind, flag_border_buf_ahead
726
   * update frame_energy_buf_behind, frame_energy_buf_ahead
727
   */
728
729
1.04M
  for (i = 0; i < 9; i++) {
730
939k
    ptr_flag_border_buf_behind[i] = ptr_flag_border_buf_behind[i + 1];
731
939k
    ptr_frame_energy_buf_behind[i] = ptr_frame_energy_buf_behind[i + 1];
732
939k
  }
733
104k
  ptr_flag_border_buf_behind[9] = ptr_flag_border_buf_ahead[0];
734
104k
  ptr_frame_energy_buf_behind[9] = ptr_frame_energy_buf_ahead[0];
735
736
104k
  ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] = flag_border;
737
738
104k
  ptr_frame_energy_buf_ahead[NFRAMEAHEAD - 1] = frame_energy;
739
740
  /** smoothing according to past results
741
   */
742
743
104k
  mode_decision_result = ptr_init_result_behind[99];
744
745
  /** update smoothing_result_buf
746
   */
747
104k
  if (ptr_flag_border_buf_behind[9] == NO_BORDER) {
748
10.4M
    for (i = 0; i < 99; i++) {
749
10.3M
      ptr_smoothing_result_buf[i] = ptr_smoothing_result_buf[i + 1];
750
10.3M
    }
751
104k
    pstr_smooth_param->num_smoothing++;
752
104k
  } else {
753
13.1k
    for (i = 0; i < 99; i++) {
754
12.9k
      ptr_smoothing_result_buf[i] = TBD;
755
12.9k
    }
756
131
    pstr_smooth_param->num_smoothing = 1;
757
131
  }
758
104k
  ptr_smoothing_result_buf[99] = ptr_init_result_behind[99];
759
760
104k
  if (pstr_smooth_param->num_smoothing >= SMOOTHING_LENGTH) {
761
0
    num_music = 0;
762
0
    num_speech = 0;
763
764
    /** smoothed result count
765
     */
766
0
    for (i = 0; i < SMOOTHING_LENGTH; i++) {
767
0
      if ((ptr_smoothing_result_buf[100 - i] == SPEECH) ||
768
0
          (ptr_smoothing_result_buf[100 - i] == SPEECH_DEFINITE)) {
769
0
        num_speech++;
770
0
      } else {
771
0
        num_music++;
772
0
      }
773
0
    }
774
775
    /** smoothing
776
     */
777
0
    if ((num_speech > num_music) && (init_mode_decision_result != MUSIC_DEFINITE)) {
778
0
      mode_decision_result = SPEECH;
779
0
    }
780
0
    if ((num_music > num_speech) && (init_mode_decision_result != SPEECH_DEFINITE)) {
781
0
      mode_decision_result = MUSIC;
782
0
    }
783
0
  }
784
785
  /** correct according to energies and ahead mode decision results
786
   */
787
788
104k
  if ((mode_decision_result == MUSIC) && (ptr_frame_energy_buf_behind[9] <= 60)) {
789
14
    for (i = 0; i < NFRAMEAHEAD; i++) {
790
7
      if ((ptr_init_result_ahead[i] == SPEECH_DEFINITE) || (ptr_init_result_ahead[i] == SPEECH)) {
791
4
        pstr_smooth_param->flag_speech_definite = 1;
792
4
      }
793
7
    }
794
7
  }
795
104k
  if ((pstr_smooth_param->flag_speech_definite == 1) && (mode_decision_result == MUSIC)) {
796
4
    mode_decision_result = SPEECH;
797
104k
  } else {
798
104k
    pstr_smooth_param->flag_speech_definite = 0;
799
104k
  }
800
801
  /** correct MUSIC mode
802
   */
803
804
104k
  if (ptr_frame_energy_buf_behind[9] <= 65) {
805
6.47k
    pstr_smooth_param->count_small_energy = 0;
806
97.9k
  } else {
807
97.9k
    pstr_smooth_param->count_small_energy++;
808
97.9k
  }
809
104k
  if (((ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC) ||
810
104k
       (ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC_DEFINITE)) &&
811
0
      (pstr_smooth_param->count_small_energy <= 30)) {
812
0
    pstr_smooth_param->flag_music_definite = 1;
813
0
  }
814
104k
  if ((pstr_smooth_param->flag_music_definite == 1) &&
815
0
      ((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE))) {
816
0
    mode_decision_result = MUSIC;
817
104k
  } else {
818
104k
    pstr_smooth_param->flag_music_definite = 0;
819
104k
  }
820
821
104k
  return mode_decision_result;
822
104k
}
823
824
static WORD32 iusace_classification_ccfl(ia_classification_struct *pstr_sig_class,
825
                                         FLOAT32 *ptr_time_signal,
826
104k
                                         iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
827
104k
  WORD32 i;
828
104k
  ia_tonal_params_struct pstr_ton_params;
829
104k
  ia_smooth_params_struct smooth_param;
830
104k
  ia_mode_params_struct pstr_mode_params;
831
104k
  ia_spec_tilt_params_struct ptr_spec_params;
832
833
104k
  ia_classification_buf_struct *pstr_buffers = &(pstr_sig_class->buffers);
834
104k
  pFLOAT32 spec_tilt_buf = pstr_sig_class->spec_tilt_buf;
835
104k
  pWORD32 n_tonal = pstr_sig_class->n_tonal;
836
104k
  pWORD32 n_tonal_low_frequency = pstr_sig_class->n_tonal_low_frequency;
837
104k
  pWORD32 framecnt_xm = &(pstr_sig_class->framecnt_xm);
838
104k
  pWORD32 framecnt = &(pstr_sig_class->framecnt);
839
104k
  pFLOAT32 ave_n_tonal_short_buf = pstr_sig_class->ave_n_tonal_short_buf;
840
104k
  pFLOAT32 ave_n_tonal_buf = pstr_sig_class->ave_n_tonal_buf;
841
104k
  pFLOAT32 msd_spec_tilt_buf = pstr_sig_class->msd_spec_tilt_buf;
842
104k
  pFLOAT32 msd_spec_tilt_short_buf = pstr_sig_class->msd_spec_tilt_short_buf;
843
844
104k
  FLOAT32 n_tonal_low_frequency_ratio;    /* the ratio of distribution of the numbers */
845
                                          /* of tonal in the low frequency domain     */
846
104k
  FLOAT32 ave_n_tonal, ave_n_tonal_short; /**< the number of tonal */
847
104k
  FLOAT32 msd_spec_tilt;                  /* the long-term MSD of spectral tilt */
848
104k
  FLOAT32 msd_spec_tilt_short;            /* the short-term MSD of spectral tilt */
849
850
104k
  WORD32 init_mode_decision_result; /* the initial mode decision */
851
104k
  WORD32 flag_border = NO_BORDER;   /* flag of current border */
852
853
104k
  WORD32 mode_decision_result; /* final mode decision result */
854
855
104k
  if (pstr_sig_class->init_flag == 0) {
856
    /* initialize */
857
1.31k
    pstr_sig_class->init_flag = 1;
858
859
7.86k
    for (i = 0; i < 5; i++) {
860
6.55k
      n_tonal[i] = 0;
861
6.55k
      n_tonal_low_frequency[i] = 0;
862
6.55k
      spec_tilt_buf[i] = 0;
863
6.55k
      pstr_buffers->init_result_behind[i] = TBD;
864
6.55k
      pstr_buffers->smoothing_result_buf[i] = TBD;
865
866
6.55k
      ave_n_tonal_short_buf[i] = 0;
867
6.55k
      ave_n_tonal_buf[i] = 0;
868
6.55k
      msd_spec_tilt_buf[i] = 0;
869
6.55k
      msd_spec_tilt_short_buf[i] = 0;
870
871
6.55k
      pstr_buffers->frame_energy_buf_behind[i] = 0;
872
6.55k
      pstr_buffers->flag_border_buf_behind[i] = NO_BORDER;
873
6.55k
    }
874
7.86k
    for (; i < 10; i++) {
875
6.55k
      n_tonal[i] = 0;
876
6.55k
      n_tonal_low_frequency[i] = 0;
877
6.55k
      spec_tilt_buf[i] = 0;
878
6.55k
      pstr_buffers->init_result_behind[i] = TBD;
879
6.55k
      pstr_buffers->smoothing_result_buf[i] = TBD;
880
881
6.55k
      pstr_buffers->frame_energy_buf_behind[i] = 0;
882
6.55k
      pstr_buffers->flag_border_buf_behind[i] = NO_BORDER;
883
6.55k
    }
884
885
119k
    for (; i < 100; i++) {
886
117k
      n_tonal[i] = 0;
887
117k
      n_tonal_low_frequency[i] = 0;
888
117k
      spec_tilt_buf[i] = 0;
889
117k
      pstr_buffers->init_result_behind[i] = TBD;
890
117k
      pstr_buffers->smoothing_result_buf[i] = TBD;
891
117k
    }
892
2.62k
    for (i = 0; i < NFRAMEAHEAD; i++) {
893
1.31k
      pstr_buffers->frame_energy_buf_ahead[i] = 0;
894
1.31k
      pstr_buffers->flag_border_buf_ahead[i] = NO_BORDER;
895
1.31k
      pstr_buffers->init_result_ahead[i] = TBD;
896
1.31k
    }
897
1.31k
  }
898
899
104k
  *framecnt += 1;
900
104k
  *framecnt_xm += 1;
901
902
104k
  pstr_ton_params.time_signal = (FLOAT32 *)ptr_time_signal;
903
104k
  pstr_ton_params.framecnt_xm = *framecnt_xm;
904
104k
  pstr_ton_params.n_tonal = n_tonal;
905
104k
  pstr_ton_params.n_tonal_low_frequency = n_tonal_low_frequency;
906
104k
  pstr_ton_params.n_tonal_low_frequency_ratio = &n_tonal_low_frequency_ratio;
907
104k
  pstr_ton_params.ave_n_tonal = &ave_n_tonal;
908
104k
  pstr_ton_params.ave_n_tonal_short = &ave_n_tonal_short;
909
  /** analysis tonal
910
   */
911
104k
  iusace_tonal_analysis(&pstr_ton_params, pstr_scratch, ccfl);
912
913
104k
  ptr_spec_params.time_signal = ptr_time_signal;
914
104k
  ptr_spec_params.framecnt_xm = *framecnt_xm;
915
104k
  ptr_spec_params.spec_tilt_buf = spec_tilt_buf;
916
104k
  ptr_spec_params.msd_spec_tilt = &msd_spec_tilt;
917
104k
  ptr_spec_params.msd_spec_tilt_short = &msd_spec_tilt_short;
918
  /** analysis spectral tilt
919
   */
920
104k
  iusace_spectral_tilt_analysis(&ptr_spec_params, ccfl);
921
922
104k
  pstr_mode_params.framecnt = *framecnt;
923
104k
  pstr_mode_params.framecnt_xm = framecnt_xm;
924
104k
  pstr_mode_params.flag_border = &flag_border;
925
104k
  pstr_mode_params.ave_n_tonal_short = ave_n_tonal_short;
926
104k
  pstr_mode_params.ave_n_tonal = ave_n_tonal;
927
104k
  pstr_mode_params.ave_n_tonal_short_buf = ave_n_tonal_short_buf;
928
104k
  pstr_mode_params.ave_n_tonal_buf = ave_n_tonal_buf;
929
104k
  pstr_mode_params.msd_spec_tilt = msd_spec_tilt;
930
104k
  pstr_mode_params.msd_spec_tilt_short = msd_spec_tilt_short;
931
104k
  pstr_mode_params.msd_spec_tilt_buf = msd_spec_tilt_buf;
932
104k
  pstr_mode_params.msd_spec_tilt_short_buf = msd_spec_tilt_short_buf;
933
104k
  pstr_mode_params.n_tonal_low_frequency_ratio = n_tonal_low_frequency_ratio;
934
104k
  pstr_mode_params.frame_energy = ptr_spec_params.frame_energy;
935
  /** initial mode decision and boundary decisions
936
   */
937
104k
  init_mode_decision_result = iusace_init_mode_decision(&pstr_mode_params);
938
939
104k
  smooth_param.flag_border_buf_behind = pstr_buffers->flag_border_buf_behind;
940
104k
  smooth_param.flag_border_buf_ahead = pstr_buffers->flag_border_buf_ahead;
941
104k
  smooth_param.frame_energy = ptr_spec_params.frame_energy;
942
104k
  smooth_param.frame_energy_buf_behind = pstr_buffers->frame_energy_buf_behind;
943
104k
  smooth_param.frame_energy_buf_ahead = pstr_buffers->frame_energy_buf_ahead;
944
104k
  smooth_param.smoothing_result_buf = pstr_buffers->smoothing_result_buf;
945
104k
  smooth_param.init_result_ahead = pstr_buffers->init_result_ahead;
946
104k
  smooth_param.flag_border = flag_border;
947
104k
  smooth_param.init_result_behind = pstr_buffers->init_result_behind;
948
104k
  smooth_param.init_mode_decision_result = init_mode_decision_result;
949
104k
  smooth_param.flag_speech_definite = 0;
950
104k
  smooth_param.count_small_energy = 0;
951
104k
  smooth_param.flag_music_definite = 0;
952
104k
  smooth_param.num_smoothing = 0;
953
  /* smoothing */
954
104k
  mode_decision_result = iusace_smoothing_mode_decision(&smooth_param);
955
956
104k
  return mode_decision_result;
957
104k
}
958
959
VOID iusace_classification(ia_classification_struct *pstr_sig_class,
960
104k
                           iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
961
104k
  WORD32 n_frames, n_class, avg_cls, nf;
962
104k
  WORD32 i;
963
104k
  FLOAT32 *ptr_time_signal = pstr_scratch->p_time_signal;
964
104k
  WORD32 mode_decision_result;
965
966
104k
  n_frames = pstr_sig_class->n_buffer_samples / ccfl;
967
968
208k
  for (nf = 0; nf < n_frames; nf++) {
969
97.3M
    for (i = 0; i < ccfl; i++) {
970
97.1M
      ptr_time_signal[i] = pstr_sig_class->input_samples[ccfl * nf + i];
971
97.1M
    }
972
973
    /* classification of ccfl-frame */
974
104k
    mode_decision_result =
975
104k
        iusace_classification_ccfl(pstr_sig_class, ptr_time_signal, pstr_scratch, ccfl);
976
977
    /* coding mode decision of 1024-frame */
978
104k
    if ((mode_decision_result == MUSIC) || (mode_decision_result == MUSIC_DEFINITE)) {
979
20.7k
      pstr_sig_class->coding_mode = FD_MODE;
980
83.6k
    } else if ((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE)) {
981
82.3k
      pstr_sig_class->coding_mode = TD_MODE;
982
82.3k
    }
983
984
104k
    pstr_sig_class->class_buf[pstr_sig_class->n_buf_class + nf] = pstr_sig_class->coding_mode;
985
104k
    pstr_sig_class->pre_mode = pstr_sig_class->coding_mode;
986
104k
  }
987
988
  /* merge ccfl-frame results */
989
104k
  pstr_sig_class->n_buf_class += n_frames;
990
104k
  n_class = (pstr_sig_class->n_class_frames > pstr_sig_class->n_buf_class)
991
104k
                ? pstr_sig_class->n_buf_class
992
104k
                : pstr_sig_class->n_class_frames;
993
104k
  {
994
104k
    WORD32 min_cls, max_cls;
995
996
104k
    min_cls = max_cls = pstr_sig_class->class_buf[0];
997
104k
    for (i = 1; i < n_class; i++) {
998
0
      if (pstr_sig_class->class_buf[i] > max_cls) {
999
0
        max_cls = pstr_sig_class->class_buf[i];
1000
0
      } else if (pstr_sig_class->class_buf[i] < min_cls) {
1001
0
        min_cls = pstr_sig_class->class_buf[i];
1002
0
      }
1003
0
    }
1004
1005
104k
    avg_cls = 0;
1006
208k
    for (i = 0; i < n_class; i++) {
1007
104k
      if (pstr_sig_class->class_buf[i] == max_cls) {
1008
104k
        avg_cls += 1;
1009
104k
      }
1010
104k
      if (pstr_sig_class->class_buf[i] == min_cls) {
1011
104k
        avg_cls += -1;
1012
104k
      }
1013
104k
    }
1014
1015
104k
    if (avg_cls > 0) {
1016
0
      pstr_sig_class->coding_mode = max_cls;
1017
104k
    } else {
1018
104k
      pstr_sig_class->coding_mode = min_cls;
1019
104k
    }
1020
104k
  }
1021
1022
  /* shift, save pre_mode and unused class */
1023
104k
  if (n_class > 0) {
1024
104k
    pstr_sig_class->pre_mode = pstr_sig_class->class_buf[n_class - 1];
1025
104k
  }
1026
104k
  pstr_sig_class->n_buf_class -= n_class;
1027
104k
  pstr_sig_class->n_buffer_samples -= ccfl * n_frames;
1028
1029
104k
  WORD32 minimum = MIN(pstr_sig_class->n_buf_class, pstr_sig_class->n_buffer_samples);
1030
104k
  if (minimum == pstr_sig_class->n_buf_class) {
1031
104k
    for (i = 0; i < minimum; i++) {
1032
0
      pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
1033
0
      pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
1034
0
    }
1035
1036
    /* shift, save unused samples */
1037
104k
    for (; i < pstr_sig_class->n_buffer_samples; i++) {
1038
0
      pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
1039
0
    }
1040
104k
  } else {
1041
0
    for (i = 0; i < minimum; i++) {
1042
0
      pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
1043
0
      pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
1044
0
    }
1045
1046
    /* shift, save unused samples */
1047
0
    for (; i < pstr_sig_class->n_buf_class; i++) {
1048
0
      pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
1049
0
    }
1050
0
  }
1051
104k
}
1052
1053
1.33k
VOID iusace_init_classification(ia_classification_struct *pstr_sig_class) {
1054
1.33k
  pstr_sig_class->pre_mode = FD_MODE;
1055
1056
1.33k
  pstr_sig_class->n_buffer_samples = 0;
1057
1.33k
  memset(pstr_sig_class->input_samples, 0, 3840 * 2 * sizeof(FLOAT32));
1058
1.33k
  pstr_sig_class->n_class_frames = 2;
1059
1.33k
  pstr_sig_class->n_buf_class = 0;
1060
1061
1.33k
  pstr_sig_class->is_switch_mode = 1;
1062
1063
1.33k
  pstr_sig_class->framecnt = 0;
1064
1.33k
  pstr_sig_class->init_flag = 0;
1065
1.33k
  pstr_sig_class->framecnt_xm = 0;
1066
1067
1.33k
  memset(&pstr_sig_class->buffers, 0, sizeof(ia_classification_buf_struct));
1068
1.33k
  memset(pstr_sig_class->spec_tilt_buf, 0, sizeof(FLOAT32) * 100);
1069
1.33k
  memset(pstr_sig_class->n_tonal, 0, sizeof(WORD32) * 100);
1070
1.33k
  memset(pstr_sig_class->n_tonal_low_frequency, 0, sizeof(WORD32) * 100);
1071
1.33k
  memset(pstr_sig_class->msd_spec_tilt_buf, 0, sizeof(FLOAT32) * 5);
1072
1.33k
  memset(pstr_sig_class->msd_spec_tilt_short_buf, 0, sizeof(FLOAT32) * 5);
1073
1.33k
  memset(pstr_sig_class->ave_n_tonal_short_buf, 0, sizeof(FLOAT32) * 5);
1074
1.33k
  memset(pstr_sig_class->ave_n_tonal_buf, 0, sizeof(FLOAT32) * 5);
1075
1.33k
  return;
1076
1.33k
}