Coverage Report

Created: 2025-12-31 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/speex/libspeex/vbr.c
Line
Count
Source
1
/* Copyright (C) 2002 Jean-Marc Valin
2
   File: vbr.c
3
4
   VBR-related routines
5
6
   Redistribution and use in source and binary forms, with or without
7
   modification, are permitted provided that the following conditions
8
   are met:
9
10
   - Redistributions of source code must retain the above copyright
11
   notice, this list of conditions and the following disclaimer.
12
13
   - Redistributions in binary form must reproduce the above copyright
14
   notice, this list of conditions and the following disclaimer in the
15
   documentation and/or other materials provided with the distribution.
16
17
   - Neither the name of the Xiph.org Foundation nor the names of its
18
   contributors may be used to endorse or promote products derived from
19
   this software without specific prior written permission.
20
21
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
25
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33
*/
34
35
#ifdef HAVE_CONFIG_H
36
#include "config.h"
37
#endif
38
39
#include "vbr.h"
40
#include <math.h>
41
42
43
34.7k
#define sqr(x) ((x)*(x))
44
45
24.0k
#define MIN_ENERGY 6000
46
8.22k
#define NOISE_POW .3
47
48
#ifndef DISABLE_VBR
49
50
const float vbr_nb_thresh[9][11]={
51
   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*   CNG   */
52
   { 4.0f,  2.5f,  2.0f,  1.2f,  0.5f,-0.25f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /*  2 kbps */
53
   {10.0f,  6.5f,  5.2f,  4.5f,  3.9f, 3.7f,  3.0f,  2.5f,  2.3f,  1.8f,  1.0f}, /*  6 kbps */
54
   {11.0f,  8.8f,  7.5f,  6.5f,  5.0f,  4.2f,  3.9f,  3.9f,  3.5f,  3.0f,  1.0f}, /*  8 kbps */
55
   {11.0f, 11.0f,  9.9f,  8.5f,  7.0f, 5.25f,  4.5f,  4.0f,  4.0f,  4.0f,  2.0f}, /* 11 kbps */
56
   {11.0f, 11.0f, 11.0f, 11.0f,  9.5f, 9.25f,  8.0f,  7.0f,  5.0f,  4.0f,  3.0f}, /* 15 kbps */
57
   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  6.2f,  5.2f,  5.0f}, /* 18 kbps */
58
   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 10.0f,  9.8f,  7.5f}, /* 24 kbps */
59
   { 7.0f,  4.5f,  3.7f,  3.0f,  2.5f,  1.0f,  1.8f,  1.5f,  1.0f,  0.0f,  0.0f}  /*  4 kbps */
60
};
61
62
63
const float vbr_hb_thresh[5][11]={
64
   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
65
   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*  2 kbps */
66
   {11.0f, 11.0f,  9.5f,  8.5f,  7.5f,  6.0f,  5.0f,  3.9f,  3.0f,  2.0f,  1.0f}, /*  6 kbps */
67
   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.7f,  7.8f,  7.0f,  6.5f,  4.0f}, /* 10 kbps */
68
   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  7.5f,  5.5f}  /* 18 kbps */
69
};
70
71
const float vbr_uhb_thresh[2][11]={
72
   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
73
   { 3.9f,  2.5f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f, -1.0f}  /*  2 kbps */
74
};
75
76
void vbr_init(VBRState *vbr)
77
1.27k
{
78
1.27k
   int i;
79
80
1.27k
   vbr->average_energy=1600000;
81
1.27k
   vbr->last_energy=1;
82
1.27k
   vbr->accum_sum=0;
83
1.27k
   vbr->soft_pitch=0;
84
1.27k
   vbr->last_pitch_coef=0;
85
1.27k
   vbr->last_quality=0;
86
87
1.27k
   vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
88
1.27k
   vbr->noise_accum_count=.05;
89
1.27k
   vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
90
1.27k
   vbr->consec_noise=0;
91
92
93
7.62k
   for (i=0;i<VBR_MEMORY_SIZE;i++)
94
6.35k
      vbr->last_log_energy[i] = log(MIN_ENERGY);
95
1.27k
}
96
97
98
/*
99
  This function should analyse the signal and decide how critical the
100
  coding error will be perceptually. The following factors should be
101
  taken into account:
102
103
  -Attacks (positive energy derivative) should be coded with more bits
104
105
  -Stationary voiced segments should receive more bits
106
107
  -Segments with (very) low absolute energy should receive less bits (maybe
108
  only shaped noise?)
109
110
  -DTX for near-zero energy?
111
112
  -Stationary fricative segments should have less bits
113
114
  -Temporal masking: when energy slope is decreasing, decrease the bit-rate
115
116
  -Decrease bit-rate for males (low pitch)?
117
118
  -(wideband only) less bits in the high-band when signal is very
119
  non-stationary (harder to notice high-frequency noise)???
120
121
*/
122
123
float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
124
6.94k
{
125
6.94k
   int i;
126
6.94k
   float ener=0, ener1=0, ener2=0;
127
6.94k
   float qual=7;
128
6.94k
   float log_energy;
129
6.94k
   float non_st=0;
130
6.94k
   float voicing;
131
6.94k
   float pow_ener;
132
133
562k
   for (i=0;i<len>>1;i++)
134
555k
      ener1 += ((float)sig[i])*sig[i];
135
136
562k
   for (i=len>>1;i<len;i++)
137
555k
      ener2 += ((float)sig[i])*sig[i];
138
6.94k
   ener=ener1+ener2;
139
140
6.94k
   log_energy = log(ener+MIN_ENERGY);
141
41.6k
   for (i=0;i<VBR_MEMORY_SIZE;i++)
142
34.7k
      non_st += sqr(log_energy-vbr->last_log_energy[i]);
143
6.94k
   non_st =  non_st/(30*VBR_MEMORY_SIZE);
144
6.94k
   if (non_st>1)
145
2.52k
      non_st=1;
146
147
6.94k
   voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
148
6.94k
   vbr->average_energy = 0.9*vbr->average_energy + .1*ener;
149
6.94k
   vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
150
6.94k
   pow_ener = pow(ener,NOISE_POW);
151
6.94k
   if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
152
1.37k
      vbr->noise_accum = .05*pow_ener;
153
154
6.94k
   if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
155
3.88k
       || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
156
3.81k
       || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
157
3.78k
       || (voicing<0 && non_st < .05))
158
3.20k
   {
159
3.20k
      float tmp;
160
161
3.20k
      vbr->consec_noise++;
162
3.20k
      if (pow_ener > 3*vbr->noise_level)
163
13
         tmp = 3*vbr->noise_level;
164
3.18k
      else
165
3.18k
         tmp = pow_ener;
166
3.20k
      if (vbr->consec_noise>=4)
167
1.29k
      {
168
1.29k
         vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
169
1.29k
         vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
170
1.29k
      }
171
3.74k
   } else {
172
3.74k
      vbr->consec_noise=0;
173
3.74k
   }
174
175
6.94k
   if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
176
668
   {
177
668
      vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
178
668
      vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
179
668
   }
180
181
   /* Checking for very low absolute energy */
182
6.94k
   if (ener < 30000)
183
5.07k
   {
184
5.07k
      qual -= .7;
185
5.07k
      if (ener < 10000)
186
4.88k
         qual-=.7;
187
5.07k
      if (ener < 3000)
188
4.73k
         qual-=.7;
189
5.07k
   } else {
190
1.87k
      float short_diff, long_diff;
191
1.87k
      short_diff = log((ener+1)/(1+vbr->last_energy));
192
1.87k
      long_diff = log((ener+1)/(1+vbr->average_energy));
193
      /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
194
195
1.87k
      if (long_diff<-5)
196
145
         long_diff=-5;
197
1.87k
      if (long_diff>2)
198
565
         long_diff=2;
199
200
1.87k
      if (long_diff>0)
201
1.44k
         qual += .6*long_diff;
202
1.87k
      if (long_diff<0)
203
431
         qual += .5*long_diff;
204
1.87k
      if (short_diff>0)
205
1.19k
      {
206
1.19k
         if (short_diff>5)
207
695
            short_diff=5;
208
1.19k
         qual += 1*short_diff;
209
1.19k
      }
210
      /* Checking for energy increases */
211
1.87k
      if (ener2 > 1.6*ener1)
212
273
         qual += .5;
213
1.87k
   }
214
6.94k
   vbr->last_energy = ener;
215
6.94k
   vbr->soft_pitch = .8*vbr->soft_pitch + .2*pitch_coef;
216
6.94k
   qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
217
218
6.94k
   if (qual < vbr->last_quality)
219
1.44k
      qual = .5*qual + .5*vbr->last_quality;
220
6.94k
   if (qual<4)
221
3.45k
      qual=4;
222
6.94k
   if (qual>10)
223
1.43k
      qual=10;
224
225
   /*
226
   if (vbr->consec_noise>=2)
227
      qual-=1.3;
228
   if (vbr->consec_noise>=5)
229
      qual-=1.3;
230
   if (vbr->consec_noise>=12)
231
      qual-=1.3;
232
   */
233
6.94k
   if (vbr->consec_noise>=3)
234
1.85k
      qual=4;
235
236
6.94k
   if (vbr->consec_noise)
237
3.20k
      qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
238
6.94k
   if (qual<0)
239
0
      qual=0;
240
241
6.94k
   if (ener<1600000)
242
5.32k
   {
243
5.32k
      if (vbr->consec_noise>2)
244
1.82k
         qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
245
5.32k
      if (ener<10000&&vbr->consec_noise>2)
246
1.77k
         qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
247
5.32k
      if (qual<0)
248
0
         qual=0;
249
5.32k
      qual += .3*log(.0001+ener/1600000.0);
250
5.32k
   }
251
6.94k
   if (qual<-1)
252
481
      qual=-1;
253
254
   /*printf ("%f %f %f %f\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level));*/
255
256
6.94k
   vbr->last_pitch_coef = pitch_coef;
257
6.94k
   vbr->last_quality = qual;
258
259
34.7k
   for (i=VBR_MEMORY_SIZE-1;i>0;i--)
260
27.7k
      vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
261
6.94k
   vbr->last_log_energy[0] = log_energy;
262
263
   /*printf ("VBR: %f %f %f %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, vbr->noise_level);*/
264
265
6.94k
   return qual;
266
6.94k
}
267
268
void vbr_destroy(VBRState *vbr)
269
1.27k
{
270
1.27k
}
271
272
#endif /* #ifndef DISABLE_VBR */