/src/speex/libspeex/vbr.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2002 Jean-Marc Valin |
2 | | File: vbr.c |
3 | | |
4 | | VBR-related routines |
5 | | |
6 | | Redistribution and use in source and binary forms, with or without |
7 | | modification, are permitted provided that the following conditions |
8 | | are met: |
9 | | |
10 | | - Redistributions of source code must retain the above copyright |
11 | | notice, this list of conditions and the following disclaimer. |
12 | | |
13 | | - Redistributions in binary form must reproduce the above copyright |
14 | | notice, this list of conditions and the following disclaimer in the |
15 | | documentation and/or other materials provided with the distribution. |
16 | | |
17 | | - Neither the name of the Xiph.org Foundation nor the names of its |
18 | | contributors may be used to endorse or promote products derived from |
19 | | this software without specific prior written permission. |
20 | | |
21 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
22 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
23 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
24 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
25 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
26 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
27 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
28 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
29 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
30 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
31 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
32 | | |
33 | | */ |
34 | | |
35 | | #ifdef HAVE_CONFIG_H |
36 | | #include "config.h" |
37 | | #endif |
38 | | |
39 | | #include "vbr.h" |
40 | | #include <math.h> |
41 | | |
42 | | |
43 | 82.0k | #define sqr(x) ((x)*(x)) |
44 | | |
45 | 52.7k | #define MIN_ENERGY 6000 |
46 | 19.3k | #define NOISE_POW .3 |
47 | | |
48 | | #ifndef DISABLE_VBR |
49 | | |
50 | | const float vbr_nb_thresh[9][11]={ |
51 | | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* CNG */ |
52 | | { 4.0f, 2.5f, 2.0f, 1.2f, 0.5f,-0.25f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /* 2 kbps */ |
53 | | {10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.7f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}, /* 6 kbps */ |
54 | | {11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 4.2f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}, /* 8 kbps */ |
55 | | {11.0f, 11.0f, 9.9f, 8.5f, 7.0f, 5.25f, 4.5f, 4.0f, 4.0f, 4.0f, 2.0f}, /* 11 kbps */ |
56 | | {11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 9.25f, 8.0f, 7.0f, 5.0f, 4.0f, 3.0f}, /* 15 kbps */ |
57 | | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 6.2f, 5.2f, 5.0f}, /* 18 kbps */ |
58 | | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 10.0f, 9.8f, 7.5f}, /* 24 kbps */ |
59 | | { 7.0f, 4.5f, 3.7f, 3.0f, 2.5f, 1.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f} /* 4 kbps */ |
60 | | }; |
61 | | |
62 | | |
63 | | const float vbr_hb_thresh[5][11]={ |
64 | | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ |
65 | | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* 2 kbps */ |
66 | | {11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}, /* 6 kbps */ |
67 | | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}, /* 10 kbps */ |
68 | | {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f} /* 18 kbps */ |
69 | | }; |
70 | | |
71 | | const float vbr_uhb_thresh[2][11]={ |
72 | | {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ |
73 | | { 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f} /* 2 kbps */ |
74 | | }; |
75 | | |
76 | | void vbr_init(VBRState *vbr) |
77 | 2.94k | { |
78 | 2.94k | int i; |
79 | | |
80 | 2.94k | vbr->average_energy=1600000; |
81 | 2.94k | vbr->last_energy=1; |
82 | 2.94k | vbr->accum_sum=0; |
83 | 2.94k | vbr->soft_pitch=0; |
84 | 2.94k | vbr->last_pitch_coef=0; |
85 | 2.94k | vbr->last_quality=0; |
86 | | |
87 | 2.94k | vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW); |
88 | 2.94k | vbr->noise_accum_count=.05; |
89 | 2.94k | vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; |
90 | 2.94k | vbr->consec_noise=0; |
91 | | |
92 | | |
93 | 17.6k | for (i=0;i<VBR_MEMORY_SIZE;i++) |
94 | 14.7k | vbr->last_log_energy[i] = log(MIN_ENERGY); |
95 | 2.94k | } |
96 | | |
97 | | |
98 | | /* |
99 | | This function should analyse the signal and decide how critical the |
100 | | coding error will be perceptually. The following factors should be |
101 | | taken into account: |
102 | | |
103 | | -Attacks (positive energy derivative) should be coded with more bits |
104 | | |
105 | | -Stationary voiced segments should receive more bits |
106 | | |
107 | | -Segments with (very) low absolute energy should receive less bits (maybe |
108 | | only shaped noise?) |
109 | | |
110 | | -DTX for near-zero energy? |
111 | | |
112 | | -Stationary fricative segments should have less bits |
113 | | |
114 | | -Temporal masking: when energy slope is decreasing, decrease the bit-rate |
115 | | |
116 | | -Decrease bit-rate for males (low pitch)? |
117 | | |
118 | | -(wideband only) less bits in the high-band when signal is very |
119 | | non-stationary (harder to notice high-frequency noise)??? |
120 | | |
121 | | */ |
122 | | |
123 | | float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef) |
124 | 16.4k | { |
125 | 16.4k | int i; |
126 | 16.4k | float ener=0, ener1=0, ener2=0; |
127 | 16.4k | float qual=7; |
128 | 16.4k | float log_energy; |
129 | 16.4k | float non_st=0; |
130 | 16.4k | float voicing; |
131 | 16.4k | float pow_ener; |
132 | | |
133 | 1.32M | for (i=0;i<len>>1;i++) |
134 | 1.31M | ener1 += ((float)sig[i])*sig[i]; |
135 | | |
136 | 1.32M | for (i=len>>1;i<len;i++) |
137 | 1.31M | ener2 += ((float)sig[i])*sig[i]; |
138 | 16.4k | ener=ener1+ener2; |
139 | | |
140 | 16.4k | log_energy = log(ener+MIN_ENERGY); |
141 | 98.5k | for (i=0;i<VBR_MEMORY_SIZE;i++) |
142 | 82.0k | non_st += sqr(log_energy-vbr->last_log_energy[i]); |
143 | 16.4k | non_st = non_st/(30*VBR_MEMORY_SIZE); |
144 | 16.4k | if (non_st>1) |
145 | 7.85k | non_st=1; |
146 | | |
147 | 16.4k | voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4); |
148 | 16.4k | vbr->average_energy = 0.9*vbr->average_energy + .1*ener; |
149 | 16.4k | vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; |
150 | 16.4k | pow_ener = pow(ener,NOISE_POW); |
151 | 16.4k | if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY) |
152 | 4.27k | vbr->noise_accum = .05*pow_ener; |
153 | | |
154 | 16.4k | if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level) |
155 | 16.4k | || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level) |
156 | 16.4k | || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level) |
157 | 16.4k | || (voicing<0 && non_st < .05)) |
158 | 4.55k | { |
159 | 4.55k | float tmp; |
160 | | |
161 | 4.55k | vbr->consec_noise++; |
162 | 4.55k | if (pow_ener > 3*vbr->noise_level) |
163 | 14 | tmp = 3*vbr->noise_level; |
164 | 4.54k | else |
165 | 4.54k | tmp = pow_ener; |
166 | 4.55k | if (vbr->consec_noise>=4) |
167 | 1.56k | { |
168 | 1.56k | vbr->noise_accum = .95*vbr->noise_accum + .05*tmp; |
169 | 1.56k | vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; |
170 | 1.56k | } |
171 | 11.8k | } else { |
172 | 11.8k | vbr->consec_noise=0; |
173 | 11.8k | } |
174 | | |
175 | 16.4k | if (pow_ener < vbr->noise_level && ener>MIN_ENERGY) |
176 | 6.33k | { |
177 | 6.33k | vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener; |
178 | 6.33k | vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; |
179 | 6.33k | } |
180 | | |
181 | | /* Checking for very low absolute energy */ |
182 | 16.4k | if (ener < 30000) |
183 | 6.14k | { |
184 | 6.14k | qual -= .7; |
185 | 6.14k | if (ener < 10000) |
186 | 5.71k | qual-=.7; |
187 | 6.14k | if (ener < 3000) |
188 | 5.36k | qual-=.7; |
189 | 10.2k | } else { |
190 | 10.2k | float short_diff, long_diff; |
191 | 10.2k | short_diff = log((ener+1)/(1+vbr->last_energy)); |
192 | 10.2k | long_diff = log((ener+1)/(1+vbr->average_energy)); |
193 | | /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/ |
194 | | |
195 | 10.2k | if (long_diff<-5) |
196 | 1.93k | long_diff=-5; |
197 | 10.2k | if (long_diff>2) |
198 | 1.84k | long_diff=2; |
199 | | |
200 | 10.2k | if (long_diff>0) |
201 | 4.78k | qual += .6*long_diff; |
202 | 10.2k | if (long_diff<0) |
203 | 5.48k | qual += .5*long_diff; |
204 | 10.2k | if (short_diff>0) |
205 | 4.55k | { |
206 | 4.55k | if (short_diff>5) |
207 | 2.03k | short_diff=5; |
208 | 4.55k | qual += 1*short_diff; |
209 | 4.55k | } |
210 | | /* Checking for energy increases */ |
211 | 10.2k | if (ener2 > 1.6*ener1) |
212 | 2.49k | qual += .5; |
213 | 10.2k | } |
214 | 16.4k | vbr->last_energy = ener; |
215 | 16.4k | vbr->soft_pitch = .8*vbr->soft_pitch + .2*pitch_coef; |
216 | 16.4k | qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4)); |
217 | | |
218 | 16.4k | if (qual < vbr->last_quality) |
219 | 7.27k | qual = .5*qual + .5*vbr->last_quality; |
220 | 16.4k | if (qual<4) |
221 | 4.83k | qual=4; |
222 | 16.4k | if (qual>10) |
223 | 2.98k | qual=10; |
224 | | |
225 | | /* |
226 | | if (vbr->consec_noise>=2) |
227 | | qual-=1.3; |
228 | | if (vbr->consec_noise>=5) |
229 | | qual-=1.3; |
230 | | if (vbr->consec_noise>=12) |
231 | | qual-=1.3; |
232 | | */ |
233 | 16.4k | if (vbr->consec_noise>=3) |
234 | 2.30k | qual=4; |
235 | | |
236 | 16.4k | if (vbr->consec_noise) |
237 | 4.55k | qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3)); |
238 | 16.4k | if (qual<0) |
239 | 0 | qual=0; |
240 | | |
241 | 16.4k | if (ener<1600000) |
242 | 7.81k | { |
243 | 7.81k | if (vbr->consec_noise>2) |
244 | 2.10k | qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); |
245 | 7.81k | if (ener<10000&&vbr->consec_noise>2) |
246 | 2.00k | qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); |
247 | 7.81k | if (qual<0) |
248 | 0 | qual=0; |
249 | 7.81k | qual += .3*log(.0001+ener/1600000.0); |
250 | 7.81k | } |
251 | 16.4k | if (qual<-1) |
252 | 555 | qual=-1; |
253 | | |
254 | | /*printf ("%f %f %f %f\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level));*/ |
255 | | |
256 | 16.4k | vbr->last_pitch_coef = pitch_coef; |
257 | 16.4k | vbr->last_quality = qual; |
258 | | |
259 | 82.0k | for (i=VBR_MEMORY_SIZE-1;i>0;i--) |
260 | 65.6k | vbr->last_log_energy[i] = vbr->last_log_energy[i-1]; |
261 | 16.4k | vbr->last_log_energy[0] = log_energy; |
262 | | |
263 | | /*printf ("VBR: %f %f %f %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, vbr->noise_level);*/ |
264 | | |
265 | 16.4k | return qual; |
266 | 16.4k | } |
267 | | |
268 | | void vbr_destroy(VBRState *vbr) |
269 | 2.94k | { |
270 | 2.94k | } |
271 | | |
272 | | #endif /* #ifndef DISABLE_VBR */ |