/src/ffmpeg/libavcodec/speexdec.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright 2002-2008 Xiph.org Foundation |
3 | | * Copyright 2002-2008 Jean-Marc Valin |
4 | | * Copyright 2005-2007 Analog Devices Inc. |
5 | | * Copyright 2005-2008 Commonwealth Scientific and Industrial Research Organisation (CSIRO) |
6 | | * Copyright 1993, 2002, 2006 David Rowe |
7 | | * Copyright 2003 EpicGames |
8 | | * Copyright 1992-1994 Jutta Degener, Carsten Bormann |
9 | | |
10 | | * Redistribution and use in source and binary forms, with or without |
11 | | * modification, are permitted provided that the following conditions |
12 | | * are met: |
13 | | |
14 | | * - Redistributions of source code must retain the above copyright |
15 | | * notice, this list of conditions and the following disclaimer. |
16 | | |
17 | | * - Redistributions in binary form must reproduce the above copyright |
18 | | * notice, this list of conditions and the following disclaimer in the |
19 | | * documentation and/or other materials provided with the distribution. |
20 | | |
21 | | * - Neither the name of the Xiph.org Foundation nor the names of its |
22 | | * contributors may be used to endorse or promote products derived from |
23 | | * this software without specific prior written permission. |
24 | | |
25 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
26 | | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
27 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
28 | | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
29 | | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
30 | | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
31 | | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
32 | | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
33 | | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
34 | | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
35 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
36 | | * |
37 | | * This file is part of FFmpeg. |
38 | | * |
39 | | * FFmpeg is free software; you can redistribute it and/or |
40 | | * modify it under the terms of the GNU Lesser General Public |
41 | | * License as published by the Free Software Foundation; either |
42 | | * version 2.1 of the License, or (at your option) any later version. |
43 | | * |
44 | | * FFmpeg is distributed in the hope that it will be useful, |
45 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
46 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
47 | | * Lesser General Public License for more details. |
48 | | * |
49 | | * You should have received a copy of the GNU Lesser General Public |
50 | | * License along with FFmpeg; if not, write to the Free Software |
51 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
52 | | */ |
53 | | |
54 | | #include "libavutil/avassert.h" |
55 | | #include "libavutil/avstring.h" |
56 | | #include "libavutil/float_dsp.h" |
57 | | #include "libavutil/intfloat.h" |
58 | | #include "libavutil/mem.h" |
59 | | #include "avcodec.h" |
60 | | #include "bytestream.h" |
61 | | #include "codec_internal.h" |
62 | | #include "decode.h" |
63 | | #include "get_bits.h" |
64 | | #include "speexdata.h" |
65 | | |
66 | 218 | #define SPEEX_NB_MODES 3 |
67 | 14.6k | #define SPEEX_INBAND_STEREO 9 |
68 | | |
69 | 1.71M | #define QMF_ORDER 64 |
70 | 7.91M | #define NB_ORDER 10 |
71 | 161M | #define NB_FRAME_SIZE 160 |
72 | | #define NB_SUBMODES 9 |
73 | | #define NB_SUBMODE_BITS 4 |
74 | 210k | #define SB_SUBMODE_BITS 3 |
75 | | |
76 | 35.7M | #define NB_SUBFRAME_SIZE 40 |
77 | 2.14M | #define NB_NB_SUBFRAMES 4 |
78 | 335k | #define NB_PITCH_START 17 |
79 | 2.62M | #define NB_PITCH_END 144 |
80 | | |
81 | | #define NB_DEC_BUFFER (NB_FRAME_SIZE + 2 * NB_PITCH_END + NB_SUBFRAME_SIZE + 12) |
82 | | |
83 | 993k | #define SPEEX_MEMSET(dst, c, n) (memset((dst), (c), (n) * sizeof(*(dst)))) |
84 | 8.82k | #define SPEEX_COPY(dst, src, n) (memcpy((dst), (src), (n) * sizeof(*(dst)))) |
85 | | |
86 | 1.53M | #define LSP_LINEAR(i) (.25f * (i) + .25f) |
87 | 94.5k | #define LSP_LINEAR_HIGH(i) (.3125f * (i) + .75f) |
88 | 1.62M | #define LSP_DIV_256(x) (0.00390625f * (x)) |
89 | 1.62M | #define LSP_DIV_512(x) (0.001953125f * (x)) |
90 | 173k | #define LSP_DIV_1024(x) (0.0009765625f * (x)) |
91 | | |
92 | | typedef struct LtpParams { |
93 | | const int8_t *gain_cdbk; |
94 | | int gain_bits; |
95 | | int pitch_bits; |
96 | | } LtpParam; |
97 | | |
98 | | static const LtpParam ltp_params_vlbr = { gain_cdbk_lbr, 5, 0 }; |
99 | | static const LtpParam ltp_params_lbr = { gain_cdbk_lbr, 5, 7 }; |
100 | | static const LtpParam ltp_params_med = { gain_cdbk_lbr, 5, 7 }; |
101 | | static const LtpParam ltp_params_nb = { gain_cdbk_nb, 7, 7 }; |
102 | | |
103 | | typedef struct SplitCodebookParams { |
104 | | int subvect_size; |
105 | | int nb_subvect; |
106 | | const signed char *shape_cb; |
107 | | int shape_bits; |
108 | | int have_sign; |
109 | | } SplitCodebookParams; |
110 | | |
111 | | static const SplitCodebookParams split_cb_nb_ulbr = { 20, 2, exc_20_32_table, 5, 0 }; |
112 | | static const SplitCodebookParams split_cb_nb_vlbr = { 10, 4, exc_10_16_table, 4, 0 }; |
113 | | static const SplitCodebookParams split_cb_nb_lbr = { 10, 4, exc_10_32_table, 5, 0 }; |
114 | | static const SplitCodebookParams split_cb_nb_med = { 8, 5, exc_8_128_table, 7, 0 }; |
115 | | static const SplitCodebookParams split_cb_nb = { 5, 8, exc_5_64_table, 6, 0 }; |
116 | | static const SplitCodebookParams split_cb_sb = { 5, 8, exc_5_256_table, 8, 0 }; |
117 | | static const SplitCodebookParams split_cb_high = { 8, 5, hexc_table, 7, 1 }; |
118 | | static const SplitCodebookParams split_cb_high_lbr= { 10, 4, hexc_10_32_table,5, 0 }; |
119 | | |
120 | | /** Quantizes LSPs */ |
121 | | typedef void (*lsp_quant_func)(float *, float *, int, GetBitContext *); |
122 | | |
123 | | /** Decodes quantized LSPs */ |
124 | | typedef void (*lsp_unquant_func)(float *, int, GetBitContext *); |
125 | | |
126 | | /** Long-term predictor quantization */ |
127 | | typedef int (*ltp_quant_func)(float *, float *, float *, |
128 | | float *, float *, float *, |
129 | | const void *, int, int, float, int, int, |
130 | | GetBitContext *, char *, float *, |
131 | | float *, int, int, int, float *); |
132 | | |
133 | | /** Long-term un-quantize */ |
134 | | typedef void (*ltp_unquant_func)(float *, float *, int, int, |
135 | | float, const void *, int, int *, |
136 | | float *, GetBitContext *, int, int, |
137 | | float, int); |
138 | | |
139 | | /** Innovation quantization function */ |
140 | | typedef void (*innovation_quant_func)(float *, float *, |
141 | | float *, float *, const void *, |
142 | | int, int, float *, float *, |
143 | | GetBitContext *, char *, int, int); |
144 | | |
145 | | /** Innovation unquantization function */ |
146 | | typedef void (*innovation_unquant_func)(float *, const void *, int, |
147 | | GetBitContext *, uint32_t *); |
148 | | |
149 | | typedef struct SpeexSubmode { |
150 | | int lbr_pitch; /**< Set to -1 for "normal" modes, otherwise encode pitch using |
151 | | a global pitch and allowing a +- lbr_pitch variation (for |
152 | | low not-rates)*/ |
153 | | int forced_pitch_gain; /**< Use the same (forced) pitch gain for all |
154 | | sub-frames */ |
155 | | int have_subframe_gain; /**< Number of bits to use as sub-frame innovation |
156 | | gain */ |
157 | | int double_codebook; /**< Apply innovation quantization twice for higher |
158 | | quality (and higher bit-rate)*/ |
159 | | lsp_unquant_func lsp_unquant; /**< LSP unquantization function */ |
160 | | |
161 | | ltp_unquant_func ltp_unquant; /**< Long-term predictor (pitch) un-quantizer */ |
162 | | const void *LtpParam; /**< Pitch parameters (options) */ |
163 | | |
164 | | innovation_unquant_func innovation_unquant; /**< Innovation un-quantization */ |
165 | | const void *innovation_params; /**< Innovation quantization parameters*/ |
166 | | |
167 | | float comb_gain; /**< Gain of enhancer comb filter */ |
168 | | } SpeexSubmode; |
169 | | |
170 | | typedef struct SpeexMode { |
171 | | int modeID; /**< ID of the mode */ |
172 | | int (*decode)(AVCodecContext *avctx, void *dec, GetBitContext *gb, float *out, int packets_left); |
173 | | int frame_size; /**< Size of frames used for decoding */ |
174 | | int subframe_size; /**< Size of sub-frames used for decoding */ |
175 | | int lpc_size; /**< Order of LPC filter */ |
176 | | float folding_gain; /**< Folding gain */ |
177 | | const SpeexSubmode *submodes[NB_SUBMODES]; /**< Sub-mode data for the mode */ |
178 | | int default_submode; /**< Default sub-mode to use when decoding */ |
179 | | } SpeexMode; |
180 | | |
181 | | typedef struct DecoderState { |
182 | | const SpeexMode *mode; |
183 | | int modeID; /**< ID of the decoder mode */ |
184 | | int first; /**< Is first frame */ |
185 | | int full_frame_size; /**< Length of full-band frames */ |
186 | | int is_wideband; /**< If wideband is present */ |
187 | | int count_lost; /**< Was the last frame lost? */ |
188 | | int frame_size; /**< Length of high-band frames */ |
189 | | int subframe_size; /**< Length of high-band sub-frames */ |
190 | | int nb_subframes; /**< Number of high-band sub-frames */ |
191 | | int lpc_size; /**< Order of high-band LPC analysis */ |
192 | | float last_ol_gain; /**< Open-loop gain for previous frame */ |
193 | | float *innov_save; /**< If non-NULL, innovation is copied here */ |
194 | | |
195 | | /* This is used in packet loss concealment */ |
196 | | int last_pitch; /**< Pitch of last correctly decoded frame */ |
197 | | float last_pitch_gain; /**< Pitch gain of last correctly decoded frame */ |
198 | | uint32_t seed; /**< Seed used for random number generation */ |
199 | | |
200 | | int encode_submode; |
201 | | const SpeexSubmode *const *submodes; /**< Sub-mode data */ |
202 | | int submodeID; /**< Activated sub-mode */ |
203 | | int lpc_enh_enabled; /**< 1 when LPC enhancer is on, 0 otherwise */ |
204 | | |
205 | | /* Vocoder data */ |
206 | | float voc_m1; |
207 | | float voc_m2; |
208 | | float voc_mean; |
209 | | int voc_offset; |
210 | | |
211 | | int dtx_enabled; |
212 | | int highpass_enabled; /**< Is the input filter enabled */ |
213 | | |
214 | | float *exc; /**< Start of excitation frame */ |
215 | | float mem_hp[2]; /**< High-pass filter memory */ |
216 | | float exc_buf[NB_DEC_BUFFER]; /**< Excitation buffer */ |
217 | | float old_qlsp[NB_ORDER]; /**< Quantized LSPs for previous frame */ |
218 | | float interp_qlpc[NB_ORDER]; /**< Interpolated quantized LPCs */ |
219 | | float mem_sp[NB_ORDER]; /**< Filter memory for synthesis signal */ |
220 | | float g0_mem[QMF_ORDER]; |
221 | | float g1_mem[QMF_ORDER]; |
222 | | float pi_gain[NB_NB_SUBFRAMES]; /**< Gain of LPC filter at theta=pi (fe/2) */ |
223 | | float exc_rms[NB_NB_SUBFRAMES]; /**< RMS of excitation per subframe */ |
224 | | } DecoderState; |
225 | | |
226 | | /* Default handler for user callbacks: skip it */ |
227 | | static int speex_default_user_handler(GetBitContext *gb, void *state, void *data) |
228 | 2.46k | { |
229 | 2.46k | const int req_size = get_bits(gb, 4); |
230 | 2.46k | skip_bits_long(gb, 5 + 8 * req_size); |
231 | 2.46k | return 0; |
232 | 2.46k | } |
233 | | |
234 | | typedef struct StereoState { |
235 | | float balance; /**< Left/right balance info */ |
236 | | float e_ratio; /**< Ratio of energies: E(left+right)/[E(left)+E(right)] */ |
237 | | float smooth_left; /**< Smoothed left channel gain */ |
238 | | float smooth_right; /**< Smoothed right channel gain */ |
239 | | } StereoState; |
240 | | |
241 | | typedef struct SpeexContext { |
242 | | AVClass *class; |
243 | | GetBitContext gb; |
244 | | |
245 | | int32_t version_id; /**< Version for Speex (for checking compatibility) */ |
246 | | int32_t rate; /**< Sampling rate used */ |
247 | | int32_t mode; /**< Mode used (0 for narrowband, 1 for wideband) */ |
248 | | int32_t bitstream_version; /**< Version ID of the bit-stream */ |
249 | | int32_t nb_channels; /**< Number of channels decoded */ |
250 | | int32_t bitrate; /**< Bit-rate used */ |
251 | | int32_t frame_size; /**< Size of frames */ |
252 | | int32_t vbr; /**< 1 for a VBR decoding, 0 otherwise */ |
253 | | int32_t frames_per_packet; /**< Number of frames stored per Ogg packet */ |
254 | | int32_t extra_headers; /**< Number of additional headers after the comments */ |
255 | | |
256 | | int pkt_size; |
257 | | |
258 | | StereoState stereo; |
259 | | DecoderState st[SPEEX_NB_MODES]; |
260 | | |
261 | | AVFloatDSPContext *fdsp; |
262 | | } SpeexContext; |
263 | | |
264 | | static void lsp_unquant_lbr(float *lsp, int order, GetBitContext *gb) |
265 | 135k | { |
266 | 135k | int id; |
267 | | |
268 | 1.49M | for (int i = 0; i < order; i++) |
269 | 1.35M | lsp[i] = LSP_LINEAR(i); |
270 | | |
271 | 135k | id = get_bits(gb, 6); |
272 | 1.49M | for (int i = 0; i < 10; i++) |
273 | 1.35M | lsp[i] += LSP_DIV_256(cdbk_nb[id * 10 + i]); |
274 | | |
275 | 135k | id = get_bits(gb, 6); |
276 | 814k | for (int i = 0; i < 5; i++) |
277 | 678k | lsp[i] += LSP_DIV_512(cdbk_nb_low1[id * 5 + i]); |
278 | | |
279 | 135k | id = get_bits(gb, 6); |
280 | 814k | for (int i = 0; i < 5; i++) |
281 | 678k | lsp[i + 5] += LSP_DIV_512(cdbk_nb_high1[id * 5 + i]); |
282 | 135k | } |
283 | | |
284 | | static void forced_pitch_unquant(float *exc, float *exc_out, int start, int end, |
285 | | float pitch_coef, const void *par, int nsf, |
286 | | int *pitch_val, float *gain_val, GetBitContext *gb, int count_lost, |
287 | | int subframe_offset, float last_pitch_gain, int cdbk_offset) |
288 | 356k | { |
289 | 356k | av_assert0(!isnan(pitch_coef)); |
290 | 356k | pitch_coef = fminf(pitch_coef, .99f); |
291 | 14.6M | for (int i = 0; i < nsf; i++) { |
292 | 14.2M | exc_out[i] = exc[i - start] * pitch_coef; |
293 | 14.2M | exc[i] = exc_out[i]; |
294 | 14.2M | } |
295 | 356k | pitch_val[0] = start; |
296 | 356k | gain_val[0] = gain_val[2] = 0.f; |
297 | 356k | gain_val[1] = pitch_coef; |
298 | 356k | } |
299 | | |
300 | | static inline float speex_rand(float std, uint32_t *seed) |
301 | 158M | { |
302 | 158M | const uint32_t jflone = 0x3f800000; |
303 | 158M | const uint32_t jflmsk = 0x007fffff; |
304 | 158M | float fran; |
305 | 158M | uint32_t ran; |
306 | 158M | seed[0] = 1664525 * seed[0] + 1013904223; |
307 | 158M | ran = jflone | (jflmsk & seed[0]); |
308 | 158M | fran = av_int2float(ran); |
309 | 158M | fran -= 1.5f; |
310 | 158M | fran *= std; |
311 | 158M | return fran; |
312 | 158M | } |
313 | | |
314 | | static void noise_codebook_unquant(float *exc, const void *par, int nsf, |
315 | | GetBitContext *gb, uint32_t *seed) |
316 | 35.2k | { |
317 | 1.44M | for (int i = 0; i < nsf; i++) |
318 | 1.41M | exc[i] = speex_rand(1.f, seed); |
319 | 35.2k | } |
320 | | |
321 | | static void split_cb_shape_sign_unquant(float *exc, const void *par, int nsf, |
322 | | GetBitContext *gb, uint32_t *seed) |
323 | 620k | { |
324 | 620k | int subvect_size, nb_subvect, have_sign, shape_bits; |
325 | 620k | const SplitCodebookParams *params; |
326 | 620k | const signed char *shape_cb; |
327 | 620k | int signs[10], ind[10]; |
328 | | |
329 | 620k | params = par; |
330 | 620k | subvect_size = params->subvect_size; |
331 | 620k | nb_subvect = params->nb_subvect; |
332 | | |
333 | 620k | shape_cb = params->shape_cb; |
334 | 620k | have_sign = params->have_sign; |
335 | 620k | shape_bits = params->shape_bits; |
336 | | |
337 | | /* Decode codewords and gains */ |
338 | 3.01M | for (int i = 0; i < nb_subvect; i++) { |
339 | 2.38M | signs[i] = have_sign ? get_bits1(gb) : 0; |
340 | 2.38M | ind[i] = get_bitsz(gb, shape_bits); |
341 | 2.38M | } |
342 | | /* Compute decoded excitation */ |
343 | 3.01M | for (int i = 0; i < nb_subvect; i++) { |
344 | 2.38M | const float s = signs[i] ? -1.f : 1.f; |
345 | | |
346 | 27.2M | for (int j = 0; j < subvect_size; j++) |
347 | 24.8M | exc[subvect_size * i + j] += s * 0.03125f * shape_cb[ind[i] * subvect_size + j]; |
348 | 2.38M | } |
349 | 620k | } |
350 | | |
351 | 6.26M | #define SUBMODE(x) st->submodes[st->submodeID]->x |
352 | | |
353 | 612k | #define gain_3tap_to_1tap(g) (FFABS(g[1]) + (g[0] > 0.f ? g[0] : -.5f * g[0]) + (g[2] > 0.f ? g[2] : -.5f * g[2])) |
354 | | |
355 | | static void |
356 | | pitch_unquant_3tap(float *exc, float *exc_out, int start, int end, float pitch_coef, |
357 | | const void *par, int nsf, int *pitch_val, float *gain_val, GetBitContext *gb, |
358 | | int count_lost, int subframe_offset, float last_pitch_gain, int cdbk_offset) |
359 | 256k | { |
360 | 256k | int pitch, gain_index, gain_cdbk_size; |
361 | 256k | const int8_t *gain_cdbk; |
362 | 256k | const LtpParam *params; |
363 | 256k | float gain[3]; |
364 | | |
365 | 256k | params = (const LtpParam *)par; |
366 | 256k | gain_cdbk_size = 1 << params->gain_bits; |
367 | 256k | gain_cdbk = params->gain_cdbk + 4 * gain_cdbk_size * cdbk_offset; |
368 | | |
369 | 256k | pitch = get_bitsz(gb, params->pitch_bits); |
370 | 256k | pitch += start; |
371 | 256k | gain_index = get_bitsz(gb, params->gain_bits); |
372 | 256k | gain[0] = 0.015625f * gain_cdbk[gain_index * 4] + .5f; |
373 | 256k | gain[1] = 0.015625f * gain_cdbk[gain_index * 4 + 1] + .5f; |
374 | 256k | gain[2] = 0.015625f * gain_cdbk[gain_index * 4 + 2] + .5f; |
375 | | |
376 | 256k | if (count_lost && pitch > subframe_offset) { |
377 | 0 | float tmp = count_lost < 4 ? last_pitch_gain : 0.5f * last_pitch_gain; |
378 | 0 | float gain_sum; |
379 | |
|
380 | 0 | tmp = fminf(tmp, .95f); |
381 | 0 | gain_sum = gain_3tap_to_1tap(gain); |
382 | |
|
383 | 0 | if (gain_sum > tmp && gain_sum > 0.f) { |
384 | 0 | float fact = tmp / gain_sum; |
385 | 0 | for (int i = 0; i < 3; i++) |
386 | 0 | gain[i] *= fact; |
387 | 0 | } |
388 | 0 | } |
389 | | |
390 | 256k | pitch_val[0] = pitch; |
391 | 256k | gain_val[0] = gain[0]; |
392 | 256k | gain_val[1] = gain[1]; |
393 | 256k | gain_val[2] = gain[2]; |
394 | 256k | SPEEX_MEMSET(exc_out, 0, nsf); |
395 | | |
396 | 1.02M | for (int i = 0; i < 3; i++) { |
397 | 768k | int tmp1, tmp3; |
398 | 768k | int pp = pitch + 1 - i; |
399 | 768k | tmp1 = nsf; |
400 | 768k | if (tmp1 > pp) |
401 | 616k | tmp1 = pp; |
402 | 17.7M | for (int j = 0; j < tmp1; j++) |
403 | 17.0M | exc_out[j] += gain[2 - i] * exc[j - pp]; |
404 | 768k | tmp3 = nsf; |
405 | 768k | if (tmp3 > pp + pitch) |
406 | 582k | tmp3 = pp + pitch; |
407 | 11.1M | for (int j = tmp1; j < tmp3; j++) |
408 | 10.3M | exc_out[j] += gain[2 - i] * exc[j - pp - pitch]; |
409 | 768k | } |
410 | 256k | } |
411 | | |
412 | | static void lsp_unquant_nb(float *lsp, int order, GetBitContext *gb) |
413 | 17.3k | { |
414 | 17.3k | int id; |
415 | | |
416 | 190k | for (int i = 0; i < order; i++) |
417 | 173k | lsp[i] = LSP_LINEAR(i); |
418 | | |
419 | 17.3k | id = get_bits(gb, 6); |
420 | 190k | for (int i = 0; i < 10; i++) |
421 | 173k | lsp[i] += LSP_DIV_256(cdbk_nb[id * 10 + i]); |
422 | | |
423 | 17.3k | id = get_bits(gb, 6); |
424 | 103k | for (int i = 0; i < 5; i++) |
425 | 86.6k | lsp[i] += LSP_DIV_512(cdbk_nb_low1[id * 5 + i]); |
426 | | |
427 | 17.3k | id = get_bits(gb, 6); |
428 | 103k | for (int i = 0; i < 5; i++) |
429 | 86.6k | lsp[i] += LSP_DIV_1024(cdbk_nb_low2[id * 5 + i]); |
430 | | |
431 | 17.3k | id = get_bits(gb, 6); |
432 | 103k | for (int i = 0; i < 5; i++) |
433 | 86.6k | lsp[i + 5] += LSP_DIV_512(cdbk_nb_high1[id * 5 + i]); |
434 | | |
435 | 17.3k | id = get_bits(gb, 6); |
436 | 103k | for (int i = 0; i < 5; i++) |
437 | 86.6k | lsp[i + 5] += LSP_DIV_1024(cdbk_nb_high2[id * 5 + i]); |
438 | 17.3k | } |
439 | | |
440 | | static void lsp_unquant_high(float *lsp, int order, GetBitContext *gb) |
441 | 11.8k | { |
442 | 11.8k | int id; |
443 | | |
444 | 106k | for (int i = 0; i < order; i++) |
445 | 94.5k | lsp[i] = LSP_LINEAR_HIGH(i); |
446 | | |
447 | 11.8k | id = get_bits(gb, 6); |
448 | 106k | for (int i = 0; i < order; i++) |
449 | 94.5k | lsp[i] += LSP_DIV_256(high_lsp_cdbk[id * order + i]); |
450 | | |
451 | 11.8k | id = get_bits(gb, 6); |
452 | 106k | for (int i = 0; i < order; i++) |
453 | 94.5k | lsp[i] += LSP_DIV_512(high_lsp_cdbk2[id * order + i]); |
454 | 11.8k | } |
455 | | |
456 | | /* 2150 bps "vocoder-like" mode for comfort noise */ |
457 | | static const SpeexSubmode nb_submode1 = { |
458 | | 0, 1, 0, 0, lsp_unquant_lbr, forced_pitch_unquant, NULL, |
459 | | noise_codebook_unquant, NULL, -1.f |
460 | | }; |
461 | | |
462 | | /* 5.95 kbps very low bit-rate mode */ |
463 | | static const SpeexSubmode nb_submode2 = { |
464 | | 0, 0, 0, 0, lsp_unquant_lbr, pitch_unquant_3tap, <p_params_vlbr, |
465 | | split_cb_shape_sign_unquant, &split_cb_nb_vlbr, .6f |
466 | | }; |
467 | | |
468 | | /* 8 kbps low bit-rate mode */ |
469 | | static const SpeexSubmode nb_submode3 = { |
470 | | -1, 0, 1, 0, lsp_unquant_lbr, pitch_unquant_3tap, <p_params_lbr, |
471 | | split_cb_shape_sign_unquant, &split_cb_nb_lbr, .55f |
472 | | }; |
473 | | |
474 | | /* 11 kbps medium bit-rate mode */ |
475 | | static const SpeexSubmode nb_submode4 = { |
476 | | -1, 0, 1, 0, lsp_unquant_lbr, pitch_unquant_3tap, <p_params_med, |
477 | | split_cb_shape_sign_unquant, &split_cb_nb_med, .45f |
478 | | }; |
479 | | |
480 | | /* 15 kbps high bit-rate mode */ |
481 | | static const SpeexSubmode nb_submode5 = { |
482 | | -1, 0, 3, 0, lsp_unquant_nb, pitch_unquant_3tap, <p_params_nb, |
483 | | split_cb_shape_sign_unquant, &split_cb_nb, .25f |
484 | | }; |
485 | | |
486 | | /* 18.2 high bit-rate mode */ |
487 | | static const SpeexSubmode nb_submode6 = { |
488 | | -1, 0, 3, 0, lsp_unquant_nb, pitch_unquant_3tap, <p_params_nb, |
489 | | split_cb_shape_sign_unquant, &split_cb_sb, .15f |
490 | | }; |
491 | | |
492 | | /* 24.6 kbps high bit-rate mode */ |
493 | | static const SpeexSubmode nb_submode7 = { |
494 | | -1, 0, 3, 1, lsp_unquant_nb, pitch_unquant_3tap, <p_params_nb, |
495 | | split_cb_shape_sign_unquant, &split_cb_nb, 0.05f |
496 | | }; |
497 | | |
498 | | /* 3.95 kbps very low bit-rate mode */ |
499 | | static const SpeexSubmode nb_submode8 = { |
500 | | 0, 1, 0, 0, lsp_unquant_lbr, forced_pitch_unquant, NULL, |
501 | | split_cb_shape_sign_unquant, &split_cb_nb_ulbr, .5f |
502 | | }; |
503 | | |
504 | | static const SpeexSubmode wb_submode1 = { |
505 | | 0, 0, 1, 0, lsp_unquant_high, NULL, NULL, |
506 | | NULL, NULL, -1.f |
507 | | }; |
508 | | |
509 | | static const SpeexSubmode wb_submode2 = { |
510 | | 0, 0, 1, 0, lsp_unquant_high, NULL, NULL, |
511 | | split_cb_shape_sign_unquant, &split_cb_high_lbr, -1.f |
512 | | }; |
513 | | |
514 | | static const SpeexSubmode wb_submode3 = { |
515 | | 0, 0, 1, 0, lsp_unquant_high, NULL, NULL, |
516 | | split_cb_shape_sign_unquant, &split_cb_high, -1.f |
517 | | }; |
518 | | |
519 | | static const SpeexSubmode wb_submode4 = { |
520 | | 0, 0, 1, 1, lsp_unquant_high, NULL, NULL, |
521 | | split_cb_shape_sign_unquant, &split_cb_high, -1.f |
522 | | }; |
523 | | |
524 | | static int nb_decode(AVCodecContext *, void *, GetBitContext *, float *, int packets_left); |
525 | | static int sb_decode(AVCodecContext *, void *, GetBitContext *, float *, int packets_left); |
526 | | |
527 | | static const SpeexMode speex_modes[SPEEX_NB_MODES] = { |
528 | | { |
529 | | .modeID = 0, |
530 | | .decode = nb_decode, |
531 | | .frame_size = NB_FRAME_SIZE, |
532 | | .subframe_size = NB_SUBFRAME_SIZE, |
533 | | .lpc_size = NB_ORDER, |
534 | | .submodes = { |
535 | | NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, |
536 | | &nb_submode5, &nb_submode6, &nb_submode7, &nb_submode8 |
537 | | }, |
538 | | .default_submode = 5, |
539 | | }, |
540 | | { |
541 | | .modeID = 1, |
542 | | .decode = sb_decode, |
543 | | .frame_size = NB_FRAME_SIZE, |
544 | | .subframe_size = NB_SUBFRAME_SIZE, |
545 | | .lpc_size = 8, |
546 | | .folding_gain = 0.9f, |
547 | | .submodes = { |
548 | | NULL, &wb_submode1, &wb_submode2, &wb_submode3, &wb_submode4 |
549 | | }, |
550 | | .default_submode = 3, |
551 | | }, |
552 | | { |
553 | | .modeID = 2, |
554 | | .decode = sb_decode, |
555 | | .frame_size = 320, |
556 | | .subframe_size = 80, |
557 | | .lpc_size = 8, |
558 | | .folding_gain = 0.7f, |
559 | | .submodes = { |
560 | | NULL, &wb_submode1 |
561 | | }, |
562 | | .default_submode = 1, |
563 | | }, |
564 | | }; |
565 | | |
566 | | static float compute_rms(const float *x, int len) |
567 | 2.21M | { |
568 | 2.21M | float sum = 0.f; |
569 | | |
570 | 231M | for (int i = 0; i < len; i++) |
571 | 229M | sum += x[i] * x[i]; |
572 | | |
573 | 2.21M | av_assert0(len > 0); |
574 | 2.21M | return sqrtf(.1f + sum / len); |
575 | 2.21M | } |
576 | | |
577 | | static void bw_lpc(float gamma, const float *lpc_in, |
578 | | float *lpc_out, int order) |
579 | 980k | { |
580 | 980k | float tmp = gamma; |
581 | | |
582 | 10.7M | for (int i = 0; i < order; i++) { |
583 | 9.80M | lpc_out[i] = tmp * lpc_in[i]; |
584 | 9.80M | tmp *= gamma; |
585 | 9.80M | } |
586 | 980k | } |
587 | | |
588 | | static void iir_mem(const float *x, const float *den, |
589 | | float *y, int N, int ord, float *mem) |
590 | 3.34M | { |
591 | 595M | for (int i = 0; i < N; i++) { |
592 | 592M | float yi = x[i] + mem[0]; |
593 | 592M | float nyi = -yi; |
594 | 5.10G | for (int j = 0; j < ord - 1; j++) |
595 | 4.51G | mem[j] = mem[j + 1] + den[j] * nyi; |
596 | 592M | mem[ord - 1] = den[ord - 1] * nyi; |
597 | 592M | y[i] = yi; |
598 | 592M | } |
599 | 3.34M | } |
600 | | |
601 | | static void highpass(const float *x, float *y, int len, float *mem, int wide) |
602 | 153k | { |
603 | 153k | static const float Pcoef[2][3] = {{ 1.00000f, -1.92683f, 0.93071f }, { 1.00000f, -1.97226f, 0.97332f } }; |
604 | 153k | static const float Zcoef[2][3] = {{ 0.96446f, -1.92879f, 0.96446f }, { 0.98645f, -1.97277f, 0.98645f } }; |
605 | 153k | const float *den, *num; |
606 | | |
607 | 153k | den = Pcoef[wide]; |
608 | 153k | num = Zcoef[wide]; |
609 | 24.6M | for (int i = 0; i < len; i++) { |
610 | 24.4M | float yi = num[0] * x[i] + mem[0]; |
611 | 24.4M | mem[0] = mem[1] + num[1] * x[i] + -den[1] * yi; |
612 | 24.4M | mem[1] = num[2] * x[i] + -den[2] * yi; |
613 | 24.4M | y[i] = yi; |
614 | 24.4M | } |
615 | 153k | } |
616 | | |
617 | | #define median3(a, b, c) \ |
618 | | ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) \ |
619 | | : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a)))) |
620 | | |
621 | | static int speex_std_stereo(GetBitContext *gb, void *state, void *data) |
622 | 1.34k | { |
623 | 1.34k | StereoState *stereo = data; |
624 | 1.34k | float sign = get_bits1(gb) ? -1.f : 1.f; |
625 | | |
626 | 1.34k | stereo->balance = exp(sign * .25f * get_bits(gb, 5)); |
627 | 1.34k | stereo->e_ratio = e_ratio_quant[get_bits(gb, 2)]; |
628 | | |
629 | 1.34k | return 0; |
630 | 1.34k | } |
631 | | |
632 | | static int speex_inband_handler(GetBitContext *gb, void *state, StereoState *stereo) |
633 | 14.6k | { |
634 | 14.6k | int id = get_bits(gb, 4); |
635 | | |
636 | 14.6k | if (id == SPEEX_INBAND_STEREO) { |
637 | 1.34k | return speex_std_stereo(gb, state, stereo); |
638 | 13.3k | } else { |
639 | 13.3k | int adv; |
640 | | |
641 | 13.3k | if (id < 2) |
642 | 8.77k | adv = 1; |
643 | 4.55k | else if (id < 8) |
644 | 720 | adv = 4; |
645 | 3.83k | else if (id < 10) |
646 | 877 | adv = 8; |
647 | 2.95k | else if (id < 12) |
648 | 1.80k | adv = 16; |
649 | 1.14k | else if (id < 14) |
650 | 556 | adv = 32; |
651 | 593 | else |
652 | 593 | adv = 64; |
653 | 13.3k | skip_bits_long(gb, adv); |
654 | 13.3k | } |
655 | 13.3k | return 0; |
656 | 14.6k | } |
657 | | |
658 | | static void sanitize_values(float *vec, float min_val, float max_val, int len) |
659 | 612k | { |
660 | 25.1M | for (int i = 0; i < len; i++) { |
661 | 24.4M | if (!isnormal(vec[i]) || fabsf(vec[i]) < 1e-8f) |
662 | 18.8M | vec[i] = 0.f; |
663 | 5.61M | else |
664 | 5.61M | vec[i] = av_clipf(vec[i], min_val, max_val); |
665 | 24.4M | } |
666 | 612k | } |
667 | | |
668 | | static void signal_mul(const float *x, float *y, float scale, int len) |
669 | 656k | { |
670 | 26.9M | for (int i = 0; i < len; i++) |
671 | 26.2M | y[i] = scale * x[i]; |
672 | 656k | } |
673 | | |
674 | | static float inner_prod(const float *x, const float *y, int len) |
675 | 5.48M | { |
676 | 5.48M | float sum = 0.f; |
677 | | |
678 | 60.3M | for (int i = 0; i < len; i += 8) { |
679 | 54.8M | float part = 0.f; |
680 | 54.8M | part += x[i + 0] * y[i + 0]; |
681 | 54.8M | part += x[i + 1] * y[i + 1]; |
682 | 54.8M | part += x[i + 2] * y[i + 2]; |
683 | 54.8M | part += x[i + 3] * y[i + 3]; |
684 | 54.8M | part += x[i + 4] * y[i + 4]; |
685 | 54.8M | part += x[i + 5] * y[i + 5]; |
686 | 54.8M | part += x[i + 6] * y[i + 6]; |
687 | 54.8M | part += x[i + 7] * y[i + 7]; |
688 | 54.8M | sum += part; |
689 | 54.8M | } |
690 | | |
691 | 5.48M | return sum; |
692 | 5.48M | } |
693 | | |
694 | | static int interp_pitch(const float *exc, float *interp, int pitch, int len) |
695 | 577k | { |
696 | 577k | float corr[4][7], maxcorr; |
697 | 577k | int maxi, maxj; |
698 | | |
699 | 4.61M | for (int i = 0; i < 7; i++) |
700 | 4.03M | corr[0][i] = inner_prod(exc, exc - pitch - 3 + i, len); |
701 | 2.30M | for (int i = 0; i < 3; i++) { |
702 | 13.8M | for (int j = 0; j < 7; j++) { |
703 | 12.1M | int i1, i2; |
704 | 12.1M | float tmp = 0.f; |
705 | | |
706 | 12.1M | i1 = 3 - j; |
707 | 12.1M | if (i1 < 0) |
708 | 5.19M | i1 = 0; |
709 | 12.1M | i2 = 10 - j; |
710 | 12.1M | if (i2 > 7) |
711 | 5.19M | i2 = 7; |
712 | 76.1M | for (int k = i1; k < i2; k++) |
713 | 64.0M | tmp += shift_filt[i][k] * corr[0][j + k - 3]; |
714 | 12.1M | corr[i + 1][j] = tmp; |
715 | 12.1M | } |
716 | 1.73M | } |
717 | 577k | maxi = maxj = 0; |
718 | 577k | maxcorr = corr[0][0]; |
719 | 2.88M | for (int i = 0; i < 4; i++) { |
720 | 18.4M | for (int j = 0; j < 7; j++) { |
721 | 16.1M | if (corr[i][j] > maxcorr) { |
722 | 1.39M | maxcorr = corr[i][j]; |
723 | 1.39M | maxi = i; |
724 | 1.39M | maxj = j; |
725 | 1.39M | } |
726 | 16.1M | } |
727 | 2.30M | } |
728 | 46.7M | for (int i = 0; i < len; i++) { |
729 | 46.1M | float tmp = 0.f; |
730 | 46.1M | if (maxi > 0.f) { |
731 | 99.4M | for (int k = 0; k < 7; k++) |
732 | 87.0M | tmp += exc[i - (pitch - maxj + 3) + k - 3] * shift_filt[maxi - 1][k]; |
733 | 33.7M | } else { |
734 | 33.7M | tmp = exc[i - (pitch - maxj + 3)]; |
735 | 33.7M | } |
736 | 46.1M | interp[i] = tmp; |
737 | 46.1M | } |
738 | 577k | return pitch - maxj + 3; |
739 | 577k | } |
740 | | |
741 | | static void multicomb(const float *exc, float *new_exc, float *ak, int p, int nsf, |
742 | | int pitch, int max_pitch, float comb_gain) |
743 | 288k | { |
744 | 288k | float old_ener, new_ener; |
745 | 288k | float iexc0_mag, iexc1_mag, exc_mag; |
746 | 288k | float iexc[4 * NB_SUBFRAME_SIZE]; |
747 | 288k | float corr0, corr1, gain0, gain1; |
748 | 288k | float pgain1, pgain2; |
749 | 288k | float c1, c2, g1, g2; |
750 | 288k | float ngain, gg1, gg2; |
751 | 288k | int corr_pitch = pitch; |
752 | | |
753 | 288k | interp_pitch(exc, iexc, corr_pitch, 80); |
754 | 288k | if (corr_pitch > max_pitch) |
755 | 49.7k | interp_pitch(exc, iexc + nsf, 2 * corr_pitch, 80); |
756 | 238k | else |
757 | 238k | interp_pitch(exc, iexc + nsf, -corr_pitch, 80); |
758 | | |
759 | 288k | iexc0_mag = sqrtf(1000.f + inner_prod(iexc, iexc, nsf)); |
760 | 288k | iexc1_mag = sqrtf(1000.f + inner_prod(iexc + nsf, iexc + nsf, nsf)); |
761 | 288k | exc_mag = sqrtf(1.f + inner_prod(exc, exc, nsf)); |
762 | 288k | corr0 = inner_prod(iexc, exc, nsf); |
763 | 288k | corr1 = inner_prod(iexc + nsf, exc, nsf); |
764 | 288k | if (corr0 > iexc0_mag * exc_mag) |
765 | 268 | pgain1 = 1.f; |
766 | 288k | else |
767 | 288k | pgain1 = (corr0 / exc_mag) / iexc0_mag; |
768 | 288k | if (corr1 > iexc1_mag * exc_mag) |
769 | 273 | pgain2 = 1.f; |
770 | 288k | else |
771 | 288k | pgain2 = (corr1 / exc_mag) / iexc1_mag; |
772 | 288k | gg1 = exc_mag / iexc0_mag; |
773 | 288k | gg2 = exc_mag / iexc1_mag; |
774 | 288k | if (comb_gain > 0.f) { |
775 | 288k | c1 = .4f * comb_gain + .07f; |
776 | 288k | c2 = .5f + 1.72f * (c1 - .07f); |
777 | 288k | } else { |
778 | 0 | c1 = c2 = 0.f; |
779 | 0 | } |
780 | 288k | g1 = 1.f - c2 * pgain1 * pgain1; |
781 | 288k | g2 = 1.f - c2 * pgain2 * pgain2; |
782 | 288k | g1 = fmaxf(g1, c1); |
783 | 288k | g2 = fmaxf(g2, c1); |
784 | 288k | g1 = c1 / g1; |
785 | 288k | g2 = c1 / g2; |
786 | | |
787 | 288k | if (corr_pitch > max_pitch) { |
788 | 49.7k | gain0 = .7f * g1 * gg1; |
789 | 49.7k | gain1 = .3f * g2 * gg2; |
790 | 238k | } else { |
791 | 238k | gain0 = .6f * g1 * gg1; |
792 | 238k | gain1 = .6f * g2 * gg2; |
793 | 238k | } |
794 | 23.3M | for (int i = 0; i < nsf; i++) |
795 | 23.0M | new_exc[i] = exc[i] + (gain0 * iexc[i]) + (gain1 * iexc[i + nsf]); |
796 | 288k | new_ener = compute_rms(new_exc, nsf); |
797 | 288k | old_ener = compute_rms(exc, nsf); |
798 | | |
799 | 288k | old_ener = fmaxf(old_ener, 1.f); |
800 | 288k | new_ener = fmaxf(new_ener, 1.f); |
801 | 288k | old_ener = fminf(old_ener, new_ener); |
802 | 288k | ngain = old_ener / new_ener; |
803 | | |
804 | 23.3M | for (int i = 0; i < nsf; i++) |
805 | 23.0M | new_exc[i] *= ngain; |
806 | 288k | } |
807 | | |
808 | | static void lsp_interpolate(const float *old_lsp, const float *new_lsp, |
809 | | float *lsp, int len, int subframe, |
810 | | int nb_subframes, float margin) |
811 | 659k | { |
812 | 659k | const float tmp = (1.f + subframe) / nb_subframes; |
813 | | |
814 | 7.16M | for (int i = 0; i < len; i++) { |
815 | 6.50M | lsp[i] = (1.f - tmp) * old_lsp[i] + tmp * new_lsp[i]; |
816 | 6.50M | lsp[i] = av_clipf(lsp[i], margin, M_PI - margin); |
817 | 6.50M | } |
818 | 5.84M | for (int i = 1; i < len - 1; i++) { |
819 | 5.18M | lsp[i] = fmaxf(lsp[i], lsp[i - 1] + margin); |
820 | 5.18M | if (lsp[i] > lsp[i + 1] - margin) |
821 | 3.54k | lsp[i] = .5f * (lsp[i] + lsp[i + 1] - margin); |
822 | 5.18M | } |
823 | 659k | } |
824 | | |
825 | | static void lsp_to_lpc(const float *freq, float *ak, int lpcrdr) |
826 | 659k | { |
827 | 659k | float xout1, xout2, xin1, xin2; |
828 | 659k | float *pw, *n0; |
829 | 659k | float Wp[4 * NB_ORDER + 2] = { 0 }; |
830 | 659k | float x_freq[NB_ORDER]; |
831 | 659k | const int m = lpcrdr >> 1; |
832 | | |
833 | 659k | pw = Wp; |
834 | | |
835 | 659k | xin1 = xin2 = 1.f; |
836 | | |
837 | 7.16M | for (int i = 0; i < lpcrdr; i++) |
838 | 6.50M | x_freq[i] = -cosf(freq[i]); |
839 | | |
840 | | /* reconstruct P(z) and Q(z) by cascading second order |
841 | | * polynomials in form 1 - 2xz(-1) +z(-2), where x is the |
842 | | * LSP coefficient |
843 | | */ |
844 | 7.82M | for (int j = 0; j <= lpcrdr; j++) { |
845 | 7.16M | int i2 = 0; |
846 | 42.5M | for (int i = 0; i < m; i++, i2 += 2) { |
847 | 35.3M | n0 = pw + (i * 4); |
848 | 35.3M | xout1 = xin1 + 2.f * x_freq[i2 ] * n0[0] + n0[1]; |
849 | 35.3M | xout2 = xin2 + 2.f * x_freq[i2 + 1] * n0[2] + n0[3]; |
850 | 35.3M | n0[1] = n0[0]; |
851 | 35.3M | n0[3] = n0[2]; |
852 | 35.3M | n0[0] = xin1; |
853 | 35.3M | n0[2] = xin2; |
854 | 35.3M | xin1 = xout1; |
855 | 35.3M | xin2 = xout2; |
856 | 35.3M | } |
857 | 7.16M | xout1 = xin1 + n0[4]; |
858 | 7.16M | xout2 = xin2 - n0[5]; |
859 | 7.16M | if (j > 0) |
860 | 6.50M | ak[j - 1] = (xout1 + xout2) * 0.5f; |
861 | 7.16M | n0[4] = xin1; |
862 | 7.16M | n0[5] = xin2; |
863 | | |
864 | 7.16M | xin1 = 0.f; |
865 | 7.16M | xin2 = 0.f; |
866 | 7.16M | } |
867 | 659k | } |
868 | | |
869 | | static int nb_decode(AVCodecContext *avctx, void *ptr_st, |
870 | | GetBitContext *gb, float *out, int packets_left) |
871 | 1.25M | { |
872 | 1.25M | DecoderState *st = ptr_st; |
873 | 1.25M | float ol_gain = 0, ol_pitch_coef = 0, best_pitch_gain = 0, pitch_average = 0; |
874 | 1.25M | int m, pitch, wideband, ol_pitch = 0, best_pitch = 40; |
875 | 1.25M | SpeexContext *s = avctx->priv_data; |
876 | 1.25M | float innov[NB_SUBFRAME_SIZE]; |
877 | 1.25M | float exc32[NB_SUBFRAME_SIZE]; |
878 | 1.25M | float interp_qlsp[NB_ORDER]; |
879 | 1.25M | float qlsp[NB_ORDER]; |
880 | 1.25M | float ak[NB_ORDER]; |
881 | 1.25M | float pitch_gain[3] = { 0 }; |
882 | | |
883 | 1.25M | st->exc = st->exc_buf + 2 * NB_PITCH_END + NB_SUBFRAME_SIZE + 6; |
884 | | |
885 | 1.25M | if (st->encode_submode) { |
886 | 1.26M | do { /* Search for next narrowband block (handle requests, skip wideband blocks) */ |
887 | 1.26M | if (get_bits_left(gb) < 5) |
888 | 10.5k | return AVERROR_INVALIDDATA; |
889 | 1.25M | wideband = get_bits1(gb); |
890 | 1.25M | if (wideband) /* Skip wideband block (for compatibility) */ { |
891 | 93.4k | int submode, advance; |
892 | | |
893 | 93.4k | submode = get_bits(gb, SB_SUBMODE_BITS); |
894 | 93.4k | advance = wb_skip_table[submode]; |
895 | 93.4k | advance -= SB_SUBMODE_BITS + 1; |
896 | 93.4k | if (advance < 0) |
897 | 21.8k | return AVERROR_INVALIDDATA; |
898 | 71.5k | skip_bits_long(gb, advance); |
899 | | |
900 | 71.5k | if (get_bits_left(gb) < 5) |
901 | 66.1k | return AVERROR_INVALIDDATA; |
902 | 5.45k | wideband = get_bits1(gb); |
903 | 5.45k | if (wideband) { |
904 | 2.48k | submode = get_bits(gb, SB_SUBMODE_BITS); |
905 | 2.48k | advance = wb_skip_table[submode]; |
906 | 2.48k | advance -= SB_SUBMODE_BITS + 1; |
907 | 2.48k | if (advance < 0) |
908 | 534 | return AVERROR_INVALIDDATA; |
909 | 1.95k | skip_bits_long(gb, advance); |
910 | 1.95k | wideband = get_bits1(gb); |
911 | 1.95k | if (wideband) { |
912 | 585 | av_log(avctx, AV_LOG_ERROR, "more than two wideband layers found\n"); |
913 | 585 | return AVERROR_INVALIDDATA; |
914 | 585 | } |
915 | 1.95k | } |
916 | 5.45k | } |
917 | 1.16M | if (get_bits_left(gb) < 4) |
918 | 814 | return AVERROR_INVALIDDATA; |
919 | 1.16M | m = get_bits(gb, 4); |
920 | 1.16M | if (m == 15) /* We found a terminator */ { |
921 | 1.35k | return AVERROR_INVALIDDATA; |
922 | 1.16M | } else if (m == 14) /* Speex in-band request */ { |
923 | 14.6k | int ret = speex_inband_handler(gb, st, &s->stereo); |
924 | 14.6k | if (ret) |
925 | 0 | return ret; |
926 | 1.15M | } else if (m == 13) /* User in-band request */ { |
927 | 2.46k | int ret = speex_default_user_handler(gb, st, NULL); |
928 | 2.46k | if (ret) |
929 | 0 | return ret; |
930 | 1.14M | } else if (m > 8) /* Invalid mode */ { |
931 | 14.5k | return AVERROR_INVALIDDATA; |
932 | 14.5k | } |
933 | 1.16M | } while (m > 8); |
934 | | |
935 | 1.13M | st->submodeID = m; /* Get the sub-mode that was used */ |
936 | 1.13M | } |
937 | | |
938 | | /* Shift all buffers by one frame */ |
939 | 1.13M | memmove(st->exc_buf, st->exc_buf + NB_FRAME_SIZE, (2 * NB_PITCH_END + NB_SUBFRAME_SIZE + 12) * sizeof(float)); |
940 | | |
941 | | /* If null mode (no transmission), just set a couple things to zero */ |
942 | 1.13M | if (st->submodes[st->submodeID] == NULL) { |
943 | 980k | float lpc[NB_ORDER]; |
944 | 980k | float innov_gain = 0.f; |
945 | | |
946 | 980k | bw_lpc(0.93f, st->interp_qlpc, lpc, NB_ORDER); |
947 | 980k | innov_gain = compute_rms(st->exc, NB_FRAME_SIZE); |
948 | 157M | for (int i = 0; i < NB_FRAME_SIZE; i++) |
949 | 156M | st->exc[i] = speex_rand(innov_gain, &st->seed); |
950 | | |
951 | | /* Final signal synthesis from excitation */ |
952 | 980k | iir_mem(st->exc, lpc, out, NB_FRAME_SIZE, NB_ORDER, st->mem_sp); |
953 | 980k | st->count_lost = 0; |
954 | | |
955 | 980k | return 0; |
956 | 980k | } |
957 | | |
958 | | /* Unquantize LSPs */ |
959 | 153k | SUBMODE(lsp_unquant)(qlsp, NB_ORDER, gb); |
960 | | |
961 | | /* Damp memory if a frame was lost and the LSP changed too much */ |
962 | 153k | if (st->count_lost) { |
963 | 0 | float fact, lsp_dist = 0; |
964 | |
|
965 | 0 | for (int i = 0; i < NB_ORDER; i++) |
966 | 0 | lsp_dist = lsp_dist + FFABS(st->old_qlsp[i] - qlsp[i]); |
967 | 0 | fact = .6f * exp(-.2f * lsp_dist); |
968 | 0 | for (int i = 0; i < NB_ORDER; i++) |
969 | 0 | st->mem_sp[i] = fact * st->mem_sp[i]; |
970 | 0 | } |
971 | | |
972 | | /* Handle first frame and lost-packet case */ |
973 | 153k | if (st->first || st->count_lost) |
974 | 1.21k | memcpy(st->old_qlsp, qlsp, sizeof(st->old_qlsp)); |
975 | | |
976 | | /* Get open-loop pitch estimation for low bit-rate pitch coding */ |
977 | 153k | if (SUBMODE(lbr_pitch) != -1) |
978 | 92.3k | ol_pitch = NB_PITCH_START + get_bits(gb, 7); |
979 | | |
980 | 153k | if (SUBMODE(forced_pitch_gain)) |
981 | 89.0k | ol_pitch_coef = 0.066667f * get_bits(gb, 4); |
982 | | |
983 | | /* Get global excitation gain */ |
984 | 153k | ol_gain = expf(get_bits(gb, 5) / 3.5f); |
985 | | |
986 | 153k | if (st->submodeID == 1) |
987 | 8.82k | st->dtx_enabled = get_bits(gb, 4) == 15; |
988 | | |
989 | 153k | if (st->submodeID > 1) |
990 | 144k | st->dtx_enabled = 0; |
991 | | |
992 | 765k | for (int sub = 0; sub < NB_NB_SUBFRAMES; sub++) { /* Loop on subframes */ |
993 | 612k | float *exc, *innov_save = NULL, tmp, ener; |
994 | 612k | int pit_min, pit_max, offset, q_energy; |
995 | | |
996 | 612k | offset = NB_SUBFRAME_SIZE * sub; /* Offset relative to start of frame */ |
997 | 612k | exc = st->exc + offset; /* Excitation */ |
998 | 612k | if (st->innov_save) /* Original signal */ |
999 | 598k | innov_save = st->innov_save + offset; |
1000 | | |
1001 | 612k | SPEEX_MEMSET(exc, 0, NB_SUBFRAME_SIZE); /* Reset excitation */ |
1002 | | |
1003 | | /* Adaptive codebook contribution */ |
1004 | 612k | av_assert0(SUBMODE(ltp_unquant)); |
1005 | | /* Handle pitch constraints if any */ |
1006 | 612k | if (SUBMODE(lbr_pitch) != -1) { |
1007 | 369k | int margin = SUBMODE(lbr_pitch); |
1008 | | |
1009 | 369k | if (margin) { |
1010 | 0 | pit_min = ol_pitch - margin + 1; |
1011 | 0 | pit_min = FFMAX(pit_min, NB_PITCH_START); |
1012 | 0 | pit_max = ol_pitch + margin; |
1013 | 0 | pit_max = FFMIN(pit_max, NB_PITCH_START); |
1014 | 369k | } else { |
1015 | 369k | pit_min = pit_max = ol_pitch; |
1016 | 369k | } |
1017 | 369k | } else { |
1018 | 243k | pit_min = NB_PITCH_START; |
1019 | 243k | pit_max = NB_PITCH_END; |
1020 | 243k | } |
1021 | | |
1022 | 612k | SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(LtpParam), |
1023 | 612k | NB_SUBFRAME_SIZE, &pitch, pitch_gain, gb, st->count_lost, offset, |
1024 | 612k | st->last_pitch_gain, 0); |
1025 | | |
1026 | 612k | sanitize_values(exc32, -32000, 32000, NB_SUBFRAME_SIZE); |
1027 | | |
1028 | 612k | tmp = gain_3tap_to_1tap(pitch_gain); |
1029 | | |
1030 | 612k | pitch_average += tmp; |
1031 | 612k | if ((tmp > best_pitch_gain && |
1032 | 59.3k | FFABS(2 * best_pitch - pitch) >= 3 && |
1033 | 58.4k | FFABS(3 * best_pitch - pitch) >= 4 && |
1034 | 56.7k | FFABS(4 * best_pitch - pitch) >= 5) || |
1035 | 556k | (tmp > .6f * best_pitch_gain && |
1036 | 81.7k | (FFABS(best_pitch - 2 * pitch) < 3 || |
1037 | 81.0k | FFABS(best_pitch - 3 * pitch) < 4 || |
1038 | 80.7k | FFABS(best_pitch - 4 * pitch) < 5)) || |
1039 | 554k | ((.67f * tmp) > best_pitch_gain && |
1040 | 3.40k | (FFABS(2 * best_pitch - pitch) < 3 || |
1041 | 2.47k | FFABS(3 * best_pitch - pitch) < 4 || |
1042 | 60.9k | FFABS(4 * best_pitch - pitch) < 5))) { |
1043 | 60.9k | best_pitch = pitch; |
1044 | 60.9k | if (tmp > best_pitch_gain) |
1045 | 59.2k | best_pitch_gain = tmp; |
1046 | 60.9k | } |
1047 | | |
1048 | 612k | memset(innov, 0, sizeof(innov)); |
1049 | | |
1050 | | /* Decode sub-frame gain correction */ |
1051 | 612k | if (SUBMODE(have_subframe_gain) == 3) { |
1052 | 69.3k | q_energy = get_bits(gb, 3); |
1053 | 69.3k | ener = exc_gain_quant_scal3[q_energy] * ol_gain; |
1054 | 543k | } else if (SUBMODE(have_subframe_gain) == 1) { |
1055 | 173k | q_energy = get_bits1(gb); |
1056 | 173k | ener = exc_gain_quant_scal1[q_energy] * ol_gain; |
1057 | 369k | } else { |
1058 | 369k | ener = ol_gain; |
1059 | 369k | } |
1060 | | |
1061 | 612k | av_assert0(SUBMODE(innovation_unquant)); |
1062 | | /* Fixed codebook contribution */ |
1063 | 612k | SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), NB_SUBFRAME_SIZE, gb, &st->seed); |
1064 | | /* De-normalize innovation and update excitation */ |
1065 | | |
1066 | 612k | signal_mul(innov, innov, ener, NB_SUBFRAME_SIZE); |
1067 | | |
1068 | | /* Decode second codebook (only for some modes) */ |
1069 | 612k | if (SUBMODE(double_codebook)) { |
1070 | 24.6k | float innov2[NB_SUBFRAME_SIZE] = { 0 }; |
1071 | | |
1072 | 24.6k | SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), NB_SUBFRAME_SIZE, gb, &st->seed); |
1073 | 24.6k | signal_mul(innov2, innov2, 0.454545f * ener, NB_SUBFRAME_SIZE); |
1074 | 1.00M | for (int i = 0; i < NB_SUBFRAME_SIZE; i++) |
1075 | 984k | innov[i] += innov2[i]; |
1076 | 24.6k | } |
1077 | 25.1M | for (int i = 0; i < NB_SUBFRAME_SIZE; i++) |
1078 | 24.4M | exc[i] = exc32[i] + innov[i]; |
1079 | 612k | if (innov_save) |
1080 | 598k | memcpy(innov_save, innov, sizeof(innov)); |
1081 | | |
1082 | | /* Vocoder mode */ |
1083 | 612k | if (st->submodeID == 1) { |
1084 | 35.2k | float g = ol_pitch_coef; |
1085 | | |
1086 | 35.2k | g = av_clipf(1.5f * (g - .2f), 0.f, 1.f); |
1087 | | |
1088 | 35.2k | SPEEX_MEMSET(exc, 0, NB_SUBFRAME_SIZE); |
1089 | 76.6k | while (st->voc_offset < NB_SUBFRAME_SIZE) { |
1090 | 41.3k | if (st->voc_offset >= 0) |
1091 | 41.3k | exc[st->voc_offset] = sqrtf(2.f * ol_pitch) * (g * ol_gain); |
1092 | 41.3k | st->voc_offset += ol_pitch; |
1093 | 41.3k | } |
1094 | 35.2k | st->voc_offset -= NB_SUBFRAME_SIZE; |
1095 | | |
1096 | 1.44M | for (int i = 0; i < NB_SUBFRAME_SIZE; i++) { |
1097 | 1.41M | float exci = exc[i]; |
1098 | 1.41M | exc[i] = (.7f * exc[i] + .3f * st->voc_m1) + ((1.f - .85f * g) * innov[i]) - .15f * g * st->voc_m2; |
1099 | 1.41M | st->voc_m1 = exci; |
1100 | 1.41M | st->voc_m2 = innov[i]; |
1101 | 1.41M | st->voc_mean = .8f * st->voc_mean + .2f * exc[i]; |
1102 | 1.41M | exc[i] -= st->voc_mean; |
1103 | 1.41M | } |
1104 | 35.2k | } |
1105 | 612k | } |
1106 | | |
1107 | 153k | if (st->lpc_enh_enabled && SUBMODE(comb_gain) > 0 && !st->count_lost) { |
1108 | 144k | multicomb(st->exc - NB_SUBFRAME_SIZE, out, st->interp_qlpc, NB_ORDER, |
1109 | 144k | 2 * NB_SUBFRAME_SIZE, best_pitch, 40, SUBMODE(comb_gain)); |
1110 | 144k | multicomb(st->exc + NB_SUBFRAME_SIZE, out + 2 * NB_SUBFRAME_SIZE, |
1111 | 144k | st->interp_qlpc, NB_ORDER, 2 * NB_SUBFRAME_SIZE, best_pitch, 40, |
1112 | 144k | SUBMODE(comb_gain)); |
1113 | 144k | } else { |
1114 | 8.82k | SPEEX_COPY(out, &st->exc[-NB_SUBFRAME_SIZE], NB_FRAME_SIZE); |
1115 | 8.82k | } |
1116 | | |
1117 | | /* If the last packet was lost, re-scale the excitation to obtain the same |
1118 | | * energy as encoded in ol_gain */ |
1119 | 153k | if (st->count_lost) { |
1120 | 0 | float exc_ener, gain; |
1121 | |
|
1122 | 0 | exc_ener = compute_rms(st->exc, NB_FRAME_SIZE); |
1123 | 0 | av_assert0(exc_ener + 1.f > 0.f); |
1124 | 0 | gain = fminf(ol_gain / (exc_ener + 1.f), 2.f); |
1125 | 0 | for (int i = 0; i < NB_FRAME_SIZE; i++) { |
1126 | 0 | st->exc[i] *= gain; |
1127 | 0 | out[i] = st->exc[i - NB_SUBFRAME_SIZE]; |
1128 | 0 | } |
1129 | 0 | } |
1130 | | |
1131 | 765k | for (int sub = 0; sub < NB_NB_SUBFRAMES; sub++) { /* Loop on subframes */ |
1132 | 612k | const int offset = NB_SUBFRAME_SIZE * sub; /* Offset relative to start of frame */ |
1133 | 612k | float pi_g = 1.f, *sp = out + offset; /* Original signal */ |
1134 | | |
1135 | 612k | lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, NB_ORDER, sub, NB_NB_SUBFRAMES, 0.002f); |
1136 | 612k | lsp_to_lpc(interp_qlsp, ak, NB_ORDER); /* Compute interpolated LPCs (unquantized) */ |
1137 | | |
1138 | 3.67M | for (int i = 0; i < NB_ORDER; i += 2) /* Compute analysis filter at w=pi */ |
1139 | 3.06M | pi_g += ak[i + 1] - ak[i]; |
1140 | 612k | st->pi_gain[sub] = pi_g; |
1141 | 612k | st->exc_rms[sub] = compute_rms(st->exc + offset, NB_SUBFRAME_SIZE); |
1142 | | |
1143 | 612k | iir_mem(sp, st->interp_qlpc, sp, NB_SUBFRAME_SIZE, NB_ORDER, st->mem_sp); |
1144 | | |
1145 | 612k | memcpy(st->interp_qlpc, ak, sizeof(st->interp_qlpc)); |
1146 | 612k | } |
1147 | | |
1148 | 153k | if (st->highpass_enabled) |
1149 | 153k | highpass(out, out, NB_FRAME_SIZE, st->mem_hp, st->is_wideband); |
1150 | | |
1151 | | /* Store the LSPs for interpolation in the next frame */ |
1152 | 153k | memcpy(st->old_qlsp, qlsp, sizeof(st->old_qlsp)); |
1153 | | |
1154 | 153k | st->count_lost = 0; |
1155 | 153k | st->last_pitch = best_pitch; |
1156 | 153k | st->last_pitch_gain = .25f * pitch_average; |
1157 | 153k | st->last_ol_gain = ol_gain; |
1158 | 153k | st->first = 0; |
1159 | | |
1160 | 153k | return 0; |
1161 | 153k | } |
1162 | | |
1163 | | static void qmf_synth(const float *x1, const float *x2, const float *a, float *y, int N, int M, float *mem1, float *mem2) |
1164 | 1.71M | { |
1165 | 1.71M | const int M2 = M >> 1, N2 = N >> 1; |
1166 | 1.71M | float xx1[352], xx2[352]; |
1167 | | |
1168 | 412M | for (int i = 0; i < N2; i++) |
1169 | 411M | xx1[i] = x1[N2-1-i]; |
1170 | 56.6M | for (int i = 0; i < M2; i++) |
1171 | 54.9M | xx1[N2+i] = mem1[2*i+1]; |
1172 | 412M | for (int i = 0; i < N2; i++) |
1173 | 411M | xx2[i] = x2[N2-1-i]; |
1174 | 56.6M | for (int i = 0; i < M2; i++) |
1175 | 54.9M | xx2[N2+i] = mem2[2*i+1]; |
1176 | | |
1177 | 207M | for (int i = 0; i < N2; i += 2) { |
1178 | 205M | float y0, y1, y2, y3; |
1179 | 205M | float x10, x20; |
1180 | | |
1181 | 205M | y0 = y1 = y2 = y3 = 0.f; |
1182 | 205M | x10 = xx1[N2-2-i]; |
1183 | 205M | x20 = xx2[N2-2-i]; |
1184 | | |
1185 | 3.49G | for (int j = 0; j < M2; j += 2) { |
1186 | 3.28G | float x11, x21; |
1187 | 3.28G | float a0, a1; |
1188 | | |
1189 | 3.28G | a0 = a[2*j]; |
1190 | 3.28G | a1 = a[2*j+1]; |
1191 | 3.28G | x11 = xx1[N2-1+j-i]; |
1192 | 3.28G | x21 = xx2[N2-1+j-i]; |
1193 | | |
1194 | 3.28G | y0 += a0 * (x11-x21); |
1195 | 3.28G | y1 += a1 * (x11+x21); |
1196 | 3.28G | y2 += a0 * (x10-x20); |
1197 | 3.28G | y3 += a1 * (x10+x20); |
1198 | 3.28G | a0 = a[2*j+2]; |
1199 | 3.28G | a1 = a[2*j+3]; |
1200 | 3.28G | x10 = xx1[N2+j-i]; |
1201 | 3.28G | x20 = xx2[N2+j-i]; |
1202 | | |
1203 | 3.28G | y0 += a0 * (x10-x20); |
1204 | 3.28G | y1 += a1 * (x10+x20); |
1205 | 3.28G | y2 += a0 * (x11-x21); |
1206 | 3.28G | y3 += a1 * (x11+x21); |
1207 | 3.28G | } |
1208 | 205M | y[2 * i ] = 2.f * y0; |
1209 | 205M | y[2 * i+1] = 2.f * y1; |
1210 | 205M | y[2 * i+2] = 2.f * y2; |
1211 | 205M | y[2 * i+3] = 2.f * y3; |
1212 | 205M | } |
1213 | | |
1214 | 56.6M | for (int i = 0; i < M2; i++) |
1215 | 54.9M | mem1[2*i+1] = xx1[i]; |
1216 | 56.6M | for (int i = 0; i < M2; i++) |
1217 | 54.9M | mem2[2*i+1] = xx2[i]; |
1218 | 1.71M | } |
1219 | | |
1220 | | static int sb_decode(AVCodecContext *avctx, void *ptr_st, |
1221 | | GetBitContext *gb, float *out, int packets_left) |
1222 | 1.94M | { |
1223 | 1.94M | SpeexContext *s = avctx->priv_data; |
1224 | 1.94M | DecoderState *st = ptr_st; |
1225 | 1.94M | float low_pi_gain[NB_NB_SUBFRAMES]; |
1226 | 1.94M | float low_exc_rms[NB_NB_SUBFRAMES]; |
1227 | 1.94M | float interp_qlsp[NB_ORDER]; |
1228 | 1.94M | int ret, wideband; |
1229 | 1.94M | float *low_innov_alias; |
1230 | 1.94M | float qlsp[NB_ORDER]; |
1231 | 1.94M | float ak[NB_ORDER]; |
1232 | 1.94M | const SpeexMode *mode; |
1233 | | |
1234 | 1.94M | mode = st->mode; |
1235 | | |
1236 | 1.94M | if (st->modeID > 0) { |
1237 | 1.94M | if (packets_left * s->frame_size < 2*st->frame_size) |
1238 | 305 | return AVERROR_INVALIDDATA; |
1239 | 1.94M | low_innov_alias = out + st->frame_size; |
1240 | 1.94M | s->st[st->modeID - 1].innov_save = low_innov_alias; |
1241 | 1.94M | ret = speex_modes[st->modeID - 1].decode(avctx, &s->st[st->modeID - 1], gb, out, packets_left); |
1242 | 1.94M | if (ret < 0) |
1243 | 224k | return ret; |
1244 | 1.94M | } |
1245 | | |
1246 | 1.72M | if (st->encode_submode) { /* Check "wideband bit" */ |
1247 | 1.72M | if (get_bits_left(gb) > 0) |
1248 | 1.50M | wideband = show_bits1(gb); |
1249 | 219k | else |
1250 | 219k | wideband = 0; |
1251 | 1.72M | if (wideband) { /* Regular wideband frame, read the submode */ |
1252 | 18.1k | wideband = get_bits1(gb); |
1253 | 18.1k | st->submodeID = get_bits(gb, SB_SUBMODE_BITS); |
1254 | 1.70M | } else { /* Was a narrowband frame, set "null submode" */ |
1255 | 1.70M | st->submodeID = 0; |
1256 | 1.70M | } |
1257 | 1.72M | if (st->submodeID != 0 && st->submodes[st->submodeID] == NULL) |
1258 | 2.61k | return AVERROR_INVALIDDATA; |
1259 | 1.72M | } |
1260 | | |
1261 | | /* If null mode (no transmission), just set a couple things to zero */ |
1262 | 1.71M | if (st->submodes[st->submodeID] == NULL) { |
1263 | 410M | for (int i = 0; i < st->frame_size; i++) |
1264 | 408M | out[st->frame_size + i] = 1e-15f; |
1265 | | |
1266 | 1.70M | st->first = 1; |
1267 | | |
1268 | | /* Final signal synthesis from excitation */ |
1269 | 1.70M | iir_mem(out + st->frame_size, st->interp_qlpc, out + st->frame_size, st->frame_size, st->lpc_size, st->mem_sp); |
1270 | | |
1271 | 1.70M | qmf_synth(out, out + st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem); |
1272 | | |
1273 | 1.70M | return 0; |
1274 | 1.70M | } |
1275 | | |
1276 | 11.8k | memcpy(low_pi_gain, s->st[st->modeID - 1].pi_gain, sizeof(low_pi_gain)); |
1277 | 11.8k | memcpy(low_exc_rms, s->st[st->modeID - 1].exc_rms, sizeof(low_exc_rms)); |
1278 | | |
1279 | 11.8k | SUBMODE(lsp_unquant)(qlsp, st->lpc_size, gb); |
1280 | | |
1281 | 11.8k | if (st->first) |
1282 | 6.12k | memcpy(st->old_qlsp, qlsp, sizeof(st->old_qlsp)); |
1283 | | |
1284 | 59.0k | for (int sub = 0; sub < st->nb_subframes; sub++) { |
1285 | 47.2k | float filter_ratio, el, rl, rh; |
1286 | 47.2k | float *innov_save = NULL, *sp; |
1287 | 47.2k | float exc[80]; |
1288 | 47.2k | int offset; |
1289 | | |
1290 | 47.2k | offset = st->subframe_size * sub; |
1291 | 47.2k | sp = out + st->frame_size + offset; |
1292 | | /* Pointer for saving innovation */ |
1293 | 47.2k | if (st->innov_save) { |
1294 | 39.7k | innov_save = st->innov_save + 2 * offset; |
1295 | 39.7k | SPEEX_MEMSET(innov_save, 0, 2 * st->subframe_size); |
1296 | 39.7k | } |
1297 | | |
1298 | 47.2k | av_assert0(st->nb_subframes > 0); |
1299 | 47.2k | lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpc_size, sub, st->nb_subframes, 0.05f); |
1300 | 47.2k | lsp_to_lpc(interp_qlsp, ak, st->lpc_size); |
1301 | | |
1302 | | /* Calculate response ratio between the low and high filter in the middle |
1303 | | of the band (4000 Hz) */ |
1304 | 47.2k | st->pi_gain[sub] = 1.f; |
1305 | 47.2k | rh = 1.f; |
1306 | 236k | for (int i = 0; i < st->lpc_size; i += 2) { |
1307 | 189k | rh += ak[i + 1] - ak[i]; |
1308 | 189k | st->pi_gain[sub] += ak[i] + ak[i + 1]; |
1309 | 189k | } |
1310 | | |
1311 | 47.2k | rl = low_pi_gain[sub]; |
1312 | 47.2k | filter_ratio = (rl + .01f) / (rh + .01f); |
1313 | | |
1314 | 47.2k | SPEEX_MEMSET(exc, 0, st->subframe_size); |
1315 | 47.2k | if (!SUBMODE(innovation_unquant)) { |
1316 | 31.2k | const int x = get_bits(gb, 5); |
1317 | 31.2k | const float g = expf(.125f * (x - 10)) / filter_ratio; |
1318 | | |
1319 | 781k | for (int i = 0; i < st->subframe_size; i += 2) { |
1320 | 750k | exc[i ] = mode->folding_gain * low_innov_alias[offset + i ] * g; |
1321 | 750k | exc[i + 1] = -mode->folding_gain * low_innov_alias[offset + i + 1] * g; |
1322 | 750k | } |
1323 | 31.2k | } else { |
1324 | 15.9k | float gc, scale; |
1325 | | |
1326 | 15.9k | el = low_exc_rms[sub]; |
1327 | 15.9k | gc = 0.87360f * gc_quant_bound[get_bits(gb, 4)]; |
1328 | | |
1329 | 15.9k | if (st->subframe_size == 80) |
1330 | 0 | gc *= M_SQRT2; |
1331 | | |
1332 | 15.9k | scale = (gc * el) / filter_ratio; |
1333 | 15.9k | SUBMODE(innovation_unquant) |
1334 | 15.9k | (exc, SUBMODE(innovation_params), st->subframe_size, |
1335 | 15.9k | gb, &st->seed); |
1336 | | |
1337 | 15.9k | signal_mul(exc, exc, scale, st->subframe_size); |
1338 | 15.9k | if (SUBMODE(double_codebook)) { |
1339 | 3.16k | float innov2[80]; |
1340 | | |
1341 | 3.16k | SPEEX_MEMSET(innov2, 0, st->subframe_size); |
1342 | 3.16k | SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframe_size, gb, &st->seed); |
1343 | 3.16k | signal_mul(innov2, innov2, 0.4f * scale, st->subframe_size); |
1344 | 129k | for (int i = 0; i < st->subframe_size; i++) |
1345 | 126k | exc[i] += innov2[i]; |
1346 | 3.16k | } |
1347 | 15.9k | } |
1348 | | |
1349 | 47.2k | if (st->innov_save) { |
1350 | 1.62M | for (int i = 0; i < st->subframe_size; i++) |
1351 | 1.58M | innov_save[2 * i] = exc[i]; |
1352 | 39.7k | } |
1353 | | |
1354 | 47.2k | iir_mem(st->exc_buf, st->interp_qlpc, sp, st->subframe_size, st->lpc_size, st->mem_sp); |
1355 | 47.2k | memcpy(st->exc_buf, exc, sizeof(exc)); |
1356 | 47.2k | memcpy(st->interp_qlpc, ak, sizeof(st->interp_qlpc)); |
1357 | 47.2k | st->exc_rms[sub] = compute_rms(st->exc_buf, st->subframe_size); |
1358 | 47.2k | } |
1359 | | |
1360 | 11.8k | qmf_synth(out, out + st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem); |
1361 | 11.8k | memcpy(st->old_qlsp, qlsp, sizeof(st->old_qlsp)); |
1362 | | |
1363 | 11.8k | st->first = 0; |
1364 | | |
1365 | 11.8k | return 0; |
1366 | 11.8k | } |
1367 | | |
1368 | | static int decoder_init(SpeexContext *s, DecoderState *st, const SpeexMode *mode) |
1369 | 3.67k | { |
1370 | 3.67k | st->mode = mode; |
1371 | 3.67k | st->modeID = mode->modeID; |
1372 | | |
1373 | 3.67k | st->first = 1; |
1374 | 3.67k | st->encode_submode = 1; |
1375 | 3.67k | st->is_wideband = st->modeID > 0; |
1376 | 3.67k | st->innov_save = NULL; |
1377 | | |
1378 | 3.67k | st->submodes = mode->submodes; |
1379 | 3.67k | st->submodeID = mode->default_submode; |
1380 | 3.67k | st->subframe_size = mode->subframe_size; |
1381 | 3.67k | st->lpc_size = mode->lpc_size; |
1382 | 3.67k | st->full_frame_size = (1 + (st->modeID > 0)) * mode->frame_size; |
1383 | 3.67k | st->nb_subframes = mode->frame_size / mode->subframe_size; |
1384 | 3.67k | st->frame_size = mode->frame_size; |
1385 | | |
1386 | 3.67k | st->lpc_enh_enabled = 1; |
1387 | | |
1388 | 3.67k | st->last_pitch = 40; |
1389 | 3.67k | st->count_lost = 0; |
1390 | 3.67k | st->seed = 1000; |
1391 | 3.67k | st->last_ol_gain = 0; |
1392 | | |
1393 | 3.67k | st->voc_m1 = st->voc_m2 = st->voc_mean = 0; |
1394 | 3.67k | st->voc_offset = 0; |
1395 | 3.67k | st->dtx_enabled = 0; |
1396 | 3.67k | st->highpass_enabled = mode->modeID == 0; |
1397 | | |
1398 | 3.67k | return 0; |
1399 | 3.67k | } |
1400 | | |
1401 | | static int parse_speex_extradata(AVCodecContext *avctx, |
1402 | | const uint8_t *extradata, int extradata_size) |
1403 | 312 | { |
1404 | 312 | SpeexContext *s = avctx->priv_data; |
1405 | 312 | const uint8_t *buf = av_strnstr(extradata, "Speex ", extradata_size); |
1406 | | |
1407 | 312 | if (!buf) |
1408 | 38 | return AVERROR_INVALIDDATA; |
1409 | | |
1410 | 274 | buf += 28; |
1411 | | |
1412 | 274 | s->version_id = bytestream_get_le32(&buf); |
1413 | 274 | buf += 4; |
1414 | 274 | s->rate = bytestream_get_le32(&buf); |
1415 | 274 | if (s->rate <= 0) |
1416 | 30 | return AVERROR_INVALIDDATA; |
1417 | 244 | s->mode = bytestream_get_le32(&buf); |
1418 | 244 | if (s->mode < 0 || s->mode >= SPEEX_NB_MODES) |
1419 | 71 | return AVERROR_INVALIDDATA; |
1420 | 173 | s->bitstream_version = bytestream_get_le32(&buf); |
1421 | 173 | if (s->bitstream_version != 4) |
1422 | 27 | return AVERROR_INVALIDDATA; |
1423 | 146 | s->nb_channels = bytestream_get_le32(&buf); |
1424 | 146 | if (s->nb_channels <= 0 || s->nb_channels > 2) |
1425 | 31 | return AVERROR_INVALIDDATA; |
1426 | 115 | s->bitrate = bytestream_get_le32(&buf); |
1427 | 115 | s->frame_size = bytestream_get_le32(&buf); |
1428 | 115 | if (s->frame_size < NB_FRAME_SIZE << (s->mode > 1) || |
1429 | 112 | s->frame_size > INT32_MAX >> (s->mode > 1)) |
1430 | 7 | return AVERROR_INVALIDDATA; |
1431 | 108 | s->frame_size = FFMIN(s->frame_size << (s->mode > 1), NB_FRAME_SIZE << s->mode); |
1432 | 108 | s->vbr = bytestream_get_le32(&buf); |
1433 | 108 | s->frames_per_packet = bytestream_get_le32(&buf); |
1434 | 108 | if (s->frames_per_packet <= 0 || |
1435 | 94 | s->frames_per_packet > 64 || |
1436 | 70 | s->frames_per_packet >= INT32_MAX / s->nb_channels / s->frame_size) |
1437 | 38 | return AVERROR_INVALIDDATA; |
1438 | 70 | s->extra_headers = bytestream_get_le32(&buf); |
1439 | | |
1440 | 70 | return 0; |
1441 | 108 | } |
1442 | | |
1443 | | static av_cold int speex_decode_init(AVCodecContext *avctx) |
1444 | 1.64k | { |
1445 | 1.64k | SpeexContext *s = avctx->priv_data; |
1446 | 1.64k | int ret; |
1447 | | |
1448 | 1.64k | s->fdsp = avpriv_float_dsp_alloc(0); |
1449 | 1.64k | if (!s->fdsp) |
1450 | 0 | return AVERROR(ENOMEM); |
1451 | | |
1452 | 1.64k | if (avctx->extradata && avctx->extradata_size >= 80) { |
1453 | 312 | ret = parse_speex_extradata(avctx, avctx->extradata, avctx->extradata_size); |
1454 | 312 | if (ret < 0) |
1455 | 242 | return ret; |
1456 | 1.33k | } else { |
1457 | 1.33k | s->rate = avctx->sample_rate; |
1458 | 1.33k | if (s->rate <= 0) |
1459 | 28 | return AVERROR_INVALIDDATA; |
1460 | | |
1461 | 1.30k | s->nb_channels = avctx->ch_layout.nb_channels; |
1462 | 1.30k | if (s->nb_channels <= 0 || s->nb_channels > 2) |
1463 | 62 | return AVERROR_INVALIDDATA; |
1464 | | |
1465 | 1.24k | switch (s->rate) { |
1466 | 10 | case 8000: s->mode = 0; break; |
1467 | 4 | case 16000: s->mode = 1; break; |
1468 | 1 | case 32000: s->mode = 2; break; |
1469 | 1.22k | default: s->mode = 2; |
1470 | 1.24k | } |
1471 | | |
1472 | 1.24k | s->frames_per_packet = 64; |
1473 | 1.24k | s->frame_size = NB_FRAME_SIZE << s->mode; |
1474 | 1.24k | } |
1475 | | |
1476 | 1.31k | if (avctx->codec_tag == MKTAG('S', 'P', 'X', 'N')) { |
1477 | 66 | int quality; |
1478 | | |
1479 | 66 | if (!avctx->extradata || avctx->extradata && avctx->extradata_size < 47) { |
1480 | 3 | av_log(avctx, AV_LOG_ERROR, "Missing or invalid extradata.\n"); |
1481 | 3 | return AVERROR_INVALIDDATA; |
1482 | 3 | } |
1483 | | |
1484 | 63 | quality = avctx->extradata[37]; |
1485 | 63 | if (quality > 10) { |
1486 | 6 | av_log(avctx, AV_LOG_ERROR, "Unsupported quality mode %d.\n", quality); |
1487 | 6 | return AVERROR_PATCHWELCOME; |
1488 | 6 | } |
1489 | | |
1490 | 57 | s->pkt_size = ((const uint8_t[]){ 5, 10, 15, 20, 20, 28, 28, 38, 38, 46, 62 })[quality]; |
1491 | | |
1492 | 57 | s->mode = 0; |
1493 | 57 | s->nb_channels = 1; |
1494 | 57 | s->rate = avctx->sample_rate; |
1495 | 57 | if (s->rate <= 0) |
1496 | 1 | return AVERROR_INVALIDDATA; |
1497 | 56 | s->frames_per_packet = 1; |
1498 | 56 | s->frame_size = NB_FRAME_SIZE; |
1499 | 56 | } |
1500 | | |
1501 | 1.30k | if (s->bitrate > 0) |
1502 | 43 | avctx->bit_rate = s->bitrate; |
1503 | 1.30k | av_channel_layout_uninit(&avctx->ch_layout); |
1504 | 1.30k | avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC; |
1505 | 1.30k | avctx->ch_layout.nb_channels = s->nb_channels; |
1506 | 1.30k | avctx->sample_rate = s->rate; |
1507 | 1.30k | avctx->sample_fmt = AV_SAMPLE_FMT_FLT; |
1508 | | |
1509 | 4.97k | for (int m = 0; m <= s->mode; m++) { |
1510 | 3.67k | ret = decoder_init(s, &s->st[m], &speex_modes[m]); |
1511 | 3.67k | if (ret < 0) |
1512 | 0 | return ret; |
1513 | 3.67k | } |
1514 | | |
1515 | 1.30k | s->stereo.balance = 1.f; |
1516 | 1.30k | s->stereo.e_ratio = .5f; |
1517 | 1.30k | s->stereo.smooth_left = 1.f; |
1518 | 1.30k | s->stereo.smooth_right = 1.f; |
1519 | | |
1520 | 1.30k | return 0; |
1521 | 1.30k | } |
1522 | | |
1523 | | static void speex_decode_stereo(float *data, int frame_size, StereoState *stereo) |
1524 | 297k | { |
1525 | 297k | float balance, e_left, e_right, e_ratio; |
1526 | | |
1527 | 297k | balance = stereo->balance; |
1528 | 297k | e_ratio = stereo->e_ratio; |
1529 | | |
1530 | | /* These two are Q14, with max value just below 2. */ |
1531 | 297k | e_right = 1.f / sqrtf(e_ratio * (1.f + balance)); |
1532 | 297k | e_left = sqrtf(balance) * e_right; |
1533 | | |
1534 | 190M | for (int i = frame_size - 1; i >= 0; i--) { |
1535 | 190M | float tmp = data[i]; |
1536 | 190M | stereo->smooth_left = stereo->smooth_left * 0.98f + e_left * 0.02f; |
1537 | 190M | stereo->smooth_right = stereo->smooth_right * 0.98f + e_right * 0.02f; |
1538 | 190M | data[2 * i ] = stereo->smooth_left * tmp; |
1539 | 190M | data[2 * i + 1] = stereo->smooth_right * tmp; |
1540 | 190M | } |
1541 | 297k | } |
1542 | | |
1543 | | static int speex_decode_frame(AVCodecContext *avctx, AVFrame *frame, |
1544 | | int *got_frame_ptr, AVPacket *avpkt) |
1545 | 531k | { |
1546 | 531k | SpeexContext *s = avctx->priv_data; |
1547 | 531k | int frames_per_packet = s->frames_per_packet; |
1548 | 531k | const float scale = 1.f / 32768.f; |
1549 | 531k | int buf_size = avpkt->size; |
1550 | 531k | float *dst; |
1551 | 531k | int ret; |
1552 | | |
1553 | 531k | if (s->pkt_size && avpkt->size == 62) |
1554 | 86 | buf_size = s->pkt_size; |
1555 | 531k | if ((ret = init_get_bits8(&s->gb, avpkt->data, buf_size)) < 0) |
1556 | 0 | return ret; |
1557 | | |
1558 | 531k | frame->nb_samples = FFALIGN(s->frame_size * frames_per_packet, 4); |
1559 | 531k | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) |
1560 | 0 | return ret; |
1561 | | |
1562 | 531k | dst = (float *)frame->extended_data[0]; |
1563 | 1.52M | for (int i = 0; i < frames_per_packet; i++) { |
1564 | 1.25M | ret = speex_modes[s->mode].decode(avctx, &s->st[s->mode], &s->gb, dst + i * s->frame_size, frames_per_packet - i); |
1565 | 1.25M | if (ret < 0) |
1566 | 119k | return ret; |
1567 | 1.13M | if (avctx->ch_layout.nb_channels == 2) |
1568 | 297k | speex_decode_stereo(dst + i * s->frame_size, s->frame_size, &s->stereo); |
1569 | 1.13M | if (get_bits_left(&s->gb) < 5 || |
1570 | 992k | show_bits(&s->gb, 5) == 15) { |
1571 | 139k | frames_per_packet = i + 1; |
1572 | 139k | break; |
1573 | 139k | } |
1574 | 1.13M | } |
1575 | | |
1576 | 412k | dst = (float *)frame->extended_data[0]; |
1577 | 412k | s->fdsp->vector_fmul_scalar(dst, dst, scale, frame->nb_samples * frame->ch_layout.nb_channels); |
1578 | 412k | frame->nb_samples = s->frame_size * frames_per_packet; |
1579 | | |
1580 | 412k | *got_frame_ptr = 1; |
1581 | | |
1582 | 412k | return (get_bits_count(&s->gb) + 7) >> 3; |
1583 | 531k | } |
1584 | | |
1585 | | static av_cold int speex_decode_close(AVCodecContext *avctx) |
1586 | 1.64k | { |
1587 | 1.64k | SpeexContext *s = avctx->priv_data; |
1588 | 1.64k | av_freep(&s->fdsp); |
1589 | 1.64k | return 0; |
1590 | 1.64k | } |
1591 | | |
1592 | | const FFCodec ff_speex_decoder = { |
1593 | | .p.name = "speex", |
1594 | | CODEC_LONG_NAME("Speex"), |
1595 | | .p.type = AVMEDIA_TYPE_AUDIO, |
1596 | | .p.id = AV_CODEC_ID_SPEEX, |
1597 | | .init = speex_decode_init, |
1598 | | FF_CODEC_DECODE_CB(speex_decode_frame), |
1599 | | .close = speex_decode_close, |
1600 | | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF, |
1601 | | .priv_data_size = sizeof(SpeexContext), |
1602 | | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, |
1603 | | }; |