/src/ffmpeg/libavcodec/amrnbdec.c
Line | Count | Source |
1 | | /* |
2 | | * AMR narrowband decoder |
3 | | * Copyright (c) 2006-2007 Robert Swain |
4 | | * Copyright (c) 2009 Colin McQuillan |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | |
23 | | |
24 | | /** |
25 | | * @file |
26 | | * AMR narrowband decoder |
27 | | * |
28 | | * This decoder uses floats for simplicity and so is not bit-exact. One |
29 | | * difference is that differences in phase can accumulate. The test sequences |
30 | | * in 3GPP TS 26.074 can still be useful. |
31 | | * |
32 | | * - Comparing this file's output to the output of the ref decoder gives a |
33 | | * PSNR of 30 to 80. Plotting the output samples shows a difference in |
34 | | * phase in some areas. |
35 | | * |
36 | | * - Comparing both decoders against their input, this decoder gives a similar |
37 | | * PSNR. If the test sequence homing frames are removed (this decoder does |
38 | | * not detect them), the PSNR is at least as good as the reference on 140 |
39 | | * out of 169 tests. |
40 | | */ |
41 | | |
42 | | |
43 | | #include <string.h> |
44 | | #include <math.h> |
45 | | |
46 | | #include "libavutil/channel_layout.h" |
47 | | #include "avcodec.h" |
48 | | #include "libavutil/common.h" |
49 | | #include "libavutil/avassert.h" |
50 | | #include "celp_math.h" |
51 | | #include "celp_filters.h" |
52 | | #include "acelp_filters.h" |
53 | | #include "acelp_vectors.h" |
54 | | #include "acelp_pitch_delay.h" |
55 | | #include "lsp.h" |
56 | | #include "amr.h" |
57 | | #include "codec_internal.h" |
58 | | #include "decode.h" |
59 | | |
60 | | #include "amrnbdata.h" |
61 | | |
62 | 2.69M | #define AMR_BLOCK_SIZE 160 ///< samples per frame |
63 | 191M | #define AMR_SAMPLE_BOUND 32768.0 ///< threshold for synthesis overflow |
64 | | |
65 | | /** |
66 | | * Scale from constructed speech to [-1,1] |
67 | | * |
68 | | * AMR is designed to produce 16-bit PCM samples (3GPP TS 26.090 4.2) but |
69 | | * upscales by two (section 6.2.2). |
70 | | * |
71 | | * Fundamentally, this scale is determined by energy_mean through |
72 | | * the fixed vector contribution to the excitation vector. |
73 | | */ |
74 | 1.19M | #define AMR_SAMPLE_SCALE (2.0 / 32768.0) |
75 | | |
76 | | /** Prediction factor for 12.2kbit/s mode */ |
77 | 259k | #define PRED_FAC_MODE_12k2 0.65 |
78 | | |
79 | 12.4M | #define LSF_R_FAC (8000.0 / 32768.0) ///< LSF residual tables to Hertz |
80 | 1.21M | #define MIN_LSF_SPACING (50.0488 / 8000.0) ///< Ensures stability of LPC filter |
81 | 51.9k | #define PITCH_LAG_MIN_MODE_12k2 18 ///< Lower bound on decoded lag search in 12.2kbit/s mode |
82 | | |
83 | | /** Initial energy in dB. Also used for bad frames (unimplemented). */ |
84 | 5.01k | #define MIN_ENERGY -14.0 |
85 | | |
86 | | /** Maximum sharpening factor |
87 | | * |
88 | | * The specification says 0.8, which should be 13107, but the reference C code |
89 | | * uses 13017 instead. (Amusingly the same applies to SHARP_MAX in g729dec.c.) |
90 | | */ |
91 | 2.89M | #define SHARP_MAX 0.79449462890625 |
92 | | |
93 | | /** Number of impulse response coefficients used for tilt factor */ |
94 | 14.2M | #define AMR_TILT_RESPONSE 22 |
95 | | /** Tilt factor = 1st reflection coefficient * gamma_t */ |
96 | 4.62M | #define AMR_TILT_GAMMA_T 0.8 |
97 | | /** Adaptive gain control factor used in post-filter */ |
98 | 4.76M | #define AMR_AGC_ALPHA 0.9 |
99 | | |
100 | | typedef struct AMRContext { |
101 | | AMRNBFrame frame; ///< decoded AMR parameters (lsf coefficients, codebook indexes, etc) |
102 | | uint8_t bad_frame_indicator; ///< bad frame ? 1 : 0 |
103 | | enum Mode cur_frame_mode; |
104 | | |
105 | | int16_t prev_lsf_r[LP_FILTER_ORDER]; ///< residual LSF vector from previous subframe |
106 | | double lsp[4][LP_FILTER_ORDER]; ///< lsp vectors from current frame |
107 | | double prev_lsp_sub4[LP_FILTER_ORDER]; ///< lsp vector for the 4th subframe of the previous frame |
108 | | |
109 | | float lsf_q[4][LP_FILTER_ORDER]; ///< Interpolated LSF vector for fixed gain smoothing |
110 | | float lsf_avg[LP_FILTER_ORDER]; ///< vector of averaged lsf vector |
111 | | |
112 | | float lpc[4][LP_FILTER_ORDER]; ///< lpc coefficient vectors for 4 subframes |
113 | | |
114 | | uint8_t pitch_lag_int; ///< integer part of pitch lag from current subframe |
115 | | |
116 | | float excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1 + AMR_SUBFRAME_SIZE]; ///< current excitation and all necessary excitation history |
117 | | float *excitation; ///< pointer to the current excitation vector in excitation_buf |
118 | | |
119 | | float pitch_vector[AMR_SUBFRAME_SIZE]; ///< adaptive code book (pitch) vector |
120 | | float fixed_vector[AMR_SUBFRAME_SIZE]; ///< algebraic codebook (fixed) vector (must be kept zero between frames) |
121 | | |
122 | | float prediction_error[4]; ///< quantified prediction errors {20log10(^gamma_gc)} for previous four subframes |
123 | | float pitch_gain[5]; ///< quantified pitch gains for the current and previous four subframes |
124 | | float fixed_gain[5]; ///< quantified fixed gains for the current and previous four subframes |
125 | | |
126 | | float beta; ///< previous pitch_gain, bounded by [0.0,SHARP_MAX] |
127 | | uint8_t diff_count; ///< the number of subframes for which diff has been above 0.65 |
128 | | uint8_t hang_count; ///< the number of subframes since a hangover period started |
129 | | |
130 | | float prev_sparse_fixed_gain; ///< previous fixed gain; used by anti-sparseness processing to determine "onset" |
131 | | uint8_t prev_ir_filter_nr; ///< previous impulse response filter "impNr": 0 - strong, 1 - medium, 2 - none |
132 | | uint8_t ir_filter_onset; ///< flag for impulse response filter strength |
133 | | |
134 | | float postfilter_mem[10]; ///< previous intermediate values in the formant filter |
135 | | float tilt_mem; ///< previous input to tilt compensation filter |
136 | | float postfilter_agc; ///< previous factor used for adaptive gain control |
137 | | float high_pass_mem[2]; ///< previous intermediate values in the high-pass filter |
138 | | |
139 | | float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; ///< floating point samples |
140 | | |
141 | | ACELPFContext acelpf_ctx; ///< context for filters for ACELP-based codecs |
142 | | ACELPVContext acelpv_ctx; ///< context for vector operations for ACELP-based codecs |
143 | | CELPFContext celpf_ctx; ///< context for filters for CELP-based codecs |
144 | | CELPMContext celpm_ctx; ///< context for fixed point math operations |
145 | | |
146 | | } AMRContext; |
147 | | |
148 | | typedef struct AMRChannelsContext { |
149 | | AMRContext ch[2]; |
150 | | } AMRChannelsContext; |
151 | | |
152 | | /** Double version of ff_weighted_vector_sumf() */ |
153 | | static void weighted_vector_sumd(double *out, const double *in_a, |
154 | | const double *in_b, double weight_coeff_a, |
155 | | double weight_coeff_b, int length) |
156 | 51.9k | { |
157 | 51.9k | int i; |
158 | | |
159 | 571k | for (i = 0; i < length; i++) |
160 | 519k | out[i] = weight_coeff_a * in_a[i] |
161 | 519k | + weight_coeff_b * in_b[i]; |
162 | 51.9k | } |
163 | | |
164 | | static av_cold int amrnb_decode_init(AVCodecContext *avctx) |
165 | 1.27k | { |
166 | 1.27k | AMRChannelsContext *s = avctx->priv_data; |
167 | 1.27k | int i; |
168 | | |
169 | 1.27k | if (avctx->ch_layout.nb_channels > 2) { |
170 | 57 | avpriv_report_missing_feature(avctx, ">2 channel AMR"); |
171 | 57 | return AVERROR_PATCHWELCOME; |
172 | 57 | } |
173 | | |
174 | 1.21k | if (!avctx->ch_layout.nb_channels) { |
175 | 1.12k | av_channel_layout_uninit(&avctx->ch_layout); |
176 | 1.12k | avctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO; |
177 | 1.12k | } |
178 | 1.21k | if (!avctx->sample_rate) |
179 | 858 | avctx->sample_rate = 8000; |
180 | 1.21k | avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; |
181 | | |
182 | 2.46k | for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { |
183 | 1.25k | AMRContext *p = &s->ch[ch]; |
184 | | // p->excitation always points to the same position in p->excitation_buf |
185 | 1.25k | p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1]; |
186 | | |
187 | 13.7k | for (i = 0; i < LP_FILTER_ORDER; i++) { |
188 | 12.5k | p->prev_lsp_sub4[i] = lsp_sub4_init[i] * 1000 / (float)(1 << 15); |
189 | 12.5k | p->lsf_avg[i] = p->lsf_q[3][i] = lsp_avg_init[i] / (float)(1 << 15); |
190 | 12.5k | } |
191 | | |
192 | 6.27k | for (i = 0; i < 4; i++) |
193 | 5.01k | p->prediction_error[i] = MIN_ENERGY; |
194 | | |
195 | 1.25k | ff_acelp_filter_init(&p->acelpf_ctx); |
196 | 1.25k | ff_acelp_vectors_init(&p->acelpv_ctx); |
197 | 1.25k | ff_celp_filter_init(&p->celpf_ctx); |
198 | 1.25k | ff_celp_math_init(&p->celpm_ctx); |
199 | 1.25k | } |
200 | | |
201 | 1.21k | return 0; |
202 | 1.27k | } |
203 | | |
204 | | |
205 | | /** |
206 | | * Unpack an RFC4867 speech frame into the AMR frame mode and parameters. |
207 | | * |
208 | | * The order of speech bits is specified by 3GPP TS 26.101. |
209 | | * |
210 | | * @param p the context |
211 | | * @param buf pointer to the input buffer |
212 | | * @param buf_size size of the input buffer |
213 | | * |
214 | | * @return the frame mode |
215 | | */ |
216 | | static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf, |
217 | | int buf_size) |
218 | 1.72M | { |
219 | 1.72M | enum Mode mode; |
220 | | |
221 | | // Decode the first octet. |
222 | 1.72M | mode = buf[0] >> 3 & 0x0F; // frame type |
223 | 1.72M | p->bad_frame_indicator = (buf[0] & 0x4) != 0x4; // quality bit |
224 | | |
225 | 1.72M | if (mode >= N_MODES || buf_size < frame_sizes_nb[mode] + 1) { |
226 | 522k | return NO_DATA; |
227 | 522k | } |
228 | | |
229 | 1.20M | if (mode < MODE_DTX) |
230 | 1.19M | ff_amr_bit_reorder((uint16_t *) &p->frame, sizeof(AMRNBFrame), buf + 1, |
231 | 1.19M | amr_unpacking_bitmaps_per_mode[mode]); |
232 | | |
233 | 1.20M | return mode; |
234 | 1.72M | } |
235 | | |
236 | | |
237 | | /// @name AMR pitch LPC coefficient decoding functions |
238 | | /// @{ |
239 | | |
240 | | /** |
241 | | * Interpolate the LSF vector (used for fixed gain smoothing). |
242 | | * The interpolation is done over all four subframes even in MODE_12k2. |
243 | | * |
244 | | * @param[in] ctx The Context |
245 | | * @param[in,out] lsf_q LSFs in [0,1] for each subframe |
246 | | * @param[in] lsf_new New LSFs in [0,1] for subframe 4 |
247 | | */ |
248 | | static void interpolate_lsf(ACELPVContext *ctx, float lsf_q[4][LP_FILTER_ORDER], float *lsf_new) |
249 | 1.19M | { |
250 | 1.19M | int i; |
251 | | |
252 | 5.95M | for (i = 0; i < 4; i++) |
253 | 4.76M | ctx->weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new, |
254 | 4.76M | 0.25 * (3 - i), 0.25 * (i + 1), |
255 | 4.76M | LP_FILTER_ORDER); |
256 | 1.19M | } |
257 | | |
258 | | /** |
259 | | * Decode a set of 5 split-matrix quantized lsf indexes into an lsp vector. |
260 | | * |
261 | | * @param p the context |
262 | | * @param lsp output LSP vector |
263 | | * @param lsf_no_r LSF vector without the residual vector added |
264 | | * @param lsf_quantizer pointers to LSF dictionary tables |
265 | | * @param quantizer_offset offset in tables |
266 | | * @param sign for the 3 dictionary table |
267 | | * @param update store data for computing the next frame's LSFs |
268 | | */ |
269 | | static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER], |
270 | | const float lsf_no_r[LP_FILTER_ORDER], |
271 | | const int16_t *lsf_quantizer[5], |
272 | | const int quantizer_offset, |
273 | | const int sign, const int update) |
274 | 51.9k | { |
275 | 51.9k | int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector |
276 | 51.9k | float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector |
277 | 51.9k | int i; |
278 | | |
279 | 311k | for (i = 0; i < LP_FILTER_ORDER >> 1; i++) |
280 | 259k | memcpy(&lsf_r[i << 1], &lsf_quantizer[i][quantizer_offset], |
281 | 259k | 2 * sizeof(*lsf_r)); |
282 | | |
283 | 51.9k | if (sign) { |
284 | 23.0k | lsf_r[4] *= -1; |
285 | 23.0k | lsf_r[5] *= -1; |
286 | 23.0k | } |
287 | | |
288 | 51.9k | if (update) |
289 | 25.9k | memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r)); |
290 | | |
291 | 571k | for (i = 0; i < LP_FILTER_ORDER; i++) |
292 | 519k | lsf_q[i] = lsf_r[i] * (LSF_R_FAC / 8000.0) + lsf_no_r[i] * (1.0 / 8000.0); |
293 | | |
294 | 51.9k | ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); |
295 | | |
296 | 51.9k | if (update) |
297 | 25.9k | interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q); |
298 | | |
299 | 51.9k | ff_acelp_lsf2lspd(lsp, lsf_q, LP_FILTER_ORDER); |
300 | 51.9k | } |
301 | | |
302 | | /** |
303 | | * Decode a set of 5 split-matrix quantized lsf indexes into 2 lsp vectors. |
304 | | * |
305 | | * @param p pointer to the AMRContext |
306 | | */ |
307 | | static void lsf2lsp_5(AMRContext *p) |
308 | 25.9k | { |
309 | 25.9k | const uint16_t *lsf_param = p->frame.lsf; |
310 | 25.9k | float lsf_no_r[LP_FILTER_ORDER]; // LSFs without the residual vector |
311 | 25.9k | const int16_t *lsf_quantizer[5]; |
312 | 25.9k | int i; |
313 | | |
314 | 25.9k | lsf_quantizer[0] = lsf_5_1[lsf_param[0]]; |
315 | 25.9k | lsf_quantizer[1] = lsf_5_2[lsf_param[1]]; |
316 | 25.9k | lsf_quantizer[2] = lsf_5_3[lsf_param[2] >> 1]; |
317 | 25.9k | lsf_quantizer[3] = lsf_5_4[lsf_param[3]]; |
318 | 25.9k | lsf_quantizer[4] = lsf_5_5[lsf_param[4]]; |
319 | | |
320 | 285k | for (i = 0; i < LP_FILTER_ORDER; i++) |
321 | 259k | lsf_no_r[i] = p->prev_lsf_r[i] * LSF_R_FAC * PRED_FAC_MODE_12k2 + lsf_5_mean[i]; |
322 | | |
323 | 25.9k | lsf2lsp_for_mode12k2(p, p->lsp[1], lsf_no_r, lsf_quantizer, 0, lsf_param[2] & 1, 0); |
324 | 25.9k | lsf2lsp_for_mode12k2(p, p->lsp[3], lsf_no_r, lsf_quantizer, 2, lsf_param[2] & 1, 1); |
325 | | |
326 | | // interpolate LSP vectors at subframes 1 and 3 |
327 | 25.9k | weighted_vector_sumd(p->lsp[0], p->prev_lsp_sub4, p->lsp[1], 0.5, 0.5, LP_FILTER_ORDER); |
328 | 25.9k | weighted_vector_sumd(p->lsp[2], p->lsp[1] , p->lsp[3], 0.5, 0.5, LP_FILTER_ORDER); |
329 | 25.9k | } |
330 | | |
331 | | /** |
332 | | * Decode a set of 3 split-matrix quantized lsf indexes into an lsp vector. |
333 | | * |
334 | | * @param p pointer to the AMRContext |
335 | | */ |
336 | | static void lsf2lsp_3(AMRContext *p) |
337 | 1.16M | { |
338 | 1.16M | const uint16_t *lsf_param = p->frame.lsf; |
339 | 1.16M | int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector |
340 | 1.16M | float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector |
341 | 1.16M | const int16_t *lsf_quantizer; |
342 | 1.16M | int i, j; |
343 | | |
344 | 1.16M | lsf_quantizer = (p->cur_frame_mode == MODE_7k95 ? lsf_3_1_MODE_7k95 : lsf_3_1)[lsf_param[0]]; |
345 | 1.16M | memcpy(lsf_r, lsf_quantizer, 3 * sizeof(*lsf_r)); |
346 | | |
347 | 1.16M | lsf_quantizer = lsf_3_2[lsf_param[1] << (p->cur_frame_mode <= MODE_5k15)]; |
348 | 1.16M | memcpy(lsf_r + 3, lsf_quantizer, 3 * sizeof(*lsf_r)); |
349 | | |
350 | 1.16M | lsf_quantizer = (p->cur_frame_mode <= MODE_5k15 ? lsf_3_3_MODE_5k15 : lsf_3_3)[lsf_param[2]]; |
351 | 1.16M | memcpy(lsf_r + 6, lsf_quantizer, 4 * sizeof(*lsf_r)); |
352 | | |
353 | | // calculate mean-removed LSF vector and add mean |
354 | 12.8M | for (i = 0; i < LP_FILTER_ORDER; i++) |
355 | 11.6M | lsf_q[i] = (lsf_r[i] + p->prev_lsf_r[i] * pred_fac[i]) * (LSF_R_FAC / 8000.0) + lsf_3_mean[i] * (1.0 / 8000.0); |
356 | | |
357 | 1.16M | ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); |
358 | | |
359 | | // store data for computing the next frame's LSFs |
360 | 1.16M | interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q); |
361 | 1.16M | memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r)); |
362 | | |
363 | 1.16M | ff_acelp_lsf2lspd(p->lsp[3], lsf_q, LP_FILTER_ORDER); |
364 | | |
365 | | // interpolate LSP vectors at subframes 1, 2 and 3 |
366 | 4.66M | for (i = 1; i <= 3; i++) |
367 | 38.4M | for(j = 0; j < LP_FILTER_ORDER; j++) |
368 | 34.9M | p->lsp[i-1][j] = p->prev_lsp_sub4[j] + |
369 | 34.9M | (p->lsp[3][j] - p->prev_lsp_sub4[j]) * 0.25 * i; |
370 | 1.16M | } |
371 | | |
372 | | /// @} |
373 | | |
374 | | |
375 | | /// @name AMR pitch vector decoding functions |
376 | | /// @{ |
377 | | |
378 | | /** |
379 | | * Like ff_decode_pitch_lag(), but with 1/6 resolution |
380 | | */ |
381 | | static void decode_pitch_lag_1_6(int *lag_int, int *lag_frac, int pitch_index, |
382 | | const int prev_lag_int, const int subframe) |
383 | 103k | { |
384 | 103k | if (subframe == 0 || subframe == 2) { |
385 | 51.9k | if (pitch_index < 463) { |
386 | 50.9k | *lag_int = (pitch_index + 107) * 10923 >> 16; |
387 | 50.9k | *lag_frac = pitch_index - *lag_int * 6 + 105; |
388 | 50.9k | } else { |
389 | 1.04k | *lag_int = pitch_index - 368; |
390 | 1.04k | *lag_frac = 0; |
391 | 1.04k | } |
392 | 51.9k | } else { |
393 | 51.9k | *lag_int = ((pitch_index + 5) * 10923 >> 16) - 1; |
394 | 51.9k | *lag_frac = pitch_index - *lag_int * 6 - 3; |
395 | 51.9k | *lag_int += av_clip(prev_lag_int - 5, PITCH_LAG_MIN_MODE_12k2, |
396 | 51.9k | PITCH_DELAY_MAX - 9); |
397 | 51.9k | } |
398 | 103k | } |
399 | | |
400 | | static void decode_pitch_vector(AMRContext *p, |
401 | | const AMRNBSubframe *amr_subframe, |
402 | | const int subframe) |
403 | 4.76M | { |
404 | 4.76M | int pitch_lag_int, pitch_lag_frac; |
405 | 4.76M | enum Mode mode = p->cur_frame_mode; |
406 | | |
407 | 4.76M | if (p->cur_frame_mode == MODE_12k2) { |
408 | 103k | decode_pitch_lag_1_6(&pitch_lag_int, &pitch_lag_frac, |
409 | 103k | amr_subframe->p_lag, p->pitch_lag_int, |
410 | 103k | subframe); |
411 | 4.66M | } else { |
412 | 4.66M | ff_decode_pitch_lag(&pitch_lag_int, &pitch_lag_frac, |
413 | 4.66M | amr_subframe->p_lag, |
414 | 4.66M | p->pitch_lag_int, subframe, |
415 | 4.66M | mode != MODE_4k75 && mode != MODE_5k15, |
416 | 4.66M | mode <= MODE_6k7 ? 4 : (mode == MODE_7k95 ? 5 : 6)); |
417 | 4.66M | pitch_lag_frac *= 2; |
418 | 4.66M | } |
419 | | |
420 | 4.76M | p->pitch_lag_int = pitch_lag_int; // store previous lag in a uint8_t |
421 | | |
422 | 4.76M | pitch_lag_int += pitch_lag_frac > 0; |
423 | | |
424 | | /* Calculate the pitch vector by interpolating the past excitation at the |
425 | | pitch lag using a b60 hamming windowed sinc function. */ |
426 | 4.76M | p->acelpf_ctx.acelp_interpolatef(p->excitation, |
427 | 4.76M | p->excitation + 1 - pitch_lag_int, |
428 | 4.76M | ff_b60_sinc, 6, |
429 | 4.76M | pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0), |
430 | 4.76M | 10, AMR_SUBFRAME_SIZE); |
431 | | |
432 | 4.76M | memcpy(p->pitch_vector, p->excitation, AMR_SUBFRAME_SIZE * sizeof(float)); |
433 | 4.76M | } |
434 | | |
435 | | /// @} |
436 | | |
437 | | |
438 | | /// @name AMR algebraic code book (fixed) vector decoding functions |
439 | | /// @{ |
440 | | |
441 | | /** |
442 | | * Decode a 10-bit algebraic codebook index from a 10.2 kbit/s frame. |
443 | | */ |
444 | | static void decode_10bit_pulse(int code, int pulse_position[8], |
445 | | int i1, int i2, int i3) |
446 | 210k | { |
447 | | // coded using 7+3 bits with the 3 LSBs being, individually, the LSB of 1 of |
448 | | // the 3 pulses and the upper 7 bits being coded in base 5 |
449 | 210k | const uint8_t *positions = base_five_table[code >> 3]; |
450 | 210k | pulse_position[i1] = (positions[2] << 1) + ( code & 1); |
451 | 210k | pulse_position[i2] = (positions[1] << 1) + ((code >> 1) & 1); |
452 | 210k | pulse_position[i3] = (positions[0] << 1) + ((code >> 2) & 1); |
453 | 210k | } |
454 | | |
455 | | /** |
456 | | * Decode the algebraic codebook index to pulse positions and signs and |
457 | | * construct the algebraic codebook vector for MODE_10k2. |
458 | | * |
459 | | * @param fixed_index positions of the eight pulses |
460 | | * @param fixed_sparse pointer to the algebraic codebook vector |
461 | | */ |
462 | | static void decode_8_pulses_31bits(const int16_t *fixed_index, |
463 | | AMRFixed *fixed_sparse) |
464 | 105k | { |
465 | 105k | int pulse_position[8]; |
466 | 105k | int i, temp; |
467 | | |
468 | 105k | decode_10bit_pulse(fixed_index[4], pulse_position, 0, 4, 1); |
469 | 105k | decode_10bit_pulse(fixed_index[5], pulse_position, 2, 6, 5); |
470 | | |
471 | | // coded using 5+2 bits with the 2 LSBs being, individually, the LSB of 1 of |
472 | | // the 2 pulses and the upper 5 bits being coded in base 5 |
473 | 105k | temp = ((fixed_index[6] >> 2) * 25 + 12) >> 5; |
474 | 105k | pulse_position[3] = temp % 5; |
475 | 105k | pulse_position[7] = temp / 5; |
476 | 105k | if (pulse_position[7] & 1) |
477 | 46.0k | pulse_position[3] = 4 - pulse_position[3]; |
478 | 105k | pulse_position[3] = (pulse_position[3] << 1) + ( fixed_index[6] & 1); |
479 | 105k | pulse_position[7] = (pulse_position[7] << 1) + ((fixed_index[6] >> 1) & 1); |
480 | | |
481 | 105k | fixed_sparse->n = 8; |
482 | 526k | for (i = 0; i < 4; i++) { |
483 | 421k | const int pos1 = (pulse_position[i] << 2) + i; |
484 | 421k | const int pos2 = (pulse_position[i + 4] << 2) + i; |
485 | 421k | const float sign = fixed_index[i] ? -1.0 : 1.0; |
486 | 421k | fixed_sparse->x[i ] = pos1; |
487 | 421k | fixed_sparse->x[i + 4] = pos2; |
488 | 421k | fixed_sparse->y[i ] = sign; |
489 | 421k | fixed_sparse->y[i + 4] = pos2 < pos1 ? -sign : sign; |
490 | 421k | } |
491 | 105k | } |
492 | | |
493 | | /** |
494 | | * Decode the algebraic codebook index to pulse positions and signs, |
495 | | * then construct the algebraic codebook vector. |
496 | | * |
497 | | * nb of pulses | bits encoding pulses |
498 | | * For MODE_4k75 or MODE_5k15, 2 | 1-3, 4-6, 7 |
499 | | * MODE_5k9, 2 | 1, 2-4, 5-6, 7-9 |
500 | | * MODE_6k7, 3 | 1-3, 4, 5-7, 8, 9-11 |
501 | | * MODE_7k4 or MODE_7k95, 4 | 1-3, 4-6, 7-9, 10, 11-13 |
502 | | * |
503 | | * @param fixed_sparse pointer to the algebraic codebook vector |
504 | | * @param pulses algebraic codebook indexes |
505 | | * @param mode mode of the current frame |
506 | | * @param subframe current subframe number |
507 | | */ |
508 | | static void decode_fixed_sparse(AMRFixed *fixed_sparse, const uint16_t *pulses, |
509 | | const enum Mode mode, const int subframe) |
510 | 4.76M | { |
511 | 4.76M | av_assert1(MODE_4k75 <= (signed)mode && mode <= MODE_12k2); |
512 | | |
513 | 4.76M | if (mode == MODE_12k2) { |
514 | 103k | ff_decode_10_pulses_35bits(pulses, fixed_sparse, gray_decode, 5, 3); |
515 | 4.66M | } else if (mode == MODE_10k2) { |
516 | 105k | decode_8_pulses_31bits(pulses, fixed_sparse); |
517 | 4.55M | } else { |
518 | 4.55M | int *pulse_position = fixed_sparse->x; |
519 | 4.55M | int i, pulse_subset; |
520 | 4.55M | const int fixed_index = pulses[0]; |
521 | | |
522 | 4.55M | if (mode <= MODE_5k15) { |
523 | 4.37M | pulse_subset = ((fixed_index >> 3) & 8) + (subframe << 1); |
524 | 4.37M | pulse_position[0] = ( fixed_index & 7) * 5 + track_position[pulse_subset]; |
525 | 4.37M | pulse_position[1] = ((fixed_index >> 3) & 7) * 5 + track_position[pulse_subset + 1]; |
526 | 4.37M | fixed_sparse->n = 2; |
527 | 4.37M | } else if (mode == MODE_5k9) { |
528 | 21.5k | pulse_subset = ((fixed_index & 1) << 1) + 1; |
529 | 21.5k | pulse_position[0] = ((fixed_index >> 1) & 7) * 5 + pulse_subset; |
530 | 21.5k | pulse_subset = (fixed_index >> 4) & 3; |
531 | 21.5k | pulse_position[1] = ((fixed_index >> 6) & 7) * 5 + pulse_subset + (pulse_subset == 3 ? 1 : 0); |
532 | 21.5k | fixed_sparse->n = pulse_position[0] == pulse_position[1] ? 1 : 2; |
533 | 158k | } else if (mode == MODE_6k7) { |
534 | 119k | pulse_position[0] = (fixed_index & 7) * 5; |
535 | 119k | pulse_subset = (fixed_index >> 2) & 2; |
536 | 119k | pulse_position[1] = ((fixed_index >> 4) & 7) * 5 + pulse_subset + 1; |
537 | 119k | pulse_subset = (fixed_index >> 6) & 2; |
538 | 119k | pulse_position[2] = ((fixed_index >> 8) & 7) * 5 + pulse_subset + 2; |
539 | 119k | fixed_sparse->n = 3; |
540 | 119k | } else { // mode <= MODE_7k95 |
541 | 39.0k | pulse_position[0] = gray_decode[ fixed_index & 7]; |
542 | 39.0k | pulse_position[1] = gray_decode[(fixed_index >> 3) & 7] + 1; |
543 | 39.0k | pulse_position[2] = gray_decode[(fixed_index >> 6) & 7] + 2; |
544 | 39.0k | pulse_subset = (fixed_index >> 9) & 1; |
545 | 39.0k | pulse_position[3] = gray_decode[(fixed_index >> 10) & 7] + pulse_subset + 3; |
546 | 39.0k | fixed_sparse->n = 4; |
547 | 39.0k | } |
548 | 13.8M | for (i = 0; i < fixed_sparse->n; i++) |
549 | 9.30M | fixed_sparse->y[i] = (pulses[1] >> i) & 1 ? 1.0 : -1.0; |
550 | 4.55M | } |
551 | 4.76M | } |
552 | | |
553 | | /** |
554 | | * Apply pitch lag to obtain the sharpened fixed vector (section 6.1.2) |
555 | | * |
556 | | * @param p the context |
557 | | * @param subframe unpacked amr subframe |
558 | | * @param mode mode of the current frame |
559 | | * @param fixed_sparse sparse representation of the fixed vector |
560 | | */ |
561 | | static void pitch_sharpening(AMRContext *p, int subframe, enum Mode mode, |
562 | | AMRFixed *fixed_sparse) |
563 | 4.76M | { |
564 | | // The spec suggests the current pitch gain is always used, but in other |
565 | | // modes the pitch and codebook gains are jointly quantized (sec 5.8.2) |
566 | | // so the codebook gain cannot depend on the quantized pitch gain. |
567 | 4.76M | if (mode == MODE_12k2) |
568 | 103k | p->beta = FFMIN(p->pitch_gain[4], 1.0); |
569 | | |
570 | 4.76M | fixed_sparse->pitch_lag = p->pitch_lag_int; |
571 | 4.76M | fixed_sparse->pitch_fac = p->beta; |
572 | | |
573 | | // Save pitch sharpening factor for the next subframe |
574 | | // MODE_4k75 only updates on the 2nd and 4th subframes - this follows from |
575 | | // the fact that the gains for two subframes are jointly quantized. |
576 | 4.76M | if (mode != MODE_4k75 || subframe & 1) |
577 | 2.89M | p->beta = av_clipf(p->pitch_gain[4], 0.0, SHARP_MAX); |
578 | 4.76M | } |
579 | | /// @} |
580 | | |
581 | | |
582 | | /// @name AMR gain decoding functions |
583 | | /// @{ |
584 | | |
585 | | /** |
586 | | * fixed gain smoothing |
587 | | * Note that where the spec specifies the "spectrum in the q domain" |
588 | | * in section 6.1.4, in fact frequencies should be used. |
589 | | * |
590 | | * @param p the context |
591 | | * @param lsf LSFs for the current subframe, in the range [0,1] |
592 | | * @param lsf_avg averaged LSFs |
593 | | * @param mode mode of the current frame |
594 | | * |
595 | | * @return fixed gain smoothed |
596 | | */ |
597 | | static float fixed_gain_smooth(AMRContext *p , const float *lsf, |
598 | | const float *lsf_avg, const enum Mode mode) |
599 | 4.76M | { |
600 | 4.76M | float diff = 0.0; |
601 | 4.76M | int i; |
602 | | |
603 | 52.4M | for (i = 0; i < LP_FILTER_ORDER; i++) |
604 | 47.6M | diff += fabs(lsf_avg[i] - lsf[i]) / lsf_avg[i]; |
605 | | |
606 | | // If diff is large for ten subframes, disable smoothing for a 40-subframe |
607 | | // hangover period. |
608 | 4.76M | p->diff_count++; |
609 | 4.76M | if (diff <= 0.65) |
610 | 3.98M | p->diff_count = 0; |
611 | | |
612 | 4.76M | if (p->diff_count > 10) { |
613 | 110k | p->hang_count = 0; |
614 | 110k | p->diff_count--; // don't let diff_count overflow |
615 | 110k | } |
616 | | |
617 | 4.76M | if (p->hang_count < 40) { |
618 | 489k | p->hang_count++; |
619 | 4.27M | } else if (mode < MODE_7k4 || mode == MODE_10k2) { |
620 | 4.15M | const float smoothing_factor = av_clipf(4.0 * diff - 1.6, 0.0, 1.0); |
621 | 4.15M | const float fixed_gain_mean = (p->fixed_gain[0] + p->fixed_gain[1] + |
622 | 4.15M | p->fixed_gain[2] + p->fixed_gain[3] + |
623 | 4.15M | p->fixed_gain[4]) * 0.2; |
624 | 4.15M | return smoothing_factor * p->fixed_gain[4] + |
625 | 4.15M | (1.0 - smoothing_factor) * fixed_gain_mean; |
626 | 4.15M | } |
627 | 604k | return p->fixed_gain[4]; |
628 | 4.76M | } |
629 | | |
630 | | /** |
631 | | * Decode pitch gain and fixed gain factor (part of section 6.1.3). |
632 | | * |
633 | | * @param p the context |
634 | | * @param amr_subframe unpacked amr subframe |
635 | | * @param mode mode of the current frame |
636 | | * @param subframe current subframe number |
637 | | * @param fixed_gain_factor decoded gain correction factor |
638 | | */ |
639 | | static void decode_gains(AMRContext *p, const AMRNBSubframe *amr_subframe, |
640 | | const enum Mode mode, const int subframe, |
641 | | float *fixed_gain_factor) |
642 | 4.76M | { |
643 | 4.76M | if (mode == MODE_12k2 || mode == MODE_7k95) { |
644 | 122k | p->pitch_gain[4] = qua_gain_pit [amr_subframe->p_gain ] |
645 | 122k | * (1.0 / 16384.0); |
646 | 122k | *fixed_gain_factor = qua_gain_code[amr_subframe->fixed_gain] |
647 | 122k | * (1.0 / 2048.0); |
648 | 4.64M | } else { |
649 | 4.64M | const uint16_t *gains; |
650 | | |
651 | 4.64M | if (mode >= MODE_6k7) { |
652 | 245k | gains = gains_high[amr_subframe->p_gain]; |
653 | 4.39M | } else if (mode >= MODE_5k15) { |
654 | 653k | gains = gains_low [amr_subframe->p_gain]; |
655 | 3.74M | } else { |
656 | | // gain index is only coded in subframes 0,2 for MODE_4k75 |
657 | 3.74M | gains = gains_MODE_4k75[(p->frame.subframe[subframe & 2].p_gain << 1) + (subframe & 1)]; |
658 | 3.74M | } |
659 | | |
660 | 4.64M | p->pitch_gain[4] = gains[0] * (1.0 / 16384.0); |
661 | 4.64M | *fixed_gain_factor = gains[1] * (1.0 / 4096.0); |
662 | 4.64M | } |
663 | 4.76M | } |
664 | | |
665 | | /// @} |
666 | | |
667 | | |
668 | | /// @name AMR preprocessing functions |
669 | | /// @{ |
670 | | |
671 | | /** |
672 | | * Circularly convolve a sparse fixed vector with a phase dispersion impulse |
673 | | * response filter (D.6.2 of G.729 and 6.1.5 of AMR). |
674 | | * |
675 | | * @param out vector with filter applied |
676 | | * @param in source vector |
677 | | * @param filter phase filter coefficients |
678 | | * |
679 | | * out[n] = sum(i,0,len-1){ in[i] * filter[(len + n - i)%len] } |
680 | | */ |
681 | | static void apply_ir_filter(float *out, const AMRFixed *in, |
682 | | const float *filter) |
683 | 570k | { |
684 | 570k | float filter1[AMR_SUBFRAME_SIZE], ///< filters at pitch lag*1 and *2 |
685 | 570k | filter2[AMR_SUBFRAME_SIZE]; |
686 | 570k | int lag = in->pitch_lag; |
687 | 570k | float fac = in->pitch_fac; |
688 | 570k | int i; |
689 | | |
690 | 570k | if (lag < AMR_SUBFRAME_SIZE) { |
691 | 480k | ff_celp_circ_addf(filter1, filter, filter, lag, fac, |
692 | 480k | AMR_SUBFRAME_SIZE); |
693 | | |
694 | 480k | if (lag < AMR_SUBFRAME_SIZE >> 1) |
695 | 61.7k | ff_celp_circ_addf(filter2, filter, filter1, lag, fac, |
696 | 61.7k | AMR_SUBFRAME_SIZE); |
697 | 480k | } |
698 | | |
699 | 570k | memset(out, 0, sizeof(float) * AMR_SUBFRAME_SIZE); |
700 | 1.77M | for (i = 0; i < in->n; i++) { |
701 | 1.20M | int x = in->x[i]; |
702 | 1.20M | float y = in->y[i]; |
703 | 1.20M | const float *filterp; |
704 | | |
705 | 1.20M | if (x >= AMR_SUBFRAME_SIZE - lag) { |
706 | 452k | filterp = filter; |
707 | 755k | } else if (x >= AMR_SUBFRAME_SIZE - (lag << 1)) { |
708 | 712k | filterp = filter1; |
709 | 712k | } else |
710 | 43.3k | filterp = filter2; |
711 | | |
712 | 1.20M | ff_celp_circ_addf(out, out, filterp, x, y, AMR_SUBFRAME_SIZE); |
713 | 1.20M | } |
714 | 570k | } |
715 | | |
716 | | /** |
717 | | * Reduce fixed vector sparseness by smoothing with one of three IR filters. |
718 | | * Also know as "adaptive phase dispersion". |
719 | | * |
720 | | * This implements 3GPP TS 26.090 section 6.1(5). |
721 | | * |
722 | | * @param p the context |
723 | | * @param fixed_sparse algebraic codebook vector |
724 | | * @param fixed_vector unfiltered fixed vector |
725 | | * @param fixed_gain smoothed gain |
726 | | * @param out space for modified vector if necessary |
727 | | */ |
728 | | static const float *anti_sparseness(AMRContext *p, AMRFixed *fixed_sparse, |
729 | | const float *fixed_vector, |
730 | | float fixed_gain, float *out) |
731 | 4.76M | { |
732 | 4.76M | int ir_filter_nr; |
733 | | |
734 | 4.76M | if (p->pitch_gain[4] < 0.6) { |
735 | 3.91M | ir_filter_nr = 0; // strong filtering |
736 | 3.91M | } else if (p->pitch_gain[4] < 0.9) { |
737 | 369k | ir_filter_nr = 1; // medium filtering |
738 | 369k | } else |
739 | 478k | ir_filter_nr = 2; // no filtering |
740 | | |
741 | | // detect 'onset' |
742 | 4.76M | if (fixed_gain > 2.0 * p->prev_sparse_fixed_gain) { |
743 | 502k | p->ir_filter_onset = 2; |
744 | 4.26M | } else if (p->ir_filter_onset) |
745 | 403k | p->ir_filter_onset--; |
746 | | |
747 | 4.76M | if (!p->ir_filter_onset) { |
748 | 4.04M | int i, count = 0; |
749 | | |
750 | 24.2M | for (i = 0; i < 5; i++) |
751 | 20.2M | if (p->pitch_gain[i] < 0.6) |
752 | 17.4M | count++; |
753 | 4.04M | if (count > 2) |
754 | 3.46M | ir_filter_nr = 0; |
755 | | |
756 | 4.04M | if (ir_filter_nr > p->prev_ir_filter_nr + 1) |
757 | 82.7k | ir_filter_nr--; |
758 | 4.04M | } else if (ir_filter_nr < 2) |
759 | 594k | ir_filter_nr++; |
760 | | |
761 | | // Disable filtering for very low level of fixed_gain. |
762 | | // Note this step is not specified in the technical description but is in |
763 | | // the reference source in the function Ph_disp. |
764 | 4.76M | if (fixed_gain < 5.0) |
765 | 3.62M | ir_filter_nr = 2; |
766 | | |
767 | 4.76M | if (p->cur_frame_mode != MODE_7k4 && p->cur_frame_mode < MODE_10k2 |
768 | 4.53M | && ir_filter_nr < 2) { |
769 | 570k | apply_ir_filter(out, fixed_sparse, |
770 | 570k | (p->cur_frame_mode == MODE_7k95 ? |
771 | 5.81k | ir_filters_lookup_MODE_7k95 : |
772 | 570k | ir_filters_lookup)[ir_filter_nr]); |
773 | 570k | fixed_vector = out; |
774 | 570k | } |
775 | | |
776 | | // update ir filter strength history |
777 | 4.76M | p->prev_ir_filter_nr = ir_filter_nr; |
778 | 4.76M | p->prev_sparse_fixed_gain = fixed_gain; |
779 | | |
780 | 4.76M | return fixed_vector; |
781 | 4.76M | } |
782 | | |
783 | | /// @} |
784 | | |
785 | | |
786 | | /// @name AMR synthesis functions |
787 | | /// @{ |
788 | | |
789 | | /** |
790 | | * Conduct 10th order linear predictive coding synthesis. |
791 | | * |
792 | | * @param p pointer to the AMRContext |
793 | | * @param lpc pointer to the LPC coefficients |
794 | | * @param fixed_gain fixed codebook gain for synthesis |
795 | | * @param fixed_vector algebraic codebook vector |
796 | | * @param samples pointer to the output speech samples |
797 | | * @param overflow 16-bit overflow flag |
798 | | */ |
799 | | static int synthesis(AMRContext *p, float *lpc, |
800 | | float fixed_gain, const float *fixed_vector, |
801 | | float *samples, uint8_t overflow) |
802 | 4.80M | { |
803 | 4.80M | int i; |
804 | 4.80M | float excitation[AMR_SUBFRAME_SIZE]; |
805 | | |
806 | | // if an overflow has been detected, the pitch vector is scaled down by a |
807 | | // factor of 4 |
808 | 4.80M | if (overflow) |
809 | 1.85M | for (i = 0; i < AMR_SUBFRAME_SIZE; i++) |
810 | 1.80M | p->pitch_vector[i] *= 0.25; |
811 | | |
812 | 4.80M | p->acelpv_ctx.weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, |
813 | 4.80M | p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE); |
814 | | |
815 | | // emphasize pitch vector contribution |
816 | 4.80M | if (p->pitch_gain[4] > 0.5 && !overflow) { |
817 | 989k | float energy = p->celpm_ctx.dot_productf(excitation, excitation, |
818 | 989k | AMR_SUBFRAME_SIZE); |
819 | 989k | float pitch_factor = |
820 | 989k | p->pitch_gain[4] * |
821 | 989k | (p->cur_frame_mode == MODE_12k2 ? |
822 | 70.9k | 0.25 * FFMIN(p->pitch_gain[4], 1.0) : |
823 | 989k | 0.5 * FFMIN(p->pitch_gain[4], SHARP_MAX)); |
824 | | |
825 | 40.5M | for (i = 0; i < AMR_SUBFRAME_SIZE; i++) |
826 | 39.5M | excitation[i] += pitch_factor * p->pitch_vector[i]; |
827 | | |
828 | 989k | ff_scale_vector_to_given_sum_of_squares(excitation, excitation, energy, |
829 | 989k | AMR_SUBFRAME_SIZE); |
830 | 989k | } |
831 | | |
832 | 4.80M | p->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation, |
833 | 4.80M | AMR_SUBFRAME_SIZE, |
834 | 4.80M | LP_FILTER_ORDER); |
835 | | |
836 | | // detect overflow |
837 | 195M | for (i = 0; i < AMR_SUBFRAME_SIZE; i++) |
838 | 191M | if (fabsf(samples[i]) > AMR_SAMPLE_BOUND) { |
839 | 59.0k | return 1; |
840 | 59.0k | } |
841 | | |
842 | 4.75M | return 0; |
843 | 4.80M | } |
844 | | |
845 | | /// @} |
846 | | |
847 | | |
848 | | /// @name AMR update functions |
849 | | /// @{ |
850 | | |
851 | | /** |
852 | | * Update buffers and history at the end of decoding a subframe. |
853 | | * |
854 | | * @param p pointer to the AMRContext |
855 | | */ |
856 | | static void update_state(AMRContext *p) |
857 | 4.76M | { |
858 | 4.76M | memcpy(p->prev_lsp_sub4, p->lsp[3], LP_FILTER_ORDER * sizeof(p->lsp[3][0])); |
859 | | |
860 | 4.76M | memmove(&p->excitation_buf[0], &p->excitation_buf[AMR_SUBFRAME_SIZE], |
861 | 4.76M | (PITCH_DELAY_MAX + LP_FILTER_ORDER + 1) * sizeof(float)); |
862 | | |
863 | 4.76M | memmove(&p->pitch_gain[0], &p->pitch_gain[1], 4 * sizeof(float)); |
864 | 4.76M | memmove(&p->fixed_gain[0], &p->fixed_gain[1], 4 * sizeof(float)); |
865 | | |
866 | 4.76M | memmove(&p->samples_in[0], &p->samples_in[AMR_SUBFRAME_SIZE], |
867 | 4.76M | LP_FILTER_ORDER * sizeof(float)); |
868 | 4.76M | } |
869 | | |
870 | | /// @} |
871 | | |
872 | | |
873 | | /// @name AMR Postprocessing functions |
874 | | /// @{ |
875 | | |
876 | | /** |
877 | | * Get the tilt factor of a formant filter from its transfer function |
878 | | * |
879 | | * @param p The Context |
880 | | * @param lpc_n LP_FILTER_ORDER coefficients of the numerator |
881 | | * @param lpc_d LP_FILTER_ORDER coefficients of the denominator |
882 | | */ |
883 | | static float tilt_factor(AMRContext *p, float *lpc_n, float *lpc_d) |
884 | 4.76M | { |
885 | 4.76M | float rh0, rh1; // autocorrelation at lag 0 and 1 |
886 | | |
887 | | // LP_FILTER_ORDER prior zeros are needed for ff_celp_lp_synthesis_filterf |
888 | 4.76M | float impulse_buffer[LP_FILTER_ORDER + AMR_TILT_RESPONSE] = { 0 }; |
889 | 4.76M | float *hf = impulse_buffer + LP_FILTER_ORDER; // start of impulse response |
890 | | |
891 | 4.76M | hf[0] = 1.0; |
892 | 4.76M | memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER); |
893 | 4.76M | p->celpf_ctx.celp_lp_synthesis_filterf(hf, lpc_d, hf, |
894 | 4.76M | AMR_TILT_RESPONSE, |
895 | 4.76M | LP_FILTER_ORDER); |
896 | | |
897 | 4.76M | rh0 = p->celpm_ctx.dot_productf(hf, hf, AMR_TILT_RESPONSE); |
898 | 4.76M | rh1 = p->celpm_ctx.dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1); |
899 | | |
900 | | // The spec only specifies this check for 12.2 and 10.2 kbit/s |
901 | | // modes. But in the ref source the tilt is always non-negative. |
902 | 4.76M | return rh1 >= 0.0 ? rh1 / rh0 * AMR_TILT_GAMMA_T : 0.0; |
903 | 4.76M | } |
904 | | |
905 | | /** |
906 | | * Perform adaptive post-filtering to enhance the quality of the speech. |
907 | | * See section 6.2.1. |
908 | | * |
909 | | * @param p pointer to the AMRContext |
910 | | * @param lpc interpolated LP coefficients for this subframe |
911 | | * @param buf_out output of the filter |
912 | | */ |
913 | | static void postfilter(AMRContext *p, float *lpc, float *buf_out) |
914 | 4.76M | { |
915 | 4.76M | int i; |
916 | 4.76M | float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input |
917 | | |
918 | 4.76M | float speech_gain = p->celpm_ctx.dot_productf(samples, samples, |
919 | 4.76M | AMR_SUBFRAME_SIZE); |
920 | | |
921 | 4.76M | float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter |
922 | 4.76M | const float *gamma_n, *gamma_d; // Formant filter factor table |
923 | 4.76M | float lpc_n[LP_FILTER_ORDER], lpc_d[LP_FILTER_ORDER]; // Transfer function coefficients |
924 | | |
925 | 4.76M | if (p->cur_frame_mode == MODE_12k2 || p->cur_frame_mode == MODE_10k2) { |
926 | 209k | gamma_n = ff_pow_0_7; |
927 | 209k | gamma_d = ff_pow_0_75; |
928 | 4.55M | } else { |
929 | 4.55M | gamma_n = ff_pow_0_55; |
930 | 4.55M | gamma_d = ff_pow_0_7; |
931 | 4.55M | } |
932 | | |
933 | 52.4M | for (i = 0; i < LP_FILTER_ORDER; i++) { |
934 | 47.6M | lpc_n[i] = lpc[i] * gamma_n[i]; |
935 | 47.6M | lpc_d[i] = lpc[i] * gamma_d[i]; |
936 | 47.6M | } |
937 | | |
938 | 4.76M | memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER); |
939 | 4.76M | p->celpf_ctx.celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples, |
940 | 4.76M | AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); |
941 | 4.76M | memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE, |
942 | 4.76M | sizeof(float) * LP_FILTER_ORDER); |
943 | | |
944 | 4.76M | p->celpf_ctx.celp_lp_zero_synthesis_filterf(buf_out, lpc_n, |
945 | 4.76M | pole_out + LP_FILTER_ORDER, |
946 | 4.76M | AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); |
947 | | |
948 | 4.76M | ff_tilt_compensation(&p->tilt_mem, tilt_factor(p, lpc_n, lpc_d), buf_out, |
949 | 4.76M | AMR_SUBFRAME_SIZE); |
950 | | |
951 | 4.76M | ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE, |
952 | 4.76M | AMR_AGC_ALPHA, &p->postfilter_agc); |
953 | 4.76M | } |
954 | | |
955 | | /// @} |
956 | | |
957 | | static int amrnb_decode_frame(AVCodecContext *avctx, AVFrame *frame, |
958 | | int *got_frame_ptr, AVPacket *avpkt) |
959 | 1.50M | { |
960 | | |
961 | 1.50M | AMRChannelsContext *s = avctx->priv_data; // pointer to private data |
962 | 1.50M | const uint8_t *buf = avpkt->data; |
963 | 1.50M | int buf_size = avpkt->size; |
964 | 1.50M | int ret; |
965 | | |
966 | | /* get output buffer */ |
967 | 1.50M | frame->nb_samples = AMR_BLOCK_SIZE; |
968 | 1.50M | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) |
969 | 0 | return ret; |
970 | | |
971 | 2.69M | for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { |
972 | 1.72M | AMRContext *p = &s->ch[ch]; |
973 | 1.72M | float fixed_gain_factor; |
974 | 1.72M | AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing |
975 | 1.72M | float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing |
976 | 1.72M | float synth_fixed_gain; // the fixed gain that synthesis should use |
977 | 1.72M | const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use |
978 | 1.72M | float *buf_out = (float *)frame->extended_data[ch]; |
979 | 1.72M | int channel_size; |
980 | 1.72M | int i, subframe; |
981 | | |
982 | 1.72M | p->cur_frame_mode = unpack_bitstream(p, buf, buf_size); |
983 | 1.72M | if (p->cur_frame_mode == NO_DATA) { |
984 | 522k | av_log(avctx, AV_LOG_ERROR, "Corrupt bitstream\n"); |
985 | 522k | return AVERROR_INVALIDDATA; |
986 | 522k | } |
987 | 1.20M | if (p->cur_frame_mode == MODE_DTX) { |
988 | 10.8k | avpriv_report_missing_feature(avctx, "dtx mode"); |
989 | 10.8k | av_log(avctx, AV_LOG_INFO, "Note: libopencore_amrnb supports dtx\n"); |
990 | 10.8k | return AVERROR_PATCHWELCOME; |
991 | 10.8k | } |
992 | | |
993 | 1.19M | channel_size = frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC |
994 | 1.19M | if (p->cur_frame_mode == MODE_12k2) { |
995 | 25.9k | lsf2lsp_5(p); |
996 | 25.9k | } else |
997 | 1.16M | lsf2lsp_3(p); |
998 | | |
999 | 5.95M | for (i = 0; i < 4; i++) |
1000 | 4.76M | ff_acelp_lspd2lpc(p->lsp[i], p->lpc[i], 5); |
1001 | | |
1002 | 5.95M | for (subframe = 0; subframe < 4; subframe++) { |
1003 | 4.76M | const AMRNBSubframe *amr_subframe = &p->frame.subframe[subframe]; |
1004 | | |
1005 | 4.76M | decode_pitch_vector(p, amr_subframe, subframe); |
1006 | | |
1007 | 4.76M | decode_fixed_sparse(&fixed_sparse, amr_subframe->pulses, |
1008 | 4.76M | p->cur_frame_mode, subframe); |
1009 | | |
1010 | | // The fixed gain (section 6.1.3) depends on the fixed vector |
1011 | | // (section 6.1.2), but the fixed vector calculation uses |
1012 | | // pitch sharpening based on the on the pitch gain (section 6.1.3). |
1013 | | // So the correct order is: pitch gain, pitch sharpening, fixed gain. |
1014 | 4.76M | decode_gains(p, amr_subframe, p->cur_frame_mode, subframe, |
1015 | 4.76M | &fixed_gain_factor); |
1016 | | |
1017 | 4.76M | pitch_sharpening(p, subframe, p->cur_frame_mode, &fixed_sparse); |
1018 | | |
1019 | 4.76M | if (fixed_sparse.pitch_lag == 0) { |
1020 | 0 | av_log(avctx, AV_LOG_ERROR, "The file is corrupted, pitch_lag = 0 is not allowed\n"); |
1021 | 0 | return AVERROR_INVALIDDATA; |
1022 | 0 | } |
1023 | 4.76M | ff_set_fixed_vector(p->fixed_vector, &fixed_sparse, 1.0, |
1024 | 4.76M | AMR_SUBFRAME_SIZE); |
1025 | | |
1026 | 4.76M | p->fixed_gain[4] = |
1027 | 4.76M | ff_amr_set_fixed_gain(fixed_gain_factor, |
1028 | 4.76M | p->celpm_ctx.dot_productf(p->fixed_vector, |
1029 | 4.76M | p->fixed_vector, |
1030 | 4.76M | AMR_SUBFRAME_SIZE) / |
1031 | 4.76M | AMR_SUBFRAME_SIZE, |
1032 | 4.76M | p->prediction_error, |
1033 | 4.76M | energy_mean[p->cur_frame_mode], energy_pred_fac); |
1034 | | |
1035 | | // The excitation feedback is calculated without any processing such |
1036 | | // as fixed gain smoothing. This isn't mentioned in the specification. |
1037 | 195M | for (i = 0; i < AMR_SUBFRAME_SIZE; i++) |
1038 | 190M | p->excitation[i] *= p->pitch_gain[4]; |
1039 | 4.76M | ff_set_fixed_vector(p->excitation, &fixed_sparse, p->fixed_gain[4], |
1040 | 4.76M | AMR_SUBFRAME_SIZE); |
1041 | | |
1042 | | // In the ref decoder, excitation is stored with no fractional bits. |
1043 | | // This step prevents buzz in silent periods. The ref encoder can |
1044 | | // emit long sequences with pitch factor greater than one. This |
1045 | | // creates unwanted feedback if the excitation vector is nonzero. |
1046 | | // (e.g. test sequence T19_795.COD in 3GPP TS 26.074) |
1047 | 195M | for (i = 0; i < AMR_SUBFRAME_SIZE; i++) |
1048 | 190M | p->excitation[i] = truncf(p->excitation[i]); |
1049 | | |
1050 | | // Smooth fixed gain. |
1051 | | // The specification is ambiguous, but in the reference source, the |
1052 | | // smoothed value is NOT fed back into later fixed gain smoothing. |
1053 | 4.76M | synth_fixed_gain = fixed_gain_smooth(p, p->lsf_q[subframe], |
1054 | 4.76M | p->lsf_avg, p->cur_frame_mode); |
1055 | | |
1056 | 4.76M | synth_fixed_vector = anti_sparseness(p, &fixed_sparse, p->fixed_vector, |
1057 | 4.76M | synth_fixed_gain, spare_vector); |
1058 | | |
1059 | 4.76M | if (synthesis(p, p->lpc[subframe], synth_fixed_gain, |
1060 | 4.76M | synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 0)) |
1061 | | // overflow detected -> rerun synthesis scaling pitch vector down |
1062 | | // by a factor of 4, skipping pitch vector contribution emphasis |
1063 | | // and adaptive gain control |
1064 | 45.1k | synthesis(p, p->lpc[subframe], synth_fixed_gain, |
1065 | 45.1k | synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 1); |
1066 | | |
1067 | 4.76M | postfilter(p, p->lpc[subframe], buf_out + subframe * AMR_SUBFRAME_SIZE); |
1068 | | |
1069 | | // update buffers and history |
1070 | 4.76M | ff_clear_fixed_vector(p->fixed_vector, &fixed_sparse, AMR_SUBFRAME_SIZE); |
1071 | 4.76M | update_state(p); |
1072 | 4.76M | } |
1073 | | |
1074 | 1.19M | p->acelpf_ctx.acelp_apply_order_2_transfer_function(buf_out, |
1075 | 1.19M | buf_out, highpass_zeros, |
1076 | 1.19M | highpass_poles, |
1077 | 1.19M | highpass_gain * AMR_SAMPLE_SCALE, |
1078 | 1.19M | p->high_pass_mem, AMR_BLOCK_SIZE); |
1079 | | |
1080 | | /* Update averaged lsf vector (used for fixed gain smoothing). |
1081 | | * |
1082 | | * Note that lsf_avg should not incorporate the current frame's LSFs |
1083 | | * for fixed_gain_smooth. |
1084 | | * The specification has an incorrect formula: the reference decoder uses |
1085 | | * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ |
1086 | 1.19M | p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], |
1087 | 1.19M | 0.84, 0.16, LP_FILTER_ORDER); |
1088 | 1.19M | buf += channel_size; |
1089 | 1.19M | buf_size -= channel_size; |
1090 | 1.19M | } |
1091 | | |
1092 | 970k | *got_frame_ptr = 1; |
1093 | | |
1094 | 970k | return buf - avpkt->data; |
1095 | 1.50M | } |
1096 | | |
1097 | | |
1098 | | const FFCodec ff_amrnb_decoder = { |
1099 | | .p.name = "amrnb", |
1100 | | CODEC_LONG_NAME("AMR-NB (Adaptive Multi-Rate NarrowBand)"), |
1101 | | .p.type = AVMEDIA_TYPE_AUDIO, |
1102 | | .p.id = AV_CODEC_ID_AMR_NB, |
1103 | | .priv_data_size = sizeof(AMRChannelsContext), |
1104 | | .init = amrnb_decode_init, |
1105 | | FF_CODEC_DECODE_CB(amrnb_decode_frame), |
1106 | | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF, |
1107 | | }; |