/src/ffmpeg/libavcodec/aac/aacdec.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Common parts of the AAC decoders |
3 | | * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) |
4 | | * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) |
5 | | * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com> |
6 | | * |
7 | | * AAC LATM decoder |
8 | | * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz> |
9 | | * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net> |
10 | | * |
11 | | * AAC decoder fixed-point implementation |
12 | | * Copyright (c) 2013 |
13 | | * MIPS Technologies, Inc., California. |
14 | | * |
15 | | * This file is part of FFmpeg. |
16 | | * |
17 | | * FFmpeg is free software; you can redistribute it and/or |
18 | | * modify it under the terms of the GNU Lesser General Public |
19 | | * License as published by the Free Software Foundation; either |
20 | | * version 2.1 of the License, or (at your option) any later version. |
21 | | * |
22 | | * FFmpeg is distributed in the hope that it will be useful, |
23 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
24 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
25 | | * Lesser General Public License for more details. |
26 | | * |
27 | | * You should have received a copy of the GNU Lesser General Public |
28 | | * License along with FFmpeg; if not, write to the Free Software |
29 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
30 | | */ |
31 | | |
32 | | /* We use several quantization functions here (Q31, Q30), |
33 | | * for which we need this to be defined for them to work as expected. */ |
34 | | #define USE_FIXED 1 |
35 | | |
36 | | #include "config_components.h" |
37 | | |
38 | | #include <limits.h> |
39 | | #include <stddef.h> |
40 | | |
41 | | #include "aacdec.h" |
42 | | #include "aacdec_tab.h" |
43 | | #include "aacdec_usac.h" |
44 | | |
45 | | #include "libavcodec/aac.h" |
46 | | #include "libavcodec/aac_defines.h" |
47 | | #include "libavcodec/aacsbr.h" |
48 | | #include "libavcodec/aactab.h" |
49 | | #include "libavcodec/adts_header.h" |
50 | | |
51 | | #include "libavcodec/avcodec.h" |
52 | | #include "libavcodec/internal.h" |
53 | | #include "libavcodec/codec_internal.h" |
54 | | #include "libavcodec/decode.h" |
55 | | #include "libavcodec/profiles.h" |
56 | | |
57 | | #include "libavutil/attributes.h" |
58 | | #include "libavutil/error.h" |
59 | | #include "libavutil/log.h" |
60 | | #include "libavutil/macros.h" |
61 | | #include "libavutil/mem.h" |
62 | | #include "libavutil/opt.h" |
63 | | #include "libavutil/tx.h" |
64 | | #include "libavutil/version.h" |
65 | | |
66 | | /* |
67 | | * supported tools |
68 | | * |
69 | | * Support? Name |
70 | | * N (code in SoC repo) gain control |
71 | | * Y block switching |
72 | | * Y window shapes - standard |
73 | | * N window shapes - Low Delay |
74 | | * Y filterbank - standard |
75 | | * N (code in SoC repo) filterbank - Scalable Sample Rate |
76 | | * Y Temporal Noise Shaping |
77 | | * Y Long Term Prediction |
78 | | * Y intensity stereo |
79 | | * Y channel coupling |
80 | | * Y frequency domain prediction |
81 | | * Y Perceptual Noise Substitution |
82 | | * Y Mid/Side stereo |
83 | | * N Scalable Inverse AAC Quantization |
84 | | * N Frequency Selective Switch |
85 | | * N upsampling filter |
86 | | * Y quantization & coding - AAC |
87 | | * N quantization & coding - TwinVQ |
88 | | * N quantization & coding - BSAC |
89 | | * N AAC Error Resilience tools |
90 | | * N Error Resilience payload syntax |
91 | | * N Error Protection tool |
92 | | * N CELP |
93 | | * N Silence Compression |
94 | | * N HVXC |
95 | | * N HVXC 4kbits/s VR |
96 | | * N Structured Audio tools |
97 | | * N Structured Audio Sample Bank Format |
98 | | * N MIDI |
99 | | * N Harmonic and Individual Lines plus Noise |
100 | | * N Text-To-Speech Interface |
101 | | * Y Spectral Band Replication |
102 | | * Y (not in this code) Layer-1 |
103 | | * Y (not in this code) Layer-2 |
104 | | * Y (not in this code) Layer-3 |
105 | | * N SinuSoidal Coding (Transient, Sinusoid, Noise) |
106 | | * Y Parametric Stereo |
107 | | * N Direct Stream Transfer |
108 | | * Y (not in fixed point code) Enhanced AAC Low Delay (ER AAC ELD) |
109 | | * |
110 | | * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication. |
111 | | * - HE AAC v2 comprises LC AAC with Spectral Band Replication and |
112 | | Parametric Stereo. |
113 | | */ |
114 | | |
115 | 440k | #define overread_err "Input buffer exhausted before END element found\n" |
116 | | |
117 | | static int count_channels(uint8_t (*layout)[3], int tags) |
118 | 94.5k | { |
119 | 94.5k | int i, sum = 0; |
120 | 541k | for (i = 0; i < tags; i++) { |
121 | 447k | int syn_ele = layout[i][0]; |
122 | 447k | int pos = layout[i][2]; |
123 | 447k | sum += (1 + (syn_ele == TYPE_CPE)) * |
124 | 447k | (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC); |
125 | 447k | } |
126 | 94.5k | return sum; |
127 | 94.5k | } |
128 | | |
129 | | /** |
130 | | * Check for the channel element in the current channel position configuration. |
131 | | * If it exists, make sure the appropriate element is allocated and map the |
132 | | * channel order to match the internal FFmpeg channel layout. |
133 | | * |
134 | | * @param che_pos current channel position configuration |
135 | | * @param type channel element type |
136 | | * @param id channel element id |
137 | | * @param channels count of the number of channels in the configuration |
138 | | * |
139 | | * @return Returns error status. 0 - OK, !0 - error |
140 | | */ |
141 | | static av_cold int che_configure(AACDecContext *ac, |
142 | | enum ChannelPosition che_pos, |
143 | | int type, int id, int *channels) |
144 | 4.09M | { |
145 | 4.09M | if (*channels >= MAX_CHANNELS) |
146 | 1.26k | return AVERROR_INVALIDDATA; |
147 | 4.09M | if (che_pos) { |
148 | 4.09M | if (!ac->che[type][id]) { |
149 | 104k | int ret = ac->proc.sbr_ctx_alloc_init(ac, &ac->che[type][id], type); |
150 | 104k | if (ret < 0) |
151 | 0 | return ret; |
152 | 104k | } |
153 | 4.09M | if (type != TYPE_CCE) { |
154 | 3.71M | if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) { |
155 | 890 | av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n"); |
156 | 890 | return AVERROR_INVALIDDATA; |
157 | 890 | } |
158 | 3.71M | ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0]; |
159 | 3.71M | if (type == TYPE_CPE || |
160 | 3.71M | (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) { |
161 | 1.55M | ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1]; |
162 | 1.55M | } |
163 | 3.71M | } |
164 | 4.09M | } else { |
165 | 280 | if (ac->che[type][id]) { |
166 | 23 | ac->proc.sbr_ctx_close(ac->che[type][id]); |
167 | 23 | } |
168 | 280 | av_freep(&ac->che[type][id]); |
169 | 280 | memset(ac->output_element, 0, sizeof(ac->output_element)); |
170 | 280 | } |
171 | 4.09M | return 0; |
172 | 4.09M | } |
173 | | |
174 | | static int frame_configure_elements(AVCodecContext *avctx) |
175 | 3.36M | { |
176 | 3.36M | AACDecContext *ac = avctx->priv_data; |
177 | 3.36M | int type, id, ch, ret; |
178 | | |
179 | | /* set channel pointers to internal buffers by default */ |
180 | 16.8M | for (type = 0; type < 4; type++) { |
181 | 874M | for (id = 0; id < MAX_ELEM_ID; id++) { |
182 | 860M | ChannelElement *che = ac->che[type][id]; |
183 | 860M | if (che) { |
184 | 73.1M | che->ch[0].output = che->ch[0].ret_buf; |
185 | 73.1M | che->ch[1].output = che->ch[1].ret_buf; |
186 | 73.1M | } |
187 | 860M | } |
188 | 13.4M | } |
189 | | |
190 | | /* get output buffer */ |
191 | 3.36M | av_frame_unref(ac->frame); |
192 | 3.36M | if (!avctx->ch_layout.nb_channels) |
193 | 22.1k | return 1; |
194 | | |
195 | 3.34M | ac->frame->nb_samples = 2048; |
196 | 3.34M | if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0) |
197 | 0 | return ret; |
198 | | |
199 | | /* map output channel pointers to AVFrame data */ |
200 | 13.0M | for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { |
201 | 9.71M | if (ac->output_element[ch]) |
202 | 9.70M | ac->output_element[ch]->output = (void *)ac->frame->extended_data[ch]; |
203 | 9.71M | } |
204 | | |
205 | 3.34M | return 0; |
206 | 3.34M | } |
207 | | |
208 | | struct elem_to_channel { |
209 | | uint64_t av_position; |
210 | | uint8_t syn_ele; |
211 | | uint8_t elem_id; |
212 | | uint8_t aac_position; |
213 | | }; |
214 | | |
215 | | static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID], |
216 | | uint8_t (*layout_map)[3], int offset, uint64_t left, |
217 | | uint64_t right, int pos, uint64_t *layout) |
218 | 1.03M | { |
219 | 1.03M | if (layout_map[offset][0] == TYPE_CPE) { |
220 | 698k | e2c_vec[offset] = (struct elem_to_channel) { |
221 | 698k | .av_position = left | right, |
222 | 698k | .syn_ele = TYPE_CPE, |
223 | 698k | .elem_id = layout_map[offset][1], |
224 | 698k | .aac_position = pos |
225 | 698k | }; |
226 | 698k | if (e2c_vec[offset].av_position != UINT64_MAX) |
227 | 698k | *layout |= e2c_vec[offset].av_position; |
228 | | |
229 | 698k | return 1; |
230 | 698k | } else { |
231 | 340k | e2c_vec[offset] = (struct elem_to_channel) { |
232 | 340k | .av_position = left, |
233 | 340k | .syn_ele = TYPE_SCE, |
234 | 340k | .elem_id = layout_map[offset][1], |
235 | 340k | .aac_position = pos |
236 | 340k | }; |
237 | 340k | e2c_vec[offset + 1] = (struct elem_to_channel) { |
238 | 340k | .av_position = right, |
239 | 340k | .syn_ele = TYPE_SCE, |
240 | 340k | .elem_id = layout_map[offset + 1][1], |
241 | 340k | .aac_position = pos |
242 | 340k | }; |
243 | 340k | if (left != UINT64_MAX) |
244 | 340k | *layout |= left; |
245 | | |
246 | 340k | if (right != UINT64_MAX) |
247 | 340k | *layout |= right; |
248 | | |
249 | 340k | return 2; |
250 | 340k | } |
251 | 1.03M | } |
252 | | |
253 | | static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos, |
254 | | int current) |
255 | 8.57M | { |
256 | 8.57M | int num_pos_channels = 0; |
257 | 8.57M | int first_cpe = 0; |
258 | 8.57M | int sce_parity = 0; |
259 | 8.57M | int i; |
260 | 13.0M | for (i = current; i < tags; i++) { |
261 | 7.76M | if (layout_map[i][2] != pos) |
262 | 3.18M | break; |
263 | 4.58M | if (layout_map[i][0] == TYPE_CPE) { |
264 | 1.49M | if (sce_parity) { |
265 | 840k | if (pos == AAC_CHANNEL_FRONT && !first_cpe) { |
266 | 751k | sce_parity = 0; |
267 | 751k | } else { |
268 | 89.0k | return -1; |
269 | 89.0k | } |
270 | 840k | } |
271 | 1.41M | num_pos_channels += 2; |
272 | 1.41M | first_cpe = 1; |
273 | 3.08M | } else { |
274 | 3.08M | num_pos_channels++; |
275 | 3.08M | sce_parity ^= (pos != AAC_CHANNEL_LFE); |
276 | 3.08M | } |
277 | 4.58M | } |
278 | 8.48M | if (sce_parity && |
279 | 8.48M | (pos == AAC_CHANNEL_FRONT && first_cpe)) |
280 | 561k | return -1; |
281 | | |
282 | 7.92M | return num_pos_channels; |
283 | 8.48M | } |
284 | | |
285 | | static int assign_channels(struct elem_to_channel e2c_vec[MAX_ELEM_ID], uint8_t (*layout_map)[3], |
286 | | uint64_t *layout, int tags, int layer, int pos, int *current) |
287 | 8.57M | { |
288 | 8.57M | int i = *current, j = 0; |
289 | 8.57M | int nb_channels = count_paired_channels(layout_map, tags, pos, i); |
290 | | |
291 | 8.57M | if (nb_channels < 0 || nb_channels > 5) |
292 | 678k | return 0; |
293 | | |
294 | 7.89M | if (pos == AAC_CHANNEL_LFE) { |
295 | 2.17M | while (nb_channels) { |
296 | 35.0k | if (ff_aac_channel_map[layer][pos - 1][j] == AV_CHAN_NONE) |
297 | 1.69k | return -1; |
298 | 33.3k | e2c_vec[i] = (struct elem_to_channel) { |
299 | 33.3k | .av_position = 1ULL << ff_aac_channel_map[layer][pos - 1][j], |
300 | 33.3k | .syn_ele = layout_map[i][0], |
301 | 33.3k | .elem_id = layout_map[i][1], |
302 | 33.3k | .aac_position = pos |
303 | 33.3k | }; |
304 | 33.3k | *layout |= e2c_vec[i].av_position; |
305 | 33.3k | i++; |
306 | 33.3k | j++; |
307 | 33.3k | nb_channels--; |
308 | 33.3k | } |
309 | 2.14M | *current = i; |
310 | | |
311 | 2.14M | return 0; |
312 | 2.14M | } |
313 | | |
314 | 6.58M | while (nb_channels & 1) { |
315 | 879k | if (ff_aac_channel_map[layer][pos - 1][0] == AV_CHAN_NONE) |
316 | 146 | return -1; |
317 | 879k | if (ff_aac_channel_map[layer][pos - 1][0] == AV_CHAN_UNUSED) |
318 | 54.9k | break; |
319 | 824k | e2c_vec[i] = (struct elem_to_channel) { |
320 | 824k | .av_position = 1ULL << ff_aac_channel_map[layer][pos - 1][0], |
321 | 824k | .syn_ele = layout_map[i][0], |
322 | 824k | .elem_id = layout_map[i][1], |
323 | 824k | .aac_position = pos |
324 | 824k | }; |
325 | 824k | *layout |= e2c_vec[i].av_position; |
326 | 824k | i++; |
327 | 824k | nb_channels--; |
328 | 824k | } |
329 | | |
330 | 5.75M | j = (pos != AAC_CHANNEL_SIDE) && nb_channels <= 3 ? 3 : 1; |
331 | 6.79M | while (nb_channels >= 2) { |
332 | 1.04M | if (ff_aac_channel_map[layer][pos - 1][j] == AV_CHAN_NONE || |
333 | 1.04M | ff_aac_channel_map[layer][pos - 1][j+1] == AV_CHAN_NONE) |
334 | 2.95k | return -1; |
335 | 1.03M | i += assign_pair(e2c_vec, layout_map, i, |
336 | 1.03M | 1ULL << ff_aac_channel_map[layer][pos - 1][j], |
337 | 1.03M | 1ULL << ff_aac_channel_map[layer][pos - 1][j+1], |
338 | 1.03M | pos, layout); |
339 | 1.03M | j += 2; |
340 | 1.03M | nb_channels -= 2; |
341 | 1.03M | } |
342 | 5.80M | while (nb_channels & 1) { |
343 | 53.4k | if (ff_aac_channel_map[layer][pos - 1][5] == AV_CHAN_NONE) |
344 | 593 | return -1; |
345 | 52.8k | e2c_vec[i] = (struct elem_to_channel) { |
346 | 52.8k | .av_position = 1ULL << ff_aac_channel_map[layer][pos - 1][5], |
347 | 52.8k | .syn_ele = layout_map[i][0], |
348 | 52.8k | .elem_id = layout_map[i][1], |
349 | 52.8k | .aac_position = pos |
350 | 52.8k | }; |
351 | 52.8k | *layout |= e2c_vec[i].av_position; |
352 | 52.8k | i++; |
353 | 52.8k | nb_channels--; |
354 | 52.8k | } |
355 | 5.75M | if (nb_channels) |
356 | 0 | return -1; |
357 | | |
358 | 5.75M | *current = i; |
359 | | |
360 | 5.75M | return 0; |
361 | 5.75M | } |
362 | | |
363 | | static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags) |
364 | 1.64M | { |
365 | 1.64M | int i, n, total_non_cc_elements; |
366 | 1.64M | struct elem_to_channel e2c_vec[4 * MAX_ELEM_ID] = { { 0 } }; |
367 | 1.64M | uint64_t layout = 0; |
368 | | |
369 | 1.64M | if (FF_ARRAY_ELEMS(e2c_vec) < tags) |
370 | 0 | return 0; |
371 | | |
372 | 3.78M | for (n = 0, i = 0; n < 3 && i < tags; n++) { |
373 | 2.14M | int ret = assign_channels(e2c_vec, layout_map, &layout, tags, n, AAC_CHANNEL_FRONT, &i); |
374 | 2.14M | if (ret < 0) |
375 | 49 | return 0; |
376 | 2.14M | ret = assign_channels(e2c_vec, layout_map, &layout, tags, n, AAC_CHANNEL_SIDE, &i); |
377 | 2.14M | if (ret < 0) |
378 | 3.27k | return 0; |
379 | 2.14M | ret = assign_channels(e2c_vec, layout_map, &layout, tags, n, AAC_CHANNEL_BACK, &i); |
380 | 2.14M | if (ret < 0) |
381 | 369 | return 0; |
382 | 2.14M | ret = assign_channels(e2c_vec, layout_map, &layout, tags, n, AAC_CHANNEL_LFE, &i); |
383 | 2.14M | if (ret < 0) |
384 | 1.69k | return 0; |
385 | 2.14M | } |
386 | | |
387 | 1.63M | total_non_cc_elements = n = i; |
388 | | |
389 | 1.63M | if (layout == AV_CH_LAYOUT_22POINT2) { |
390 | | // For 22.2 reorder the result as needed |
391 | 2.46k | FFSWAP(struct elem_to_channel, e2c_vec[2], e2c_vec[0]); // FL & FR first (final), FC third |
392 | 2.46k | FFSWAP(struct elem_to_channel, e2c_vec[2], e2c_vec[1]); // FC second (final), FLc & FRc third |
393 | 2.46k | FFSWAP(struct elem_to_channel, e2c_vec[6], e2c_vec[2]); // LFE1 third (final), FLc & FRc seventh |
394 | 2.46k | FFSWAP(struct elem_to_channel, e2c_vec[4], e2c_vec[3]); // BL & BR fourth (final), SiL & SiR fifth |
395 | 2.46k | FFSWAP(struct elem_to_channel, e2c_vec[6], e2c_vec[4]); // FLc & FRc fifth (final), SiL & SiR seventh |
396 | 2.46k | FFSWAP(struct elem_to_channel, e2c_vec[7], e2c_vec[6]); // LFE2 seventh (final), SiL & SiR eight (final) |
397 | 2.46k | FFSWAP(struct elem_to_channel, e2c_vec[9], e2c_vec[8]); // TpFL & TpFR ninth (final), TFC tenth (final) |
398 | 2.46k | FFSWAP(struct elem_to_channel, e2c_vec[11], e2c_vec[10]); // TC eleventh (final), TpSiL & TpSiR twelth |
399 | 2.46k | FFSWAP(struct elem_to_channel, e2c_vec[12], e2c_vec[11]); // TpBL & TpBR twelth (final), TpSiL & TpSiR thirteenth (final) |
400 | 1.63M | } else { |
401 | | // For everything else, utilize the AV channel position define as a |
402 | | // stable sort. |
403 | 2.06M | do { |
404 | 2.06M | int next_n = 0; |
405 | 3.37M | for (i = 1; i < n; i++) |
406 | 1.31M | if (e2c_vec[i - 1].av_position > e2c_vec[i].av_position) { |
407 | 630k | FFSWAP(struct elem_to_channel, e2c_vec[i - 1], e2c_vec[i]); |
408 | 630k | next_n = i; |
409 | 630k | } |
410 | 2.06M | n = next_n; |
411 | 2.06M | } while (n > 0); |
412 | | |
413 | 1.63M | } |
414 | | |
415 | 3.91M | for (i = 0; i < total_non_cc_elements; i++) { |
416 | 2.27M | layout_map[i][0] = e2c_vec[i].syn_ele; |
417 | 2.27M | layout_map[i][1] = e2c_vec[i].elem_id; |
418 | 2.27M | layout_map[i][2] = e2c_vec[i].aac_position; |
419 | 2.27M | } |
420 | | |
421 | 1.63M | return layout; |
422 | 1.64M | } |
423 | | |
424 | | /** |
425 | | * Save current output configuration if and only if it has been locked. |
426 | | */ |
427 | | static int push_output_configuration(AACDecContext *ac) |
428 | 777k | { |
429 | 777k | int pushed = 0; |
430 | | |
431 | 777k | if (ac->oc[1].status == OC_LOCKED || ac->oc[0].status == OC_NONE) { |
432 | 379k | ac->oc[0] = ac->oc[1]; |
433 | 379k | pushed = 1; |
434 | 379k | } |
435 | 777k | ac->oc[1].status = OC_NONE; |
436 | 777k | return pushed; |
437 | 777k | } |
438 | | |
439 | | /** |
440 | | * Restore the previous output configuration if and only if the current |
441 | | * configuration is unlocked. |
442 | | */ |
443 | | static void pop_output_configuration(AACDecContext *ac) |
444 | 1.20M | { |
445 | 1.20M | if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) { |
446 | 724k | ac->oc[1] = ac->oc[0]; |
447 | 724k | ac->avctx->ch_layout = ac->oc[1].ch_layout; |
448 | 724k | ff_aac_output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags, |
449 | 724k | ac->oc[1].status, 0); |
450 | 724k | } |
451 | 1.20M | } |
452 | | |
453 | | /** |
454 | | * Configure output channel order based on the current program |
455 | | * configuration element. |
456 | | * |
457 | | * @return Returns error status. 0 - OK, !0 - error |
458 | | */ |
459 | | int ff_aac_output_configure(AACDecContext *ac, |
460 | | uint8_t layout_map[MAX_ELEM_ID * 4][3], int tags, |
461 | | enum OCStatus oc_type, int get_new_frame) |
462 | 1.64M | { |
463 | 1.64M | AVCodecContext *avctx = ac->avctx; |
464 | 1.64M | int i, channels = 0, ret; |
465 | 1.64M | uint64_t layout = 0; |
466 | 1.64M | uint8_t id_map[TYPE_END][MAX_ELEM_ID] = {{ 0 }}; |
467 | 1.64M | uint8_t type_counts[TYPE_END] = { 0 }; |
468 | | |
469 | 1.64M | if (ac->oc[1].layout_map != layout_map) { |
470 | 687k | memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0])); |
471 | 687k | ac->oc[1].layout_map_tags = tags; |
472 | 687k | } |
473 | 5.76M | for (i = 0; i < tags; i++) { |
474 | 4.12M | int type = layout_map[i][0]; |
475 | 4.12M | int id = layout_map[i][1]; |
476 | 4.12M | id_map[type][id] = type_counts[type]++; |
477 | 4.12M | if (id_map[type][id] >= MAX_ELEM_ID) { |
478 | 0 | avpriv_request_sample(ac->avctx, "Too large remapped id"); |
479 | 0 | return AVERROR_PATCHWELCOME; |
480 | 0 | } |
481 | 4.12M | } |
482 | | // Try to sniff a reasonable channel order, otherwise output the |
483 | | // channels in the order the PCE declared them. |
484 | 1.64M | if (ac->output_channel_order == CHANNEL_ORDER_DEFAULT) |
485 | 1.64M | layout = sniff_channel_order(layout_map, tags); |
486 | 5.73M | for (i = 0; i < tags; i++) { |
487 | 4.09M | int type = layout_map[i][0]; |
488 | 4.09M | int id = layout_map[i][1]; |
489 | 4.09M | int iid = id_map[type][id]; |
490 | 4.09M | int position = layout_map[i][2]; |
491 | | // Allocate or free elements depending on if they are in the |
492 | | // current program configuration. |
493 | 4.09M | ret = che_configure(ac, position, type, iid, &channels); |
494 | 4.09M | if (ret < 0) |
495 | 2.15k | return ret; |
496 | 4.09M | ac->tag_che_map[type][id] = ac->che[type][iid]; |
497 | 4.09M | } |
498 | 1.63M | if (ac->oc[1].m4ac.ps == 1 && channels == 2) { |
499 | 425k | if (layout == AV_CH_FRONT_CENTER) { |
500 | 345k | layout = AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT; |
501 | 345k | } else { |
502 | 79.8k | layout = 0; |
503 | 79.8k | } |
504 | 425k | } |
505 | | |
506 | 1.63M | av_channel_layout_uninit(&ac->oc[1].ch_layout); |
507 | 1.63M | if (layout) |
508 | 1.32M | av_channel_layout_from_mask(&ac->oc[1].ch_layout, layout); |
509 | 312k | else { |
510 | 312k | ac->oc[1].ch_layout.order = AV_CHANNEL_ORDER_UNSPEC; |
511 | 312k | ac->oc[1].ch_layout.nb_channels = channels; |
512 | 312k | } |
513 | | |
514 | 1.63M | av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout); |
515 | 1.63M | ac->oc[1].status = oc_type; |
516 | | |
517 | 1.63M | if (get_new_frame) { |
518 | 358k | if ((ret = frame_configure_elements(ac->avctx)) < 0) |
519 | 0 | return ret; |
520 | 358k | } |
521 | | |
522 | 1.63M | return 0; |
523 | 1.63M | } |
524 | | |
525 | | static av_cold void flush(AVCodecContext *avctx) |
526 | 996k | { |
527 | 996k | AACDecContext *ac= avctx->priv_data; |
528 | 996k | int type, i, j; |
529 | | |
530 | 4.98M | for (type = 3; type >= 0; type--) { |
531 | 259M | for (i = 0; i < MAX_ELEM_ID; i++) { |
532 | 255M | ChannelElement *che = ac->che[type][i]; |
533 | 255M | if (che) { |
534 | 32.5M | for (j = 0; j <= 1; j++) { |
535 | 21.6M | memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved)); |
536 | 21.6M | } |
537 | 10.8M | } |
538 | 255M | } |
539 | 3.98M | } |
540 | | |
541 | 996k | ff_aac_usac_reset_state(ac, &ac->oc[1]); |
542 | 996k | } |
543 | | |
544 | | /** |
545 | | * Set up channel positions based on a default channel configuration |
546 | | * as specified in table 1.17. |
547 | | * |
548 | | * @return Returns error status. 0 - OK, !0 - error |
549 | | */ |
550 | | int ff_aac_set_default_channel_config(AACDecContext *ac, AVCodecContext *avctx, |
551 | | uint8_t (*layout_map)[3], |
552 | | int *tags, |
553 | | int channel_config) |
554 | 609k | { |
555 | 609k | if (channel_config < 1 || (channel_config > 7 && channel_config < 11) || |
556 | 609k | channel_config > 14) { |
557 | 1.67k | av_log(avctx, AV_LOG_ERROR, |
558 | 1.67k | "invalid default channel configuration (%d)\n", |
559 | 1.67k | channel_config); |
560 | 1.67k | return AVERROR_INVALIDDATA; |
561 | 1.67k | } |
562 | 608k | *tags = ff_tags_per_config[channel_config]; |
563 | 608k | memcpy(layout_map, ff_aac_channel_layout_map[channel_config - 1], |
564 | 608k | *tags * sizeof(*layout_map)); |
565 | | |
566 | | /* |
567 | | * AAC specification has 7.1(wide) as a default layout for 8-channel streams. |
568 | | * However, at least Nero AAC encoder encodes 7.1 streams using the default |
569 | | * channel config 7, mapping the side channels of the original audio stream |
570 | | * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD |
571 | | * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding |
572 | | * the incorrect streams as if they were correct (and as the encoder intended). |
573 | | * |
574 | | * As actual intended 7.1(wide) streams are very rare, default to assuming a |
575 | | * 7.1 layout was intended. |
576 | | */ |
577 | 608k | if (channel_config == 7 && avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) { |
578 | 7.79k | layout_map[2][2] = AAC_CHANNEL_BACK; |
579 | | |
580 | 7.79k | if (!ac || !ac->warned_71_wide++) { |
581 | 4.56k | av_log(avctx, AV_LOG_INFO, "Assuming an incorrectly encoded 7.1 channel layout" |
582 | 4.56k | " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode" |
583 | 4.56k | " according to the specification instead.\n", FF_COMPLIANCE_STRICT); |
584 | 4.56k | } |
585 | 7.79k | } |
586 | | |
587 | 608k | return 0; |
588 | 609k | } |
589 | | |
590 | | ChannelElement *ff_aac_get_che(AACDecContext *ac, int type, int elem_id) |
591 | 2.81M | { |
592 | | /* For PCE based channel configurations map the channels solely based |
593 | | * on tags. */ |
594 | 2.81M | if (!ac->oc[1].m4ac.chan_config) { |
595 | 119k | return ac->tag_che_map[type][elem_id]; |
596 | 119k | } |
597 | | // Allow single CPE stereo files to be signalled with mono configuration. |
598 | 2.69M | if (!ac->tags_mapped && type == TYPE_CPE && |
599 | 2.69M | ac->oc[1].m4ac.chan_config == 1) { |
600 | 221k | uint8_t layout_map[MAX_ELEM_ID*4][3]; |
601 | 221k | int layout_map_tags; |
602 | 221k | push_output_configuration(ac); |
603 | | |
604 | 221k | av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n"); |
605 | | |
606 | 221k | if (ff_aac_set_default_channel_config(ac, ac->avctx, layout_map, |
607 | 221k | &layout_map_tags, 2) < 0) |
608 | 0 | return NULL; |
609 | 221k | if (ff_aac_output_configure(ac, layout_map, layout_map_tags, |
610 | 221k | OC_TRIAL_FRAME, 1) < 0) |
611 | 0 | return NULL; |
612 | | |
613 | 221k | ac->oc[1].m4ac.chan_config = 2; |
614 | 221k | ac->oc[1].m4ac.ps = 0; |
615 | 221k | } |
616 | | // And vice-versa |
617 | 2.69M | if (!ac->tags_mapped && type == TYPE_SCE && |
618 | 2.69M | ac->oc[1].m4ac.chan_config == 2) { |
619 | 106k | uint8_t layout_map[MAX_ELEM_ID * 4][3]; |
620 | 106k | int layout_map_tags; |
621 | 106k | push_output_configuration(ac); |
622 | | |
623 | 106k | av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n"); |
624 | | |
625 | 106k | layout_map_tags = 2; |
626 | 106k | layout_map[0][0] = layout_map[1][0] = TYPE_SCE; |
627 | 106k | layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT; |
628 | 106k | layout_map[0][1] = 0; |
629 | 106k | layout_map[1][1] = 1; |
630 | 106k | if (ff_aac_output_configure(ac, layout_map, layout_map_tags, |
631 | 106k | OC_TRIAL_FRAME, 1) < 0) |
632 | 0 | return NULL; |
633 | | |
634 | 106k | if (ac->oc[1].m4ac.sbr) |
635 | 105k | ac->oc[1].m4ac.ps = -1; |
636 | 106k | } |
637 | | /* For indexed channel configurations map the channels solely based |
638 | | * on position. */ |
639 | 2.69M | switch (ac->oc[1].m4ac.chan_config) { |
640 | 48.6k | case 14: |
641 | 48.6k | if (ac->tags_mapped > 2 && ((type == TYPE_CPE && elem_id < 3) || |
642 | 14.1k | (type == TYPE_LFE && elem_id < 1))) { |
643 | 1.82k | ac->tags_mapped++; |
644 | 1.82k | return ac->tag_che_map[type][elem_id] = ac->che[type][elem_id]; |
645 | 1.82k | } |
646 | 57.3k | case 13: |
647 | 57.3k | if (ac->tags_mapped > 3 && ((type == TYPE_CPE && elem_id < 8) || |
648 | 7.63k | (type == TYPE_SCE && elem_id < 6) || |
649 | 7.63k | (type == TYPE_LFE && elem_id < 2))) { |
650 | 3.86k | ac->tags_mapped++; |
651 | 3.86k | return ac->tag_che_map[type][elem_id] = ac->che[type][elem_id]; |
652 | 3.86k | } |
653 | 69.0k | case 12: |
654 | 1.19M | case 7: |
655 | 1.19M | if (ac->tags_mapped == 3 && type == TYPE_CPE) { |
656 | 11.3k | ac->tags_mapped++; |
657 | 11.3k | return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2]; |
658 | 11.3k | } |
659 | 1.19M | case 11: |
660 | 1.19M | if (ac->tags_mapped == 3 && type == TYPE_SCE) { |
661 | 80.4k | ac->tags_mapped++; |
662 | 80.4k | return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1]; |
663 | 80.4k | } |
664 | 1.40M | case 6: |
665 | | /* Some streams incorrectly code 5.1 audio as |
666 | | * SCE[0] CPE[0] CPE[1] SCE[1] |
667 | | * instead of |
668 | | * SCE[0] CPE[0] CPE[1] LFE[0]. |
669 | | * If we seem to have encountered such a stream, transfer |
670 | | * the LFE[0] element to the SCE[1]'s mapping */ |
671 | 1.40M | if (ac->tags_mapped == ff_tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) { |
672 | 46.5k | if (!ac->warned_remapping_once && (type != TYPE_LFE || elem_id != 0)) { |
673 | 871 | av_log(ac->avctx, AV_LOG_WARNING, |
674 | 871 | "This stream seems to incorrectly report its last channel as %s[%d], mapping to LFE[0]\n", |
675 | 871 | type == TYPE_SCE ? "SCE" : "LFE", elem_id); |
676 | 871 | ac->warned_remapping_once++; |
677 | 871 | } |
678 | 46.5k | ac->tags_mapped++; |
679 | 46.5k | return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0]; |
680 | 46.5k | } |
681 | 1.39M | case 5: |
682 | 1.39M | if (ac->tags_mapped == 2 && type == TYPE_CPE) { |
683 | 122k | ac->tags_mapped++; |
684 | 122k | return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1]; |
685 | 122k | } |
686 | 1.31M | case 4: |
687 | | /* Some streams incorrectly code 4.0 audio as |
688 | | * SCE[0] CPE[0] LFE[0] |
689 | | * instead of |
690 | | * SCE[0] CPE[0] SCE[1]. |
691 | | * If we seem to have encountered such a stream, transfer |
692 | | * the SCE[1] element to the LFE[0]'s mapping */ |
693 | 1.31M | if (ac->tags_mapped == ff_tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) { |
694 | 4.55k | if (!ac->warned_remapping_once && (type != TYPE_SCE || elem_id != 1)) { |
695 | 153 | av_log(ac->avctx, AV_LOG_WARNING, |
696 | 153 | "This stream seems to incorrectly report its last channel as %s[%d], mapping to SCE[1]\n", |
697 | 153 | type == TYPE_SCE ? "SCE" : "LFE", elem_id); |
698 | 153 | ac->warned_remapping_once++; |
699 | 153 | } |
700 | 4.55k | ac->tags_mapped++; |
701 | 4.55k | return ac->tag_che_map[type][elem_id] = ac->che[TYPE_SCE][1]; |
702 | 4.55k | } |
703 | 1.31M | if (ac->tags_mapped == 2 && |
704 | 1.31M | ac->oc[1].m4ac.chan_config == 4 && |
705 | 1.31M | type == TYPE_SCE) { |
706 | 0 | ac->tags_mapped++; |
707 | 0 | return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1]; |
708 | 0 | } |
709 | 1.34M | case 3: |
710 | 1.98M | case 2: |
711 | 1.98M | if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) && |
712 | 1.98M | type == TYPE_CPE) { |
713 | 590k | ac->tags_mapped++; |
714 | 590k | return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0]; |
715 | 1.39M | } else if (ac->tags_mapped == 1 && ac->oc[1].m4ac.chan_config == 2 && |
716 | 1.39M | type == TYPE_SCE) { |
717 | 67.4k | ac->tags_mapped++; |
718 | 67.4k | return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1]; |
719 | 67.4k | } |
720 | 1.76M | case 1: |
721 | 1.76M | if (!ac->tags_mapped && type == TYPE_SCE) { |
722 | 826k | ac->tags_mapped++; |
723 | 826k | return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0]; |
724 | 826k | } |
725 | 942k | default: |
726 | 942k | return NULL; |
727 | 2.69M | } |
728 | 2.69M | } |
729 | | |
730 | | /** |
731 | | * Decode an array of 4 bit element IDs, optionally interleaved with a |
732 | | * stereo/mono switching bit. |
733 | | * |
734 | | * @param type speaker type/position for these channels |
735 | | */ |
736 | | static void decode_channel_map(uint8_t layout_map[][3], |
737 | | enum ChannelPosition type, |
738 | | GetBitContext *gb, int n) |
739 | 282k | { |
740 | 1.33M | while (n--) { |
741 | 1.05M | enum RawDataBlockType syn_ele; |
742 | 1.05M | switch (type) { |
743 | 334k | case AAC_CHANNEL_FRONT: |
744 | 524k | case AAC_CHANNEL_BACK: |
745 | 785k | case AAC_CHANNEL_SIDE: |
746 | 785k | syn_ele = get_bits1(gb); |
747 | 785k | break; |
748 | 220k | case AAC_CHANNEL_CC: |
749 | 220k | skip_bits1(gb); |
750 | 220k | syn_ele = TYPE_CCE; |
751 | 220k | break; |
752 | 50.8k | case AAC_CHANNEL_LFE: |
753 | 50.8k | syn_ele = TYPE_LFE; |
754 | 50.8k | break; |
755 | 0 | default: |
756 | | // AAC_CHANNEL_OFF has no channel map |
757 | 0 | av_assert0(0); |
758 | 1.05M | } |
759 | 1.05M | layout_map[0][0] = syn_ele; |
760 | 1.05M | layout_map[0][1] = get_bits(gb, 4); |
761 | 1.05M | layout_map[0][2] = type; |
762 | 1.05M | layout_map++; |
763 | 1.05M | } |
764 | 282k | } |
765 | | |
766 | | static inline void relative_align_get_bits(GetBitContext *gb, |
767 | 56.4k | int reference_position) { |
768 | 56.4k | int n = (reference_position - get_bits_count(gb) & 7); |
769 | 56.4k | if (n) |
770 | 52.8k | skip_bits(gb, n); |
771 | 56.4k | } |
772 | | |
773 | | /** |
774 | | * Decode program configuration element; reference: table 4.2. |
775 | | * |
776 | | * @return Returns error status. 0 - OK, !0 - error |
777 | | */ |
778 | | static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac, |
779 | | uint8_t (*layout_map)[3], |
780 | | GetBitContext *gb, int byte_align_ref) |
781 | 123k | { |
782 | 123k | int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc; |
783 | 123k | int sampling_index; |
784 | 123k | int comment_len; |
785 | 123k | int tags; |
786 | | |
787 | 123k | skip_bits(gb, 2); // object_type |
788 | | |
789 | 123k | sampling_index = get_bits(gb, 4); |
790 | 123k | if (m4ac->sampling_index != sampling_index) |
791 | 111k | av_log(avctx, AV_LOG_WARNING, |
792 | 111k | "Sample rate index in program config element does not " |
793 | 111k | "match the sample rate index configured by the container.\n"); |
794 | | |
795 | 123k | num_front = get_bits(gb, 4); |
796 | 123k | num_side = get_bits(gb, 4); |
797 | 123k | num_back = get_bits(gb, 4); |
798 | 123k | num_lfe = get_bits(gb, 2); |
799 | 123k | num_assoc_data = get_bits(gb, 3); |
800 | 123k | num_cc = get_bits(gb, 4); |
801 | | |
802 | 123k | if (get_bits1(gb)) |
803 | 50.7k | skip_bits(gb, 4); // mono_mixdown_tag |
804 | 123k | if (get_bits1(gb)) |
805 | 49.2k | skip_bits(gb, 4); // stereo_mixdown_tag |
806 | | |
807 | 123k | if (get_bits1(gb)) |
808 | 33.4k | skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround |
809 | | |
810 | 123k | if (get_bits_left(gb) < 5 * (num_front + num_side + num_back + num_cc) + 4 *(num_lfe + num_assoc_data + num_cc)) { |
811 | 66.7k | av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err); |
812 | 66.7k | return -1; |
813 | 66.7k | } |
814 | 56.4k | decode_channel_map(layout_map , AAC_CHANNEL_FRONT, gb, num_front); |
815 | 56.4k | tags = num_front; |
816 | 56.4k | decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE, gb, num_side); |
817 | 56.4k | tags += num_side; |
818 | 56.4k | decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK, gb, num_back); |
819 | 56.4k | tags += num_back; |
820 | 56.4k | decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE, gb, num_lfe); |
821 | 56.4k | tags += num_lfe; |
822 | | |
823 | 56.4k | skip_bits_long(gb, 4 * num_assoc_data); |
824 | | |
825 | 56.4k | decode_channel_map(layout_map + tags, AAC_CHANNEL_CC, gb, num_cc); |
826 | 56.4k | tags += num_cc; |
827 | | |
828 | 56.4k | relative_align_get_bits(gb, byte_align_ref); |
829 | | |
830 | | /* comment field, first byte is length */ |
831 | 56.4k | comment_len = get_bits(gb, 8) * 8; |
832 | 56.4k | if (get_bits_left(gb) < comment_len) { |
833 | 17.9k | av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err); |
834 | 17.9k | return AVERROR_INVALIDDATA; |
835 | 17.9k | } |
836 | 38.5k | skip_bits_long(gb, comment_len); |
837 | 38.5k | return tags; |
838 | 56.4k | } |
839 | | |
840 | | /** |
841 | | * Decode GA "General Audio" specific configuration; reference: table 4.1. |
842 | | * |
843 | | * @param ac pointer to AACDecContext, may be null |
844 | | * @param avctx pointer to AVCCodecContext, used for logging |
845 | | * |
846 | | * @return Returns error status. 0 - OK, !0 - error |
847 | | */ |
848 | | static int decode_ga_specific_config(AACDecContext *ac, AVCodecContext *avctx, |
849 | | GetBitContext *gb, |
850 | | int get_bit_alignment, |
851 | | MPEG4AudioConfig *m4ac, |
852 | | int channel_config) |
853 | 99.8k | { |
854 | 99.8k | int extension_flag, ret, ep_config, res_flags; |
855 | 99.8k | uint8_t layout_map[MAX_ELEM_ID*4][3]; |
856 | 99.8k | int tags = 0; |
857 | | |
858 | 99.8k | m4ac->frame_length_short = get_bits1(gb); |
859 | 99.8k | if (m4ac->frame_length_short && m4ac->sbr == 1) { |
860 | 2.29k | avpriv_report_missing_feature(avctx, "SBR with 960 frame length"); |
861 | 2.29k | if (ac) ac->warned_960_sbr = 1; |
862 | 2.29k | m4ac->sbr = 0; |
863 | 2.29k | m4ac->ps = 0; |
864 | 2.29k | } |
865 | | |
866 | 99.8k | if (get_bits1(gb)) // dependsOnCoreCoder |
867 | 50.2k | skip_bits(gb, 14); // coreCoderDelay |
868 | 99.8k | extension_flag = get_bits1(gb); |
869 | | |
870 | 99.8k | if (m4ac->object_type == AOT_AAC_SCALABLE || |
871 | 99.8k | m4ac->object_type == AOT_ER_AAC_SCALABLE) |
872 | 0 | skip_bits(gb, 3); // layerNr |
873 | | |
874 | 99.8k | if (channel_config == 0) { |
875 | 22.7k | skip_bits(gb, 4); // element_instance_tag |
876 | 22.7k | tags = decode_pce(avctx, m4ac, layout_map, gb, get_bit_alignment); |
877 | 22.7k | if (tags < 0) |
878 | 4.89k | return tags; |
879 | 77.0k | } else { |
880 | 77.0k | if ((ret = ff_aac_set_default_channel_config(ac, avctx, layout_map, |
881 | 77.0k | &tags, channel_config))) |
882 | 410 | return ret; |
883 | 77.0k | } |
884 | | |
885 | 94.5k | if (count_channels(layout_map, tags) > 1) { |
886 | 84.0k | m4ac->ps = 0; |
887 | 84.0k | } else if (m4ac->sbr == 1 && m4ac->ps == -1) |
888 | 1.05k | m4ac->ps = 1; |
889 | | |
890 | 94.5k | if (ac && (ret = ff_aac_output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0))) |
891 | 1.31k | return ret; |
892 | | |
893 | 93.2k | if (extension_flag) { |
894 | 22.1k | switch (m4ac->object_type) { |
895 | 0 | case AOT_ER_BSAC: |
896 | 0 | skip_bits(gb, 5); // numOfSubFrame |
897 | 0 | skip_bits(gb, 11); // layer_length |
898 | 0 | break; |
899 | 764 | case AOT_ER_AAC_LC: |
900 | 764 | case AOT_ER_AAC_LTP: |
901 | 764 | case AOT_ER_AAC_SCALABLE: |
902 | 947 | case AOT_ER_AAC_LD: |
903 | 947 | res_flags = get_bits(gb, 3); |
904 | 947 | if (res_flags) { |
905 | 683 | avpriv_report_missing_feature(avctx, |
906 | 683 | "AAC data resilience (flags %x)", |
907 | 683 | res_flags); |
908 | 683 | return AVERROR_PATCHWELCOME; |
909 | 683 | } |
910 | 264 | break; |
911 | 22.1k | } |
912 | 21.4k | skip_bits1(gb); // extensionFlag3 (TBD in version 3) |
913 | 21.4k | } |
914 | 92.5k | switch (m4ac->object_type) { |
915 | 4.70k | case AOT_ER_AAC_LC: |
916 | 4.70k | case AOT_ER_AAC_LTP: |
917 | 4.70k | case AOT_ER_AAC_SCALABLE: |
918 | 5.93k | case AOT_ER_AAC_LD: |
919 | 5.93k | ep_config = get_bits(gb, 2); |
920 | 5.93k | if (ep_config) { |
921 | 475 | avpriv_report_missing_feature(avctx, |
922 | 475 | "epConfig %d", ep_config); |
923 | 475 | return AVERROR_PATCHWELCOME; |
924 | 475 | } |
925 | 92.5k | } |
926 | 92.0k | return 0; |
927 | 92.5k | } |
928 | | |
929 | | static int decode_eld_specific_config(AACDecContext *ac, AVCodecContext *avctx, |
930 | | GetBitContext *gb, |
931 | | MPEG4AudioConfig *m4ac, |
932 | | int channel_config) |
933 | 9.60k | { |
934 | 9.60k | int ret, ep_config, res_flags; |
935 | 9.60k | uint8_t layout_map[MAX_ELEM_ID*4][3]; |
936 | 9.60k | int tags = 0; |
937 | 9.60k | const int ELDEXT_TERM = 0; |
938 | | |
939 | 9.60k | m4ac->ps = 0; |
940 | 9.60k | m4ac->sbr = 0; |
941 | 9.60k | m4ac->frame_length_short = get_bits1(gb); |
942 | | |
943 | 9.60k | res_flags = get_bits(gb, 3); |
944 | 9.60k | if (res_flags) { |
945 | 1.33k | avpriv_report_missing_feature(avctx, |
946 | 1.33k | "AAC data resilience (flags %x)", |
947 | 1.33k | res_flags); |
948 | 1.33k | return AVERROR_PATCHWELCOME; |
949 | 1.33k | } |
950 | | |
951 | 8.27k | if (get_bits1(gb)) { // ldSbrPresentFlag |
952 | 88 | avpriv_report_missing_feature(avctx, |
953 | 88 | "Low Delay SBR"); |
954 | 88 | return AVERROR_PATCHWELCOME; |
955 | 88 | } |
956 | | |
957 | 510k | while (get_bits(gb, 4) != ELDEXT_TERM) { |
958 | 505k | int len = get_bits(gb, 4); |
959 | 505k | if (len == 15) |
960 | 1.58k | len += get_bits(gb, 8); |
961 | 505k | if (len == 15 + 255) |
962 | 860 | len += get_bits(gb, 16); |
963 | 505k | if (get_bits_left(gb) < len * 8 + 4) { |
964 | 2.97k | av_log(avctx, AV_LOG_ERROR, overread_err); |
965 | 2.97k | return AVERROR_INVALIDDATA; |
966 | 2.97k | } |
967 | 502k | skip_bits_long(gb, 8 * len); |
968 | 502k | } |
969 | | |
970 | 5.20k | if ((ret = ff_aac_set_default_channel_config(ac, avctx, layout_map, |
971 | 5.20k | &tags, channel_config))) |
972 | 1.26k | return ret; |
973 | | |
974 | 3.94k | if (ac && (ret = ff_aac_output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0))) |
975 | 0 | return ret; |
976 | | |
977 | 3.94k | ep_config = get_bits(gb, 2); |
978 | 3.94k | if (ep_config) { |
979 | 92 | avpriv_report_missing_feature(avctx, |
980 | 92 | "epConfig %d", ep_config); |
981 | 92 | return AVERROR_PATCHWELCOME; |
982 | 92 | } |
983 | 3.85k | return 0; |
984 | 3.94k | } |
985 | | |
986 | | /** |
987 | | * Decode audio specific configuration; reference: table 1.13. |
988 | | * |
989 | | * @param ac pointer to AACDecContext, may be null |
990 | | * @param avctx pointer to AVCCodecContext, used for logging |
991 | | * @param m4ac pointer to MPEG4AudioConfig, used for parsing |
992 | | * @param gb buffer holding an audio specific config |
993 | | * @param get_bit_alignment relative alignment for byte align operations |
994 | | * @param sync_extension look for an appended sync extension |
995 | | * |
996 | | * @return Returns error status or number of consumed bits. <0 - error |
997 | | */ |
998 | | static int decode_audio_specific_config_gb(AACDecContext *ac, |
999 | | AVCodecContext *avctx, |
1000 | | OutputConfiguration *oc, |
1001 | | GetBitContext *gb, |
1002 | | int get_bit_alignment, |
1003 | | int sync_extension) |
1004 | 125k | { |
1005 | 125k | int i, ret; |
1006 | 125k | GetBitContext gbc = *gb; |
1007 | 125k | MPEG4AudioConfig *m4ac = &oc->m4ac; |
1008 | 125k | MPEG4AudioConfig m4ac_bak = *m4ac; |
1009 | | |
1010 | 125k | if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) { |
1011 | 6.87k | *m4ac = m4ac_bak; |
1012 | 6.87k | return AVERROR_INVALIDDATA; |
1013 | 6.87k | } |
1014 | | |
1015 | 118k | if (m4ac->sampling_index > 12) { |
1016 | 894 | av_log(avctx, AV_LOG_ERROR, |
1017 | 894 | "invalid sampling rate index %d\n", |
1018 | 894 | m4ac->sampling_index); |
1019 | 894 | *m4ac = m4ac_bak; |
1020 | 894 | return AVERROR_INVALIDDATA; |
1021 | 894 | } |
1022 | 117k | if (m4ac->object_type == AOT_ER_AAC_LD && |
1023 | 117k | (m4ac->sampling_index < 3 || m4ac->sampling_index > 7)) { |
1024 | 273 | av_log(avctx, AV_LOG_ERROR, |
1025 | 273 | "invalid low delay sampling rate index %d\n", |
1026 | 273 | m4ac->sampling_index); |
1027 | 273 | *m4ac = m4ac_bak; |
1028 | 273 | return AVERROR_INVALIDDATA; |
1029 | 273 | } |
1030 | | |
1031 | 117k | skip_bits_long(gb, i); |
1032 | | |
1033 | 117k | switch (m4ac->object_type) { |
1034 | 12.9k | case AOT_AAC_MAIN: |
1035 | 33.0k | case AOT_AAC_LC: |
1036 | 43.9k | case AOT_AAC_SSR: |
1037 | 92.4k | case AOT_AAC_LTP: |
1038 | 98.5k | case AOT_ER_AAC_LC: |
1039 | 99.8k | case AOT_ER_AAC_LD: |
1040 | 99.8k | if ((ret = decode_ga_specific_config(ac, avctx, gb, get_bit_alignment, |
1041 | 99.8k | &oc->m4ac, m4ac->chan_config)) < 0) |
1042 | 7.77k | return ret; |
1043 | 92.0k | break; |
1044 | 92.0k | case AOT_ER_AAC_ELD: |
1045 | 9.60k | if ((ret = decode_eld_specific_config(ac, avctx, gb, |
1046 | 9.60k | &oc->m4ac, m4ac->chan_config)) < 0) |
1047 | 5.75k | return ret; |
1048 | 3.85k | break; |
1049 | 3.85k | #if CONFIG_AAC_DECODER |
1050 | 3.85k | case AOT_USAC: |
1051 | 1.48k | if ((ret = ff_aac_usac_config_decode(ac, avctx, gb, |
1052 | 1.48k | oc, m4ac->chan_config)) < 0) |
1053 | 265 | return ret; |
1054 | 1.22k | break; |
1055 | 1.22k | #endif |
1056 | 6.09k | default: |
1057 | 6.09k | avpriv_report_missing_feature(avctx, |
1058 | 6.09k | "Audio object type %s%d", |
1059 | 6.09k | m4ac->sbr == 1 ? "SBR+" : "", |
1060 | 6.09k | m4ac->object_type); |
1061 | 6.09k | return AVERROR(ENOSYS); |
1062 | 117k | } |
1063 | | |
1064 | 97.1k | ff_dlog(avctx, |
1065 | 97.1k | "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n", |
1066 | 97.1k | m4ac->object_type, m4ac->chan_config, m4ac->sampling_index, |
1067 | 97.1k | m4ac->sample_rate, m4ac->sbr, |
1068 | 97.1k | m4ac->ps); |
1069 | | |
1070 | 97.1k | return get_bits_count(gb); |
1071 | 117k | } |
1072 | | |
1073 | | static int decode_audio_specific_config(AACDecContext *ac, |
1074 | | AVCodecContext *avctx, |
1075 | | OutputConfiguration *oc, |
1076 | | const uint8_t *data, int64_t bit_size, |
1077 | | int sync_extension) |
1078 | 39.6k | { |
1079 | 39.6k | int i, ret; |
1080 | 39.6k | GetBitContext gb; |
1081 | | |
1082 | 39.6k | if (bit_size < 0 || bit_size > INT_MAX) { |
1083 | 0 | av_log(avctx, AV_LOG_ERROR, "Audio specific config size is invalid\n"); |
1084 | 0 | return AVERROR_INVALIDDATA; |
1085 | 0 | } |
1086 | | |
1087 | 39.6k | ff_dlog(avctx, "audio specific config size %d\n", (int)bit_size >> 3); |
1088 | 15.3M | for (i = 0; i < bit_size >> 3; i++) |
1089 | 15.3M | ff_dlog(avctx, "%02x ", data[i]); |
1090 | 39.6k | ff_dlog(avctx, "\n"); |
1091 | | |
1092 | 39.6k | if ((ret = init_get_bits(&gb, data, bit_size)) < 0) |
1093 | 0 | return ret; |
1094 | | |
1095 | 39.6k | return decode_audio_specific_config_gb(ac, avctx, oc, &gb, 0, |
1096 | 39.6k | sync_extension); |
1097 | 39.6k | } |
1098 | | |
1099 | | static av_cold int decode_close(AVCodecContext *avctx) |
1100 | 21.7k | { |
1101 | 21.7k | AACDecContext *ac = avctx->priv_data; |
1102 | | |
1103 | 65.3k | for (int i = 0; i < 2; i++) { |
1104 | 43.5k | OutputConfiguration *oc = &ac->oc[i]; |
1105 | 43.5k | AACUSACConfig *usac = &oc->usac; |
1106 | 52.1k | for (int j = 0; j < usac->nb_elems; j++) { |
1107 | 8.57k | AACUsacElemConfig *ec = &usac->elems[i]; |
1108 | 8.57k | av_freep(&ec->ext.pl_data); |
1109 | 8.57k | } |
1110 | 43.5k | } |
1111 | | |
1112 | 108k | for (int type = 0; type < FF_ARRAY_ELEMS(ac->che); type++) { |
1113 | 5.66M | for (int i = 0; i < MAX_ELEM_ID; i++) { |
1114 | 5.57M | if (ac->che[type][i]) { |
1115 | 104k | ac->proc.sbr_ctx_close(ac->che[type][i]); |
1116 | 104k | av_freep(&ac->che[type][i]); |
1117 | 104k | } |
1118 | 5.57M | } |
1119 | 87.0k | } |
1120 | | |
1121 | 21.7k | av_tx_uninit(&ac->mdct96); |
1122 | 21.7k | av_tx_uninit(&ac->mdct120); |
1123 | 21.7k | av_tx_uninit(&ac->mdct128); |
1124 | 21.7k | av_tx_uninit(&ac->mdct480); |
1125 | 21.7k | av_tx_uninit(&ac->mdct512); |
1126 | 21.7k | av_tx_uninit(&ac->mdct768); |
1127 | 21.7k | av_tx_uninit(&ac->mdct960); |
1128 | 21.7k | av_tx_uninit(&ac->mdct1024); |
1129 | 21.7k | av_tx_uninit(&ac->mdct_ltp); |
1130 | | |
1131 | | // Compiler will optimize this branch away. |
1132 | 21.7k | if (ac->is_fixed) |
1133 | 9.92k | av_freep(&ac->RENAME_FIXED(fdsp)); |
1134 | 11.8k | else |
1135 | 11.8k | av_freep(&ac->fdsp); |
1136 | | |
1137 | 21.7k | return 0; |
1138 | 21.7k | } |
1139 | | |
1140 | | static av_cold int init_dsp(AVCodecContext *avctx) |
1141 | 21.4k | { |
1142 | 21.4k | AACDecContext *ac = avctx->priv_data; |
1143 | 21.4k | int is_fixed = ac->is_fixed, ret; |
1144 | 21.4k | float scale_fixed, scale_float; |
1145 | 21.4k | const float *const scalep = is_fixed ? &scale_fixed : &scale_float; |
1146 | 21.4k | enum AVTXType tx_type = is_fixed ? AV_TX_INT32_MDCT : AV_TX_FLOAT_MDCT; |
1147 | | |
1148 | 21.4k | #define MDCT_INIT(s, fn, len, sval) \ |
1149 | 171k | scale_fixed = (sval) * 128.0f; \ |
1150 | 171k | scale_float = (sval) / 32768.0f; \ |
1151 | 171k | ret = av_tx_init(&s, &fn, tx_type, 1, len, scalep, 0); \ |
1152 | 171k | if (ret < 0) \ |
1153 | 171k | return ret |
1154 | | |
1155 | 21.4k | MDCT_INIT(ac->mdct96, ac->mdct96_fn, 96, 1.0/96); |
1156 | 21.4k | MDCT_INIT(ac->mdct120, ac->mdct120_fn, 120, 1.0/120); |
1157 | 21.4k | MDCT_INIT(ac->mdct128, ac->mdct128_fn, 128, 1.0/128); |
1158 | 21.4k | MDCT_INIT(ac->mdct480, ac->mdct480_fn, 480, 1.0/480); |
1159 | 21.4k | MDCT_INIT(ac->mdct512, ac->mdct512_fn, 512, 1.0/512); |
1160 | 21.4k | MDCT_INIT(ac->mdct768, ac->mdct768_fn, 768, 1.0/768); |
1161 | 21.4k | MDCT_INIT(ac->mdct960, ac->mdct960_fn, 960, 1.0/960); |
1162 | 21.4k | MDCT_INIT(ac->mdct1024, ac->mdct1024_fn, 1024, 1.0/1024); |
1163 | 21.4k | #undef MDCT_INIT |
1164 | | |
1165 | | /* LTP forward MDCT */ |
1166 | 21.4k | scale_fixed = -1.0; |
1167 | 21.4k | scale_float = -32786.0*2 + 36; |
1168 | 21.4k | ret = av_tx_init(&ac->mdct_ltp, &ac->mdct_ltp_fn, tx_type, 0, 1024, scalep, 0); |
1169 | 21.4k | if (ret < 0) |
1170 | 0 | return ret; |
1171 | | |
1172 | 21.4k | return 0; |
1173 | 21.4k | } |
1174 | | |
1175 | | av_cold int ff_aac_decode_init(AVCodecContext *avctx) |
1176 | 21.7k | { |
1177 | 21.7k | AACDecContext *ac = avctx->priv_data; |
1178 | 21.7k | int ret; |
1179 | | |
1180 | 21.7k | if (avctx->sample_rate > 96000) |
1181 | 130 | return AVERROR_INVALIDDATA; |
1182 | | |
1183 | 21.6k | ff_aacdec_common_init_once(); |
1184 | | |
1185 | 21.6k | ac->avctx = avctx; |
1186 | 21.6k | ac->oc[1].m4ac.sample_rate = avctx->sample_rate; |
1187 | | |
1188 | 21.6k | if (avctx->extradata_size > 0) { |
1189 | 1.74k | if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1], |
1190 | 1.74k | avctx->extradata, |
1191 | 1.74k | avctx->extradata_size * 8LL, |
1192 | 1.74k | 1)) < 0) |
1193 | 211 | return ret; |
1194 | 19.8k | } else { |
1195 | 19.8k | int sr, i; |
1196 | 19.8k | uint8_t layout_map[MAX_ELEM_ID*4][3]; |
1197 | 19.8k | int layout_map_tags; |
1198 | | |
1199 | 19.8k | sr = ff_aac_sample_rate_idx(avctx->sample_rate); |
1200 | 19.8k | ac->oc[1].m4ac.sampling_index = sr; |
1201 | 19.8k | ac->oc[1].m4ac.channels = avctx->ch_layout.nb_channels; |
1202 | 19.8k | ac->oc[1].m4ac.sbr = -1; |
1203 | 19.8k | ac->oc[1].m4ac.ps = -1; |
1204 | | |
1205 | 24.9k | for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++) |
1206 | 24.6k | if (ff_mpeg4audio_channels[i] == avctx->ch_layout.nb_channels) |
1207 | 19.6k | break; |
1208 | 19.8k | if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) { |
1209 | 259 | i = 0; |
1210 | 259 | } |
1211 | 19.8k | ac->oc[1].m4ac.chan_config = i; |
1212 | | |
1213 | 19.8k | if (ac->oc[1].m4ac.chan_config) { |
1214 | 254 | int ret = ff_aac_set_default_channel_config(ac, avctx, layout_map, |
1215 | 254 | &layout_map_tags, |
1216 | 254 | ac->oc[1].m4ac.chan_config); |
1217 | 254 | if (!ret) |
1218 | 254 | ff_aac_output_configure(ac, layout_map, layout_map_tags, |
1219 | 254 | OC_GLOBAL_HDR, 0); |
1220 | 0 | else if (avctx->err_recognition & AV_EF_EXPLODE) |
1221 | 0 | return AVERROR_INVALIDDATA; |
1222 | 254 | } |
1223 | 19.8k | } |
1224 | | |
1225 | 21.4k | if (avctx->ch_layout.nb_channels > MAX_CHANNELS) { |
1226 | 25 | av_log(avctx, AV_LOG_ERROR, "Too many channels\n"); |
1227 | 25 | return AVERROR_INVALIDDATA; |
1228 | 25 | } |
1229 | | |
1230 | 21.4k | ac->random_state = 0x1f2e3d4c; |
1231 | | |
1232 | 21.4k | return init_dsp(avctx); |
1233 | 21.4k | } |
1234 | | |
1235 | | /** |
1236 | | * Skip data_stream_element; reference: table 4.10. |
1237 | | */ |
1238 | | static int skip_data_stream_element(AACDecContext *ac, GetBitContext *gb) |
1239 | 127k | { |
1240 | 127k | int byte_align = get_bits1(gb); |
1241 | 127k | int count = get_bits(gb, 8); |
1242 | 127k | if (count == 255) |
1243 | 2.48k | count += get_bits(gb, 8); |
1244 | 127k | if (byte_align) |
1245 | 62.5k | align_get_bits(gb); |
1246 | | |
1247 | 127k | if (get_bits_left(gb) < 8 * count) { |
1248 | 32.0k | av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err); |
1249 | 32.0k | return AVERROR_INVALIDDATA; |
1250 | 32.0k | } |
1251 | 95.6k | skip_bits_long(gb, 8 * count); |
1252 | 95.6k | return 0; |
1253 | 127k | } |
1254 | | |
1255 | | static int decode_prediction(AACDecContext *ac, IndividualChannelStream *ics, |
1256 | | GetBitContext *gb) |
1257 | 118k | { |
1258 | 118k | int sfb; |
1259 | 118k | if (get_bits1(gb)) { |
1260 | 46.3k | ics->predictor_reset_group = get_bits(gb, 5); |
1261 | 46.3k | if (ics->predictor_reset_group == 0 || |
1262 | 46.3k | ics->predictor_reset_group > 30) { |
1263 | 7.66k | av_log(ac->avctx, AV_LOG_ERROR, |
1264 | 7.66k | "Invalid Predictor Reset Group.\n"); |
1265 | 7.66k | return AVERROR_INVALIDDATA; |
1266 | 7.66k | } |
1267 | 46.3k | } |
1268 | 2.97M | for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) { |
1269 | 2.86M | ics->prediction_used[sfb] = get_bits1(gb); |
1270 | 2.86M | } |
1271 | 111k | return 0; |
1272 | 118k | } |
1273 | | |
1274 | | /** |
1275 | | * Decode Long Term Prediction data; reference: table 4.xx. |
1276 | | */ |
1277 | | static void decode_ltp(AACDecContext *ac, LongTermPrediction *ltp, |
1278 | | GetBitContext *gb, uint8_t max_sfb) |
1279 | 129k | { |
1280 | 129k | int sfb; |
1281 | | |
1282 | 129k | ltp->lag = get_bits(gb, 11); |
1283 | 129k | if (CONFIG_AAC_FIXED_DECODER && ac->is_fixed) |
1284 | 105k | ltp->coef_fixed = Q30(ff_ltp_coef[get_bits(gb, 3)]); |
1285 | 23.9k | else if (CONFIG_AAC_DECODER) |
1286 | 23.9k | ltp->coef = ff_ltp_coef[get_bits(gb, 3)]; |
1287 | | |
1288 | 3.14M | for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++) |
1289 | 3.01M | ltp->used[sfb] = get_bits1(gb); |
1290 | 129k | } |
1291 | | |
1292 | | /** |
1293 | | * Decode Individual Channel Stream info; reference: table 4.6. |
1294 | | */ |
1295 | | static int decode_ics_info(AACDecContext *ac, IndividualChannelStream *ics, |
1296 | | GetBitContext *gb) |
1297 | 1.39M | { |
1298 | 1.39M | const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac; |
1299 | 1.39M | const int aot = m4ac->object_type; |
1300 | 1.39M | const int sampling_index = m4ac->sampling_index; |
1301 | 1.39M | int ret_fail = AVERROR_INVALIDDATA; |
1302 | | |
1303 | 1.39M | if (aot != AOT_ER_AAC_ELD) { |
1304 | 1.37M | if (get_bits1(gb)) { |
1305 | 299k | av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n"); |
1306 | 299k | if (ac->avctx->err_recognition & AV_EF_BITSTREAM) |
1307 | 0 | return AVERROR_INVALIDDATA; |
1308 | 299k | } |
1309 | 1.37M | ics->window_sequence[1] = ics->window_sequence[0]; |
1310 | 1.37M | ics->window_sequence[0] = get_bits(gb, 2); |
1311 | 1.37M | if (aot == AOT_ER_AAC_LD && |
1312 | 1.37M | ics->window_sequence[0] != ONLY_LONG_SEQUENCE) { |
1313 | 4.10k | av_log(ac->avctx, AV_LOG_ERROR, |
1314 | 4.10k | "AAC LD is only defined for ONLY_LONG_SEQUENCE but " |
1315 | 4.10k | "window sequence %d found.\n", ics->window_sequence[0]); |
1316 | 4.10k | ics->window_sequence[0] = ONLY_LONG_SEQUENCE; |
1317 | 4.10k | return AVERROR_INVALIDDATA; |
1318 | 4.10k | } |
1319 | 1.37M | ics->use_kb_window[1] = ics->use_kb_window[0]; |
1320 | 1.37M | ics->use_kb_window[0] = get_bits1(gb); |
1321 | 1.37M | } |
1322 | 1.39M | ics->prev_num_window_groups = FFMAX(ics->num_window_groups, 1); |
1323 | 1.39M | ics->num_window_groups = 1; |
1324 | 1.39M | ics->group_len[0] = 1; |
1325 | 1.39M | if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { |
1326 | 98.5k | int i; |
1327 | 98.5k | ics->max_sfb = get_bits(gb, 4); |
1328 | 788k | for (i = 0; i < 7; i++) { |
1329 | 689k | if (get_bits1(gb)) { |
1330 | 461k | ics->group_len[ics->num_window_groups - 1]++; |
1331 | 461k | } else { |
1332 | 228k | ics->num_window_groups++; |
1333 | 228k | ics->group_len[ics->num_window_groups - 1] = 1; |
1334 | 228k | } |
1335 | 689k | } |
1336 | 98.5k | ics->num_windows = 8; |
1337 | 98.5k | if (m4ac->frame_length_short) { |
1338 | 11.7k | ics->swb_offset = ff_swb_offset_120[sampling_index]; |
1339 | 11.7k | ics->num_swb = ff_aac_num_swb_120[sampling_index]; |
1340 | 86.7k | } else { |
1341 | 86.7k | ics->swb_offset = ff_swb_offset_128[sampling_index]; |
1342 | 86.7k | ics->num_swb = ff_aac_num_swb_128[sampling_index]; |
1343 | 86.7k | } |
1344 | 98.5k | ics->tns_max_bands = ff_tns_max_bands_128[sampling_index]; |
1345 | 98.5k | ics->predictor_present = 0; |
1346 | 1.29M | } else { |
1347 | 1.29M | ics->max_sfb = get_bits(gb, 6); |
1348 | 1.29M | ics->num_windows = 1; |
1349 | 1.29M | if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) { |
1350 | 70.7k | if (m4ac->frame_length_short) { |
1351 | 10.7k | ics->swb_offset = ff_swb_offset_480[sampling_index]; |
1352 | 10.7k | ics->num_swb = ff_aac_num_swb_480[sampling_index]; |
1353 | 10.7k | ics->tns_max_bands = ff_tns_max_bands_480[sampling_index]; |
1354 | 60.0k | } else { |
1355 | 60.0k | ics->swb_offset = ff_swb_offset_512[sampling_index]; |
1356 | 60.0k | ics->num_swb = ff_aac_num_swb_512[sampling_index]; |
1357 | 60.0k | ics->tns_max_bands = ff_tns_max_bands_512[sampling_index]; |
1358 | 60.0k | } |
1359 | 70.7k | if (!ics->num_swb || !ics->swb_offset) { |
1360 | 598 | ret_fail = AVERROR_BUG; |
1361 | 598 | goto fail; |
1362 | 598 | } |
1363 | 1.22M | } else { |
1364 | 1.22M | if (m4ac->frame_length_short) { |
1365 | 56.2k | ics->num_swb = ff_aac_num_swb_960[sampling_index]; |
1366 | 56.2k | ics->swb_offset = ff_swb_offset_960[sampling_index]; |
1367 | 1.16M | } else { |
1368 | 1.16M | ics->num_swb = ff_aac_num_swb_1024[sampling_index]; |
1369 | 1.16M | ics->swb_offset = ff_swb_offset_1024[sampling_index]; |
1370 | 1.16M | } |
1371 | 1.22M | ics->tns_max_bands = ff_tns_max_bands_1024[sampling_index]; |
1372 | 1.22M | } |
1373 | 1.29M | if (aot != AOT_ER_AAC_ELD) { |
1374 | 1.27M | ics->predictor_present = get_bits1(gb); |
1375 | 1.27M | ics->predictor_reset_group = 0; |
1376 | 1.27M | } |
1377 | 1.29M | if (ics->predictor_present) { |
1378 | 334k | if (aot == AOT_AAC_MAIN) { |
1379 | 118k | if (decode_prediction(ac, ics, gb)) { |
1380 | 7.66k | goto fail; |
1381 | 7.66k | } |
1382 | 215k | } else if (aot == AOT_AAC_LC || |
1383 | 215k | aot == AOT_ER_AAC_LC) { |
1384 | 21.7k | av_log(ac->avctx, AV_LOG_ERROR, |
1385 | 21.7k | "Prediction is not allowed in AAC-LC.\n"); |
1386 | 21.7k | goto fail; |
1387 | 193k | } else { |
1388 | 193k | if (aot == AOT_ER_AAC_LD) { |
1389 | 1.75k | av_log(ac->avctx, AV_LOG_ERROR, |
1390 | 1.75k | "LTP in ER AAC LD not yet implemented.\n"); |
1391 | 1.75k | ret_fail = AVERROR_PATCHWELCOME; |
1392 | 1.75k | goto fail; |
1393 | 1.75k | } |
1394 | 192k | if ((ics->ltp.present = get_bits(gb, 1))) |
1395 | 120k | decode_ltp(ac, &ics->ltp, gb, ics->max_sfb); |
1396 | 192k | } |
1397 | 334k | } |
1398 | 1.29M | } |
1399 | | |
1400 | 1.36M | if (ics->max_sfb > ics->num_swb) { |
1401 | 104k | av_log(ac->avctx, AV_LOG_ERROR, |
1402 | 104k | "Number of scalefactor bands in group (%d) " |
1403 | 104k | "exceeds limit (%d).\n", |
1404 | 104k | ics->max_sfb, ics->num_swb); |
1405 | 104k | goto fail; |
1406 | 104k | } |
1407 | | |
1408 | 1.25M | return 0; |
1409 | 136k | fail: |
1410 | 136k | ics->max_sfb = 0; |
1411 | 136k | return ret_fail; |
1412 | 1.36M | } |
1413 | | |
1414 | | /** |
1415 | | * Decode band types (section_data payload); reference: table 4.46. |
1416 | | * |
1417 | | * @param band_type array of the used band type |
1418 | | * @param band_type_run_end array of the last scalefactor band of a band type run |
1419 | | * |
1420 | | * @return Returns error status. 0 - OK, !0 - error |
1421 | | */ |
1422 | | static int decode_band_types(AACDecContext *ac, SingleChannelElement *sce, |
1423 | | GetBitContext *gb) |
1424 | 1.44M | { |
1425 | 1.44M | IndividualChannelStream *ics = &sce->ics; |
1426 | 1.44M | const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5; |
1427 | | |
1428 | 2.92M | for (int g = 0; g < ics->num_window_groups; g++) { |
1429 | 1.68M | int k = 0; |
1430 | 3.17M | while (k < ics->max_sfb) { |
1431 | 1.69M | uint8_t sect_end = k; |
1432 | 1.69M | int sect_len_incr; |
1433 | 1.69M | int sect_band_type = get_bits(gb, 4); |
1434 | 1.69M | if (sect_band_type == 12) { |
1435 | 24.4k | av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n"); |
1436 | 24.4k | return AVERROR_INVALIDDATA; |
1437 | 24.4k | } |
1438 | 1.67M | do { |
1439 | 1.67M | sect_len_incr = get_bits(gb, bits); |
1440 | 1.67M | sect_end += sect_len_incr; |
1441 | 1.67M | if (get_bits_left(gb) < 0) { |
1442 | 30.8k | av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err); |
1443 | 30.8k | return AVERROR_INVALIDDATA; |
1444 | 30.8k | } |
1445 | 1.64M | if (sect_end > ics->max_sfb) { |
1446 | 142k | av_log(ac->avctx, AV_LOG_ERROR, |
1447 | 142k | "Number of bands (%d) exceeds limit (%d).\n", |
1448 | 142k | sect_end, ics->max_sfb); |
1449 | 142k | return AVERROR_INVALIDDATA; |
1450 | 142k | } |
1451 | 1.64M | } while (sect_len_incr == (1 << bits) - 1); |
1452 | 11.8M | for (; k < sect_end; k++) |
1453 | 10.3M | sce->band_type[g*ics->max_sfb + k] = sect_band_type; |
1454 | 1.49M | } |
1455 | 1.68M | } |
1456 | 1.24M | return 0; |
1457 | 1.44M | } |
1458 | | |
1459 | | /** |
1460 | | * Decode scalefactors; reference: table 4.47. |
1461 | | * |
1462 | | * @param global_gain first scalefactor value as scalefactors are differentially coded |
1463 | | * @param band_type array of the used band type |
1464 | | * @param band_type_run_end array of the last scalefactor band of a band type run |
1465 | | * @param sf array of scalefactors or intensity stereo positions |
1466 | | * |
1467 | | * @return Returns error status. 0 - OK, !0 - error |
1468 | | */ |
1469 | | static int decode_scalefactors(AACDecContext *ac, SingleChannelElement *sce, |
1470 | | GetBitContext *gb, unsigned int global_gain) |
1471 | 1.24M | { |
1472 | 1.24M | IndividualChannelStream *ics = &sce->ics; |
1473 | 1.24M | int offset[3] = { global_gain, global_gain - NOISE_OFFSET, 0 }; |
1474 | 1.24M | int clipped_offset; |
1475 | 1.24M | int noise_flag = 1; |
1476 | | |
1477 | 2.72M | for (int g = 0; g < ics->num_window_groups; g++) { |
1478 | 9.49M | for (int sfb = 0; sfb < ics->max_sfb; sfb++) { |
1479 | 8.00M | switch (sce->band_type[g*ics->max_sfb + sfb]) { |
1480 | 864k | case ZERO_BT: |
1481 | 864k | sce->sfo[g*ics->max_sfb + sfb] = 0; |
1482 | 864k | break; |
1483 | 40.9k | case INTENSITY_BT: /* fallthrough */ |
1484 | 885k | case INTENSITY_BT2: |
1485 | 885k | offset[2] += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - SCALE_DIFF_ZERO; |
1486 | 885k | clipped_offset = av_clip(offset[2], -155, 100); |
1487 | 885k | if (offset[2] != clipped_offset) { |
1488 | 10.9k | avpriv_request_sample(ac->avctx, |
1489 | 10.9k | "If you heard an audible artifact, there may be a bug in the decoder. " |
1490 | 10.9k | "Clipped intensity stereo position (%d -> %d)", |
1491 | 10.9k | offset[2], clipped_offset); |
1492 | 10.9k | } |
1493 | 885k | sce->sfo[g*ics->max_sfb + sfb] = clipped_offset - 100; |
1494 | 885k | break; |
1495 | 4.68M | case NOISE_BT: |
1496 | 4.68M | if (noise_flag-- > 0) |
1497 | 271k | offset[1] += get_bits(gb, NOISE_PRE_BITS) - NOISE_PRE; |
1498 | 4.41M | else |
1499 | 4.41M | offset[1] += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - SCALE_DIFF_ZERO; |
1500 | 4.68M | clipped_offset = av_clip(offset[1], -100, 155); |
1501 | 4.68M | if (offset[1] != clipped_offset) { |
1502 | 564k | avpriv_request_sample(ac->avctx, |
1503 | 564k | "If you heard an audible artifact, there may be a bug in the decoder. " |
1504 | 564k | "Clipped noise gain (%d -> %d)", |
1505 | 564k | offset[1], clipped_offset); |
1506 | 564k | } |
1507 | 4.68M | sce->sfo[g*ics->max_sfb + sfb] = clipped_offset; |
1508 | 4.68M | break; |
1509 | 1.57M | default: |
1510 | 1.57M | offset[0] += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - SCALE_DIFF_ZERO; |
1511 | 1.57M | if (offset[0] > 255U) { |
1512 | 734 | av_log(ac->avctx, AV_LOG_ERROR, |
1513 | 734 | "Scalefactor (%d) out of range.\n", offset[0]); |
1514 | 734 | return AVERROR_INVALIDDATA; |
1515 | 734 | } |
1516 | 1.57M | sce->sfo[g*ics->max_sfb + sfb] = offset[0] - 100; |
1517 | 1.57M | break; |
1518 | 8.00M | } |
1519 | 8.00M | } |
1520 | 1.48M | } |
1521 | | |
1522 | 1.24M | return 0; |
1523 | 1.24M | } |
1524 | | |
1525 | | /** |
1526 | | * Decode pulse data; reference: table 4.7. |
1527 | | */ |
1528 | | static int decode_pulses(Pulse *pulse, GetBitContext *gb, |
1529 | | const uint16_t *swb_offset, int num_swb) |
1530 | 168k | { |
1531 | 168k | int i, pulse_swb; |
1532 | 168k | pulse->num_pulse = get_bits(gb, 2) + 1; |
1533 | 168k | pulse_swb = get_bits(gb, 6); |
1534 | 168k | if (pulse_swb >= num_swb) |
1535 | 7.79k | return -1; |
1536 | 160k | pulse->pos[0] = swb_offset[pulse_swb]; |
1537 | 160k | pulse->pos[0] += get_bits(gb, 5); |
1538 | 160k | if (pulse->pos[0] >= swb_offset[num_swb]) |
1539 | 272 | return -1; |
1540 | 160k | pulse->amp[0] = get_bits(gb, 4); |
1541 | 396k | for (i = 1; i < pulse->num_pulse; i++) { |
1542 | 236k | pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1]; |
1543 | 236k | if (pulse->pos[i] >= swb_offset[num_swb]) |
1544 | 173 | return -1; |
1545 | 236k | pulse->amp[i] = get_bits(gb, 4); |
1546 | 236k | } |
1547 | 159k | return 0; |
1548 | 160k | } |
1549 | | |
1550 | | /** |
1551 | | * Decode Temporal Noise Shaping data; reference: table 4.48. |
1552 | | * |
1553 | | * @return Returns error status. 0 - OK, !0 - error |
1554 | | */ |
1555 | | int ff_aac_decode_tns(AACDecContext *ac, TemporalNoiseShaping *tns, |
1556 | | GetBitContext *gb, const IndividualChannelStream *ics) |
1557 | 98.6k | { |
1558 | 98.6k | int tns_max_order = INT32_MAX; |
1559 | 98.6k | const int is_usac = ac->oc[1].m4ac.object_type == AOT_USAC; |
1560 | 98.6k | int w, filt, i, coef_len, coef_res, coef_compress; |
1561 | 98.6k | const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE; |
1562 | | |
1563 | | /* USAC doesn't seem to have a limit */ |
1564 | 98.6k | if (!is_usac) |
1565 | 64.4k | tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12; |
1566 | | |
1567 | 307k | for (w = 0; w < ics->num_windows; w++) { |
1568 | 226k | if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) { |
1569 | 74.5k | coef_res = get_bits1(gb); |
1570 | | |
1571 | 181k | for (filt = 0; filt < tns->n_filt[w]; filt++) { |
1572 | 123k | int tmp2_idx; |
1573 | 123k | tns->length[w][filt] = get_bits(gb, 6 - 2 * is8); |
1574 | | |
1575 | 123k | if (is_usac) |
1576 | 20.0k | tns->order[w][filt] = get_bits(gb, 4 - is8); |
1577 | 103k | else |
1578 | 103k | tns->order[w][filt] = get_bits(gb, 5 - (2 * is8)); |
1579 | | |
1580 | 123k | if (tns->order[w][filt] > tns_max_order) { |
1581 | 17.1k | av_log(ac->avctx, AV_LOG_ERROR, |
1582 | 17.1k | "TNS filter order %d is greater than maximum %d.\n", |
1583 | 17.1k | tns->order[w][filt], tns_max_order); |
1584 | 17.1k | tns->order[w][filt] = 0; |
1585 | 17.1k | return AVERROR_INVALIDDATA; |
1586 | 17.1k | } |
1587 | 106k | if (tns->order[w][filt]) { |
1588 | 64.0k | tns->direction[w][filt] = get_bits1(gb); |
1589 | 64.0k | coef_compress = get_bits1(gb); |
1590 | 64.0k | coef_len = coef_res + 3 - coef_compress; |
1591 | 64.0k | tmp2_idx = 2 * coef_compress + coef_res; |
1592 | | |
1593 | 451k | for (i = 0; i < tns->order[w][filt]; i++) { |
1594 | 387k | if (CONFIG_AAC_FIXED_DECODER && ac->is_fixed) |
1595 | 285k | tns->coef_fixed[w][filt][i] = Q31(ff_tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)]); |
1596 | 101k | else if (CONFIG_AAC_DECODER) |
1597 | 101k | tns->coef[w][filt][i] = ff_tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)]; |
1598 | 387k | } |
1599 | 64.0k | } |
1600 | 106k | } |
1601 | 74.5k | } |
1602 | 226k | } |
1603 | 81.5k | return 0; |
1604 | 98.6k | } |
1605 | | |
1606 | | /** |
1607 | | * Decode Mid/Side data; reference: table 4.54. |
1608 | | * |
1609 | | * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s; |
1610 | | * [1] mask is decoded from bitstream; [2] mask is all 1s; |
1611 | | * [3] reserved for scalable AAC |
1612 | | */ |
1613 | | static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb, |
1614 | | int ms_present) |
1615 | 65.2k | { |
1616 | 65.2k | int idx; |
1617 | 65.2k | int max_idx = cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb; |
1618 | 65.2k | cpe->max_sfb_ste = cpe->ch[0].ics.max_sfb; |
1619 | 65.2k | if (ms_present == 1) { |
1620 | 121k | for (idx = 0; idx < max_idx; idx++) |
1621 | 69.6k | cpe->ms_mask[idx] = get_bits1(gb); |
1622 | 51.9k | } else if (ms_present == 2) { |
1623 | 13.3k | memset(cpe->ms_mask, 1, max_idx * sizeof(cpe->ms_mask[0])); |
1624 | 13.3k | } |
1625 | 65.2k | } |
1626 | | |
1627 | | static void decode_gain_control(SingleChannelElement * sce, GetBitContext * gb) |
1628 | 50.0k | { |
1629 | | // wd_num, wd_test, aloc_size |
1630 | 50.0k | static const uint8_t gain_mode[4][3] = { |
1631 | 50.0k | {1, 0, 5}, // ONLY_LONG_SEQUENCE = 0, |
1632 | 50.0k | {2, 1, 2}, // LONG_START_SEQUENCE, |
1633 | 50.0k | {8, 0, 2}, // EIGHT_SHORT_SEQUENCE, |
1634 | 50.0k | {2, 1, 5}, // LONG_STOP_SEQUENCE |
1635 | 50.0k | }; |
1636 | | |
1637 | 50.0k | const int mode = sce->ics.window_sequence[0]; |
1638 | 50.0k | uint8_t bd, wd, ad; |
1639 | | |
1640 | | // FIXME: Store the gain control data on |sce| and do something with it. |
1641 | 50.0k | uint8_t max_band = get_bits(gb, 2); |
1642 | 121k | for (bd = 0; bd < max_band; bd++) { |
1643 | 182k | for (wd = 0; wd < gain_mode[mode][0]; wd++) { |
1644 | 111k | uint8_t adjust_num = get_bits(gb, 3); |
1645 | 392k | for (ad = 0; ad < adjust_num; ad++) { |
1646 | 281k | skip_bits(gb, 4 + ((wd == 0 && gain_mode[mode][1]) |
1647 | 281k | ? 4 |
1648 | 281k | : gain_mode[mode][2])); |
1649 | 281k | } |
1650 | 111k | } |
1651 | 71.0k | } |
1652 | 50.0k | } |
1653 | | |
1654 | | /** |
1655 | | * Decode an individual_channel_stream payload; reference: table 4.44. |
1656 | | * |
1657 | | * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information. |
1658 | | * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.) |
1659 | | * |
1660 | | * @return Returns error status. 0 - OK, !0 - error |
1661 | | */ |
1662 | | int ff_aac_decode_ics(AACDecContext *ac, SingleChannelElement *sce, |
1663 | | GetBitContext *gb, int common_window, int scale_flag) |
1664 | 1.55M | { |
1665 | 1.55M | Pulse pulse; |
1666 | 1.55M | TemporalNoiseShaping *tns = &sce->tns; |
1667 | 1.55M | IndividualChannelStream *ics = &sce->ics; |
1668 | 1.55M | int global_gain, eld_syntax, er_syntax, pulse_present = 0; |
1669 | 1.55M | int ret; |
1670 | | |
1671 | 1.55M | eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD; |
1672 | 1.55M | er_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_LC || |
1673 | 1.55M | ac->oc[1].m4ac.object_type == AOT_ER_AAC_LTP || |
1674 | 1.55M | ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD || |
1675 | 1.55M | ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD; |
1676 | | |
1677 | | /* This assignment is to silence a GCC warning about the variable being used |
1678 | | * uninitialized when in fact it always is. |
1679 | | */ |
1680 | 1.55M | pulse.num_pulse = 0; |
1681 | | |
1682 | 1.55M | global_gain = get_bits(gb, 8); |
1683 | | |
1684 | 1.55M | if (!common_window && !scale_flag) { |
1685 | 1.13M | ret = decode_ics_info(ac, ics, gb); |
1686 | 1.13M | if (ret < 0) |
1687 | 110k | goto fail; |
1688 | 1.13M | } |
1689 | | |
1690 | 1.44M | if ((ret = decode_band_types(ac, sce, gb)) < 0) |
1691 | 197k | goto fail; |
1692 | 1.24M | if ((ret = decode_scalefactors(ac, sce, gb, global_gain)) < 0) |
1693 | 734 | goto fail; |
1694 | | |
1695 | 1.24M | ac->dsp.dequant_scalefactors(sce); |
1696 | | |
1697 | 1.24M | pulse_present = 0; |
1698 | 1.24M | if (!scale_flag) { |
1699 | 1.24M | if (!eld_syntax && (pulse_present = get_bits1(gb))) { |
1700 | 173k | if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { |
1701 | 5.15k | av_log(ac->avctx, AV_LOG_ERROR, |
1702 | 5.15k | "Pulse tool not allowed in eight short sequence.\n"); |
1703 | 5.15k | ret = AVERROR_INVALIDDATA; |
1704 | 5.15k | goto fail; |
1705 | 5.15k | } |
1706 | 168k | if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) { |
1707 | 8.23k | av_log(ac->avctx, AV_LOG_ERROR, |
1708 | 8.23k | "Pulse data corrupt or invalid.\n"); |
1709 | 8.23k | ret = AVERROR_INVALIDDATA; |
1710 | 8.23k | goto fail; |
1711 | 8.23k | } |
1712 | 168k | } |
1713 | 1.22M | tns->present = get_bits1(gb); |
1714 | 1.22M | if (tns->present && !er_syntax) { |
1715 | 57.7k | ret = ff_aac_decode_tns(ac, tns, gb, ics); |
1716 | 57.7k | if (ret < 0) |
1717 | 14.9k | goto fail; |
1718 | 57.7k | } |
1719 | 1.21M | if (!eld_syntax && get_bits1(gb)) { |
1720 | 50.0k | decode_gain_control(sce, gb); |
1721 | 50.0k | if (!ac->warned_gain_control) { |
1722 | 3.78k | avpriv_report_missing_feature(ac->avctx, "Gain control"); |
1723 | 3.78k | ac->warned_gain_control = 1; |
1724 | 3.78k | } |
1725 | 50.0k | } |
1726 | | // I see no textual basis in the spec for this occurring after SSR gain |
1727 | | // control, but this is what both reference and real implmentations do |
1728 | 1.21M | if (tns->present && er_syntax) { |
1729 | 6.72k | ret = ff_aac_decode_tns(ac, tns, gb, ics); |
1730 | 6.72k | if (ret < 0) |
1731 | 2.19k | goto fail; |
1732 | 6.72k | } |
1733 | 1.21M | } |
1734 | | |
1735 | 1.21M | ret = ac->proc.decode_spectrum_and_dequant(ac, gb, |
1736 | 1.21M | pulse_present ? &pulse : NULL, |
1737 | 1.21M | sce); |
1738 | 1.21M | if (ret < 0) |
1739 | 367 | goto fail; |
1740 | | |
1741 | 1.21M | if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window) |
1742 | 357k | ac->dsp.apply_prediction(ac, sce); |
1743 | | |
1744 | 1.21M | return 0; |
1745 | 339k | fail: |
1746 | 339k | tns->present = 0; |
1747 | 339k | return ret; |
1748 | 1.21M | } |
1749 | | |
1750 | | /** |
1751 | | * Decode a channel_pair_element; reference: table 4.4. |
1752 | | * |
1753 | | * @return Returns error status. 0 - OK, !0 - error |
1754 | | */ |
1755 | | static int decode_cpe(AACDecContext *ac, GetBitContext *gb, ChannelElement *cpe) |
1756 | 468k | { |
1757 | 468k | int i, ret, common_window, ms_present = 0; |
1758 | 468k | int eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD; |
1759 | | |
1760 | 468k | common_window = eld_syntax || get_bits1(gb); |
1761 | 468k | if (common_window) { |
1762 | 267k | if (decode_ics_info(ac, &cpe->ch[0].ics, gb)) |
1763 | 30.1k | return AVERROR_INVALIDDATA; |
1764 | 237k | i = cpe->ch[1].ics.use_kb_window[0]; |
1765 | 237k | cpe->ch[1].ics = cpe->ch[0].ics; |
1766 | 237k | cpe->ch[1].ics.use_kb_window[1] = i; |
1767 | 237k | if (cpe->ch[1].ics.predictor_present && |
1768 | 237k | (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN)) |
1769 | 17.2k | if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1))) |
1770 | 9.36k | decode_ltp(ac, &cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb); |
1771 | 237k | ms_present = get_bits(gb, 2); |
1772 | 237k | if (ms_present == 3) { |
1773 | 18.1k | av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n"); |
1774 | 18.1k | return AVERROR_INVALIDDATA; |
1775 | 218k | } else if (ms_present) |
1776 | 65.2k | decode_mid_side_stereo(cpe, gb, ms_present); |
1777 | 237k | } |
1778 | 420k | if ((ret = ff_aac_decode_ics(ac, &cpe->ch[0], gb, common_window, 0))) |
1779 | 69.8k | return ret; |
1780 | 350k | if ((ret = ff_aac_decode_ics(ac, &cpe->ch[1], gb, common_window, 0))) |
1781 | 61.9k | return ret; |
1782 | | |
1783 | 288k | if (common_window) { |
1784 | 197k | if (ms_present) |
1785 | 58.0k | ac->dsp.apply_mid_side_stereo(ac, cpe); |
1786 | 197k | if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) { |
1787 | 17.3k | ac->dsp.apply_prediction(ac, &cpe->ch[0]); |
1788 | 17.3k | ac->dsp.apply_prediction(ac, &cpe->ch[1]); |
1789 | 17.3k | } |
1790 | 197k | } |
1791 | | |
1792 | 288k | ac->dsp.apply_intensity_stereo(ac, cpe, ms_present); |
1793 | 288k | return 0; |
1794 | 350k | } |
1795 | | |
1796 | | /** |
1797 | | * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53. |
1798 | | * |
1799 | | * @return Returns number of bytes consumed. |
1800 | | */ |
1801 | | static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc, |
1802 | | GetBitContext *gb) |
1803 | 10.2k | { |
1804 | 10.2k | int i; |
1805 | 10.2k | int num_excl_chan = 0; |
1806 | | |
1807 | 62.8k | do { |
1808 | 503k | for (i = 0; i < 7; i++) |
1809 | 440k | che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb); |
1810 | 62.8k | } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb)); |
1811 | | |
1812 | 10.2k | return num_excl_chan / 7; |
1813 | 10.2k | } |
1814 | | |
1815 | | /** |
1816 | | * Decode dynamic range information; reference: table 4.52. |
1817 | | * |
1818 | | * @return Returns number of bytes consumed. |
1819 | | */ |
1820 | | static int decode_dynamic_range(DynamicRangeControl *che_drc, |
1821 | | GetBitContext *gb) |
1822 | 12.9k | { |
1823 | 12.9k | int n = 1; |
1824 | 12.9k | int drc_num_bands = 1; |
1825 | 12.9k | int i; |
1826 | | |
1827 | | /* pce_tag_present? */ |
1828 | 12.9k | if (get_bits1(gb)) { |
1829 | 8.19k | che_drc->pce_instance_tag = get_bits(gb, 4); |
1830 | 8.19k | skip_bits(gb, 4); // tag_reserved_bits |
1831 | 8.19k | n++; |
1832 | 8.19k | } |
1833 | | |
1834 | | /* excluded_chns_present? */ |
1835 | 12.9k | if (get_bits1(gb)) { |
1836 | 10.2k | n += decode_drc_channel_exclusions(che_drc, gb); |
1837 | 10.2k | } |
1838 | | |
1839 | | /* drc_bands_present? */ |
1840 | 12.9k | if (get_bits1(gb)) { |
1841 | 5.95k | che_drc->band_incr = get_bits(gb, 4); |
1842 | 5.95k | che_drc->interpolation_scheme = get_bits(gb, 4); |
1843 | 5.95k | n++; |
1844 | 5.95k | drc_num_bands += che_drc->band_incr; |
1845 | 77.8k | for (i = 0; i < drc_num_bands; i++) { |
1846 | 71.8k | che_drc->band_top[i] = get_bits(gb, 8); |
1847 | 71.8k | n++; |
1848 | 71.8k | } |
1849 | 5.95k | } |
1850 | | |
1851 | | /* prog_ref_level_present? */ |
1852 | 12.9k | if (get_bits1(gb)) { |
1853 | 5.71k | che_drc->prog_ref_level = get_bits(gb, 7); |
1854 | 5.71k | skip_bits1(gb); // prog_ref_level_reserved_bits |
1855 | 5.71k | n++; |
1856 | 5.71k | } |
1857 | | |
1858 | 91.7k | for (i = 0; i < drc_num_bands; i++) { |
1859 | 78.8k | che_drc->dyn_rng_sgn[i] = get_bits1(gb); |
1860 | 78.8k | che_drc->dyn_rng_ctl[i] = get_bits(gb, 7); |
1861 | 78.8k | n++; |
1862 | 78.8k | } |
1863 | | |
1864 | 12.9k | return n; |
1865 | 12.9k | } |
1866 | | |
1867 | 8.99k | static int decode_fill(AACDecContext *ac, GetBitContext *gb, int len) { |
1868 | 8.99k | uint8_t buf[256]; |
1869 | 8.99k | int i, major, minor; |
1870 | | |
1871 | 8.99k | if (len < 13+7*8) |
1872 | 3.07k | goto unknown; |
1873 | | |
1874 | 5.92k | get_bits(gb, 13); len -= 13; |
1875 | | |
1876 | 173k | for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8) |
1877 | 167k | buf[i] = get_bits(gb, 8); |
1878 | | |
1879 | 5.92k | buf[i] = 0; |
1880 | 5.92k | if (ac->avctx->debug & FF_DEBUG_PICT_INFO) |
1881 | 0 | av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf); |
1882 | | |
1883 | 5.92k | if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){ |
1884 | 673 | ac->avctx->internal->skip_samples = 1024; |
1885 | 673 | } |
1886 | | |
1887 | 8.99k | unknown: |
1888 | 8.99k | skip_bits_long(gb, len); |
1889 | | |
1890 | 8.99k | return 0; |
1891 | 5.92k | } |
1892 | | |
1893 | | /** |
1894 | | * Decode extension data (incomplete); reference: table 4.51. |
1895 | | * |
1896 | | * @param cnt length of TYPE_FIL syntactic element in bytes |
1897 | | * |
1898 | | * @return Returns number of bytes consumed |
1899 | | */ |
1900 | | static int decode_extension_payload(AACDecContext *ac, GetBitContext *gb, int cnt, |
1901 | | ChannelElement *che, enum RawDataBlockType elem_type) |
1902 | 510k | { |
1903 | 510k | int crc_flag = 0; |
1904 | 510k | int res = cnt; |
1905 | 510k | int type = get_bits(gb, 4); |
1906 | | |
1907 | 510k | if (ac->avctx->debug & FF_DEBUG_STARTCODE) |
1908 | 0 | av_log(ac->avctx, AV_LOG_DEBUG, "extension type: %d len:%d\n", type, cnt); |
1909 | | |
1910 | 510k | switch (type) { // extension type |
1911 | 159k | case EXT_SBR_DATA_CRC: |
1912 | 159k | crc_flag++; |
1913 | 452k | case EXT_SBR_DATA: |
1914 | 452k | if (!che) { |
1915 | 2.55k | av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n"); |
1916 | 2.55k | return res; |
1917 | 449k | } else if (ac->oc[1].m4ac.frame_length_short) { |
1918 | 2.26k | if (!ac->warned_960_sbr) |
1919 | 100 | avpriv_report_missing_feature(ac->avctx, |
1920 | 100 | "SBR with 960 frame length"); |
1921 | 2.26k | ac->warned_960_sbr = 1; |
1922 | 2.26k | skip_bits_long(gb, 8 * cnt - 4); |
1923 | 2.26k | return res; |
1924 | 447k | } else if (!ac->oc[1].m4ac.sbr) { |
1925 | 871 | av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n"); |
1926 | 871 | skip_bits_long(gb, 8 * cnt - 4); |
1927 | 871 | return res; |
1928 | 446k | } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) { |
1929 | 9.38k | av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n"); |
1930 | 9.38k | skip_bits_long(gb, 8 * cnt - 4); |
1931 | 9.38k | return res; |
1932 | 437k | } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && |
1933 | 437k | ac->avctx->ch_layout.nb_channels == 1) { |
1934 | 9.40k | ac->oc[1].m4ac.sbr = 1; |
1935 | 9.40k | ac->oc[1].m4ac.ps = 1; |
1936 | 9.40k | ac->avctx->profile = AV_PROFILE_AAC_HE_V2; |
1937 | 9.40k | ff_aac_output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags, |
1938 | 9.40k | ac->oc[1].status, 1); |
1939 | 427k | } else { |
1940 | 427k | ac->oc[1].m4ac.sbr = 1; |
1941 | 427k | ac->avctx->profile = AV_PROFILE_AAC_HE; |
1942 | 427k | } |
1943 | | |
1944 | 437k | ac->proc.sbr_decode_extension(ac, che, gb, crc_flag, cnt, elem_type); |
1945 | | |
1946 | 437k | if (ac->oc[1].m4ac.ps == 1 && !ac->warned_he_aac_mono) { |
1947 | 1.57k | av_log(ac->avctx, AV_LOG_VERBOSE, "Treating HE-AAC mono as stereo.\n"); |
1948 | 1.57k | ac->warned_he_aac_mono = 1; |
1949 | 1.57k | } |
1950 | 437k | break; |
1951 | 12.9k | case EXT_DYNAMIC_RANGE: |
1952 | 12.9k | res = decode_dynamic_range(&ac->che_drc, gb); |
1953 | 12.9k | break; |
1954 | 8.99k | case EXT_FILL: |
1955 | 8.99k | decode_fill(ac, gb, 8 * cnt - 4); |
1956 | 8.99k | break; |
1957 | 2.00k | case EXT_FILL_DATA: |
1958 | 2.77k | case EXT_DATA_ELEMENT: |
1959 | 36.1k | default: |
1960 | 36.1k | skip_bits_long(gb, 8 * cnt - 4); |
1961 | 36.1k | break; |
1962 | 510k | }; |
1963 | 495k | return res; |
1964 | 510k | } |
1965 | | |
1966 | | /** |
1967 | | * channel coupling transformation interface |
1968 | | * |
1969 | | * @param apply_coupling_method pointer to (in)dependent coupling function |
1970 | | */ |
1971 | | static void apply_channel_coupling(AACDecContext *ac, ChannelElement *cc, |
1972 | | enum RawDataBlockType type, int elem_id, |
1973 | | enum CouplingPoint coupling_point, |
1974 | | void (*apply_coupling_method)(AACDecContext *ac, SingleChannelElement *target, ChannelElement *cce, int index)) |
1975 | 2.20M | { |
1976 | 2.20M | int i, c; |
1977 | | |
1978 | 143M | for (i = 0; i < MAX_ELEM_ID; i++) { |
1979 | 141M | ChannelElement *cce = ac->che[TYPE_CCE][i]; |
1980 | 141M | int index = 0; |
1981 | | |
1982 | 141M | if (cce && cce->coup.coupling_point == coupling_point) { |
1983 | 3.59M | ChannelCoupling *coup = &cce->coup; |
1984 | | |
1985 | 8.57M | for (c = 0; c <= coup->num_coupled; c++) { |
1986 | 4.98M | if (coup->type[c] == type && coup->id_select[c] == elem_id) { |
1987 | 1.65M | if (coup->ch_select[c] != 1) { |
1988 | 1.64M | apply_coupling_method(ac, &cc->ch[0], cce, index); |
1989 | 1.64M | if (coup->ch_select[c] != 0) |
1990 | 222k | index++; |
1991 | 1.64M | } |
1992 | 1.65M | if (coup->ch_select[c] != 2) |
1993 | 1.50M | apply_coupling_method(ac, &cc->ch[1], cce, index++); |
1994 | 1.65M | } else |
1995 | 3.33M | index += 1 + (coup->ch_select[c] == 3); |
1996 | 4.98M | } |
1997 | 3.59M | } |
1998 | 141M | } |
1999 | 2.20M | } |
2000 | | |
2001 | | /** |
2002 | | * Convert spectral data to samples, applying all supported tools as appropriate. |
2003 | | */ |
2004 | | static void spectral_to_sample(AACDecContext *ac, int samples) |
2005 | 1.77M | { |
2006 | 1.77M | int i, type; |
2007 | 1.77M | void (*imdct_and_window)(AACDecContext *ac, SingleChannelElement *sce); |
2008 | 1.77M | switch (ac->oc[1].m4ac.object_type) { |
2009 | 8.06k | case AOT_ER_AAC_LD: |
2010 | 8.06k | imdct_and_window = ac->dsp.imdct_and_windowing_ld; |
2011 | 8.06k | break; |
2012 | 4.29k | case AOT_ER_AAC_ELD: |
2013 | 4.29k | imdct_and_window = ac->dsp.imdct_and_windowing_eld; |
2014 | 4.29k | break; |
2015 | 1.76M | default: |
2016 | 1.76M | if (ac->oc[1].m4ac.frame_length_short) |
2017 | 15.1k | imdct_and_window = ac->dsp.imdct_and_windowing_960; |
2018 | 1.75M | else |
2019 | 1.75M | imdct_and_window = ac->dsp.imdct_and_windowing; |
2020 | 1.77M | } |
2021 | 8.89M | for (type = 3; type >= 0; type--) { |
2022 | 462M | for (i = 0; i < MAX_ELEM_ID; i++) { |
2023 | 455M | ChannelElement *che = ac->che[type][i]; |
2024 | 455M | if (che && che->present) { |
2025 | 753k | if (type <= TYPE_CPE) |
2026 | 732k | apply_channel_coupling(ac, che, type, i, BEFORE_TNS, ac->dsp.apply_dependent_coupling); |
2027 | 753k | if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) { |
2028 | 85.0k | if (che->ch[0].ics.predictor_present) { |
2029 | 40.3k | if (che->ch[0].ics.ltp.present) |
2030 | 27.6k | ac->dsp.apply_ltp(ac, &che->ch[0]); |
2031 | 40.3k | if (che->ch[1].ics.ltp.present && type == TYPE_CPE) |
2032 | 20.3k | ac->dsp.apply_ltp(ac, &che->ch[1]); |
2033 | 40.3k | } |
2034 | 85.0k | } |
2035 | 753k | if (che->ch[0].tns.present) |
2036 | 23.2k | ac->dsp.apply_tns(che->ch[0].coeffs, |
2037 | 23.2k | &che->ch[0].tns, &che->ch[0].ics, 1); |
2038 | 753k | if (che->ch[1].tns.present) |
2039 | 11.3k | ac->dsp.apply_tns(che->ch[1].coeffs, |
2040 | 11.3k | &che->ch[1].tns, &che->ch[1].ics, 1); |
2041 | 753k | if (type <= TYPE_CPE) |
2042 | 732k | apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, ac->dsp.apply_dependent_coupling); |
2043 | 753k | if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) { |
2044 | 747k | imdct_and_window(ac, &che->ch[0]); |
2045 | 747k | if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) |
2046 | 84.1k | ac->dsp.update_ltp(ac, &che->ch[0]); |
2047 | 747k | if (type == TYPE_CPE) { |
2048 | 246k | imdct_and_window(ac, &che->ch[1]); |
2049 | 246k | if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) |
2050 | 40.6k | ac->dsp.update_ltp(ac, &che->ch[1]); |
2051 | 246k | } |
2052 | 747k | if (ac->oc[1].m4ac.sbr > 0) { |
2053 | 455k | ac->proc.sbr_apply(ac, che, type, |
2054 | 455k | che->ch[0].output, |
2055 | 455k | che->ch[1].output); |
2056 | 455k | } |
2057 | 747k | } |
2058 | 753k | if (type <= TYPE_CCE) |
2059 | 740k | apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, ac->dsp.apply_independent_coupling); |
2060 | 753k | ac->dsp.clip_output(ac, che, type, samples); |
2061 | 753k | che->present = 0; |
2062 | 454M | } else if (che) { |
2063 | 42.1M | av_log(ac->avctx, AV_LOG_VERBOSE, "ChannelElement %d.%d missing \n", type, i); |
2064 | 42.1M | } |
2065 | 455M | } |
2066 | 7.11M | } |
2067 | 1.77M | } |
2068 | | |
2069 | | static int parse_adts_frame_header(AACDecContext *ac, GetBitContext *gb) |
2070 | 319k | { |
2071 | 319k | int size; |
2072 | 319k | AACADTSHeaderInfo hdr_info; |
2073 | 319k | uint8_t layout_map[MAX_ELEM_ID*4][3]; |
2074 | 319k | int layout_map_tags, ret; |
2075 | | |
2076 | 319k | size = ff_adts_header_parse(gb, &hdr_info); |
2077 | 319k | if (size > 0) { |
2078 | 309k | if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) { |
2079 | | // This is 2 for "VLB " audio in NSV files. |
2080 | | // See samples/nsv/vlb_audio. |
2081 | 4.55k | avpriv_report_missing_feature(ac->avctx, |
2082 | 4.55k | "More than one AAC RDB per ADTS frame"); |
2083 | 4.55k | ac->warned_num_aac_frames = 1; |
2084 | 4.55k | } |
2085 | 309k | push_output_configuration(ac); |
2086 | 309k | if (hdr_info.chan_config) { |
2087 | 304k | ac->oc[1].m4ac.chan_config = hdr_info.chan_config; |
2088 | 304k | if ((ret = ff_aac_set_default_channel_config(ac, ac->avctx, |
2089 | 304k | layout_map, |
2090 | 304k | &layout_map_tags, |
2091 | 304k | hdr_info.chan_config)) < 0) |
2092 | 0 | return ret; |
2093 | 304k | if ((ret = ff_aac_output_configure(ac, layout_map, layout_map_tags, |
2094 | 304k | FFMAX(ac->oc[1].status, |
2095 | 304k | OC_TRIAL_FRAME), 0)) < 0) |
2096 | 0 | return ret; |
2097 | 304k | } else { |
2098 | 5.47k | ac->oc[1].m4ac.chan_config = 0; |
2099 | | /** |
2100 | | * dual mono frames in Japanese DTV can have chan_config 0 |
2101 | | * WITHOUT specifying PCE. |
2102 | | * thus, set dual mono as default. |
2103 | | */ |
2104 | 5.47k | if (ac->dmono_mode && ac->oc[0].status == OC_NONE) { |
2105 | 0 | layout_map_tags = 2; |
2106 | 0 | layout_map[0][0] = layout_map[1][0] = TYPE_SCE; |
2107 | 0 | layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT; |
2108 | 0 | layout_map[0][1] = 0; |
2109 | 0 | layout_map[1][1] = 1; |
2110 | 0 | if (ff_aac_output_configure(ac, layout_map, layout_map_tags, |
2111 | 0 | OC_TRIAL_FRAME, 0)) |
2112 | 0 | return -7; |
2113 | 0 | } |
2114 | 5.47k | } |
2115 | 309k | ac->oc[1].m4ac.sample_rate = hdr_info.sample_rate; |
2116 | 309k | ac->oc[1].m4ac.sampling_index = hdr_info.sampling_index; |
2117 | 309k | ac->oc[1].m4ac.object_type = hdr_info.object_type; |
2118 | 309k | ac->oc[1].m4ac.frame_length_short = 0; |
2119 | 309k | if (ac->oc[0].status != OC_LOCKED || |
2120 | 309k | ac->oc[0].m4ac.chan_config != hdr_info.chan_config || |
2121 | 309k | ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) { |
2122 | 184k | ac->oc[1].m4ac.sbr = -1; |
2123 | 184k | ac->oc[1].m4ac.ps = -1; |
2124 | 184k | } |
2125 | 309k | if (!hdr_info.crc_absent) |
2126 | 1.98k | skip_bits(gb, 16); |
2127 | 309k | } |
2128 | 319k | return size; |
2129 | 319k | } |
2130 | | |
2131 | | static int aac_decode_er_frame(AVCodecContext *avctx, AVFrame *frame, |
2132 | | int *got_frame_ptr, GetBitContext *gb) |
2133 | 41.8k | { |
2134 | 41.8k | AACDecContext *ac = avctx->priv_data; |
2135 | 41.8k | const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac; |
2136 | 41.8k | ChannelElement *che; |
2137 | 41.8k | int err, i; |
2138 | 41.8k | int samples = m4ac->frame_length_short ? 960 : 1024; |
2139 | 41.8k | int chan_config = m4ac->chan_config; |
2140 | 41.8k | int aot = m4ac->object_type; |
2141 | | |
2142 | 41.8k | if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) |
2143 | 27.3k | samples >>= 1; |
2144 | | |
2145 | 41.8k | ac->frame = frame; |
2146 | | |
2147 | 41.8k | if ((err = frame_configure_elements(avctx)) < 0) |
2148 | 0 | return err; |
2149 | | |
2150 | | // The AV_PROFILE_AAC_* defines are all object_type - 1 |
2151 | | // This may lead to an undefined profile being signaled |
2152 | 41.8k | ac->avctx->profile = aot - 1; |
2153 | | |
2154 | 41.8k | ac->tags_mapped = 0; |
2155 | | |
2156 | 41.8k | if (chan_config < 0 || (chan_config >= 8 && chan_config < 11) || chan_config >= 13) { |
2157 | 855 | avpriv_request_sample(avctx, "Unknown ER channel configuration %d", |
2158 | 855 | chan_config); |
2159 | 855 | return AVERROR_INVALIDDATA; |
2160 | 855 | } |
2161 | 116k | for (i = 0; i < ff_tags_per_config[chan_config]; i++) { |
2162 | 95.5k | const int elem_type = ff_aac_channel_layout_map[chan_config-1][i][0]; |
2163 | 95.5k | const int elem_id = ff_aac_channel_layout_map[chan_config-1][i][1]; |
2164 | 95.5k | if (!(che=ff_aac_get_che(ac, elem_type, elem_id))) { |
2165 | 0 | av_log(ac->avctx, AV_LOG_ERROR, |
2166 | 0 | "channel element %d.%d is not allocated\n", |
2167 | 0 | elem_type, elem_id); |
2168 | 0 | return AVERROR_INVALIDDATA; |
2169 | 0 | } |
2170 | 95.5k | che->present = 1; |
2171 | 95.5k | if (aot != AOT_ER_AAC_ELD) |
2172 | 73.0k | skip_bits(gb, 4); |
2173 | 95.5k | switch (elem_type) { |
2174 | 38.5k | case TYPE_SCE: |
2175 | 38.5k | err = ff_aac_decode_ics(ac, &che->ch[0], gb, 0, 0); |
2176 | 38.5k | break; |
2177 | 47.4k | case TYPE_CPE: |
2178 | 47.4k | err = decode_cpe(ac, gb, che); |
2179 | 47.4k | break; |
2180 | 9.54k | case TYPE_LFE: |
2181 | 9.54k | err = ff_aac_decode_ics(ac, &che->ch[0], gb, 0, 0); |
2182 | 9.54k | break; |
2183 | 95.5k | } |
2184 | 95.5k | if (err < 0) |
2185 | 20.2k | return err; |
2186 | 95.5k | } |
2187 | | |
2188 | 20.6k | spectral_to_sample(ac, samples); |
2189 | | |
2190 | 20.6k | if (!ac->frame->data[0] && samples) { |
2191 | 1.35k | av_log(avctx, AV_LOG_ERROR, "no frame data found\n"); |
2192 | 1.35k | return AVERROR_INVALIDDATA; |
2193 | 1.35k | } |
2194 | | |
2195 | 19.3k | ac->frame->nb_samples = samples; |
2196 | 19.3k | ac->frame->sample_rate = avctx->sample_rate; |
2197 | 19.3k | *got_frame_ptr = 1; |
2198 | | |
2199 | 19.3k | skip_bits_long(gb, get_bits_left(gb)); |
2200 | 19.3k | return 0; |
2201 | 20.6k | } |
2202 | | |
2203 | | static int decode_frame_ga(AVCodecContext *avctx, AACDecContext *ac, |
2204 | | GetBitContext *gb, int *got_frame_ptr) |
2205 | 2.74M | { |
2206 | 2.74M | int err; |
2207 | 2.74M | int is_dmono; |
2208 | 2.74M | int elem_id; |
2209 | 2.74M | enum RawDataBlockType elem_type, che_prev_type = TYPE_END; |
2210 | 2.74M | uint8_t che_presence[4][MAX_ELEM_ID] = {{0}}; |
2211 | 2.74M | ChannelElement *che = NULL, *che_prev = NULL; |
2212 | 2.74M | int samples = 0, multiplier, audio_found = 0, pce_found = 0, sce_count = 0; |
2213 | 2.74M | AVFrame *frame = ac->frame; |
2214 | | |
2215 | 2.74M | int payload_alignment = get_bits_count(gb); |
2216 | | // parse |
2217 | 3.88M | while ((elem_type = get_bits(gb, 3)) != TYPE_END) { |
2218 | 2.12M | elem_id = get_bits(gb, 4); |
2219 | | |
2220 | 2.12M | if (avctx->debug & FF_DEBUG_STARTCODE) |
2221 | 0 | av_log(avctx, AV_LOG_DEBUG, "Elem type:%x id:%x\n", elem_type, elem_id); |
2222 | | |
2223 | 2.12M | if (!avctx->ch_layout.nb_channels && elem_type != TYPE_PCE) |
2224 | 13.3k | return AVERROR_INVALIDDATA; |
2225 | | |
2226 | 2.10M | if (elem_type < TYPE_DSE) { |
2227 | 1.34M | if (che_presence[elem_type][elem_id]) { |
2228 | 45.8k | int error = che_presence[elem_type][elem_id] > 1; |
2229 | 45.8k | av_log(ac->avctx, error ? AV_LOG_ERROR : AV_LOG_DEBUG, "channel element %d.%d duplicate\n", |
2230 | 45.8k | elem_type, elem_id); |
2231 | 45.8k | if (error) |
2232 | 6.71k | return AVERROR_INVALIDDATA; |
2233 | 45.8k | } |
2234 | 1.33M | che_presence[elem_type][elem_id]++; |
2235 | | |
2236 | 1.33M | if (!(che=ff_aac_get_che(ac, elem_type, elem_id))) { |
2237 | 186k | av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", |
2238 | 186k | elem_type, elem_id); |
2239 | 186k | return AVERROR_INVALIDDATA; |
2240 | 186k | } |
2241 | 1.15M | samples = ac->oc[1].m4ac.frame_length_short ? 960 : 1024; |
2242 | 1.15M | che->present = 1; |
2243 | 1.15M | } |
2244 | | |
2245 | 1.91M | switch (elem_type) { |
2246 | | |
2247 | 714k | case TYPE_SCE: |
2248 | 714k | err = ff_aac_decode_ics(ac, &che->ch[0], gb, 0, 0); |
2249 | 714k | audio_found = 1; |
2250 | 714k | sce_count++; |
2251 | 714k | break; |
2252 | | |
2253 | 421k | case TYPE_CPE: |
2254 | 421k | err = decode_cpe(ac, gb, che); |
2255 | 421k | audio_found = 1; |
2256 | 421k | break; |
2257 | | |
2258 | 12.9k | case TYPE_CCE: |
2259 | 12.9k | err = ac->proc.decode_cce(ac, gb, che); |
2260 | 12.9k | break; |
2261 | | |
2262 | 4.95k | case TYPE_LFE: |
2263 | 4.95k | err = ff_aac_decode_ics(ac, &che->ch[0], gb, 0, 0); |
2264 | 4.95k | audio_found = 1; |
2265 | 4.95k | break; |
2266 | | |
2267 | 127k | case TYPE_DSE: |
2268 | 127k | err = skip_data_stream_element(ac, gb); |
2269 | 127k | break; |
2270 | | |
2271 | 101k | case TYPE_PCE: { |
2272 | 101k | uint8_t layout_map[MAX_ELEM_ID*4][3] = {{0}}; |
2273 | 101k | int tags; |
2274 | | |
2275 | 101k | int pushed = push_output_configuration(ac); |
2276 | 101k | if (pce_found && !pushed) |
2277 | 913 | return AVERROR_INVALIDDATA; |
2278 | | |
2279 | 100k | tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb, |
2280 | 100k | payload_alignment); |
2281 | 100k | if (tags < 0) { |
2282 | 79.8k | err = tags; |
2283 | 79.8k | break; |
2284 | 79.8k | } |
2285 | 20.6k | if (pce_found) { |
2286 | 63 | av_log(avctx, AV_LOG_ERROR, |
2287 | 63 | "Not evaluating a further program_config_element as this construct is dubious at best.\n"); |
2288 | 63 | pop_output_configuration(ac); |
2289 | 20.5k | } else { |
2290 | 20.5k | err = ff_aac_output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1); |
2291 | 20.5k | if (!err) |
2292 | 19.9k | ac->oc[1].m4ac.chan_config = 0; |
2293 | 20.5k | pce_found = 1; |
2294 | 20.5k | } |
2295 | 20.6k | break; |
2296 | 100k | } |
2297 | | |
2298 | 532k | case TYPE_FIL: |
2299 | 532k | if (elem_id == 15) |
2300 | 128k | elem_id += get_bits(gb, 8) - 1; |
2301 | 532k | if (get_bits_left(gb) < 8 * elem_id) { |
2302 | 19.0k | av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err); |
2303 | 19.0k | return AVERROR_INVALIDDATA; |
2304 | 19.0k | } |
2305 | 513k | err = 0; |
2306 | 1.02M | while (elem_id > 0) { |
2307 | 510k | int ret = decode_extension_payload(ac, gb, elem_id, che_prev, che_prev_type); |
2308 | 510k | if (ret < 0) { |
2309 | 0 | err = ret; |
2310 | 0 | break; |
2311 | 0 | } |
2312 | 510k | elem_id -= ret; |
2313 | 510k | } |
2314 | 513k | break; |
2315 | | |
2316 | 0 | default: |
2317 | 0 | err = AVERROR_BUG; /* should not happen, but keeps compiler happy */ |
2318 | 0 | break; |
2319 | 1.91M | } |
2320 | | |
2321 | 1.89M | if (elem_type < TYPE_DSE) { |
2322 | 1.15M | che_prev = che; |
2323 | 1.15M | che_prev_type = elem_type; |
2324 | 1.15M | } |
2325 | | |
2326 | 1.89M | if (err) |
2327 | 480k | return err; |
2328 | | |
2329 | 1.41M | if (get_bits_left(gb) < 3) { |
2330 | 270k | av_log(avctx, AV_LOG_ERROR, overread_err); |
2331 | 270k | return AVERROR_INVALIDDATA; |
2332 | 270k | } |
2333 | 1.41M | } |
2334 | | |
2335 | 1.76M | if (!avctx->ch_layout.nb_channels) |
2336 | 4.16k | return 0; |
2337 | | |
2338 | 1.75M | multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0; |
2339 | 1.75M | samples <<= multiplier; |
2340 | | |
2341 | 1.75M | spectral_to_sample(ac, samples); |
2342 | | |
2343 | 1.75M | if (ac->oc[1].status && audio_found) { |
2344 | 163k | avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier; |
2345 | 163k | avctx->frame_size = samples; |
2346 | 163k | ac->oc[1].status = OC_LOCKED; |
2347 | 163k | } |
2348 | | |
2349 | 1.75M | if (!ac->frame->data[0] && samples) { |
2350 | 0 | av_log(avctx, AV_LOG_ERROR, "no frame data found\n"); |
2351 | 0 | return AVERROR_INVALIDDATA; |
2352 | 0 | } |
2353 | | |
2354 | 1.75M | if (samples) { |
2355 | 166k | ac->frame->nb_samples = samples; |
2356 | 166k | ac->frame->sample_rate = avctx->sample_rate; |
2357 | 166k | *got_frame_ptr = 1; |
2358 | 1.59M | } else { |
2359 | 1.59M | av_frame_unref(ac->frame); |
2360 | 1.59M | *got_frame_ptr = 0; |
2361 | 1.59M | } |
2362 | | |
2363 | | /* for dual-mono audio (SCE + SCE) */ |
2364 | 1.75M | is_dmono = ac->dmono_mode && sce_count == 2 && |
2365 | 1.75M | !av_channel_layout_compare(&ac->oc[1].ch_layout, |
2366 | 0 | &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO); |
2367 | 1.75M | if (is_dmono) { |
2368 | 0 | if (ac->dmono_mode == 1) |
2369 | 0 | frame->data[1] = frame->data[0]; |
2370 | 0 | else if (ac->dmono_mode == 2) |
2371 | 0 | frame->data[0] = frame->data[1]; |
2372 | 0 | } |
2373 | | |
2374 | 1.75M | return 0; |
2375 | 1.75M | } |
2376 | | |
2377 | | static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame, |
2378 | | int *got_frame_ptr, GetBitContext *gb, |
2379 | | const AVPacket *avpkt) |
2380 | 2.97M | { |
2381 | 2.97M | int err; |
2382 | 2.97M | AACDecContext *ac = avctx->priv_data; |
2383 | | |
2384 | 2.97M | ac->frame = frame; |
2385 | 2.97M | *got_frame_ptr = 0; |
2386 | | |
2387 | 2.97M | if (show_bits(gb, 12) == 0xfff) { |
2388 | 319k | if ((err = parse_adts_frame_header(ac, gb)) < 0) { |
2389 | 9.24k | av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n"); |
2390 | 9.24k | goto fail; |
2391 | 9.24k | } |
2392 | 309k | if (ac->oc[1].m4ac.sampling_index > 12) { |
2393 | 0 | av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index); |
2394 | 0 | err = AVERROR_INVALIDDATA; |
2395 | 0 | goto fail; |
2396 | 0 | } |
2397 | 309k | } |
2398 | | |
2399 | 2.96M | if ((err = frame_configure_elements(avctx)) < 0) |
2400 | 0 | goto fail; |
2401 | | |
2402 | | // The AV_PROFILE_AAC_* defines are all object_type - 1 |
2403 | | // This may lead to an undefined profile being signaled |
2404 | 2.96M | ac->avctx->profile = ac->oc[1].m4ac.object_type - 1; |
2405 | | |
2406 | 2.96M | ac->tags_mapped = 0; |
2407 | | |
2408 | 2.96M | if (ac->oc[1].m4ac.object_type == AOT_USAC) { |
2409 | 221k | if (ac->is_fixed) { |
2410 | 1.92k | avpriv_report_missing_feature(ac->avctx, |
2411 | 1.92k | "AAC USAC fixed-point decoding"); |
2412 | 1.92k | return AVERROR_PATCHWELCOME; |
2413 | 1.92k | } |
2414 | 219k | #if CONFIG_AAC_DECODER |
2415 | 219k | err = ff_aac_usac_decode_frame(avctx, ac, gb, got_frame_ptr); |
2416 | 219k | if (err < 0) |
2417 | 214k | goto fail; |
2418 | 219k | #endif |
2419 | 2.74M | } else { |
2420 | 2.74M | err = decode_frame_ga(avctx, ac, gb, got_frame_ptr); |
2421 | 2.74M | if (err < 0) |
2422 | 978k | goto fail; |
2423 | 2.74M | } |
2424 | | |
2425 | 1.76M | return err; |
2426 | | |
2427 | 1.20M | fail: |
2428 | 1.20M | pop_output_configuration(ac); |
2429 | 1.20M | return err; |
2430 | 2.96M | } |
2431 | | |
2432 | | static int aac_decode_frame(AVCodecContext *avctx, AVFrame *frame, |
2433 | | int *got_frame_ptr, AVPacket *avpkt) |
2434 | 2.58M | { |
2435 | 2.58M | AACDecContext *ac = avctx->priv_data; |
2436 | 2.58M | const uint8_t *buf = avpkt->data; |
2437 | 2.58M | int buf_size = avpkt->size; |
2438 | 2.58M | GetBitContext gb; |
2439 | 2.58M | int buf_consumed; |
2440 | 2.58M | int buf_offset; |
2441 | 2.58M | int err; |
2442 | 2.58M | size_t new_extradata_size; |
2443 | 2.58M | const uint8_t *new_extradata = av_packet_get_side_data(avpkt, |
2444 | 2.58M | AV_PKT_DATA_NEW_EXTRADATA, |
2445 | 2.58M | &new_extradata_size); |
2446 | 2.58M | size_t jp_dualmono_size; |
2447 | 2.58M | const uint8_t *jp_dualmono = av_packet_get_side_data(avpkt, |
2448 | 2.58M | AV_PKT_DATA_JP_DUALMONO, |
2449 | 2.58M | &jp_dualmono_size); |
2450 | | |
2451 | 2.58M | if (new_extradata) { |
2452 | | /* discard previous configuration */ |
2453 | 0 | ac->oc[1].status = OC_NONE; |
2454 | 0 | err = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1], |
2455 | 0 | new_extradata, |
2456 | 0 | new_extradata_size * 8LL, 1); |
2457 | 0 | if (err < 0) { |
2458 | 0 | return err; |
2459 | 0 | } |
2460 | 0 | } |
2461 | | |
2462 | 2.58M | ac->dmono_mode = 0; |
2463 | 2.58M | if (jp_dualmono && jp_dualmono_size > 0) |
2464 | 0 | ac->dmono_mode = 1 + *jp_dualmono; |
2465 | 2.58M | if (ac->force_dmono_mode >= 0) |
2466 | 0 | ac->dmono_mode = ac->force_dmono_mode; |
2467 | | |
2468 | 2.58M | if (INT_MAX / 8 <= buf_size) |
2469 | 0 | return AVERROR_INVALIDDATA; |
2470 | | |
2471 | 2.58M | if ((err = init_get_bits8(&gb, buf, buf_size)) < 0) |
2472 | 0 | return err; |
2473 | | |
2474 | 2.58M | switch (ac->oc[1].m4ac.object_type) { |
2475 | 1.52k | case AOT_ER_AAC_LC: |
2476 | 1.52k | case AOT_ER_AAC_LTP: |
2477 | 17.6k | case AOT_ER_AAC_LD: |
2478 | 20.9k | case AOT_ER_AAC_ELD: |
2479 | 20.9k | err = aac_decode_er_frame(avctx, frame, got_frame_ptr, &gb); |
2480 | 20.9k | break; |
2481 | 2.56M | default: |
2482 | 2.56M | err = aac_decode_frame_int(avctx, frame, got_frame_ptr, &gb, avpkt); |
2483 | 2.58M | } |
2484 | 2.58M | if (err < 0) |
2485 | 895k | return err; |
2486 | | |
2487 | 1.69M | buf_consumed = (get_bits_count(&gb) + 7) >> 3; |
2488 | 1.92M | for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++) |
2489 | 1.79M | if (buf[buf_offset]) |
2490 | 1.56M | break; |
2491 | | |
2492 | 1.69M | return buf_size > buf_offset ? buf_consumed : buf_size; |
2493 | 2.58M | } |
2494 | | |
2495 | | #if CONFIG_AAC_LATM_DECODER |
2496 | | #include "aacdec_latm.h" |
2497 | | #endif |
2498 | | |
2499 | | #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM |
2500 | | #define OFF(field) offsetof(AACDecContext, field) |
2501 | | static const AVOption options[] = { |
2502 | | /** |
2503 | | * AVOptions for Japanese DTV specific extensions (ADTS only) |
2504 | | */ |
2505 | | {"dual_mono_mode", "Select the channel to decode for dual mono", |
2506 | | OFF(force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2, |
2507 | | AACDEC_FLAGS, .unit = "dual_mono_mode"}, |
2508 | | |
2509 | | {"auto", "autoselection", 0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, .unit = "dual_mono_mode"}, |
2510 | | {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, .unit = "dual_mono_mode"}, |
2511 | | {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, .unit = "dual_mono_mode"}, |
2512 | | {"both", "Select both channels", 0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, .unit = "dual_mono_mode"}, |
2513 | | |
2514 | | { "channel_order", "Order in which the channels are to be exported", |
2515 | | OFF(output_channel_order), AV_OPT_TYPE_INT, |
2516 | | { .i64 = CHANNEL_ORDER_DEFAULT }, 0, 1, AACDEC_FLAGS, .unit = "channel_order" }, |
2517 | | { "default", "normal libavcodec channel order", 0, AV_OPT_TYPE_CONST, |
2518 | | { .i64 = CHANNEL_ORDER_DEFAULT }, .flags = AACDEC_FLAGS, .unit = "channel_order" }, |
2519 | | { "coded", "order in which the channels are coded in the bitstream", |
2520 | | 0, AV_OPT_TYPE_CONST, { .i64 = CHANNEL_ORDER_CODED }, .flags = AACDEC_FLAGS, .unit = "channel_order" }, |
2521 | | |
2522 | | {NULL}, |
2523 | | }; |
2524 | | |
2525 | | static const AVClass decoder_class = { |
2526 | | .class_name = "AAC decoder", |
2527 | | .item_name = av_default_item_name, |
2528 | | .option = options, |
2529 | | .version = LIBAVUTIL_VERSION_INT, |
2530 | | }; |
2531 | | |
2532 | | #if CONFIG_AAC_DECODER |
2533 | | const FFCodec ff_aac_decoder = { |
2534 | | .p.name = "aac", |
2535 | | CODEC_LONG_NAME("AAC (Advanced Audio Coding)"), |
2536 | | .p.type = AVMEDIA_TYPE_AUDIO, |
2537 | | .p.id = AV_CODEC_ID_AAC, |
2538 | | .p.priv_class = &decoder_class, |
2539 | | .priv_data_size = sizeof(AACDecContext), |
2540 | | .init = ff_aac_decode_init_float, |
2541 | | .close = decode_close, |
2542 | | FF_CODEC_DECODE_CB(aac_decode_frame), |
2543 | | .p.sample_fmts = (const enum AVSampleFormat[]) { |
2544 | | AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE |
2545 | | }, |
2546 | | .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1, |
2547 | | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, |
2548 | | .p.ch_layouts = ff_aac_ch_layout, |
2549 | | .flush = flush, |
2550 | | .p.profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles), |
2551 | | }; |
2552 | | #endif |
2553 | | |
2554 | | #if CONFIG_AAC_FIXED_DECODER |
2555 | | const FFCodec ff_aac_fixed_decoder = { |
2556 | | .p.name = "aac_fixed", |
2557 | | CODEC_LONG_NAME("AAC (Advanced Audio Coding)"), |
2558 | | .p.type = AVMEDIA_TYPE_AUDIO, |
2559 | | .p.id = AV_CODEC_ID_AAC, |
2560 | | .p.priv_class = &decoder_class, |
2561 | | .priv_data_size = sizeof(AACDecContext), |
2562 | | .init = ff_aac_decode_init_fixed, |
2563 | | .close = decode_close, |
2564 | | FF_CODEC_DECODE_CB(aac_decode_frame), |
2565 | | .p.sample_fmts = (const enum AVSampleFormat[]) { |
2566 | | AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_NONE |
2567 | | }, |
2568 | | .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1, |
2569 | | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, |
2570 | | .p.ch_layouts = ff_aac_ch_layout, |
2571 | | .p.profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles), |
2572 | | .flush = flush, |
2573 | | }; |
2574 | | #endif |