/src/ffmpeg/libavcodec/aac/aacdec_latm.h
Line | Count | Source |
1 | | /* |
2 | | * AAC decoder |
3 | | * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) |
4 | | * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) |
5 | | * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com> |
6 | | * |
7 | | * AAC LATM decoder |
8 | | * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz> |
9 | | * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net> |
10 | | * |
11 | | * AAC decoder fixed-point implementation |
12 | | * Copyright (c) 2013 |
13 | | * MIPS Technologies, Inc., California. |
14 | | * |
15 | | * This file is part of FFmpeg. |
16 | | * |
17 | | * FFmpeg is free software; you can redistribute it and/or |
18 | | * modify it under the terms of the GNU Lesser General Public |
19 | | * License as published by the Free Software Foundation; either |
20 | | * version 2.1 of the License, or (at your option) any later version. |
21 | | * |
22 | | * FFmpeg is distributed in the hope that it will be useful, |
23 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
24 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
25 | | * Lesser General Public License for more details. |
26 | | * |
27 | | * You should have received a copy of the GNU Lesser General Public |
28 | | * License along with FFmpeg; if not, write to the Free Software |
29 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
30 | | */ |
31 | | |
32 | | #ifndef AVCODEC_AAC_AACDEC_LATM_H |
33 | | #define AVCODEC_AAC_AACDEC_LATM_H |
34 | | |
35 | 792k | #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word |
36 | | |
37 | | struct LATMContext { |
38 | | AACDecContext aac_ctx; ///< containing AACContext |
39 | | int initialized; ///< initialized after a valid extradata was seen |
40 | | |
41 | | // parser data |
42 | | int audio_mux_version_A; ///< LATM syntax version |
43 | | int frame_length_type; ///< 0/1 variable/fixed frame length |
44 | | int frame_length; ///< frame length for fixed frame length |
45 | | }; |
46 | | |
47 | | static inline uint32_t latm_get_value(GetBitContext *b) |
48 | 27.1k | { |
49 | 27.1k | int length = get_bits(b, 2); |
50 | | |
51 | 27.1k | return get_bits_long(b, (length+1)*8); |
52 | 27.1k | } |
53 | | |
54 | | static int latm_decode_audio_specific_config(struct LATMContext *latmctx, |
55 | | GetBitContext *gb, int asclen) |
56 | 79.9k | { |
57 | 79.9k | AACDecContext *ac = &latmctx->aac_ctx; |
58 | 79.9k | AVCodecContext *avctx = ac->avctx; |
59 | 79.9k | OutputConfiguration oc = { 0 }; |
60 | 79.9k | MPEG4AudioConfig *m4ac = &oc.m4ac; |
61 | 79.9k | GetBitContext gbc; |
62 | 79.9k | int config_start_bit = get_bits_count(gb); |
63 | 79.9k | int sync_extension = 0; |
64 | 79.9k | int bits_consumed, esize, i; |
65 | | |
66 | 79.9k | if (asclen > 0) { |
67 | 7.63k | sync_extension = 1; |
68 | 7.63k | asclen = FFMIN(asclen, get_bits_left(gb)); |
69 | 7.63k | init_get_bits(&gbc, gb->buffer, config_start_bit + asclen); |
70 | 7.63k | skip_bits_long(&gbc, config_start_bit); |
71 | 72.2k | } else if (asclen == 0) { |
72 | 71.0k | gbc = *gb; |
73 | 71.0k | } else { |
74 | 1.18k | return AVERROR_INVALIDDATA; |
75 | 1.18k | } |
76 | | |
77 | 78.7k | if (get_bits_left(gb) <= 0) |
78 | 2.91k | return AVERROR_INVALIDDATA; |
79 | | |
80 | 75.8k | bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &oc, |
81 | 75.8k | &gbc, config_start_bit, |
82 | 75.8k | sync_extension); |
83 | | |
84 | 75.8k | if (bits_consumed < config_start_bit) |
85 | 20.7k | return AVERROR_INVALIDDATA; |
86 | 55.1k | bits_consumed -= config_start_bit; |
87 | | |
88 | 55.1k | if (asclen == 0) |
89 | 50.3k | asclen = bits_consumed; |
90 | | |
91 | 55.1k | if (!latmctx->initialized || |
92 | 27.9k | ac->oc[1].m4ac.sample_rate != m4ac->sample_rate || |
93 | 40.9k | ac->oc[1].m4ac.chan_config != m4ac->chan_config) { |
94 | | |
95 | 40.9k | if (latmctx->initialized) { |
96 | 13.8k | av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", |
97 | 13.8k | m4ac->sample_rate, m4ac->chan_config); |
98 | 27.1k | } else { |
99 | 27.1k | av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n"); |
100 | 27.1k | } |
101 | 40.9k | latmctx->initialized = 0; |
102 | | |
103 | 40.9k | esize = (asclen + 7) / 8; |
104 | | |
105 | 40.9k | if (avctx->extradata_size < esize) { |
106 | 11.5k | av_free(avctx->extradata); |
107 | 11.5k | avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE); |
108 | 11.5k | if (!avctx->extradata) |
109 | 0 | return AVERROR(ENOMEM); |
110 | 11.5k | } |
111 | | |
112 | 40.9k | avctx->extradata_size = esize; |
113 | 40.9k | gbc = *gb; |
114 | 15.6M | for (i = 0; i < esize; i++) { |
115 | 15.6M | avctx->extradata[i] = get_bits(&gbc, 8); |
116 | 15.6M | } |
117 | 40.9k | memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE); |
118 | 40.9k | } |
119 | 55.1k | skip_bits_long(gb, asclen); |
120 | | |
121 | 55.1k | return 0; |
122 | 55.1k | } |
123 | | |
124 | | static int read_stream_mux_config(struct LATMContext *latmctx, |
125 | | GetBitContext *gb) |
126 | 378k | { |
127 | 378k | int ret, audio_mux_version = get_bits(gb, 1); |
128 | | |
129 | 378k | latmctx->audio_mux_version_A = 0; |
130 | 378k | if (audio_mux_version) |
131 | 303k | latmctx->audio_mux_version_A = get_bits(gb, 1); |
132 | | |
133 | 378k | if (!latmctx->audio_mux_version_A) { |
134 | | |
135 | 90.9k | if (audio_mux_version) |
136 | 15.7k | latm_get_value(gb); // taraFullness |
137 | | |
138 | 90.9k | skip_bits(gb, 1); // allStreamSameTimeFraming |
139 | 90.9k | skip_bits(gb, 6); // numSubFrames |
140 | | // numPrograms |
141 | 90.9k | if (get_bits(gb, 4)) { // numPrograms |
142 | 8.66k | avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs"); |
143 | 8.66k | return AVERROR_PATCHWELCOME; |
144 | 8.66k | } |
145 | | |
146 | | // for each program (which there is only one in DVB) |
147 | | |
148 | | // for each layer (which there is only one in DVB) |
149 | 82.3k | if (get_bits(gb, 3)) { // numLayer |
150 | 2.41k | avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers"); |
151 | 2.41k | return AVERROR_PATCHWELCOME; |
152 | 2.41k | } |
153 | | |
154 | | // for all but first stream: use_same_config = get_bits(gb, 1); |
155 | 79.9k | if (!audio_mux_version) { |
156 | 70.3k | if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0) |
157 | 20.3k | return ret; |
158 | 70.3k | } else { |
159 | 9.56k | int ascLen = latm_get_value(gb); |
160 | 9.56k | if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0) |
161 | 4.51k | return ret; |
162 | 9.56k | } |
163 | | |
164 | 55.1k | latmctx->frame_length_type = get_bits(gb, 3); |
165 | 55.1k | switch (latmctx->frame_length_type) { |
166 | 10.5k | case 0: |
167 | 10.5k | skip_bits(gb, 8); // latmBufferFullness |
168 | 10.5k | break; |
169 | 1.22k | case 1: |
170 | 1.22k | latmctx->frame_length = get_bits(gb, 9); |
171 | 1.22k | break; |
172 | 3.03k | case 3: |
173 | 6.80k | case 4: |
174 | 8.48k | case 5: |
175 | 8.48k | skip_bits(gb, 6); // CELP frame length table index |
176 | 8.48k | break; |
177 | 4.14k | case 6: |
178 | 24.3k | case 7: |
179 | 24.3k | skip_bits(gb, 1); // HVXC frame length table index |
180 | 24.3k | break; |
181 | 55.1k | } |
182 | | |
183 | 55.1k | if (get_bits(gb, 1)) { // other data |
184 | 25.8k | if (audio_mux_version) { |
185 | 1.87k | latm_get_value(gb); // other_data_bits |
186 | 23.9k | } else { |
187 | 23.9k | int esc; |
188 | 60.5k | do { |
189 | 60.5k | if (get_bits_left(gb) < 9) |
190 | 12.8k | return AVERROR_INVALIDDATA; |
191 | 47.7k | esc = get_bits(gb, 1); |
192 | 47.7k | skip_bits(gb, 8); |
193 | 47.7k | } while (esc); |
194 | 23.9k | } |
195 | 25.8k | } |
196 | | |
197 | 42.2k | if (get_bits(gb, 1)) // crc present |
198 | 12.4k | skip_bits(gb, 8); // config_crc |
199 | 42.2k | } |
200 | | |
201 | 329k | return 0; |
202 | 378k | } |
203 | | |
204 | | static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb) |
205 | 113k | { |
206 | 113k | uint8_t tmp; |
207 | | |
208 | 113k | if (ctx->frame_length_type == 0) { |
209 | 13.8k | int mux_slot_length = 0; |
210 | 1.36M | do { |
211 | 1.36M | if (get_bits_left(gb) < 8) |
212 | 6.64k | return AVERROR_INVALIDDATA; |
213 | 1.35M | tmp = get_bits(gb, 8); |
214 | 1.35M | mux_slot_length += tmp; |
215 | 1.35M | } while (tmp == 255); |
216 | 7.21k | return mux_slot_length; |
217 | 99.9k | } else if (ctx->frame_length_type == 1) { |
218 | 1.82k | return ctx->frame_length; |
219 | 98.1k | } else if (ctx->frame_length_type == 3 || |
220 | 94.2k | ctx->frame_length_type == 5 || |
221 | 91.2k | ctx->frame_length_type == 7) { |
222 | 70.2k | skip_bits(gb, 2); // mux_slot_length_coded |
223 | 70.2k | } |
224 | 98.1k | return 0; |
225 | 113k | } |
226 | | |
227 | | static int read_audio_mux_element(struct LATMContext *latmctx, |
228 | | GetBitContext *gb) |
229 | 536k | { |
230 | 536k | int err; |
231 | 536k | uint8_t use_same_mux = get_bits(gb, 1); |
232 | 536k | if (!use_same_mux) { |
233 | 378k | if ((err = read_stream_mux_config(latmctx, gb)) < 0) |
234 | 48.7k | return err; |
235 | 378k | } else if (!latmctx->aac_ctx.avctx->extradata) { |
236 | 20.3k | av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG, |
237 | 20.3k | "no decoder config found\n"); |
238 | 20.3k | return 1; |
239 | 20.3k | } |
240 | 467k | if (latmctx->audio_mux_version_A == 0) { |
241 | 113k | int mux_slot_length_bytes = read_payload_length_info(latmctx, gb); |
242 | 113k | if (mux_slot_length_bytes < 0 || mux_slot_length_bytes * 8LL > get_bits_left(gb)) { |
243 | 12.8k | av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n"); |
244 | 12.8k | return AVERROR_INVALIDDATA; |
245 | 100k | } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) { |
246 | 4.21k | av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, |
247 | 4.21k | "frame length mismatch %d << %d\n", |
248 | 4.21k | mux_slot_length_bytes * 8, get_bits_left(gb)); |
249 | 4.21k | return AVERROR_INVALIDDATA; |
250 | 4.21k | } |
251 | 113k | } |
252 | 450k | return 0; |
253 | 467k | } |
254 | | |
255 | | |
256 | | static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out, |
257 | | int *got_frame_ptr, AVPacket *avpkt) |
258 | 792k | { |
259 | 792k | struct LATMContext *latmctx = avctx->priv_data; |
260 | 792k | int muxlength, err; |
261 | 792k | GetBitContext gb; |
262 | | |
263 | 792k | if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0) |
264 | 0 | return err; |
265 | | |
266 | | // check for LOAS sync word |
267 | 792k | if (get_bits(&gb, 11) != LOAS_SYNC_WORD) |
268 | 246k | return AVERROR_INVALIDDATA; |
269 | | |
270 | 546k | muxlength = get_bits(&gb, 13) + 3; |
271 | | // not enough data, the parser should have sorted this out |
272 | 546k | if (muxlength > avpkt->size) |
273 | 9.87k | return AVERROR_INVALIDDATA; |
274 | | |
275 | 536k | if ((err = read_audio_mux_element(latmctx, &gb))) |
276 | 86.1k | return (err < 0) ? err : avpkt->size; |
277 | | |
278 | 450k | if (!latmctx->initialized) { |
279 | 34.9k | if (!avctx->extradata) { |
280 | 2.64k | *got_frame_ptr = 0; |
281 | 2.64k | return avpkt->size; |
282 | 32.2k | } else { |
283 | 32.2k | push_output_configuration(&latmctx->aac_ctx); |
284 | 32.2k | if ((err = decode_audio_specific_config( |
285 | 32.2k | &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1], |
286 | 32.2k | avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) { |
287 | 10.1k | pop_output_configuration(&latmctx->aac_ctx); |
288 | 10.1k | return err; |
289 | 10.1k | } |
290 | 22.0k | latmctx->initialized = 1; |
291 | 22.0k | } |
292 | 34.9k | } |
293 | | |
294 | 437k | if (show_bits(&gb, 12) == 0xfff) { |
295 | 690 | av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, |
296 | 690 | "ADTS header detected, probably as result of configuration " |
297 | 690 | "misparsing\n"); |
298 | 690 | return AVERROR_INVALIDDATA; |
299 | 690 | } |
300 | | |
301 | 436k | switch (latmctx->aac_ctx.oc[1].m4ac.object_type) { |
302 | 17.4k | case AOT_ER_AAC_LC: |
303 | 17.4k | case AOT_ER_AAC_LTP: |
304 | 20.8k | case AOT_ER_AAC_LD: |
305 | 25.7k | case AOT_ER_AAC_ELD: |
306 | 25.7k | err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb); |
307 | 25.7k | break; |
308 | 410k | default: |
309 | 410k | err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt); |
310 | 436k | } |
311 | 436k | if (err < 0) |
312 | 332k | return err; |
313 | | |
314 | 104k | return muxlength; |
315 | 436k | } |
316 | | |
317 | | static av_cold int latm_decode_init(AVCodecContext *avctx) |
318 | 12.3k | { |
319 | 12.3k | struct LATMContext *latmctx = avctx->priv_data; |
320 | 12.3k | int ret = ff_aac_decode_init_float(avctx); |
321 | | |
322 | 12.3k | if (avctx->extradata_size > 0) |
323 | 1.84k | latmctx->initialized = !ret; |
324 | | |
325 | 12.3k | return ret; |
326 | 12.3k | } |
327 | | |
328 | | /* |
329 | | Note: This decoder filter is intended to decode LATM streams transferred |
330 | | in MPEG transport streams which only contain one program. |
331 | | To do a more complex LATM demuxing a separate LATM demuxer should be used. |
332 | | */ |
333 | | const FFCodec ff_aac_latm_decoder = { |
334 | | .p.name = "aac_latm", |
335 | | CODEC_LONG_NAME("AAC LATM (Advanced Audio Coding LATM syntax)"), |
336 | | .p.type = AVMEDIA_TYPE_AUDIO, |
337 | | .p.id = AV_CODEC_ID_AAC_LATM, |
338 | | .priv_data_size = sizeof(struct LATMContext), |
339 | | .init = latm_decode_init, |
340 | | .close = decode_close, |
341 | | FF_CODEC_DECODE_CB(latm_decode_frame), |
342 | | CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP), |
343 | | .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1, |
344 | | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, |
345 | | CODEC_CH_LAYOUTS_ARRAY(ff_aac_ch_layout), |
346 | | .flush = flush, |
347 | | .p.profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles), |
348 | | }; |
349 | | |
350 | | #endif /* AVCODEC_AAC_AACDEC_LATM_H */ |