/src/ffmpeg/libavformat/iamf_reader.c
Line | Count | Source |
1 | | /* |
2 | | * Immersive Audio Model and Formats demuxing utils |
3 | | * Copyright (c) 2024 James Almer <jamrial@gmail.com> |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include "libavutil/avassert.h" |
23 | | #include "libavutil/intreadwrite.h" |
24 | | #include "libavutil/log.h" |
25 | | #include "libavutil/mem.h" |
26 | | #include "libavcodec/mathops.h" |
27 | | #include "libavcodec/packet.h" |
28 | | #include "avformat.h" |
29 | | #include "avio_internal.h" |
30 | | #include "iamf.h" |
31 | | #include "iamf_parse.h" |
32 | | #include "iamf_reader.h" |
33 | | |
34 | | static AVStream *find_stream_by_id(AVFormatContext *s, int id, int stream_id_offset) |
35 | 58.8k | { |
36 | 86.7k | for (int i = 0; i < s->nb_streams; i++) |
37 | 86.4k | if (s->streams[i]->id == id + stream_id_offset) |
38 | 58.6k | return s->streams[i]; |
39 | | |
40 | 253 | av_log(s, AV_LOG_ERROR, "Invalid stream id %d\n", id); |
41 | 253 | return NULL; |
42 | 58.8k | } |
43 | | |
44 | | static int audio_frame_obu(AVFormatContext *s, const IAMFDemuxContext *c, |
45 | | AVIOContext *pb, AVPacket *pkt, |
46 | | int len, enum IAMF_OBU_Type type, |
47 | | unsigned skip_samples, unsigned discard_padding, |
48 | | int stream_id_offset, int id_in_bitstream) |
49 | 58.8k | { |
50 | 58.8k | AVStream *st; |
51 | 58.8k | int ret, audio_substream_id; |
52 | | |
53 | 58.8k | if (id_in_bitstream) { |
54 | 3.68k | unsigned explicit_audio_substream_id; |
55 | 3.68k | int64_t pos = avio_tell(pb); |
56 | 3.68k | explicit_audio_substream_id = ffio_read_leb(pb); |
57 | 3.68k | len -= avio_tell(pb) - pos; |
58 | 3.68k | audio_substream_id = explicit_audio_substream_id; |
59 | 3.68k | } else |
60 | 55.1k | audio_substream_id = type - IAMF_OBU_IA_AUDIO_FRAME_ID0; |
61 | | |
62 | 58.8k | st = find_stream_by_id(s, audio_substream_id, stream_id_offset); |
63 | 58.8k | if (!st) |
64 | 253 | return AVERROR_INVALIDDATA; |
65 | | |
66 | 58.6k | ret = av_get_packet(pb, pkt, len); |
67 | 58.6k | if (ret < 0) |
68 | 6 | return ret; |
69 | 58.6k | if (ret != len) |
70 | 479 | return AVERROR_INVALIDDATA; |
71 | | |
72 | 58.1k | if (skip_samples || discard_padding) { |
73 | 835 | uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10); |
74 | 835 | if (!side_data) |
75 | 0 | return AVERROR(ENOMEM); |
76 | 835 | AV_WL32A(side_data, skip_samples); |
77 | 835 | AV_WL32A(side_data + 4, discard_padding); |
78 | 835 | } |
79 | 58.1k | if (c->mix) { |
80 | 21.2k | uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, c->mix_size); |
81 | 21.2k | if (!side_data) |
82 | 0 | return AVERROR(ENOMEM); |
83 | 21.2k | memcpy(side_data, c->mix, c->mix_size); |
84 | 21.2k | } |
85 | 58.1k | if (c->demix) { |
86 | 241 | uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, c->demix_size); |
87 | 241 | if (!side_data) |
88 | 0 | return AVERROR(ENOMEM); |
89 | 241 | memcpy(side_data, c->demix, c->demix_size); |
90 | 241 | } |
91 | 58.1k | if (c->recon) { |
92 | 21.3k | uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, c->recon_size); |
93 | 21.3k | if (!side_data) |
94 | 0 | return AVERROR(ENOMEM); |
95 | 21.3k | memcpy(side_data, c->recon, c->recon_size); |
96 | 21.3k | } |
97 | | |
98 | 58.1k | if (st->discard == AVDISCARD_ALL) |
99 | 0 | pkt->flags |= AV_PKT_FLAG_DISCARD; |
100 | | |
101 | 58.1k | pkt->stream_index = st->index; |
102 | 58.1k | return 0; |
103 | 58.1k | } |
104 | | |
105 | | static int parameter_block_obu(AVFormatContext *s, IAMFDemuxContext *c, |
106 | | AVIOContext *pbc, int len) |
107 | 4.66k | { |
108 | 4.66k | const IAMFAudioElement *audio_element; |
109 | 4.66k | const IAMFParamDefinition *param_definition; |
110 | 4.66k | const AVIAMFParamDefinition *param; |
111 | 4.66k | AVIAMFParamDefinition *out_param = NULL; |
112 | 4.66k | FFIOContext b; |
113 | 4.66k | AVIOContext *pb; |
114 | 4.66k | uint8_t *buf; |
115 | 4.66k | unsigned int duration, constant_subblock_duration; |
116 | 4.66k | unsigned int total_duration = 0; |
117 | 4.66k | unsigned int nb_subblocks; |
118 | 4.66k | unsigned int parameter_id; |
119 | 4.66k | size_t out_param_size; |
120 | 4.66k | int ret; |
121 | | |
122 | 4.66k | buf = av_malloc(len); |
123 | 4.66k | if (!buf) |
124 | 0 | return AVERROR(ENOMEM); |
125 | | |
126 | 4.66k | ret = avio_read(pbc, buf, len); |
127 | 4.66k | if (ret != len) { |
128 | 54 | if (ret >= 0) |
129 | 53 | ret = AVERROR_INVALIDDATA; |
130 | 54 | goto fail; |
131 | 54 | } |
132 | | |
133 | 4.61k | ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL); |
134 | 4.61k | pb = &b.pub; |
135 | | |
136 | 4.61k | parameter_id = ffio_read_leb(pb); |
137 | 4.61k | param_definition = ff_iamf_get_param_definition(&c->iamf, parameter_id); |
138 | 4.61k | if (!param_definition) { |
139 | 1.79k | av_log(s, AV_LOG_VERBOSE, "Non existent parameter_id %d referenced in a parameter block. Ignoring\n", |
140 | 1.79k | parameter_id); |
141 | 1.79k | ret = 0; |
142 | 1.79k | goto fail; |
143 | 1.79k | } |
144 | | |
145 | 2.82k | audio_element = param_definition->audio_element; |
146 | 2.82k | param = param_definition->param; |
147 | 2.82k | if (!param_definition->mode) { |
148 | 1.72k | duration = ffio_read_leb(pb); |
149 | 1.72k | if (!duration) { |
150 | 11 | ret = AVERROR_INVALIDDATA; |
151 | 11 | goto fail; |
152 | 11 | } |
153 | 1.71k | if (audio_element) { |
154 | 1.02k | const IAMFCodecConfig *codec_config = ff_iamf_get_codec_config(&c->iamf, audio_element->codec_config_id); |
155 | 1.02k | if (duration > av_rescale(codec_config->nb_samples, codec_config->sample_rate, param->parameter_rate)) { |
156 | 73 | av_log(s, AV_LOG_ERROR, "Invalid block duration in parameter_id %u\n", parameter_id); |
157 | 73 | ret = AVERROR_INVALIDDATA; |
158 | 73 | goto fail; |
159 | 73 | } |
160 | 1.02k | } |
161 | 1.64k | constant_subblock_duration = ffio_read_leb(pb); |
162 | 1.64k | if (constant_subblock_duration == 0) |
163 | 71 | nb_subblocks = ffio_read_leb(pb); |
164 | 1.57k | else { |
165 | 1.57k | if (constant_subblock_duration > duration) { |
166 | 40 | av_log(s, AV_LOG_ERROR, "Invalid block duration in parameter_id %u\n", parameter_id); |
167 | 40 | ret = AVERROR_INVALIDDATA; |
168 | 40 | goto fail; |
169 | 40 | } |
170 | 1.53k | nb_subblocks = duration / constant_subblock_duration; |
171 | 1.53k | total_duration = duration; |
172 | 1.53k | } |
173 | 1.64k | } else { |
174 | 1.09k | duration = param->duration; |
175 | 1.09k | constant_subblock_duration = param->constant_subblock_duration; |
176 | 1.09k | nb_subblocks = param->nb_subblocks; |
177 | 1.09k | } |
178 | | |
179 | 2.69k | if (nb_subblocks > duration) { |
180 | 12 | av_log(s, AV_LOG_ERROR, "Invalid duration or subblock count in parameter_id %u\n", parameter_id); |
181 | 12 | ret = AVERROR_INVALIDDATA; |
182 | 12 | goto fail; |
183 | 12 | } |
184 | | |
185 | 2.68k | out_param = av_iamf_param_definition_alloc(param->type, nb_subblocks, &out_param_size); |
186 | 2.68k | if (!out_param) { |
187 | 8 | ret = AVERROR(ENOMEM); |
188 | 8 | goto fail; |
189 | 8 | } |
190 | | |
191 | 2.67k | out_param->parameter_id = param->parameter_id; |
192 | 2.67k | out_param->type = param->type; |
193 | 2.67k | out_param->parameter_rate = param->parameter_rate; |
194 | 2.67k | out_param->duration = duration; |
195 | 2.67k | out_param->constant_subblock_duration = constant_subblock_duration; |
196 | 2.67k | out_param->nb_subblocks = nb_subblocks; |
197 | | |
198 | 345k | for (int i = 0; i < nb_subblocks; i++) { |
199 | 344k | void *subblock = av_iamf_param_definition_get_subblock(out_param, i); |
200 | 344k | unsigned int subblock_duration = constant_subblock_duration; |
201 | | |
202 | 344k | if (!param_definition->mode && !constant_subblock_duration) { |
203 | 1.07k | subblock_duration = ffio_read_leb(pb); |
204 | 1.07k | if (duration - total_duration > subblock_duration) { |
205 | 12 | av_log(s, AV_LOG_ERROR, "Invalid subblock durations in parameter_id %u\n", parameter_id); |
206 | 12 | ret = AVERROR_INVALIDDATA; |
207 | 12 | goto fail; |
208 | 12 | } |
209 | 1.06k | total_duration += subblock_duration; |
210 | 342k | } else if (i == nb_subblocks - 1) |
211 | 2.55k | subblock_duration = duration - i * constant_subblock_duration; |
212 | | |
213 | 344k | switch (param->type) { |
214 | 323k | case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: { |
215 | 323k | AVIAMFMixGain *mix = subblock; |
216 | | |
217 | 323k | mix->animation_type = ffio_read_leb(pb); |
218 | 323k | if (mix->animation_type > AV_IAMF_ANIMATION_TYPE_BEZIER) { |
219 | 1.59k | ret = 0; |
220 | 1.59k | av_free(out_param); |
221 | 1.59k | goto fail; |
222 | 1.59k | } |
223 | | |
224 | 322k | mix->start_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8); |
225 | 322k | if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR) |
226 | 151 | mix->end_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8); |
227 | 322k | if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) { |
228 | 138 | mix->control_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8); |
229 | 138 | mix->control_point_relative_time = av_make_q(avio_r8(pb), 1 << 8); |
230 | 138 | } |
231 | 322k | mix->subblock_duration = subblock_duration; |
232 | 322k | break; |
233 | 323k | } |
234 | 423 | case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: { |
235 | 423 | AVIAMFDemixingInfo *demix = subblock; |
236 | | |
237 | 423 | demix->dmixp_mode = avio_r8(pb) >> 5; |
238 | 423 | demix->subblock_duration = subblock_duration; |
239 | 423 | break; |
240 | 323k | } |
241 | 19.9k | case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: { |
242 | 19.9k | AVIAMFReconGain *recon = subblock; |
243 | 19.9k | const AVIAMFAudioElement *element; |
244 | | |
245 | 19.9k | av_assert0(audio_element && audio_element->celement); |
246 | 19.9k | element = audio_element->celement; |
247 | 41.0k | for (int i = 0; i < element->nb_layers; i++) { |
248 | 21.0k | const AVIAMFLayer *layer = element->layers[i]; |
249 | 21.0k | if (layer->flags & AV_IAMF_LAYER_FLAG_RECON_GAIN) { |
250 | 19.9k | unsigned int recon_gain_flags = ffio_read_leb(pb); |
251 | 19.9k | unsigned int bitcount = 7 + 5 * !!(recon_gain_flags & 0x80); |
252 | 19.9k | recon_gain_flags = (recon_gain_flags & 0x7F) | ((recon_gain_flags & 0xFF00) >> 1); |
253 | 159k | for (int j = 0; j < bitcount; j++) { |
254 | 139k | if (recon_gain_flags & (1 << j)) |
255 | 5.85k | recon->recon_gain[i][j] = avio_r8(pb); |
256 | 139k | } |
257 | 19.9k | } |
258 | 21.0k | } |
259 | 19.9k | recon->subblock_duration = subblock_duration; |
260 | 19.9k | break; |
261 | 19.9k | } |
262 | 0 | default: |
263 | 0 | av_unreachable("param_definition_type should have been checked in descriptor"); |
264 | 344k | } |
265 | 344k | } |
266 | | |
267 | 1.06k | len -= avio_tell(pb); |
268 | 1.06k | if (len) { |
269 | 138 | int level = (s->error_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING; |
270 | 138 | av_log(s, level, "Underread in parameter_block_obu. %d bytes left at the end\n", len); |
271 | 138 | } |
272 | | |
273 | 1.06k | if (!param_definition->mode && !constant_subblock_duration && total_duration != duration) { |
274 | 11 | av_log(s, AV_LOG_ERROR, "Invalid duration in parameter block\n"); |
275 | 11 | ret = AVERROR_INVALIDDATA; |
276 | 11 | goto fail; |
277 | 11 | } |
278 | | |
279 | 1.05k | switch (param->type) { |
280 | 361 | case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: |
281 | 361 | av_free(c->mix); |
282 | 361 | c->mix = out_param; |
283 | 361 | c->mix_size = out_param_size; |
284 | 361 | break; |
285 | 52 | case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: |
286 | 52 | av_free(c->demix); |
287 | 52 | c->demix = out_param; |
288 | 52 | c->demix_size = out_param_size; |
289 | 52 | break; |
290 | 642 | case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: |
291 | 642 | av_free(c->recon); |
292 | 642 | c->recon = out_param; |
293 | 642 | c->recon_size = out_param_size; |
294 | 642 | break; |
295 | 0 | default: |
296 | 0 | av_unreachable("param_definition_type should have been checked in descriptor"); |
297 | 1.05k | } |
298 | | |
299 | 1.05k | ret = 0; |
300 | 4.66k | fail: |
301 | 4.66k | if (ret < 0) |
302 | 221 | av_free(out_param); |
303 | 4.66k | av_free(buf); |
304 | | |
305 | 4.66k | return ret; |
306 | 1.05k | } |
307 | | |
308 | | int ff_iamf_read_packet(AVFormatContext *s, IAMFDemuxContext *c, |
309 | | AVIOContext *pb, int max_size, int stream_id_offset, AVPacket *pkt) |
310 | 61.2k | { |
311 | 61.2k | int read = 0; |
312 | | |
313 | 459k | while (1) { |
314 | 459k | uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE] = {0}; |
315 | 459k | enum IAMF_OBU_Type type; |
316 | 459k | unsigned obu_size; |
317 | 459k | unsigned skip_samples, discard_padding; |
318 | 459k | int ret, len, size, start_pos; |
319 | | |
320 | 459k | ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size)); |
321 | 459k | if (ret < 0) |
322 | 0 | return ret; |
323 | 459k | size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size)); |
324 | 459k | if (size < 0) |
325 | 643 | return size; |
326 | 459k | if (size != FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size)) |
327 | 492 | return AVERROR_INVALIDDATA; |
328 | | |
329 | 458k | len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type, |
330 | 458k | &skip_samples, &discard_padding); |
331 | 458k | if (len < 0 || obu_size > max_size || len > INT_MAX - read) { |
332 | 540 | av_log(s, AV_LOG_ERROR, "Failed to read obu\n"); |
333 | 540 | return len < 0 ? len : AVERROR_INVALIDDATA; |
334 | 540 | } |
335 | 458k | avio_seek(pb, -(size - start_pos), SEEK_CUR); |
336 | | |
337 | 458k | read += len; |
338 | 458k | if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17) { |
339 | 58.8k | ret = audio_frame_obu(s, c, pb, pkt, obu_size, type, |
340 | 58.8k | skip_samples, discard_padding, stream_id_offset, |
341 | 58.8k | type == IAMF_OBU_IA_AUDIO_FRAME); |
342 | 58.8k | if (ret < 0) |
343 | 738 | return ret; |
344 | 58.1k | return read; |
345 | 399k | } else if (type == IAMF_OBU_IA_PARAMETER_BLOCK) { |
346 | 4.66k | ret = parameter_block_obu(s, c, pb, obu_size); |
347 | 4.66k | if (ret < 0) |
348 | 221 | return ret; |
349 | 394k | } else if (type == IAMF_OBU_IA_TEMPORAL_DELIMITER) { |
350 | 1.86k | av_freep(&c->mix); |
351 | 1.86k | c->mix_size = 0; |
352 | 1.86k | av_freep(&c->demix); |
353 | 1.86k | c->demix_size = 0; |
354 | 1.86k | av_freep(&c->recon); |
355 | 1.86k | c->recon_size = 0; |
356 | 392k | } else { |
357 | 392k | int64_t offset = avio_skip(pb, obu_size); |
358 | 392k | if (offset < 0) |
359 | 107 | return offset; |
360 | 392k | } |
361 | 399k | max_size -= len; |
362 | 399k | if (max_size < 0) |
363 | 3 | return AVERROR_INVALIDDATA; |
364 | 399k | if (!max_size) |
365 | 344 | break; |
366 | 399k | } |
367 | | |
368 | 344 | return read; |
369 | 61.2k | } |
370 | | |
371 | | void ff_iamf_read_deinit(IAMFDemuxContext *c) |
372 | 5.16k | { |
373 | 5.16k | IAMFContext *const iamf = &c->iamf; |
374 | | |
375 | 5.16k | ff_iamf_uninit_context(iamf); |
376 | | |
377 | 5.16k | av_freep(&c->mix); |
378 | 5.16k | c->mix_size = 0; |
379 | 5.16k | av_freep(&c->demix); |
380 | 5.16k | c->demix_size = 0; |
381 | 5.16k | av_freep(&c->recon); |
382 | 5.16k | c->recon_size = 0; |
383 | 5.16k | } |