/src/ffmpeg/libavformat/xwma.c
Line | Count | Source |
1 | | /* |
2 | | * xWMA demuxer |
3 | | * Copyright (c) 2011 Max Horn |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include <inttypes.h> |
23 | | #include <stdint.h> |
24 | | |
25 | | #include "libavutil/mem.h" |
26 | | #include "avformat.h" |
27 | | #include "demux.h" |
28 | | #include "internal.h" |
29 | | #include "riff.h" |
30 | | |
31 | | /* |
32 | | * Demuxer for xWMA, a Microsoft audio container used by XAudio 2. |
33 | | */ |
34 | | |
35 | | typedef struct XWMAContext { |
36 | | int64_t data_end; |
37 | | } XWMAContext; |
38 | | |
39 | | static int xwma_probe(const AVProbeData *p) |
40 | 964k | { |
41 | 964k | if (!memcmp(p->buf, "RIFF", 4) && !memcmp(p->buf + 8, "XWMA", 4)) |
42 | 660 | return AVPROBE_SCORE_MAX; |
43 | 964k | return 0; |
44 | 964k | } |
45 | | |
46 | | static int xwma_read_header(AVFormatContext *s) |
47 | 4.08k | { |
48 | 4.08k | int64_t size; |
49 | 4.08k | int ret = 0; |
50 | 4.08k | uint32_t dpds_table_size = 0; |
51 | 4.08k | uint32_t *dpds_table = NULL; |
52 | 4.08k | unsigned int tag; |
53 | 4.08k | AVIOContext *pb = s->pb; |
54 | 4.08k | AVStream *st; |
55 | 4.08k | XWMAContext *xwma = s->priv_data; |
56 | 4.08k | int i; |
57 | | |
58 | | /* The following code is mostly copied from wav.c, with some |
59 | | * minor alterations. |
60 | | */ |
61 | | |
62 | | /* check RIFF header */ |
63 | 4.08k | tag = avio_rl32(pb); |
64 | 4.08k | if (tag != MKTAG('R', 'I', 'F', 'F')) |
65 | 180 | return AVERROR_INVALIDDATA; |
66 | 3.90k | avio_rl32(pb); /* file size */ |
67 | 3.90k | tag = avio_rl32(pb); |
68 | 3.90k | if (tag != MKTAG('X', 'W', 'M', 'A')) |
69 | 69 | return AVERROR_INVALIDDATA; |
70 | | |
71 | | /* parse fmt header */ |
72 | 3.83k | tag = avio_rl32(pb); |
73 | 3.83k | if (tag != MKTAG('f', 'm', 't', ' ')) |
74 | 75 | return AVERROR_INVALIDDATA; |
75 | 3.75k | size = avio_rl32(pb); |
76 | 3.75k | st = avformat_new_stream(s, NULL); |
77 | 3.75k | if (!st) |
78 | 0 | return AVERROR(ENOMEM); |
79 | | |
80 | 3.75k | ret = ff_get_wav_header(s, pb, st->codecpar, size, 0); |
81 | 3.75k | if (ret < 0) |
82 | 554 | return ret; |
83 | 3.20k | ffstream(st)->need_parsing = AVSTREAM_PARSE_NONE; |
84 | | |
85 | | /* XWMA encoder only allows a few channel/sample rate/bitrate combinations, |
86 | | * but some create identical files with fake bitrate (1ch 22050hz at |
87 | | * 20/48/192kbps are all 20kbps, with the exact same codec data). |
88 | | * Decoder needs correct bitrate to work, so it's normalized here. */ |
89 | 3.20k | if (st->codecpar->codec_id == AV_CODEC_ID_WMAV2) { |
90 | 910 | int ch = st->codecpar->ch_layout.nb_channels; |
91 | 910 | int sr = st->codecpar->sample_rate; |
92 | 910 | int br = st->codecpar->bit_rate; |
93 | | |
94 | 910 | if (ch == 1) { |
95 | 264 | if (sr == 22050 && (br==48000 || br==192000)) |
96 | 4 | br = 20000; |
97 | 260 | else if (sr == 32000 && (br==48000 || br==192000)) |
98 | 4 | br = 20000; |
99 | 256 | else if (sr == 44100 && (br==96000 || br==192000)) |
100 | 4 | br = 48000; |
101 | 264 | } |
102 | 646 | else if (ch == 2) { |
103 | 192 | if (sr == 22050 && (br==48000 || br==192000)) |
104 | 4 | br = 32000; |
105 | 188 | else if (sr == 32000 && (br==192000)) |
106 | 2 | br = 48000; |
107 | 192 | } |
108 | | |
109 | 910 | st->codecpar->bit_rate = br; |
110 | 910 | } |
111 | | |
112 | | /* Normally xWMA can only contain WMAv2 with 1/2 channels, |
113 | | * and WMAPRO with 6 channels. */ |
114 | 3.20k | if (st->codecpar->codec_id != AV_CODEC_ID_WMAV2 && |
115 | 2.29k | st->codecpar->codec_id != AV_CODEC_ID_WMAPRO) { |
116 | 2.28k | avpriv_request_sample(s, "Unexpected codec (tag %s; id %d)", |
117 | 2.28k | av_fourcc2str(st->codecpar->codec_tag), |
118 | 2.28k | st->codecpar->codec_id); |
119 | 2.28k | } else { |
120 | | /* xWMA shouldn't have extradata. But the WMA codecs require it, |
121 | | * so we provide our own fake extradata. |
122 | | * |
123 | | * First, check that there really was no extradata in the header. If |
124 | | * there was, then try to use it, after asking the user to provide a |
125 | | * sample of this unusual file. |
126 | | */ |
127 | 918 | if (st->codecpar->extradata_size != 0) { |
128 | | /* Surprise, surprise: We *did* get some extradata. No idea |
129 | | * if it will work, but just go on and try it, after asking |
130 | | * the user for a sample. |
131 | | */ |
132 | 17 | avpriv_request_sample(s, "Unexpected extradata (%d bytes)", |
133 | 17 | st->codecpar->extradata_size); |
134 | 901 | } else if (st->codecpar->codec_id == AV_CODEC_ID_WMAPRO) { |
135 | 8 | if ((ret = ff_alloc_extradata(st->codecpar, 18)) < 0) |
136 | 0 | return ret; |
137 | | |
138 | 8 | memset(st->codecpar->extradata, 0, st->codecpar->extradata_size); |
139 | 8 | st->codecpar->extradata[ 0] = st->codecpar->bits_per_coded_sample; |
140 | 8 | st->codecpar->extradata[14] = 224; |
141 | 893 | } else { |
142 | 893 | if ((ret = ff_alloc_extradata(st->codecpar, 6)) < 0) |
143 | 0 | return ret; |
144 | | |
145 | 893 | memset(st->codecpar->extradata, 0, st->codecpar->extradata_size); |
146 | | /* setup extradata with our experimentally obtained value */ |
147 | 893 | st->codecpar->extradata[4] = 31; |
148 | 893 | } |
149 | 918 | } |
150 | | |
151 | 3.20k | if (!av_channel_layout_check(&st->codecpar->ch_layout)) { |
152 | 24 | av_log(s, AV_LOG_WARNING, "Invalid channel count: %d\n", |
153 | 24 | st->codecpar->ch_layout.nb_channels); |
154 | 24 | return AVERROR_INVALIDDATA; |
155 | 24 | } |
156 | 3.17k | if (!st->codecpar->bits_per_coded_sample || st->codecpar->bits_per_coded_sample > 64) { |
157 | 523 | av_log(s, AV_LOG_WARNING, "Invalid bits_per_coded_sample: %d\n", |
158 | 523 | st->codecpar->bits_per_coded_sample); |
159 | 523 | return AVERROR_INVALIDDATA; |
160 | 523 | } |
161 | | |
162 | | /* set the sample rate */ |
163 | 2.65k | avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); |
164 | | |
165 | | /* parse the remaining RIFF chunks */ |
166 | 212k | for (;;) { |
167 | 212k | if (pb->eof_reached) { |
168 | 464 | ret = AVERROR_EOF; |
169 | 464 | goto fail; |
170 | 464 | } |
171 | | /* read next chunk tag */ |
172 | 211k | tag = avio_rl32(pb); |
173 | 211k | size = avio_rl32(pb); |
174 | 211k | if (tag == MKTAG('d', 'a', 't', 'a')) { |
175 | | /* We assume that the data chunk comes last. */ |
176 | 2.08k | break; |
177 | 209k | } else if (tag == MKTAG('d','p','d','s')) { |
178 | | /* Quoting the MSDN xWMA docs on the dpds chunk: "Contains the |
179 | | * decoded packet cumulative data size array, each element is the |
180 | | * number of bytes accumulated after the corresponding xWMA packet |
181 | | * is decoded in order." |
182 | | * |
183 | | * Each packet has size equal to st->codecpar->block_align, which in |
184 | | * all cases I saw so far was always 2230. Thus, we can use the |
185 | | * dpds data to compute a seeking index. |
186 | | */ |
187 | | |
188 | | /* Error out if there is more than one dpds chunk. */ |
189 | 295 | if (dpds_table) { |
190 | 2 | av_log(s, AV_LOG_ERROR, "two dpds chunks present\n"); |
191 | 2 | ret = AVERROR_INVALIDDATA; |
192 | 2 | goto fail; |
193 | 2 | } |
194 | | |
195 | | /* Compute the number of entries in the dpds chunk. */ |
196 | 293 | if (size & 3) { /* Size should be divisible by four */ |
197 | 93 | av_log(s, AV_LOG_WARNING, |
198 | 93 | "dpds chunk size %"PRId64" not divisible by 4\n", size); |
199 | 93 | } |
200 | 293 | dpds_table_size = size / 4; |
201 | 293 | if (dpds_table_size == 0 || dpds_table_size >= INT_MAX / 4) { |
202 | 34 | av_log(s, AV_LOG_ERROR, |
203 | 34 | "dpds chunk size %"PRId64" invalid\n", size); |
204 | 34 | return AVERROR_INVALIDDATA; |
205 | 34 | } |
206 | | |
207 | | /* Allocate some temporary storage to keep the dpds data around. |
208 | | * for processing later on. |
209 | | */ |
210 | 259 | dpds_table = av_malloc_array(dpds_table_size, sizeof(uint32_t)); |
211 | 259 | if (!dpds_table) { |
212 | 0 | return AVERROR(ENOMEM); |
213 | 0 | } |
214 | | |
215 | 58.2k | for (i = 0; i < dpds_table_size; ++i) { |
216 | 58.0k | if (avio_feof(pb)) { |
217 | 75 | ret = AVERROR_INVALIDDATA; |
218 | 75 | goto fail; |
219 | 75 | } |
220 | 57.9k | dpds_table[i] = avio_rl32(pb); |
221 | 57.9k | size -= 4; |
222 | 57.9k | } |
223 | 259 | } |
224 | 209k | avio_skip(pb, size); |
225 | 209k | } |
226 | | |
227 | | /* Determine overall data length */ |
228 | 2.08k | if (size < 0) { |
229 | 0 | ret = AVERROR_INVALIDDATA; |
230 | 0 | goto fail; |
231 | 0 | } |
232 | 2.08k | if (!size) { |
233 | 415 | xwma->data_end = INT64_MAX; |
234 | 415 | } else |
235 | 1.66k | xwma->data_end = avio_tell(pb) + size; |
236 | | |
237 | | |
238 | 2.08k | if (dpds_table && dpds_table_size) { |
239 | 174 | int64_t cur_pos; |
240 | 174 | const uint32_t bytes_per_sample |
241 | 174 | = (st->codecpar->ch_layout.nb_channels * st->codecpar->bits_per_coded_sample) >> 3; |
242 | | |
243 | | /* Estimate the duration from the total number of output bytes. */ |
244 | 174 | const uint64_t total_decoded_bytes = dpds_table[dpds_table_size - 1]; |
245 | | |
246 | 174 | if (!bytes_per_sample) { |
247 | 2 | av_log(s, AV_LOG_ERROR, |
248 | 2 | "Invalid bits_per_coded_sample %d for %d channels\n", |
249 | 2 | st->codecpar->bits_per_coded_sample, st->codecpar->ch_layout.nb_channels); |
250 | 2 | ret = AVERROR_INVALIDDATA; |
251 | 2 | goto fail; |
252 | 2 | } |
253 | | |
254 | 172 | st->duration = total_decoded_bytes / bytes_per_sample; |
255 | | |
256 | | /* Use the dpds data to build a seek table. We can only do this after |
257 | | * we know the offset to the data chunk, as we need that to determine |
258 | | * the actual offset to each input block. |
259 | | * Note: If we allowed ourselves to assume that the data chunk always |
260 | | * follows immediately after the dpds block, we could of course guess |
261 | | * the data block's start offset already while reading the dpds chunk. |
262 | | * I decided against that, just in case other chunks ever are |
263 | | * discovered. |
264 | | */ |
265 | 172 | cur_pos = avio_tell(pb); |
266 | 53.0k | for (i = 0; i < dpds_table_size; ++i) { |
267 | | /* From the number of output bytes that would accumulate in the |
268 | | * output buffer after decoding the first (i+1) packets, we compute |
269 | | * an offset / timestamp pair. |
270 | | */ |
271 | 52.8k | av_add_index_entry(st, |
272 | 52.8k | cur_pos + (i+1) * st->codecpar->block_align, /* pos */ |
273 | 52.8k | dpds_table[i] / bytes_per_sample, /* timestamp */ |
274 | 52.8k | st->codecpar->block_align, /* size */ |
275 | 52.8k | 0, /* duration */ |
276 | 52.8k | AVINDEX_KEYFRAME); |
277 | 52.8k | } |
278 | 1.90k | } else if (st->codecpar->bit_rate) { |
279 | | /* No dpds chunk was present (or only an empty one), so estimate |
280 | | * the total duration using the average bits per sample and the |
281 | | * total data length. |
282 | | */ |
283 | 1.80k | st->duration = av_rescale((size<<3), st->codecpar->sample_rate, st->codecpar->bit_rate); |
284 | 1.80k | } |
285 | | |
286 | 2.62k | fail: |
287 | 2.62k | av_free(dpds_table); |
288 | | |
289 | 2.62k | return ret; |
290 | 2.08k | } |
291 | | |
292 | | static int xwma_read_packet(AVFormatContext *s, AVPacket *pkt) |
293 | 881k | { |
294 | 881k | int ret, size; |
295 | 881k | int64_t left; |
296 | 881k | AVStream *st; |
297 | 881k | XWMAContext *xwma = s->priv_data; |
298 | | |
299 | 881k | st = s->streams[0]; |
300 | | |
301 | 881k | left = xwma->data_end - avio_tell(s->pb); |
302 | 881k | if (left <= 0) { |
303 | 48 | return AVERROR_EOF; |
304 | 48 | } |
305 | | |
306 | | /* read a single block; the default block size is 2230. */ |
307 | 881k | size = (st->codecpar->block_align > 1) ? st->codecpar->block_align : 2230; |
308 | 881k | size = FFMIN(size, left); |
309 | | |
310 | 881k | ret = av_get_packet(s->pb, pkt, size); |
311 | 881k | if (ret < 0) |
312 | 3.75k | return ret; |
313 | | |
314 | 877k | pkt->stream_index = 0; |
315 | 877k | return ret; |
316 | 881k | } |
317 | | |
318 | | const FFInputFormat ff_xwma_demuxer = { |
319 | | .p.name = "xwma", |
320 | | .p.long_name = NULL_IF_CONFIG_SMALL("Microsoft xWMA"), |
321 | | .priv_data_size = sizeof(XWMAContext), |
322 | | .read_probe = xwma_probe, |
323 | | .read_header = xwma_read_header, |
324 | | .read_packet = xwma_read_packet, |
325 | | }; |