/src/ffmpeg/libavformat/subtitles.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2012-2013 Clément Bœsch <u pkh me> |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "avformat.h" |
22 | | #include "subtitles.h" |
23 | | #include "avio_internal.h" |
24 | | #include "libavutil/avassert.h" |
25 | | #include "libavutil/avstring.h" |
26 | | #include "libavutil/mem.h" |
27 | | |
28 | | void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb) |
29 | 5.58M | { |
30 | 5.58M | int i; |
31 | 5.58M | r->pb = pb; |
32 | 5.58M | r->buf_pos = r->buf_len = 0; |
33 | 5.58M | r->type = FF_UTF_8; |
34 | 16.7M | for (i = 0; i < 2; i++) |
35 | 11.1M | r->buf[r->buf_len++] = avio_r8(r->pb); |
36 | 5.58M | if (strncmp("\xFF\xFE", r->buf, 2) == 0) { |
37 | 57.5k | r->type = FF_UTF16LE; |
38 | 57.5k | r->buf_pos += 2; |
39 | 5.53M | } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) { |
40 | 94.9k | r->type = FF_UTF16BE; |
41 | 94.9k | r->buf_pos += 2; |
42 | 5.43M | } else { |
43 | 5.43M | r->buf[r->buf_len++] = avio_r8(r->pb); |
44 | 5.43M | if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) { |
45 | | // UTF8 |
46 | 4.91k | r->buf_pos += 3; |
47 | 4.91k | } |
48 | 5.43M | } |
49 | 5.58M | if (s && (r->type == FF_UTF16LE || r->type == FF_UTF16BE)) |
50 | 1.96k | av_log(s, AV_LOG_INFO, |
51 | 1.96k | "UTF16 is automatically converted to UTF8, do not specify a character encoding\n"); |
52 | 5.58M | } |
53 | | |
54 | | void ff_text_init_buf(FFTextReader *r, const void *buf, size_t size) |
55 | 5.54M | { |
56 | 5.54M | ffio_init_read_context(&r->buf_pb, buf, size); |
57 | 5.54M | ff_text_init_avio(NULL, r, &r->buf_pb.pub); |
58 | 5.54M | } |
59 | | |
60 | | int64_t ff_text_pos(FFTextReader *r) |
61 | 18.9M | { |
62 | 18.9M | return avio_tell(r->pb) - r->buf_len + r->buf_pos; |
63 | 18.9M | } |
64 | | |
65 | | int ff_text_r8(FFTextReader *r) |
66 | 339M | { |
67 | 339M | uint32_t val; |
68 | 339M | uint8_t tmp; |
69 | 339M | if (r->buf_pos < r->buf_len) |
70 | 68.1M | return r->buf[r->buf_pos++]; |
71 | 270M | if (r->type == FF_UTF16LE) { |
72 | 8.90M | GET_UTF16(val, avio_rl16(r->pb), return 0;) |
73 | 262M | } else if (r->type == FF_UTF16BE) { |
74 | 11.8M | GET_UTF16(val, avio_rb16(r->pb), return 0;) |
75 | 250M | } else { |
76 | 250M | return avio_r8(r->pb); |
77 | 250M | } |
78 | 20.6M | if (!val) |
79 | 211k | return 0; |
80 | 20.4M | r->buf_pos = 0; |
81 | 20.4M | r->buf_len = 0; |
82 | 20.4M | PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;) |
83 | 20.4M | return r->buf[r->buf_pos++]; // buf_len is at least 1 |
84 | 20.6M | } |
85 | | |
86 | | void ff_text_read(FFTextReader *r, char *buf, size_t size) |
87 | 4.62M | { |
88 | 71.1M | for ( ; size > 0; size--) |
89 | 66.5M | *buf++ = ff_text_r8(r); |
90 | 4.62M | } |
91 | | |
92 | | int ff_text_eof(FFTextReader *r) |
93 | 32.4M | { |
94 | 32.4M | return r->buf_pos >= r->buf_len && avio_feof(r->pb); |
95 | 32.4M | } |
96 | | |
97 | | int ff_text_peek_r8(FFTextReader *r) |
98 | 29.2M | { |
99 | 29.2M | int c; |
100 | 29.2M | if (r->buf_pos < r->buf_len) |
101 | 16.7M | return r->buf[r->buf_pos]; |
102 | 12.4M | c = ff_text_r8(r); |
103 | 12.4M | if (!avio_feof(r->pb)) { |
104 | 12.4M | r->buf_pos = 0; |
105 | 12.4M | r->buf_len = 1; |
106 | 12.4M | r->buf[0] = c; |
107 | 12.4M | } |
108 | 12.4M | return c; |
109 | 29.2M | } |
110 | | |
111 | | AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q, |
112 | | const uint8_t *event, size_t len, int merge) |
113 | 19.4M | { |
114 | 19.4M | AVPacket **subs, *sub; |
115 | | |
116 | 19.4M | av_assert1(event || len == 0); |
117 | | |
118 | 19.4M | if (merge && q->nb_subs > 0) { |
119 | | /* merge with previous event */ |
120 | | |
121 | 2.56M | int old_len; |
122 | 2.56M | sub = q->subs[q->nb_subs - 1]; |
123 | 2.56M | old_len = sub->size; |
124 | 2.56M | if (event) { |
125 | 2.56M | if (av_grow_packet(sub, len) < 0) |
126 | 0 | return NULL; |
127 | 2.56M | memcpy(sub->data + old_len, event, len); |
128 | 2.56M | } |
129 | 16.8M | } else { |
130 | | /* new event */ |
131 | | |
132 | 16.8M | if (q->nb_subs >= INT_MAX/sizeof(*q->subs) - 1) |
133 | 0 | return NULL; |
134 | 16.8M | subs = av_fast_realloc(q->subs, &q->allocated_size, |
135 | 16.8M | (q->nb_subs + 1) * sizeof(*q->subs)); |
136 | 16.8M | if (!subs) |
137 | 0 | return NULL; |
138 | 16.8M | q->subs = subs; |
139 | 16.8M | sub = av_packet_alloc(); |
140 | 16.8M | if (!sub) |
141 | 0 | return NULL; |
142 | 16.8M | if (event) { |
143 | 16.4M | if (av_new_packet(sub, len) < 0) { |
144 | 0 | av_packet_free(&sub); |
145 | 0 | return NULL; |
146 | 0 | } |
147 | 16.4M | memcpy(sub->data, event, len); |
148 | 16.4M | } |
149 | 16.8M | sub->flags |= AV_PKT_FLAG_KEY; |
150 | 16.8M | sub->pts = sub->dts = 0; |
151 | 16.8M | subs[q->nb_subs++] = sub; |
152 | 16.8M | } |
153 | 19.4M | return sub; |
154 | 19.4M | } |
155 | | |
156 | | AVPacket *ff_subtitles_queue_insert_bprint(FFDemuxSubtitlesQueue *q, |
157 | | const AVBPrint *event, int merge) |
158 | 4.91M | { |
159 | 4.91M | if (!av_bprint_is_complete(event)) |
160 | 0 | return NULL; |
161 | 4.91M | return ff_subtitles_queue_insert(q, event->str, event->len, merge); |
162 | 4.91M | } |
163 | | |
164 | | static int cmp_pkt_sub_ts_pos(const void *a, const void *b) |
165 | 176M | { |
166 | 176M | const AVPacket *s1 = *(const AVPacket **)a; |
167 | 176M | const AVPacket *s2 = *(const AVPacket **)b; |
168 | 176M | if (s1->pts == s2->pts) |
169 | 121M | return FFDIFFSIGN(s1->pos, s2->pos); |
170 | 54.5M | return FFDIFFSIGN(s1->pts , s2->pts); |
171 | 176M | } |
172 | | |
173 | | static int cmp_pkt_sub_pos_ts(const void *a, const void *b) |
174 | 0 | { |
175 | 0 | const AVPacket *s1 = *(const AVPacket **)a; |
176 | 0 | const AVPacket *s2 = *(const AVPacket **)b; |
177 | 0 | if (s1->pos == s2->pos) { |
178 | 0 | if (s1->pts == s2->pts) |
179 | 0 | return 0; |
180 | 0 | return s1->pts > s2->pts ? 1 : -1; |
181 | 0 | } |
182 | 0 | return s1->pos > s2->pos ? 1 : -1; |
183 | 0 | } |
184 | | |
185 | | static void drop_dups(void *log_ctx, FFDemuxSubtitlesQueue *q) |
186 | 22.6k | { |
187 | 22.6k | int i, drop = 0; |
188 | | |
189 | 16.5M | for (i = 1; i < q->nb_subs; i++) { |
190 | 16.4M | const int last_id = i - 1 - drop; |
191 | 16.4M | const AVPacket *last = q->subs[last_id]; |
192 | | |
193 | 16.4M | if (q->subs[i]->pts == last->pts && |
194 | 16.4M | q->subs[i]->duration == last->duration && |
195 | 16.4M | q->subs[i]->stream_index == last->stream_index && |
196 | 16.4M | !strcmp(q->subs[i]->data, last->data)) { |
197 | | |
198 | 7.11M | av_packet_free(&q->subs[i]); |
199 | 7.11M | drop++; |
200 | 9.38M | } else if (drop) { |
201 | 8.42M | q->subs[last_id + 1] = q->subs[i]; |
202 | 8.42M | q->subs[i] = NULL; |
203 | 8.42M | } |
204 | 16.4M | } |
205 | | |
206 | 22.6k | if (drop) { |
207 | 2.94k | q->nb_subs -= drop; |
208 | 2.94k | av_log(log_ctx, AV_LOG_WARNING, "Dropping %d duplicated subtitle events\n", drop); |
209 | 2.94k | } |
210 | 22.6k | } |
211 | | |
212 | | void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q) |
213 | 123k | { |
214 | 123k | int i; |
215 | | |
216 | 123k | if (!q->nb_subs) |
217 | 100k | return; |
218 | | |
219 | 23.3k | qsort(q->subs, q->nb_subs, sizeof(*q->subs), |
220 | 23.3k | q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos |
221 | 23.3k | : cmp_pkt_sub_pos_ts); |
222 | 16.8M | for (i = 0; i < q->nb_subs; i++) |
223 | 16.7M | if (q->subs[i]->duration < 0 && i < q->nb_subs - 1 && q->subs[i + 1]->pts - (uint64_t)q->subs[i]->pts <= INT64_MAX) |
224 | 8.22M | q->subs[i]->duration = q->subs[i + 1]->pts - q->subs[i]->pts; |
225 | | |
226 | 23.3k | if (!q->keep_duplicates) |
227 | 22.6k | drop_dups(log_ctx, q); |
228 | 23.3k | } |
229 | | |
230 | | int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt) |
231 | 2.82M | { |
232 | 2.82M | AVPacket *sub; |
233 | 2.82M | int ret; |
234 | | |
235 | 2.82M | if (q->current_sub_idx == q->nb_subs) |
236 | 121k | return AVERROR_EOF; |
237 | 2.69M | sub = q->subs[q->current_sub_idx]; |
238 | 2.69M | if ((ret = av_packet_ref(pkt, sub)) < 0) { |
239 | 0 | return ret; |
240 | 0 | } |
241 | | |
242 | 2.69M | pkt->dts = pkt->pts; |
243 | 2.69M | q->current_sub_idx++; |
244 | 2.69M | return 0; |
245 | 2.69M | } |
246 | | |
247 | | static int search_sub_ts(const FFDemuxSubtitlesQueue *q, int64_t ts) |
248 | 0 | { |
249 | 0 | int s1 = 0, s2 = q->nb_subs - 1; |
250 | |
|
251 | 0 | if (s2 < s1) |
252 | 0 | return AVERROR(ERANGE); |
253 | | |
254 | 0 | for (;;) { |
255 | 0 | int mid; |
256 | |
|
257 | 0 | if (s1 == s2) |
258 | 0 | return s1; |
259 | 0 | if (s1 == s2 - 1) |
260 | 0 | return q->subs[s1]->pts <= q->subs[s2]->pts ? s1 : s2; |
261 | 0 | mid = (s1 + s2) / 2; |
262 | 0 | if (q->subs[mid]->pts <= ts) |
263 | 0 | s1 = mid; |
264 | 0 | else |
265 | 0 | s2 = mid; |
266 | 0 | } |
267 | 0 | } |
268 | | |
269 | | int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index, |
270 | | int64_t min_ts, int64_t ts, int64_t max_ts, int flags) |
271 | 0 | { |
272 | 0 | if (flags & AVSEEK_FLAG_BYTE) { |
273 | 0 | return AVERROR(ENOSYS); |
274 | 0 | } else if (flags & AVSEEK_FLAG_FRAME) { |
275 | 0 | if (ts < 0 || ts >= q->nb_subs) |
276 | 0 | return AVERROR(ERANGE); |
277 | 0 | q->current_sub_idx = ts; |
278 | 0 | } else { |
279 | 0 | int i, idx = search_sub_ts(q, ts); |
280 | 0 | int64_t ts_selected; |
281 | |
|
282 | 0 | if (idx < 0) |
283 | 0 | return idx; |
284 | 0 | for (i = idx; i < q->nb_subs && q->subs[i]->pts < min_ts; i++) |
285 | 0 | if (stream_index == -1 || q->subs[i]->stream_index == stream_index) |
286 | 0 | idx = i; |
287 | 0 | for (i = idx; i > 0 && q->subs[i]->pts > max_ts; i--) |
288 | 0 | if (stream_index == -1 || q->subs[i]->stream_index == stream_index) |
289 | 0 | idx = i; |
290 | |
|
291 | 0 | ts_selected = q->subs[idx]->pts; |
292 | 0 | if (ts_selected < min_ts || ts_selected > max_ts) |
293 | 0 | return AVERROR(ERANGE); |
294 | | |
295 | | /* look back in the latest subtitles for overlapping subtitles */ |
296 | 0 | for (i = idx - 1; i >= 0; i--) { |
297 | 0 | int64_t pts = q->subs[i]->pts; |
298 | 0 | if (q->subs[i]->duration <= 0 || |
299 | 0 | (stream_index != -1 && q->subs[i]->stream_index != stream_index)) |
300 | 0 | continue; |
301 | 0 | if (pts >= min_ts && pts > ts_selected - q->subs[i]->duration) |
302 | 0 | idx = i; |
303 | 0 | else |
304 | 0 | break; |
305 | 0 | } |
306 | | |
307 | | /* If the queue is used to store multiple subtitles streams (like with |
308 | | * VobSub) and the stream index is not specified, we need to make sure |
309 | | * to focus on the smallest file position offset for a same timestamp; |
310 | | * queue is ordered by pts and then filepos, so we can take the first |
311 | | * entry for a given timestamp. */ |
312 | 0 | if (stream_index == -1) |
313 | 0 | while (idx > 0 && q->subs[idx - 1]->pts == q->subs[idx]->pts) |
314 | 0 | idx--; |
315 | |
|
316 | 0 | q->current_sub_idx = idx; |
317 | 0 | } |
318 | 0 | return 0; |
319 | 0 | } |
320 | | |
321 | | void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q) |
322 | 137k | { |
323 | 137k | int i; |
324 | | |
325 | 9.85M | for (i = 0; i < q->nb_subs; i++) |
326 | 9.72M | av_packet_free(&q->subs[i]); |
327 | 137k | av_freep(&q->subs); |
328 | 137k | q->nb_subs = q->allocated_size = q->current_sub_idx = 0; |
329 | 137k | } |
330 | | |
331 | | int ff_subtitles_read_packet(AVFormatContext *s, AVPacket *pkt) |
332 | 1.93M | { |
333 | 1.93M | FFDemuxSubtitlesQueue *q = s->priv_data; |
334 | 1.93M | return ff_subtitles_queue_read_packet(q, pkt); |
335 | 1.93M | } |
336 | | |
337 | | int ff_subtitles_read_seek(AVFormatContext *s, int stream_index, |
338 | | int64_t min_ts, int64_t ts, int64_t max_ts, int flags) |
339 | 0 | { |
340 | 0 | FFDemuxSubtitlesQueue *q = s->priv_data; |
341 | 0 | return ff_subtitles_queue_seek(q, s, stream_index, |
342 | 0 | min_ts, ts, max_ts, flags); |
343 | 0 | } |
344 | | |
345 | | int ff_subtitles_read_close(AVFormatContext *s) |
346 | 102k | { |
347 | 102k | FFDemuxSubtitlesQueue *q = s->priv_data; |
348 | 102k | ff_subtitles_queue_clean(q); |
349 | 102k | return 0; |
350 | 102k | } |
351 | | |
352 | | int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c) |
353 | 3.10M | { |
354 | 3.10M | int i = 0; |
355 | 3.10M | char end_chr; |
356 | | |
357 | 3.10M | if (!*c) // cached char? |
358 | 3.02M | *c = ff_text_r8(tr); |
359 | 3.10M | if (!*c) |
360 | 10.4k | return 0; |
361 | | |
362 | 3.09M | end_chr = *c == '<' ? '>' : '<'; |
363 | 70.0M | do { |
364 | 70.0M | av_bprint_chars(buf, *c, 1); |
365 | 70.0M | *c = ff_text_r8(tr); |
366 | 70.0M | if (i == INT_MAX) |
367 | 0 | return AVERROR_INVALIDDATA; |
368 | 70.0M | i++; |
369 | 70.0M | } while (*c != end_chr && *c); |
370 | 3.09M | if (end_chr == '>') { |
371 | 2.99M | av_bprint_chars(buf, '>', 1); |
372 | 2.99M | *c = 0; |
373 | 2.99M | } |
374 | 3.09M | return av_bprint_is_complete(buf) ? i : AVERROR(ENOMEM); |
375 | 3.09M | } |
376 | | |
377 | | const char *ff_smil_get_attr_ptr(const char *s, const char *attr) |
378 | 4.00M | { |
379 | 4.00M | int in_quotes = 0; |
380 | 4.00M | const size_t len = strlen(attr); |
381 | | |
382 | 8.01M | while (*s) { |
383 | 28.6M | while (*s) { |
384 | 24.6M | if (!in_quotes && av_isspace(*s)) |
385 | 35.1k | break; |
386 | 24.6M | in_quotes ^= *s == '"'; // XXX: support escaping? |
387 | 24.6M | s++; |
388 | 24.6M | } |
389 | 4.06M | while (av_isspace(*s)) |
390 | 43.1k | s++; |
391 | 4.02M | if (!av_strncasecmp(s, attr, len) && s[len] == '=') |
392 | 14.1k | return s + len + 1 + (s[len + 1] == '"'); |
393 | 4.02M | } |
394 | 3.98M | return NULL; |
395 | 4.00M | } |
396 | | |
397 | | static inline int is_eol(char c) |
398 | 43.5M | { |
399 | 43.5M | return c == '\r' || c == '\n'; |
400 | 43.5M | } |
401 | | |
402 | | int ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf) |
403 | 1.51M | { |
404 | 1.51M | char eol_buf[5], last_was_cr = 0; |
405 | 1.51M | int n = 0, i = 0, nb_eol = 0; |
406 | | |
407 | 1.51M | av_bprint_clear(buf); |
408 | | |
409 | 43.5M | for (;;) { |
410 | 43.5M | char c = ff_text_r8(tr); |
411 | | |
412 | 43.5M | if (!c) |
413 | 1.51M | break; |
414 | | |
415 | | /* ignore all initial line breaks */ |
416 | 42.0M | if (n == 0 && is_eol(c)) |
417 | 8.47k | continue; |
418 | | |
419 | | /* line break buffering: we don't want to add the trailing \r\n */ |
420 | 42.0M | if (is_eol(c)) { |
421 | 1.59M | nb_eol += c == '\n' || last_was_cr; |
422 | 1.59M | if (nb_eol == 2) |
423 | 3.34k | break; |
424 | 1.59M | eol_buf[i++] = c; |
425 | 1.59M | if (i == sizeof(eol_buf) - 1) |
426 | 0 | break; |
427 | 1.59M | last_was_cr = c == '\r'; |
428 | 1.59M | continue; |
429 | 1.59M | } |
430 | | |
431 | | /* only one line break followed by data: we flush the line breaks |
432 | | * buffer */ |
433 | 40.4M | if (i) { |
434 | 1.58M | eol_buf[i] = 0; |
435 | 1.58M | av_bprintf(buf, "%s", eol_buf); |
436 | 1.58M | i = nb_eol = 0; |
437 | 1.58M | } |
438 | | |
439 | 40.4M | av_bprint_chars(buf, c, 1); |
440 | 40.4M | n++; |
441 | 40.4M | } |
442 | 1.51M | return av_bprint_is_complete(buf) ? 0 : AVERROR(ENOMEM); |
443 | 1.51M | } |
444 | | |
445 | | int ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf) |
446 | 1.51M | { |
447 | 1.51M | FFTextReader tr; |
448 | 1.51M | tr.buf_pos = tr.buf_len = 0; |
449 | 1.51M | tr.type = 0; |
450 | 1.51M | tr.pb = pb; |
451 | 1.51M | return ff_subtitles_read_text_chunk(&tr, buf); |
452 | 1.51M | } |
453 | | |
454 | | ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size) |
455 | 16.3M | { |
456 | 16.3M | size_t cur = 0; |
457 | 16.3M | if (!size) |
458 | 0 | return 0; |
459 | 16.3M | buf[0] = '\0'; |
460 | 95.5M | while (cur + 1 < size) { |
461 | 95.5M | unsigned char c = ff_text_r8(tr); |
462 | 95.5M | if (!c) |
463 | 14.4M | return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA; |
464 | 81.0M | if (c == '\r' || c == '\n') |
465 | 1.86M | break; |
466 | 79.1M | buf[cur++] = c; |
467 | 79.1M | buf[cur] = '\0'; |
468 | 79.1M | } |
469 | 4.47M | while (ff_text_peek_r8(tr) == '\r') |
470 | 2.56M | ff_text_r8(tr); |
471 | 1.90M | if (ff_text_peek_r8(tr) == '\n') |
472 | 72.7k | ff_text_r8(tr); |
473 | 1.90M | return cur; |
474 | 16.3M | } |