/src/ffmpeg/libavcodec/movtextdec.c
Line | Count | Source |
1 | | /* |
2 | | * 3GPP TS 26.245 Timed Text decoder |
3 | | * Copyright (c) 2012 Philip Langdale <philipl@overt.org> |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include "avcodec.h" |
23 | | #include "ass.h" |
24 | | #include "libavutil/opt.h" |
25 | | #include "libavutil/avstring.h" |
26 | | #include "libavutil/common.h" |
27 | | #include "libavutil/bprint.h" |
28 | | #include "libavutil/intreadwrite.h" |
29 | | #include "libavutil/mem.h" |
30 | | #include "bytestream.h" |
31 | | #include "codec_internal.h" |
32 | | |
33 | 16.6k | #define STYLE_FLAG_BOLD (1<<0) |
34 | 16.6k | #define STYLE_FLAG_ITALIC (1<<1) |
35 | 16.6k | #define STYLE_FLAG_UNDERLINE (1<<2) |
36 | | |
37 | 2.13k | #define BOX_SIZE_INITIAL 40 |
38 | | |
39 | 3.95M | #define STYL_BOX (1<<0) |
40 | 3.89M | #define HLIT_BOX (1<<1) |
41 | 2.06k | #define HCLR_BOX (1<<2) |
42 | 30.5k | #define TWRP_BOX (1<<3) |
43 | | |
44 | 3 | #define BOTTOM_LEFT 1 |
45 | 1 | #define BOTTOM_CENTER 2 |
46 | 11 | #define BOTTOM_RIGHT 3 |
47 | 4 | #define MIDDLE_LEFT 4 |
48 | 2 | #define MIDDLE_CENTER 5 |
49 | 3 | #define MIDDLE_RIGHT 6 |
50 | 43 | #define TOP_LEFT 7 |
51 | 5 | #define TOP_CENTER 8 |
52 | 3 | #define TOP_RIGHT 9 |
53 | | |
54 | 16.8k | #define RGB_TO_BGR(c) (((c) & 0xff) << 16 | ((c) & 0xff00) | (((c) >> 16) & 0xff)) |
55 | | |
56 | | typedef struct { |
57 | | uint16_t font_id; |
58 | | char *font; |
59 | | } FontRecord; |
60 | | |
61 | | typedef struct { |
62 | | uint16_t start; |
63 | | uint16_t end; |
64 | | uint8_t flags; |
65 | | uint8_t bold; |
66 | | uint8_t italic; |
67 | | uint8_t underline; |
68 | | int color; |
69 | | uint8_t alpha; |
70 | | uint8_t fontsize; |
71 | | uint16_t font_id; |
72 | | } StyleBox; |
73 | | |
74 | | typedef struct { |
75 | | StyleBox style; |
76 | | const char *font; |
77 | | int back_color; |
78 | | uint8_t back_alpha; |
79 | | int alignment; |
80 | | } MovTextDefault; |
81 | | |
82 | | typedef struct { |
83 | | uint16_t hlit_start; |
84 | | uint16_t hlit_end; |
85 | | } HighlightBox; |
86 | | |
87 | | typedef struct { |
88 | | uint8_t hlit_color[4]; |
89 | | } HilightcolorBox; |
90 | | |
91 | | typedef struct { |
92 | | uint8_t wrap_flag; |
93 | | } TextWrapBox; |
94 | | |
95 | | typedef struct { |
96 | | AVClass *class; |
97 | | StyleBox *s; |
98 | | HighlightBox h; |
99 | | HilightcolorBox c; |
100 | | FontRecord *ftab; |
101 | | TextWrapBox w; |
102 | | MovTextDefault d; |
103 | | uint8_t box_flags; |
104 | | uint16_t style_entries, ftab_entries; |
105 | | int readorder; |
106 | | int frame_width; |
107 | | int frame_height; |
108 | | } MovTextContext; |
109 | | |
110 | | typedef struct { |
111 | | uint32_t type; |
112 | | unsigned base_size; |
113 | | int (*decode)(const uint8_t *tsmb, MovTextContext *m, uint64_t size); |
114 | | } Box; |
115 | | |
116 | | static void mov_text_cleanup(MovTextContext *m) |
117 | 51.3k | { |
118 | 51.3k | if (m->box_flags & STYL_BOX) { |
119 | 15.8k | av_freep(&m->s); |
120 | 15.8k | m->style_entries = 0; |
121 | 15.8k | } |
122 | 51.3k | } |
123 | | |
124 | | static void mov_text_cleanup_ftab(MovTextContext *m) |
125 | 2.18k | { |
126 | 1.57M | for (unsigned i = 0; i < m->ftab_entries; i++) |
127 | 1.56M | av_freep(&m->ftab[i].font); |
128 | 2.18k | av_freep(&m->ftab); |
129 | 2.18k | m->ftab_entries = 0; |
130 | 2.18k | } |
131 | | |
132 | | static void mov_text_parse_style_record(StyleBox *style, const uint8_t **ptr) |
133 | 16.6k | { |
134 | | // fontID |
135 | 16.6k | style->font_id = bytestream_get_be16(ptr); |
136 | | // face-style-flags |
137 | 16.6k | style->flags = bytestream_get_byte(ptr); |
138 | 16.6k | style->bold = !!(style->flags & STYLE_FLAG_BOLD); |
139 | 16.6k | style->italic = !!(style->flags & STYLE_FLAG_ITALIC); |
140 | 16.6k | style->underline = !!(style->flags & STYLE_FLAG_UNDERLINE); |
141 | | // fontsize |
142 | 16.6k | style->fontsize = bytestream_get_byte(ptr); |
143 | | // Primary color |
144 | 16.6k | style->color = bytestream_get_be24(ptr); |
145 | 16.6k | style->color = RGB_TO_BGR(style->color); |
146 | 16.6k | style->alpha = bytestream_get_byte(ptr); |
147 | 16.6k | } |
148 | | |
149 | | static int mov_text_tx3g(AVCodecContext *avctx, MovTextContext *m) |
150 | 2.13k | { |
151 | 2.13k | const uint8_t *tx3g_ptr = avctx->extradata; |
152 | 2.13k | int i, j = -1, font_length, remaining = avctx->extradata_size - BOX_SIZE_INITIAL; |
153 | 2.13k | int8_t v_align, h_align; |
154 | 2.13k | unsigned ftab_entries; |
155 | | |
156 | 2.13k | m->ftab_entries = 0; |
157 | 2.13k | if (remaining < 0) |
158 | 1.85k | return -1; |
159 | | |
160 | | // Display Flags |
161 | 282 | tx3g_ptr += 4; |
162 | | // Alignment |
163 | 282 | h_align = bytestream_get_byte(&tx3g_ptr); |
164 | 282 | v_align = bytestream_get_byte(&tx3g_ptr); |
165 | 282 | if (h_align == 0) { |
166 | 81 | if (v_align == 0) |
167 | 43 | m->d.alignment = TOP_LEFT; |
168 | 81 | if (v_align == 1) |
169 | 4 | m->d.alignment = MIDDLE_LEFT; |
170 | 81 | if (v_align == -1) |
171 | 3 | m->d.alignment = BOTTOM_LEFT; |
172 | 81 | } |
173 | 282 | if (h_align == 1) { |
174 | 25 | if (v_align == 0) |
175 | 5 | m->d.alignment = TOP_CENTER; |
176 | 25 | if (v_align == 1) |
177 | 2 | m->d.alignment = MIDDLE_CENTER; |
178 | 25 | if (v_align == -1) |
179 | 1 | m->d.alignment = BOTTOM_CENTER; |
180 | 25 | } |
181 | 282 | if (h_align == -1) { |
182 | 36 | if (v_align == 0) |
183 | 3 | m->d.alignment = TOP_RIGHT; |
184 | 36 | if (v_align == 1) |
185 | 3 | m->d.alignment = MIDDLE_RIGHT; |
186 | 36 | if (v_align == -1) |
187 | 11 | m->d.alignment = BOTTOM_RIGHT; |
188 | 36 | } |
189 | | // Background Color |
190 | 282 | m->d.back_color = bytestream_get_be24(&tx3g_ptr); |
191 | 282 | m->d.back_color = RGB_TO_BGR(m->d.back_color); |
192 | 282 | m->d.back_alpha = bytestream_get_byte(&tx3g_ptr); |
193 | | // BoxRecord |
194 | 282 | tx3g_ptr += 8; |
195 | | // StyleRecord |
196 | 282 | tx3g_ptr += 4; |
197 | 282 | mov_text_parse_style_record(&m->d.style, &tx3g_ptr); |
198 | | // FontRecord |
199 | | // FontRecord Size |
200 | 282 | tx3g_ptr += 4; |
201 | | // ftab |
202 | 282 | tx3g_ptr += 4; |
203 | | |
204 | | // In case of broken header, init default font |
205 | 282 | m->d.font = ASS_DEFAULT_FONT; |
206 | | |
207 | 282 | ftab_entries = bytestream_get_be16(&tx3g_ptr); |
208 | 282 | if (!ftab_entries) |
209 | 37 | return 0; |
210 | 245 | remaining -= 3 * ftab_entries; |
211 | 245 | if (remaining < 0) |
212 | 79 | return AVERROR_INVALIDDATA; |
213 | 166 | m->ftab = av_calloc(ftab_entries, sizeof(*m->ftab)); |
214 | 166 | if (!m->ftab) |
215 | 0 | return AVERROR(ENOMEM); |
216 | 166 | m->ftab_entries = ftab_entries; |
217 | | |
218 | 1.31M | for (i = 0; i < m->ftab_entries; i++) { |
219 | 1.31M | m->ftab[i].font_id = bytestream_get_be16(&tx3g_ptr); |
220 | 1.31M | if (m->ftab[i].font_id == m->d.style.font_id) |
221 | 203k | j = i; |
222 | 1.31M | font_length = bytestream_get_byte(&tx3g_ptr); |
223 | | |
224 | 1.31M | remaining -= font_length; |
225 | 1.31M | if (remaining < 0) { |
226 | 47 | mov_text_cleanup_ftab(m); |
227 | 47 | return -1; |
228 | 47 | } |
229 | 1.31M | m->ftab[i].font = av_malloc(font_length + 1); |
230 | 1.31M | if (!m->ftab[i].font) { |
231 | 0 | mov_text_cleanup_ftab(m); |
232 | 0 | return AVERROR(ENOMEM); |
233 | 0 | } |
234 | 1.31M | bytestream_get_buffer(&tx3g_ptr, m->ftab[i].font, font_length); |
235 | 1.31M | m->ftab[i].font[font_length] = '\0'; |
236 | 1.31M | } |
237 | 119 | if (j >= 0) |
238 | 60 | m->d.font = m->ftab[j].font; |
239 | 119 | return 0; |
240 | 166 | } |
241 | | |
242 | | static int decode_twrp(const uint8_t *tsmb, MovTextContext *m, uint64_t size) |
243 | 613 | { |
244 | 613 | m->box_flags |= TWRP_BOX; |
245 | 613 | m->w.wrap_flag = bytestream_get_byte(&tsmb); |
246 | 613 | return 0; |
247 | 613 | } |
248 | | |
249 | | static int decode_hlit(const uint8_t *tsmb, MovTextContext *m, uint64_t size) |
250 | 1.02k | { |
251 | 1.02k | m->box_flags |= HLIT_BOX; |
252 | 1.02k | m->h.hlit_start = bytestream_get_be16(&tsmb); |
253 | 1.02k | m->h.hlit_end = bytestream_get_be16(&tsmb); |
254 | 1.02k | return 0; |
255 | 1.02k | } |
256 | | |
257 | | static int decode_hclr(const uint8_t *tsmb, MovTextContext *m, uint64_t size) |
258 | 603 | { |
259 | 603 | m->box_flags |= HCLR_BOX; |
260 | 603 | bytestream_get_buffer(&tsmb, m->c.hlit_color, 4); |
261 | 603 | return 0; |
262 | 603 | } |
263 | | |
264 | | static int styles_equivalent(const StyleBox *a, const StyleBox *b) |
265 | 21.3k | { |
266 | 198k | #define CMP(field) ((a)->field == (b)->field) |
267 | 21.3k | return CMP(bold) && CMP(italic) && CMP(underline) && CMP(color) && |
268 | 4.72k | CMP(alpha) && CMP(fontsize) && CMP(font_id); |
269 | 21.3k | #undef CMP |
270 | 21.3k | } |
271 | | |
272 | | static int decode_styl(const uint8_t *tsmb, MovTextContext *m, uint64_t size) |
273 | 11.1k | { |
274 | 11.1k | int i; |
275 | 11.1k | int style_entries = bytestream_get_be16(&tsmb); |
276 | 11.1k | StyleBox *tmp; |
277 | | |
278 | | // A single style record is of length 12 bytes. |
279 | 11.1k | if (2 + style_entries * 12 > size) |
280 | 458 | return -1; |
281 | | |
282 | 10.6k | tmp = av_realloc_array(m->s, style_entries, sizeof(*m->s)); |
283 | 10.6k | if (!tmp) |
284 | 0 | return AVERROR(ENOMEM); |
285 | 10.6k | m->s = tmp; |
286 | 10.6k | m->style_entries = style_entries; |
287 | | |
288 | 10.6k | m->box_flags |= STYL_BOX; |
289 | 27.7k | for(i = 0; i < m->style_entries; i++) { |
290 | 18.0k | StyleBox *style = &m->s[i]; |
291 | | |
292 | 18.0k | style->start = bytestream_get_be16(&tsmb); |
293 | 18.0k | style->end = bytestream_get_be16(&tsmb); |
294 | 18.0k | if (style->end < style->start || |
295 | 17.5k | (i && style->start < m->s[i - 1].end)) { |
296 | 955 | mov_text_cleanup(m); |
297 | 955 | return AVERROR_INVALIDDATA; |
298 | 955 | } |
299 | 17.1k | if (style->start == style->end) { |
300 | | /* Skip this style as it applies to no character */ |
301 | 782 | tsmb += 8; |
302 | 782 | m->style_entries--; |
303 | 782 | i--; |
304 | 782 | continue; |
305 | 782 | } |
306 | | |
307 | 16.3k | mov_text_parse_style_record(style, &tsmb); |
308 | 16.3k | if (styles_equivalent(style, &m->d.style)) { |
309 | | /* Skip this style as it is equivalent to the default style */ |
310 | 445 | m->style_entries--; |
311 | 445 | i--; |
312 | 445 | continue; |
313 | 15.8k | } else if (i && style->start == style[-1].end && |
314 | 5.02k | styles_equivalent(style, &style[-1])) { |
315 | | /* Merge the two adjacent styles */ |
316 | 780 | style[-1].end = style->end; |
317 | 780 | m->style_entries--; |
318 | 780 | i--; |
319 | 780 | continue; |
320 | 780 | } |
321 | 16.3k | } |
322 | 9.72k | return 0; |
323 | 10.6k | } |
324 | | |
325 | | static const Box box_types[] = { |
326 | | { MKBETAG('s','t','y','l'), 2, decode_styl }, |
327 | | { MKBETAG('h','l','i','t'), 4, decode_hlit }, |
328 | | { MKBETAG('h','c','l','r'), 4, decode_hclr }, |
329 | | { MKBETAG('t','w','r','p'), 1, decode_twrp } |
330 | | }; |
331 | | |
332 | | const static size_t box_count = FF_ARRAY_ELEMS(box_types); |
333 | | |
334 | | // Return byte length of the UTF-8 sequence starting at text[0]. 0 on error. |
335 | | static int get_utf8_length_at(const char *text, const char *text_end) |
336 | 3.89M | { |
337 | 3.89M | const char *start = text; |
338 | 3.89M | int err = 0; |
339 | 3.89M | uint32_t c; |
340 | 7.14M | GET_UTF8(c, text < text_end ? (uint8_t)*text++ : (err = 1, 0), goto error;); |
341 | 7.14M | if (err) |
342 | 0 | goto error; |
343 | 3.51M | return text - start; |
344 | 376k | error: |
345 | 376k | return 0; |
346 | 7.14M | } |
347 | | |
348 | | static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end, |
349 | | AVCodecContext *avctx) |
350 | 34.5k | { |
351 | 34.5k | MovTextContext *m = avctx->priv_data; |
352 | 34.5k | const StyleBox *const default_style = &m->d.style; |
353 | 34.5k | int i = 0; |
354 | 34.5k | int text_pos = 0; |
355 | 34.5k | int entry = 0; |
356 | 34.5k | int color = default_style->color; |
357 | | |
358 | 34.5k | if (text < text_end && m->box_flags & TWRP_BOX) { |
359 | 401 | if (m->w.wrap_flag == 1) { |
360 | 198 | av_bprintf(buf, "{\\q1}"); /* End of line wrap */ |
361 | 203 | } else { |
362 | 203 | av_bprintf(buf, "{\\q2}"); /* No wrap */ |
363 | 203 | } |
364 | 401 | } |
365 | | |
366 | 3.93M | while (text < text_end) { |
367 | 3.89M | int len; |
368 | | |
369 | 3.89M | if ((m->box_flags & STYL_BOX) && entry < m->style_entries) { |
370 | 53.7k | const StyleBox *style = &m->s[entry]; |
371 | 53.7k | if (text_pos == style->end) { |
372 | 2.44k | av_bprintf(buf, "{\\r}"); |
373 | 2.44k | color = default_style->color; |
374 | 2.44k | entry++; |
375 | 2.44k | style++; |
376 | 2.44k | } |
377 | 53.7k | if (entry < m->style_entries && text_pos == style->start) { |
378 | 6.94k | if (style->bold ^ default_style->bold) |
379 | 2.28k | av_bprintf(buf, "{\\b%d}", style->bold); |
380 | 6.94k | if (style->italic ^ default_style->italic) |
381 | 3.91k | av_bprintf(buf, "{\\i%d}", style->italic); |
382 | 6.94k | if (style->underline ^ default_style->underline) |
383 | 1.74k | av_bprintf(buf, "{\\u%d}", style->underline); |
384 | 6.94k | if (style->fontsize != default_style->fontsize) |
385 | 6.03k | av_bprintf(buf, "{\\fs%d}", style->fontsize); |
386 | 6.94k | if (style->font_id != default_style->font_id) |
387 | 50.8M | for (i = 0; i < m->ftab_entries; i++) { |
388 | 50.8M | if (style->font_id == m->ftab[i].font_id) |
389 | 17.8M | av_bprintf(buf, "{\\fn%s}", m->ftab[i].font); |
390 | 50.8M | } |
391 | 6.94k | if (default_style->color != style->color) { |
392 | 6.47k | color = style->color; |
393 | 6.47k | av_bprintf(buf, "{\\1c&H%X&}", color); |
394 | 6.47k | } |
395 | 6.94k | if (default_style->alpha != style->alpha) |
396 | 4.19k | av_bprintf(buf, "{\\1a&H%02X&}", 255 - style->alpha); |
397 | 6.94k | } |
398 | 53.7k | } |
399 | 3.89M | if (m->box_flags & HLIT_BOX) { |
400 | 115k | if (text_pos == m->h.hlit_start) { |
401 | | /* If hclr box is present, set the secondary color to the color |
402 | | * specified. Otherwise, set primary color to white and secondary |
403 | | * color to black. These colors will come from TextSampleModifier |
404 | | * boxes in future and inverse video technique for highlight will |
405 | | * be implemented. |
406 | | */ |
407 | 851 | if (m->box_flags & HCLR_BOX) { |
408 | 482 | av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2], |
409 | 482 | m->c.hlit_color[1], m->c.hlit_color[0]); |
410 | 482 | } else { |
411 | 369 | av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}"); |
412 | 369 | } |
413 | 851 | } |
414 | 115k | if (text_pos == m->h.hlit_end) { |
415 | 607 | if (m->box_flags & HCLR_BOX) { |
416 | 281 | av_bprintf(buf, "{\\2c&H%X&}", default_style->color); |
417 | 326 | } else { |
418 | 326 | av_bprintf(buf, "{\\1c&H%X&}{\\2c&H%X&}", |
419 | 326 | color, default_style->color); |
420 | 326 | } |
421 | 607 | } |
422 | 115k | } |
423 | | |
424 | 3.89M | len = get_utf8_length_at(text, text_end); |
425 | 3.89M | if (len < 1) { |
426 | 376k | av_log(avctx, AV_LOG_ERROR, "invalid UTF-8 byte in subtitle\n"); |
427 | 376k | len = 1; |
428 | 376k | } |
429 | 3.89M | switch (*text) { |
430 | 91.8k | case '\r': |
431 | 91.8k | break; |
432 | 1.92M | case '\n': |
433 | 1.92M | av_bprintf(buf, "\\N"); |
434 | 1.92M | break; |
435 | 1.87M | default: |
436 | 1.87M | av_bprint_append_data(buf, text, len); |
437 | 1.87M | break; |
438 | 3.89M | } |
439 | 3.89M | text += len; |
440 | 3.89M | text_pos++; |
441 | 3.89M | } |
442 | | |
443 | 34.5k | return 0; |
444 | 34.5k | } |
445 | | |
446 | 2.13k | static av_cold int mov_text_init(AVCodecContext *avctx) { |
447 | | /* |
448 | | * TODO: Handle the default text style. |
449 | | * NB: Most players ignore styles completely, with the result that |
450 | | * it's very common to find files where the default style is broken |
451 | | * and respecting it results in a worse experience than ignoring it. |
452 | | */ |
453 | 2.13k | int ret; |
454 | 2.13k | MovTextContext *m = avctx->priv_data; |
455 | 2.13k | ret = mov_text_tx3g(avctx, m); |
456 | 2.13k | if (ret == 0) { |
457 | 156 | const StyleBox *const default_style = &m->d.style; |
458 | 156 | if (!m->frame_width || !m->frame_height) { |
459 | 156 | m->frame_width = ASS_DEFAULT_PLAYRESX; |
460 | 156 | m->frame_height = ASS_DEFAULT_PLAYRESY; |
461 | 156 | } |
462 | 156 | return ff_ass_subtitle_header_full(avctx, |
463 | 156 | m->frame_width, m->frame_height, |
464 | 156 | m->d.font, default_style->fontsize, |
465 | 156 | (255U - default_style->alpha) << 24 | default_style->color, |
466 | 156 | (255U - default_style->alpha) << 24 | default_style->color, |
467 | 156 | (255U - m->d.back_alpha) << 24 | m->d.back_color, |
468 | 156 | (255U - m->d.back_alpha) << 24 | m->d.back_color, |
469 | 156 | default_style->bold, default_style->italic, default_style->underline, |
470 | 156 | ASS_DEFAULT_BORDERSTYLE, m->d.alignment); |
471 | 156 | } else |
472 | 1.98k | return ff_ass_subtitle_header_default(avctx); |
473 | 2.13k | } |
474 | | |
475 | | static int mov_text_decode_frame(AVCodecContext *avctx, AVSubtitle *sub, |
476 | | int *got_sub_ptr, const AVPacket *avpkt) |
477 | 200k | { |
478 | 200k | MovTextContext *m = avctx->priv_data; |
479 | 200k | int ret; |
480 | 200k | AVBPrint buf; |
481 | 200k | const char *ptr = avpkt->data, *end; |
482 | 200k | int text_length; |
483 | 200k | size_t i; |
484 | | |
485 | 200k | if (!ptr || avpkt->size < 2) |
486 | 160k | return AVERROR_INVALIDDATA; |
487 | | |
488 | | /* |
489 | | * A packet of size two with value zero is an empty subtitle |
490 | | * used to mark the end of the previous non-empty subtitle. |
491 | | * We can just drop them here as we have duration information |
492 | | * already. If the value is non-zero, then it's technically a |
493 | | * bad packet. |
494 | | */ |
495 | 39.5k | if (avpkt->size == 2) |
496 | 4.22k | return AV_RB16(ptr) == 0 ? 0 : AVERROR_INVALIDDATA; |
497 | | |
498 | | /* |
499 | | * The first two bytes of the packet are the length of the text string |
500 | | * In complex cases, there are style descriptors appended to the string |
501 | | * so we can't just assume the packet size is the string size. |
502 | | */ |
503 | 35.3k | text_length = AV_RB16(ptr); |
504 | 35.3k | end = ptr + FFMIN(2 + text_length, avpkt->size); |
505 | 35.3k | ptr += 2; |
506 | | |
507 | 35.3k | mov_text_cleanup(m); |
508 | | |
509 | 35.3k | m->style_entries = 0; |
510 | 35.3k | m->box_flags = 0; |
511 | | // Note that the spec recommends lines be no longer than 2048 characters. |
512 | 35.3k | av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); |
513 | 35.3k | if (text_length + 2 < avpkt->size) { |
514 | 13.7k | const uint8_t *tsmb = end; |
515 | 13.7k | const uint8_t *const tsmb_end = avpkt->data + avpkt->size; |
516 | | // A box is a minimum of 8 bytes. |
517 | 28.3k | while (tsmb_end - tsmb >= 8) { |
518 | 22.8k | uint64_t tsmb_size = bytestream_get_be32(&tsmb); |
519 | 22.8k | uint32_t tsmb_type = bytestream_get_be32(&tsmb); |
520 | 22.8k | int size_var, ret_tsmb; |
521 | | |
522 | 22.8k | if (tsmb_size == 1) { |
523 | 1.04k | if (tsmb_end - tsmb < 8) |
524 | 286 | break; |
525 | 759 | tsmb_size = bytestream_get_be64(&tsmb); |
526 | 759 | size_var = 16; |
527 | 759 | } else |
528 | 21.8k | size_var = 8; |
529 | | //size_var is equal to 8 or 16 depending on the size of box |
530 | | |
531 | 22.5k | if (tsmb_size < size_var) { |
532 | 791 | av_log(avctx, AV_LOG_ERROR, "tsmb_size invalid\n"); |
533 | 791 | return AVERROR_INVALIDDATA; |
534 | 791 | } |
535 | 21.7k | tsmb_size -= size_var; |
536 | | |
537 | 21.7k | if (tsmb_end - tsmb < tsmb_size) |
538 | 7.20k | break; |
539 | | |
540 | 70.6k | for (i = 0; i < box_count; i++) { |
541 | 56.8k | if (tsmb_type == box_types[i].type) { |
542 | 13.6k | if (tsmb_size < box_types[i].base_size) |
543 | 304 | break; |
544 | 13.3k | ret_tsmb = box_types[i].decode(tsmb, m, tsmb_size); |
545 | 13.3k | if (ret_tsmb == -1) |
546 | 458 | break; |
547 | 13.3k | } |
548 | 56.8k | } |
549 | 14.5k | tsmb += tsmb_size; |
550 | 14.5k | } |
551 | 12.9k | text_to_ass(&buf, ptr, end, avctx); |
552 | 12.9k | mov_text_cleanup(m); |
553 | 12.9k | } else |
554 | 21.5k | text_to_ass(&buf, ptr, end, avctx); |
555 | | |
556 | 34.5k | ret = ff_ass_add_rect(sub, buf.str, m->readorder++, 0, NULL, NULL); |
557 | 34.5k | av_bprint_finalize(&buf, NULL); |
558 | 34.5k | if (ret < 0) |
559 | 0 | return ret; |
560 | 34.5k | *got_sub_ptr = sub->num_rects > 0; |
561 | 34.5k | return avpkt->size; |
562 | 34.5k | } |
563 | | |
564 | | static av_cold int mov_text_decode_close(AVCodecContext *avctx) |
565 | 2.13k | { |
566 | 2.13k | MovTextContext *m = avctx->priv_data; |
567 | 2.13k | mov_text_cleanup_ftab(m); |
568 | 2.13k | mov_text_cleanup(m); |
569 | 2.13k | return 0; |
570 | 2.13k | } |
571 | | |
572 | | static av_cold void mov_text_flush(AVCodecContext *avctx) |
573 | 40.9k | { |
574 | 40.9k | MovTextContext *m = avctx->priv_data; |
575 | 40.9k | if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP)) |
576 | 40.9k | m->readorder = 0; |
577 | 40.9k | } |
578 | | |
579 | | #define OFFSET(x) offsetof(MovTextContext, x) |
580 | | #define FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_SUBTITLE_PARAM |
581 | | static const AVOption options[] = { |
582 | | { "width", "Frame width, usually video width", OFFSET(frame_width), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, |
583 | | { "height", "Frame height, usually video height", OFFSET(frame_height), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, |
584 | | { NULL }, |
585 | | }; |
586 | | |
587 | | static const AVClass mov_text_decoder_class = { |
588 | | .class_name = "MOV text decoder", |
589 | | .item_name = av_default_item_name, |
590 | | .option = options, |
591 | | .version = LIBAVUTIL_VERSION_INT, |
592 | | }; |
593 | | |
594 | | const FFCodec ff_movtext_decoder = { |
595 | | .p.name = "mov_text", |
596 | | CODEC_LONG_NAME("3GPP Timed Text subtitle"), |
597 | | .p.type = AVMEDIA_TYPE_SUBTITLE, |
598 | | .p.id = AV_CODEC_ID_MOV_TEXT, |
599 | | .priv_data_size = sizeof(MovTextContext), |
600 | | .p.priv_class = &mov_text_decoder_class, |
601 | | .init = mov_text_init, |
602 | | FF_CODEC_DECODE_SUB_CB(mov_text_decode_frame), |
603 | | .close = mov_text_decode_close, |
604 | | .flush = mov_text_flush, |
605 | | }; |