/src/vlc/modules/codec/subsdec.c
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * subsdec.c : text subtitle decoder |
3 | | ***************************************************************************** |
4 | | * Copyright (C) 2000-2006 VLC authors and VideoLAN |
5 | | * |
6 | | * Authors: Gildas Bazin <gbazin@videolan.org> |
7 | | * Samuel Hocevar <sam@zoy.org> |
8 | | * Derk-Jan Hartman <hartman at videolan dot org> |
9 | | * Bernie Purcell <bitmap@videolan.org> |
10 | | * |
11 | | * This program is free software; you can redistribute it and/or modify it |
12 | | * under the terms of the GNU Lesser General Public License as published by |
13 | | * the Free Software Foundation; either version 2.1 of the License, or |
14 | | * (at your option) any later version. |
15 | | * |
16 | | * This program is distributed in the hope that it will be useful, |
17 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | | * GNU Lesser General Public License for more details. |
20 | | * |
21 | | * You should have received a copy of the GNU Lesser General Public License |
22 | | * along with this program; if not, write to the Free Software Foundation, |
23 | | * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. |
24 | | *****************************************************************************/ |
25 | | |
26 | | /***************************************************************************** |
27 | | * Preamble |
28 | | *****************************************************************************/ |
29 | | #ifdef HAVE_CONFIG_H |
30 | | # include "config.h" |
31 | | #endif |
32 | | |
33 | | #include <limits.h> |
34 | | #include <errno.h> |
35 | | #include <ctype.h> |
36 | | |
37 | | #include <vlc_common.h> |
38 | | #include <vlc_plugin.h> |
39 | | #include <vlc_codec.h> |
40 | | #include <vlc_charset.h> |
41 | | #include <vlc_xml.h> |
42 | | |
43 | | #include "substext.h" |
44 | | |
45 | | /***************************************************************************** |
46 | | * Module descriptor. |
47 | | *****************************************************************************/ |
48 | | static const char *const ppsz_encodings[] = { |
49 | | "", |
50 | | "system", |
51 | | "UTF-8", |
52 | | "UTF-16", |
53 | | "UTF-16BE", |
54 | | "UTF-16LE", |
55 | | "GB18030", |
56 | | "ISO-8859-15", |
57 | | "Windows-1252", |
58 | | "IBM850", |
59 | | "ISO-8859-2", |
60 | | "Windows-1250", |
61 | | "ISO-8859-3", |
62 | | "ISO-8859-10", |
63 | | "Windows-1251", |
64 | | "KOI8-R", |
65 | | "KOI8-U", |
66 | | "ISO-8859-6", |
67 | | "Windows-1256", |
68 | | "ISO-8859-7", |
69 | | "Windows-1253", |
70 | | "ISO-8859-8", |
71 | | "Windows-1255", |
72 | | "ISO-8859-9", |
73 | | "Windows-1254", |
74 | | "ISO-8859-11", |
75 | | "Windows-874", |
76 | | "ISO-8859-13", |
77 | | "Windows-1257", |
78 | | "ISO-8859-14", |
79 | | "ISO-8859-16", |
80 | | "ISO-2022-CN-EXT", |
81 | | "EUC-CN", |
82 | | "ISO-2022-JP-2", |
83 | | "EUC-JP", |
84 | | "Shift_JIS", |
85 | | "CP949", |
86 | | "ISO-2022-KR", |
87 | | "Big5", |
88 | | "ISO-2022-TW", |
89 | | "Big5-HKSCS", |
90 | | "VISCII", |
91 | | "Windows-1258", |
92 | | }; |
93 | | |
94 | | static const char *const ppsz_encoding_names[] = { |
95 | | /* xgettext: |
96 | | The character encoding name in parenthesis corresponds to that used for |
97 | | the GetACP translation. "Windows-1252" applies to Western European |
98 | | languages using the Latin alphabet. */ |
99 | | N_("Default (Windows-1252)"), |
100 | | N_("System codeset"), |
101 | | N_("Universal (UTF-8)"), |
102 | | N_("Universal (UTF-16)"), |
103 | | N_("Universal (big endian UTF-16)"), |
104 | | N_("Universal (little endian UTF-16)"), |
105 | | N_("Universal, Chinese (GB18030)"), |
106 | | |
107 | | /* ISO 8859 and the likes */ |
108 | | /* 1 */ |
109 | | N_("Western European (Latin-9)"), /* mostly superset of Latin-1 */ |
110 | | N_("Western European (Windows-1252)"), |
111 | | N_("Western European (IBM 00850)"), |
112 | | /* 2 */ |
113 | | N_("Eastern European (Latin-2)"), |
114 | | N_("Eastern European (Windows-1250)"), |
115 | | /* 3 */ |
116 | | N_("Esperanto (Latin-3)"), |
117 | | /* 4 */ |
118 | | N_("Nordic (Latin-6)"), /* Latin 6 supersedes Latin 4 */ |
119 | | /* 5 */ |
120 | | N_("Cyrillic (Windows-1251)"), /* ISO 8859-5 is not practically used */ |
121 | | N_("Russian (KOI8-R)"), |
122 | | N_("Ukrainian (KOI8-U)"), |
123 | | /* 6 */ |
124 | | N_("Arabic (ISO 8859-6)"), |
125 | | N_("Arabic (Windows-1256)"), |
126 | | /* 7 */ |
127 | | N_("Greek (ISO 8859-7)"), |
128 | | N_("Greek (Windows-1253)"), |
129 | | /* 8 */ |
130 | | N_("Hebrew (ISO 8859-8)"), |
131 | | N_("Hebrew (Windows-1255)"), |
132 | | /* 9 */ |
133 | | N_("Turkish (ISO 8859-9)"), |
134 | | N_("Turkish (Windows-1254)"), |
135 | | /* 10 -> 4 */ |
136 | | /* 11 */ |
137 | | N_("Thai (TIS 620-2533/ISO 8859-11)"), |
138 | | N_("Thai (Windows-874)"), |
139 | | /* 13 */ |
140 | | N_("Baltic (Latin-7)"), |
141 | | N_("Baltic (Windows-1257)"), |
142 | | /* 12 -> /dev/null */ |
143 | | /* 14 */ |
144 | | N_("Celtic (Latin-8)"), |
145 | | /* 15 -> 1 */ |
146 | | /* 16 */ |
147 | | N_("South-Eastern European (Latin-10)"), |
148 | | /* CJK families */ |
149 | | N_("Simplified Chinese (ISO-2022-CN-EXT)"), |
150 | | N_("Simplified Chinese Unix (EUC-CN)"), |
151 | | N_("Japanese (7-bits JIS/ISO-2022-JP-2)"), |
152 | | N_("Japanese Unix (EUC-JP)"), |
153 | | N_("Japanese (Shift JIS)"), |
154 | | N_("Korean (EUC-KR/CP949)"), |
155 | | N_("Korean (ISO-2022-KR)"), |
156 | | N_("Traditional Chinese (Big5)"), |
157 | | N_("Traditional Chinese Unix (EUC-TW)"), |
158 | | N_("Hong-Kong Supplementary (HKSCS)"), |
159 | | /* Other */ |
160 | | N_("Vietnamese (VISCII)"), |
161 | | N_("Vietnamese (Windows-1258)"), |
162 | | }; |
163 | | |
164 | | static const int pi_justification[] = { -1, 0, 1, 2 }; |
165 | | static const char *const ppsz_justification_text[] = { |
166 | | N_("Auto"),N_("Center"),N_("Left"),N_("Right") |
167 | | }; |
168 | | |
169 | | #define ENCODING_TEXT N_("Subtitle text encoding") |
170 | | #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles") |
171 | | #define ALIGN_TEXT N_("Subtitle justification") |
172 | | #define ALIGN_LONGTEXT N_("Set the justification of subtitles") |
173 | | #define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitle autodetection") |
174 | | #define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \ |
175 | | "UTF-8 encoding within subtitle files.") |
176 | | |
177 | | static int OpenDecoder ( vlc_object_t * ); |
178 | | static void CloseDecoder ( vlc_object_t * ); |
179 | | |
180 | 0 | vlc_module_begin () |
181 | 0 | set_shortname( N_("Subtitles")) |
182 | 0 | set_description( N_("Text subtitle decoder") ) |
183 | 0 | set_capability( "spu decoder", 50 ) |
184 | 0 | set_callbacks( OpenDecoder, CloseDecoder ) |
185 | 0 | set_subcategory( SUBCAT_INPUT_SCODEC ) |
186 | |
|
187 | 0 | add_integer( "subsdec-align", -1, ALIGN_TEXT, ALIGN_LONGTEXT ) |
188 | 0 | change_integer_list( pi_justification, ppsz_justification_text ) |
189 | 0 | add_string( "subsdec-encoding", "", |
190 | 0 | ENCODING_TEXT, ENCODING_LONGTEXT ) |
191 | 0 | change_string_list( ppsz_encodings, ppsz_encoding_names ) |
192 | 0 | add_bool( "subsdec-autodetect-utf8", true, |
193 | 0 | AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT ) |
194 | 0 | vlc_module_end () |
195 | | |
196 | | /***************************************************************************** |
197 | | * Local prototypes |
198 | | *****************************************************************************/ |
199 | | #define NO_BREAKING_SPACE " " |
200 | | |
201 | | typedef struct |
202 | | { |
203 | | int i_align; /* Subtitles alignment on the vout */ |
204 | | |
205 | | vlc_iconv_t iconv_handle; /* handle to iconv instance */ |
206 | | bool b_autodetect_utf8; |
207 | | } decoder_sys_t; |
208 | | |
209 | | |
210 | | static int DecodeBlock ( decoder_t *, block_t * ); |
211 | | static subpicture_t *ParseText ( decoder_t *, block_t * ); |
212 | | static text_segment_t *ParseSubtitles(int *pi_align, const char * ); |
213 | | |
214 | | /***************************************************************************** |
215 | | * OpenDecoder: probe the decoder and return score |
216 | | ***************************************************************************** |
217 | | * Tries to launch a decoder and return score so that the interface is able |
218 | | * to chose. |
219 | | *****************************************************************************/ |
220 | | static int OpenDecoder( vlc_object_t *p_this ) |
221 | 0 | { |
222 | 0 | decoder_t *p_dec = (decoder_t*)p_this; |
223 | 0 | decoder_sys_t *p_sys; |
224 | |
|
225 | 0 | switch( p_dec->fmt_in->i_codec ) |
226 | 0 | { |
227 | 0 | case VLC_CODEC_SUBT: |
228 | 0 | case VLC_CODEC_ITU_T140: |
229 | 0 | break; |
230 | 0 | default: |
231 | 0 | return VLC_EGENERIC; |
232 | 0 | } |
233 | | |
234 | | /* Allocate the memory needed to store the decoder's structure */ |
235 | 0 | p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) ); |
236 | 0 | if( p_sys == NULL ) |
237 | 0 | return VLC_ENOMEM; |
238 | | |
239 | 0 | p_dec->pf_decode = DecodeBlock; |
240 | 0 | p_dec->fmt_out.i_codec = 0; |
241 | | |
242 | | /* init of p_sys */ |
243 | 0 | p_sys->i_align = -1; |
244 | 0 | p_sys->iconv_handle = (vlc_iconv_t)-1; |
245 | 0 | p_sys->b_autodetect_utf8 = false; |
246 | |
|
247 | 0 | const char *encoding; |
248 | 0 | char *var = NULL; |
249 | | |
250 | | /* First try demux-specified encoding */ |
251 | 0 | if( p_dec->fmt_in->i_codec == VLC_CODEC_ITU_T140 ) |
252 | 0 | encoding = "UTF-8"; /* IUT T.140 is always using UTF-8 */ |
253 | 0 | else |
254 | 0 | if( p_dec->fmt_in->subs.psz_encoding && *p_dec->fmt_in->subs.psz_encoding ) |
255 | 0 | { |
256 | 0 | encoding = p_dec->fmt_in->subs.psz_encoding; |
257 | 0 | msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s", |
258 | 0 | encoding); |
259 | 0 | } |
260 | 0 | else |
261 | 0 | { |
262 | | /* Second, try configured encoding */ |
263 | 0 | if ((var = var_InheritString (p_dec, "subsdec-encoding")) != NULL) |
264 | 0 | { |
265 | 0 | msg_Dbg (p_dec, "trying configured character encoding: %s", var); |
266 | 0 | if (!strcmp (var, "system")) |
267 | 0 | { |
268 | 0 | free (var); |
269 | 0 | var = NULL; |
270 | 0 | encoding = ""; |
271 | | /* ^ iconv() treats "" as nl_langinfo(CODESET) */ |
272 | 0 | } |
273 | 0 | else |
274 | 0 | encoding = var; |
275 | 0 | } |
276 | 0 | else |
277 | | /* Third, try "local" encoding */ |
278 | 0 | { |
279 | | /* xgettext: |
280 | | The Windows ANSI code page most commonly used for this language. |
281 | | VLC uses this as a guess of the subtitle files character set |
282 | | (if UTF-8 and UTF-16 autodetection fails). |
283 | | Western European languages normally use "CP1252", which is a |
284 | | Microsoft-variant of ISO 8859-1. That suits the Latin alphabet. |
285 | | Other scripts use other code pages. |
286 | | |
287 | | This MUST be a valid iconv character set. If unsure, please refer |
288 | | the VideoLAN translators mailing list. */ |
289 | 0 | encoding = vlc_pgettext("GetACP", "CP1252"); |
290 | 0 | msg_Dbg (p_dec, "trying default character encoding: %s", encoding); |
291 | 0 | } |
292 | | |
293 | | /* Check UTF-8 autodetection */ |
294 | 0 | if (var_InheritBool (p_dec, "subsdec-autodetect-utf8")) |
295 | 0 | { |
296 | 0 | msg_Dbg (p_dec, "using automatic UTF-8 detection"); |
297 | 0 | p_sys->b_autodetect_utf8 = true; |
298 | 0 | } |
299 | 0 | } |
300 | |
|
301 | 0 | if (strcasecmp (encoding, "UTF-8") && strcasecmp (encoding, "utf8")) |
302 | 0 | { |
303 | 0 | p_sys->iconv_handle = vlc_iconv_open ("UTF-8", encoding); |
304 | 0 | if (p_sys->iconv_handle == (vlc_iconv_t)(-1)) |
305 | 0 | msg_Err (p_dec, "cannot convert from %s: %s", encoding, |
306 | 0 | vlc_strerror_c(errno)); |
307 | 0 | } |
308 | 0 | free (var); |
309 | |
|
310 | 0 | p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" ); |
311 | |
|
312 | 0 | return VLC_SUCCESS; |
313 | 0 | } |
314 | | |
315 | | /**************************************************************************** |
316 | | * DecodeBlock: the whole thing |
317 | | **************************************************************************** |
318 | | * This function must be fed with complete subtitles units. |
319 | | ****************************************************************************/ |
320 | | static int DecodeBlock( decoder_t *p_dec, block_t *p_block ) |
321 | 0 | { |
322 | 0 | subpicture_t *p_spu; |
323 | |
|
324 | 0 | if( p_block == NULL ) /* No Drain */ |
325 | 0 | return VLCDEC_SUCCESS; |
326 | | |
327 | 0 | if( p_block->i_flags & BLOCK_FLAG_CORRUPTED ) |
328 | 0 | { |
329 | 0 | block_Release( p_block ); |
330 | 0 | return VLCDEC_SUCCESS; |
331 | 0 | } |
332 | | |
333 | 0 | p_spu = ParseText( p_dec, p_block ); |
334 | |
|
335 | 0 | block_Release( p_block ); |
336 | 0 | if( p_spu != NULL ) |
337 | 0 | decoder_QueueSub( p_dec, p_spu ); |
338 | 0 | return VLCDEC_SUCCESS; |
339 | 0 | } |
340 | | |
341 | | /***************************************************************************** |
342 | | * CloseDecoder: clean up the decoder |
343 | | *****************************************************************************/ |
344 | | static void CloseDecoder( vlc_object_t *p_this ) |
345 | 0 | { |
346 | 0 | decoder_t *p_dec = (decoder_t *)p_this; |
347 | 0 | decoder_sys_t *p_sys = p_dec->p_sys; |
348 | |
|
349 | 0 | if( p_sys->iconv_handle != (vlc_iconv_t)-1 ) |
350 | 0 | vlc_iconv_close( p_sys->iconv_handle ); |
351 | |
|
352 | 0 | free( p_sys ); |
353 | 0 | } |
354 | | |
355 | | /***************************************************************************** |
356 | | * ParseText: parse an text subtitle packet and send it to the video output |
357 | | *****************************************************************************/ |
358 | | static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) |
359 | 0 | { |
360 | 0 | decoder_sys_t *p_sys = p_dec->p_sys; |
361 | 0 | subpicture_t *p_spu = NULL; |
362 | |
|
363 | 0 | if( p_block->i_flags & BLOCK_FLAG_CORRUPTED ) |
364 | 0 | return NULL; |
365 | | |
366 | | /* We cannot display a subpicture with no date */ |
367 | 0 | if( p_block->i_pts == VLC_TICK_INVALID ) |
368 | 0 | { |
369 | 0 | msg_Warn( p_dec, "subtitle without a date" ); |
370 | 0 | return NULL; |
371 | 0 | } |
372 | | |
373 | | /* Check validity of packet data */ |
374 | | /* An "empty" line containing only \0 can be used to force |
375 | | and ephemer picture from the screen */ |
376 | 0 | if( p_block->i_buffer < 1 ) |
377 | 0 | { |
378 | 0 | msg_Warn( p_dec, "no subtitle data" ); |
379 | 0 | return NULL; |
380 | 0 | } |
381 | | |
382 | 0 | char *psz_subtitle = NULL; |
383 | | |
384 | | /* Should be resiliant against bad subtitles */ |
385 | 0 | if( p_sys->iconv_handle == (vlc_iconv_t)-1 || |
386 | 0 | p_sys->b_autodetect_utf8 ) |
387 | 0 | { |
388 | 0 | psz_subtitle = malloc( p_block->i_buffer + 1 ); |
389 | 0 | if( psz_subtitle == NULL ) |
390 | 0 | return NULL; |
391 | 0 | memcpy( psz_subtitle, p_block->p_buffer, p_block->i_buffer ); |
392 | 0 | psz_subtitle[p_block->i_buffer] = '\0'; |
393 | 0 | } |
394 | | |
395 | 0 | if( p_sys->iconv_handle == (vlc_iconv_t)-1 ) |
396 | 0 | { |
397 | 0 | if (EnsureUTF8( psz_subtitle ) == NULL) |
398 | 0 | { |
399 | 0 | msg_Err( p_dec, "failed to convert subtitle encoding.\n" |
400 | 0 | "Try manually setting a character-encoding " |
401 | 0 | "before you open the file." ); |
402 | 0 | } |
403 | 0 | } |
404 | 0 | else |
405 | 0 | { |
406 | 0 | if( p_sys->b_autodetect_utf8 ) |
407 | 0 | { |
408 | 0 | if( IsUTF8( psz_subtitle ) == NULL ) |
409 | 0 | { |
410 | 0 | msg_Dbg( p_dec, "invalid UTF-8 sequence: " |
411 | 0 | "disabling UTF-8 subtitles autodetection" ); |
412 | 0 | p_sys->b_autodetect_utf8 = false; |
413 | 0 | } |
414 | 0 | } |
415 | |
|
416 | 0 | if( !p_sys->b_autodetect_utf8 ) |
417 | 0 | { |
418 | 0 | size_t inbytes_left = p_block->i_buffer; |
419 | 0 | size_t outbytes_left = 6 * inbytes_left; |
420 | 0 | char *psz_new_subtitle = xmalloc( outbytes_left + 1 ); |
421 | 0 | char *psz_convert_buffer_out = psz_new_subtitle; |
422 | 0 | const char *psz_convert_buffer_in = |
423 | 0 | psz_subtitle ? psz_subtitle : (char *)p_block->p_buffer; |
424 | |
|
425 | 0 | size_t ret = vlc_iconv( p_sys->iconv_handle, |
426 | 0 | &psz_convert_buffer_in, &inbytes_left, |
427 | 0 | &psz_convert_buffer_out, &outbytes_left ); |
428 | |
|
429 | 0 | *psz_convert_buffer_out++ = '\0'; |
430 | 0 | free( psz_subtitle ); |
431 | |
|
432 | 0 | if( ( ret == (size_t)(-1) ) || inbytes_left ) |
433 | 0 | { |
434 | 0 | free( psz_new_subtitle ); |
435 | 0 | msg_Err( p_dec, "failed to convert subtitle encoding.\n" |
436 | 0 | "Try manually setting a character-encoding " |
437 | 0 | "before you open the file." ); |
438 | 0 | return NULL; |
439 | 0 | } |
440 | | |
441 | 0 | psz_subtitle = realloc( psz_new_subtitle, |
442 | 0 | psz_convert_buffer_out - psz_new_subtitle ); |
443 | 0 | if( !psz_subtitle ) |
444 | 0 | psz_subtitle = psz_new_subtitle; |
445 | 0 | } |
446 | 0 | } |
447 | | |
448 | | /* Create the subpicture unit */ |
449 | 0 | p_spu = decoder_NewSubpictureText( p_dec ); |
450 | 0 | if( !p_spu ) |
451 | 0 | { |
452 | 0 | free( psz_subtitle ); |
453 | 0 | return NULL; |
454 | 0 | } |
455 | 0 | p_spu->i_start = p_block->i_pts; |
456 | 0 | p_spu->i_stop = p_block->i_pts + p_block->i_length; |
457 | 0 | p_spu->b_ephemer = (p_block->i_length == VLC_TICK_INVALID); |
458 | 0 | p_spu->b_absolute = false; |
459 | |
|
460 | 0 | subtext_updater_sys_t *p_spu_sys = p_spu->updater.p_sys; |
461 | |
|
462 | 0 | int i_inline_align = -1; |
463 | 0 | p_spu_sys->region.p_segments = ParseSubtitles( &i_inline_align, psz_subtitle ); |
464 | 0 | free( psz_subtitle ); |
465 | 0 | if( p_sys->i_align >= 0 ) /* bottom ; left, right or centered */ |
466 | 0 | { |
467 | 0 | p_spu_sys->region.align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; |
468 | 0 | p_spu_sys->region.inner_align = p_sys->i_align; |
469 | 0 | } |
470 | 0 | else if( i_inline_align >= 0 ) |
471 | 0 | { |
472 | 0 | p_spu_sys->region.align = i_inline_align; |
473 | 0 | p_spu_sys->region.inner_align = i_inline_align; |
474 | 0 | } |
475 | 0 | else /* default, bottom ; centered */ |
476 | 0 | { |
477 | 0 | p_spu_sys->region.align = SUBPICTURE_ALIGN_BOTTOM; |
478 | 0 | p_spu_sys->region.inner_align = 0; |
479 | 0 | } |
480 | |
|
481 | 0 | return p_spu; |
482 | 0 | } |
483 | | |
484 | | static bool AppendCharacter( text_segment_t* p_segment, char c ) |
485 | 0 | { |
486 | 0 | char* tmp; |
487 | 0 | if ( asprintf( &tmp, "%s%c", p_segment->psz_text ? p_segment->psz_text : "", c ) < 0 ) |
488 | 0 | return false; |
489 | 0 | free( p_segment->psz_text ); |
490 | 0 | p_segment->psz_text = tmp; |
491 | 0 | return true; |
492 | 0 | } |
493 | | |
494 | | static bool AppendString( text_segment_t* p_segment, const char* psz_str ) |
495 | 0 | { |
496 | 0 | char* tmp; |
497 | 0 | if ( asprintf( &tmp, "%s%s", p_segment->psz_text ? p_segment->psz_text : "", psz_str ) < 0 ) |
498 | 0 | return false; |
499 | 0 | free( p_segment->psz_text ); |
500 | 0 | p_segment->psz_text = tmp; |
501 | 0 | return true; |
502 | 0 | } |
503 | | |
504 | | static char* ConsumeAttribute( const char** ppsz_subtitle, char** ppsz_attribute_value ) |
505 | 0 | { |
506 | 0 | const char* psz_subtitle = *ppsz_subtitle; |
507 | 0 | char* psz_attribute_name; |
508 | 0 | *ppsz_attribute_value = NULL; |
509 | |
|
510 | 0 | while (*psz_subtitle == ' ') |
511 | 0 | psz_subtitle++; |
512 | |
|
513 | 0 | size_t attr_len = 0; |
514 | 0 | char delimiter; |
515 | |
|
516 | 0 | while ( *psz_subtitle && isalpha( *psz_subtitle ) ) |
517 | 0 | { |
518 | 0 | psz_subtitle++; |
519 | 0 | attr_len++; |
520 | 0 | } |
521 | 0 | if ( !*psz_subtitle || attr_len == 0 ) |
522 | 0 | return NULL; |
523 | 0 | psz_attribute_name = malloc( attr_len + 1 ); |
524 | 0 | if ( unlikely( !psz_attribute_name ) ) |
525 | 0 | return NULL; |
526 | 0 | strncpy( psz_attribute_name, psz_subtitle - attr_len, attr_len ); |
527 | 0 | psz_attribute_name[attr_len] = 0; |
528 | | |
529 | | // Skip over to the attribute value |
530 | 0 | while ( *psz_subtitle && *psz_subtitle != '=' ) |
531 | 0 | psz_subtitle++; |
532 | 0 | if ( !*psz_subtitle ) |
533 | 0 | { |
534 | 0 | *ppsz_subtitle = psz_subtitle; |
535 | 0 | return psz_attribute_name; |
536 | 0 | } |
537 | | // Skip the '=' sign |
538 | 0 | psz_subtitle++; |
539 | | |
540 | | // Aknoledge the delimiter if any |
541 | 0 | while ( *psz_subtitle && isspace( *psz_subtitle) ) |
542 | 0 | psz_subtitle++; |
543 | |
|
544 | 0 | if ( *psz_subtitle == '\'' || *psz_subtitle == '"' ) |
545 | 0 | { |
546 | | // Save the delimiter and skip it |
547 | 0 | delimiter = *psz_subtitle; |
548 | 0 | psz_subtitle++; |
549 | 0 | } |
550 | 0 | else |
551 | 0 | delimiter = 0; |
552 | | |
553 | | // Skip spaces, just in case |
554 | 0 | while ( *psz_subtitle && isspace( *psz_subtitle ) ) |
555 | 0 | psz_subtitle++; |
556 | |
|
557 | 0 | attr_len = 0; |
558 | 0 | while ( *psz_subtitle && ( ( delimiter != 0 && *psz_subtitle != delimiter ) || |
559 | 0 | ( delimiter == 0 && ( !isspace(*psz_subtitle) && *psz_subtitle != '>' ) ) ) ) |
560 | 0 | { |
561 | 0 | psz_subtitle++; |
562 | 0 | attr_len++; |
563 | 0 | } |
564 | 0 | if ( attr_len == 0 ) |
565 | 0 | { |
566 | 0 | *ppsz_subtitle = psz_subtitle; |
567 | 0 | return psz_attribute_name; |
568 | 0 | } |
569 | 0 | if ( unlikely( !( *ppsz_attribute_value = malloc( attr_len + 1 ) ) ) ) |
570 | 0 | { |
571 | 0 | free( psz_attribute_name ); |
572 | 0 | return NULL; |
573 | 0 | } |
574 | 0 | strncpy( *ppsz_attribute_value, psz_subtitle - attr_len, attr_len ); |
575 | 0 | (*ppsz_attribute_value)[attr_len] = 0; |
576 | | // Finally, skip over the final delimiter |
577 | 0 | if (delimiter != 0 && *psz_subtitle) |
578 | 0 | psz_subtitle++; |
579 | 0 | *ppsz_subtitle = psz_subtitle; |
580 | 0 | return psz_attribute_name; |
581 | 0 | } |
582 | | |
583 | | // Returns the next tag and consume the string up to after the tag name, or |
584 | | // returns NULL and doesn't advance if the angle bracket was not a tag opening |
585 | | // For instance, if psz_subtitle == "<some_tag attribute=value>" |
586 | | // GetTag will return "some_tag", and will advance up to the first 'a' in "attribute" |
587 | | // The returned value must be freed. |
588 | | static char* GetTag( const char** ppsz_subtitle, bool b_closing ) |
589 | 0 | { |
590 | 0 | const char* psz_subtitle = *ppsz_subtitle; |
591 | 0 | if ( *psz_subtitle != '<' ) |
592 | 0 | return NULL; |
593 | | // Skip the '<' |
594 | 0 | psz_subtitle++; |
595 | 0 | if ( b_closing && *psz_subtitle == '/' ) |
596 | 0 | psz_subtitle++; |
597 | | // Skip potential spaces |
598 | 0 | while ( *psz_subtitle == ' ' ) |
599 | 0 | psz_subtitle++; |
600 | | // Now we need to verify if what comes next is a valid tag: |
601 | 0 | if ( !isalpha( *psz_subtitle ) ) |
602 | 0 | return NULL; |
603 | 0 | size_t tag_size = 1; |
604 | 0 | while ( isalnum( psz_subtitle[tag_size] ) || psz_subtitle[tag_size] == '_' ) |
605 | 0 | tag_size++; |
606 | 0 | char* psz_tagname = vlc_alloc( tag_size + 1, sizeof( *psz_tagname ) ); |
607 | 0 | if ( unlikely( !psz_tagname ) ) |
608 | 0 | return NULL; |
609 | 0 | strncpy( psz_tagname, psz_subtitle, tag_size ); |
610 | 0 | psz_tagname[tag_size] = 0; |
611 | 0 | psz_subtitle += tag_size; |
612 | 0 | *ppsz_subtitle = psz_subtitle; |
613 | 0 | return psz_tagname; |
614 | 0 | } |
615 | | |
616 | | static bool IsClosed( const char* psz_subtitle, const char* psz_tagname ) |
617 | 0 | { |
618 | 0 | const char* psz_tagpos = strcasestr( psz_subtitle, psz_tagname ); |
619 | 0 | if ( !psz_tagpos ) |
620 | 0 | return false; |
621 | | // Search for '</' and '>' immediately before & after (minding the potential spaces) |
622 | 0 | const char* psz_endtag = psz_tagpos + strlen( psz_tagname ); |
623 | 0 | while ( *psz_endtag == ' ' ) |
624 | 0 | psz_endtag++; |
625 | 0 | if ( *psz_endtag != '>' ) |
626 | 0 | return false; |
627 | | // Skip back before the tag itself |
628 | 0 | psz_tagpos--; |
629 | 0 | while ( *psz_tagpos == ' ' && psz_tagpos > psz_subtitle ) |
630 | 0 | psz_tagpos--; |
631 | 0 | if ( *psz_tagpos-- != '/' ) |
632 | 0 | return false; |
633 | 0 | if ( *psz_tagpos != '<' ) |
634 | 0 | return false; |
635 | 0 | return true; |
636 | 0 | } |
637 | | |
638 | | typedef struct tag_stack tag_stack_t; |
639 | | struct tag_stack |
640 | | { |
641 | | char* psz_tagname; |
642 | | tag_stack_t *p_next; |
643 | | }; |
644 | | |
645 | | static void AppendTag( tag_stack_t **pp_stack, char* psz_tagname ) |
646 | 0 | { |
647 | 0 | tag_stack_t* p_elem = malloc( sizeof( *p_elem ) ); |
648 | 0 | if ( unlikely( !p_elem ) ) |
649 | 0 | return; |
650 | 0 | p_elem->p_next = *pp_stack; |
651 | 0 | p_elem->psz_tagname = psz_tagname; |
652 | 0 | *pp_stack = p_elem; |
653 | 0 | } |
654 | | |
655 | | static bool HasTag( tag_stack_t **pp_stack, const char* psz_tagname ) |
656 | 0 | { |
657 | 0 | tag_stack_t *p_prev = NULL; |
658 | 0 | for ( tag_stack_t* p_current = *pp_stack; p_current; p_current = p_current->p_next ) |
659 | 0 | { |
660 | 0 | if ( !strcasecmp( psz_tagname, p_current->psz_tagname ) ) |
661 | 0 | { |
662 | 0 | if ( p_current == *pp_stack ) |
663 | 0 | { |
664 | 0 | *pp_stack = p_current->p_next; |
665 | 0 | } |
666 | 0 | else |
667 | 0 | { |
668 | 0 | p_prev->p_next = p_current->p_next; |
669 | 0 | } |
670 | 0 | free( p_current->psz_tagname ); |
671 | 0 | free( p_current ); |
672 | 0 | return true; |
673 | 0 | } |
674 | 0 | p_prev = p_current; |
675 | 0 | } |
676 | 0 | return false; |
677 | 0 | } |
678 | | |
679 | | /* |
680 | | * mini style stack implementation |
681 | | */ |
682 | | typedef struct style_stack style_stack_t; |
683 | | struct style_stack |
684 | | { |
685 | | text_style_t* p_style; |
686 | | style_stack_t* p_next; |
687 | | }; |
688 | | |
689 | | static text_style_t* DuplicateAndPushStyle(style_stack_t** pp_stack) |
690 | 0 | { |
691 | 0 | text_style_t* p_dup = ( *pp_stack ) ? text_style_Duplicate( (*pp_stack)->p_style ) : text_style_Create( STYLE_NO_DEFAULTS ); |
692 | 0 | if ( unlikely( !p_dup ) ) |
693 | 0 | return NULL; |
694 | 0 | style_stack_t* p_entry = malloc( sizeof( *p_entry ) ); |
695 | 0 | if ( unlikely( !p_entry ) ) |
696 | 0 | { |
697 | 0 | text_style_Delete( p_dup ); |
698 | 0 | return NULL; |
699 | 0 | } |
700 | | // Give the style ownership to the segment. |
701 | 0 | p_entry->p_style = p_dup; |
702 | 0 | p_entry->p_next = *pp_stack; |
703 | 0 | *pp_stack = p_entry; |
704 | 0 | return p_dup; |
705 | 0 | } |
706 | | |
707 | | static void PopStyle(style_stack_t** pp_stack) |
708 | 0 | { |
709 | 0 | style_stack_t* p_old = *pp_stack; |
710 | 0 | if ( !p_old ) |
711 | 0 | return; |
712 | 0 | *pp_stack = p_old->p_next; |
713 | | // Don't free the style, it is now owned by the text_segment_t |
714 | 0 | free( p_old ); |
715 | 0 | } |
716 | | |
717 | | static text_segment_t* NewTextSegmentPushStyle( text_segment_t* p_segment, style_stack_t** pp_stack ) |
718 | 0 | { |
719 | 0 | text_segment_t* p_new = text_segment_New( NULL ); |
720 | 0 | if ( unlikely( p_new == NULL ) ) |
721 | 0 | return NULL; |
722 | 0 | text_style_t* p_style = DuplicateAndPushStyle( pp_stack ); |
723 | 0 | p_new->style = p_style; |
724 | 0 | p_segment->p_next = p_new; |
725 | 0 | return p_new; |
726 | 0 | } |
727 | | |
728 | | static text_segment_t* NewTextSegmentPopStyle( text_segment_t* p_segment, style_stack_t** pp_stack ) |
729 | 0 | { |
730 | 0 | text_segment_t* p_new = text_segment_New( NULL ); |
731 | 0 | if ( unlikely( p_new == NULL ) ) |
732 | 0 | return NULL; |
733 | | // We shouldn't have an empty stack since this happens when closing a tag, |
734 | | // but better be safe than sorry if (/when) we encounter a broken subtitle file. |
735 | 0 | PopStyle( pp_stack ); |
736 | 0 | text_style_t* p_dup = ( *pp_stack ) ? text_style_Duplicate( (*pp_stack)->p_style ) : text_style_Create( STYLE_NO_DEFAULTS ); |
737 | 0 | p_new->style = p_dup; |
738 | 0 | p_segment->p_next = p_new; |
739 | 0 | return p_new; |
740 | 0 | } |
741 | | |
742 | | static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle ) |
743 | 0 | { |
744 | 0 | text_segment_t* p_segment; |
745 | 0 | text_segment_t* p_first_segment; |
746 | 0 | style_stack_t* p_stack = NULL; |
747 | 0 | tag_stack_t* p_tag_stack = NULL; |
748 | | |
749 | | //FIXME: Remove initial allocation? Might make the below code more complicated |
750 | 0 | p_first_segment = p_segment = text_segment_New( "" ); |
751 | |
|
752 | 0 | *pi_align = -1; |
753 | | |
754 | | /* */ |
755 | 0 | while( *psz_subtitle ) |
756 | 0 | { |
757 | | /* HTML extensions */ |
758 | 0 | if( *psz_subtitle == '<' ) |
759 | 0 | { |
760 | 0 | char *psz_tagname = GetTag( &psz_subtitle, false ); |
761 | 0 | if ( psz_tagname != NULL ) |
762 | 0 | { |
763 | 0 | if( !strcasecmp( psz_tagname, "br" ) ) |
764 | 0 | { |
765 | 0 | if ( !AppendCharacter( p_segment, '\n' ) ) |
766 | 0 | { |
767 | 0 | free( psz_tagname ); |
768 | 0 | goto fail; |
769 | 0 | } |
770 | 0 | } |
771 | 0 | else if( !strcasecmp( psz_tagname, "b" ) ) |
772 | 0 | { |
773 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
774 | 0 | p_segment->style->i_style_flags |= STYLE_BOLD; |
775 | 0 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
776 | 0 | } |
777 | 0 | else if( !strcasecmp( psz_tagname, "i" ) ) |
778 | 0 | { |
779 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
780 | 0 | p_segment->style->i_style_flags |= STYLE_ITALIC; |
781 | 0 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
782 | 0 | } |
783 | 0 | else if( !strcasecmp( psz_tagname, "u" ) ) |
784 | 0 | { |
785 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
786 | 0 | p_segment->style->i_style_flags |= STYLE_UNDERLINE; |
787 | 0 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
788 | 0 | } |
789 | 0 | else if( !strcasecmp( psz_tagname, "s" ) ) |
790 | 0 | { |
791 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
792 | 0 | p_segment->style->i_style_flags |= STYLE_STRIKEOUT; |
793 | 0 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
794 | 0 | } |
795 | 0 | else if( !strcasecmp( psz_tagname, "font" ) ) |
796 | 0 | { |
797 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
798 | |
|
799 | 0 | char* psz_attribute_name; |
800 | 0 | char* psz_attribute_value; |
801 | |
|
802 | 0 | while( ( psz_attribute_name = ConsumeAttribute( &psz_subtitle, &psz_attribute_value ) ) ) |
803 | 0 | { |
804 | 0 | if ( !psz_attribute_value ) |
805 | 0 | { |
806 | 0 | free( psz_attribute_name ); |
807 | 0 | continue; |
808 | 0 | } |
809 | 0 | if ( !strcasecmp( psz_attribute_name, "face" ) ) |
810 | 0 | { |
811 | 0 | free(p_segment->style->psz_fontname); |
812 | 0 | p_segment->style->psz_fontname = psz_attribute_value; |
813 | | // We don't want to free the attribute value since it has become our fontname |
814 | 0 | psz_attribute_value = NULL; |
815 | 0 | } |
816 | 0 | else if ( !strcasecmp( psz_attribute_name, "family" ) ) |
817 | 0 | { |
818 | 0 | free(p_segment->style->psz_monofontname); |
819 | 0 | p_segment->style->psz_monofontname = psz_attribute_value; |
820 | 0 | psz_attribute_value = NULL; |
821 | 0 | } |
822 | 0 | else if ( !strcasecmp( psz_attribute_name, "size" ) ) |
823 | 0 | { |
824 | 0 | int size = atoi( psz_attribute_value ); |
825 | 0 | if( size ) |
826 | 0 | { |
827 | 0 | p_segment->style->i_font_size = size; |
828 | 0 | p_segment->style->f_font_relsize = STYLE_DEFAULT_REL_FONT_SIZE * |
829 | 0 | STYLE_DEFAULT_FONT_SIZE / p_segment->style->i_font_size; |
830 | 0 | } |
831 | 0 | } |
832 | 0 | else if ( !strcasecmp( psz_attribute_name, "color" ) ) |
833 | 0 | { |
834 | 0 | p_segment->style->i_font_color = vlc_html_color( psz_attribute_value, NULL ); |
835 | 0 | p_segment->style->i_features |= STYLE_HAS_FONT_COLOR; |
836 | 0 | } |
837 | 0 | else if ( !strcasecmp( psz_attribute_name, "outline-color" ) ) |
838 | 0 | { |
839 | 0 | p_segment->style->i_outline_color = vlc_html_color( psz_attribute_value, NULL ); |
840 | 0 | p_segment->style->i_features |= STYLE_HAS_OUTLINE_COLOR; |
841 | 0 | } |
842 | 0 | else if ( !strcasecmp( psz_attribute_name, "shadow-color" ) ) |
843 | 0 | { |
844 | 0 | p_segment->style->i_shadow_color = vlc_html_color( psz_attribute_value, NULL ); |
845 | 0 | p_segment->style->i_features |= STYLE_HAS_SHADOW_COLOR; |
846 | 0 | } |
847 | 0 | else if ( !strcasecmp( psz_attribute_name, "outline-level" ) ) |
848 | 0 | { |
849 | 0 | p_segment->style->i_outline_width = atoi( psz_attribute_value ); |
850 | 0 | } |
851 | 0 | else if ( !strcasecmp( psz_attribute_name, "shadow-level" ) ) |
852 | 0 | { |
853 | 0 | p_segment->style->i_shadow_width = atoi( psz_attribute_value ); |
854 | 0 | } |
855 | 0 | else if ( !strcasecmp( psz_attribute_name, "back-color" ) ) |
856 | 0 | { |
857 | 0 | p_segment->style->i_background_color = vlc_html_color( psz_attribute_value, NULL ); |
858 | 0 | p_segment->style->i_features |= STYLE_HAS_BACKGROUND_COLOR; |
859 | 0 | } |
860 | 0 | else if ( !strcasecmp( psz_attribute_name, "alpha" ) ) |
861 | 0 | { |
862 | 0 | p_segment->style->i_font_alpha = atoi( psz_attribute_value ); |
863 | 0 | p_segment->style->i_features |= STYLE_HAS_FONT_ALPHA; |
864 | 0 | } |
865 | |
|
866 | 0 | free( psz_attribute_name ); |
867 | 0 | free( psz_attribute_value ); |
868 | 0 | } |
869 | 0 | } |
870 | 0 | else |
871 | 0 | { |
872 | | // This is an unknown tag. We need to hide it if it's properly closed, and display it otherwise |
873 | 0 | if ( !IsClosed( psz_subtitle, psz_tagname ) ) |
874 | 0 | { |
875 | 0 | AppendCharacter( p_segment, '<' ); |
876 | 0 | AppendString( p_segment, psz_tagname ); |
877 | 0 | AppendCharacter( p_segment, '>' ); |
878 | 0 | } |
879 | 0 | else |
880 | 0 | { |
881 | 0 | AppendTag( &p_tag_stack, psz_tagname ); |
882 | | // We don't want to free the tagname now, it will be freed when the tag |
883 | | // gets poped from the stack. |
884 | 0 | psz_tagname = NULL; |
885 | 0 | } |
886 | | // In any case, fall through and skip to the closing tag. |
887 | 0 | } |
888 | | // Skip potential spaces & end tag |
889 | 0 | while ( *psz_subtitle && *psz_subtitle != '>' ) |
890 | 0 | psz_subtitle++; |
891 | 0 | if ( *psz_subtitle == '>' ) |
892 | 0 | psz_subtitle++; |
893 | |
|
894 | 0 | free( psz_tagname ); |
895 | 0 | } |
896 | 0 | else if( !strncmp( psz_subtitle, "</", 2 )) |
897 | 0 | { |
898 | 0 | char* psz_closetagname = GetTag( &psz_subtitle, true ); |
899 | 0 | if ( psz_closetagname != NULL ) |
900 | 0 | { |
901 | 0 | if ( !strcasecmp( psz_closetagname, "b" ) || |
902 | 0 | !strcasecmp( psz_closetagname, "i" ) || |
903 | 0 | !strcasecmp( psz_closetagname, "u" ) || |
904 | 0 | !strcasecmp( psz_closetagname, "s" ) || |
905 | 0 | !strcasecmp( psz_closetagname, "font" ) ) |
906 | 0 | { |
907 | | // A closing tag for one of the tags we handle, meaning |
908 | | // we pushed a style onto the stack earlier |
909 | 0 | p_segment = NewTextSegmentPopStyle( p_segment, &p_stack ); |
910 | 0 | } |
911 | 0 | else |
912 | 0 | { |
913 | | // Unknown closing tag. If it is closing an unknown tag, ignore it. Otherwise, display it |
914 | 0 | if ( !HasTag( &p_tag_stack, psz_closetagname ) ) |
915 | 0 | { |
916 | 0 | AppendString( p_segment, "</" ); |
917 | 0 | AppendString( p_segment, psz_closetagname ); |
918 | 0 | AppendCharacter( p_segment, '>' ); |
919 | 0 | } |
920 | 0 | } |
921 | 0 | while ( *psz_subtitle == ' ' ) |
922 | 0 | psz_subtitle++; |
923 | 0 | if ( *psz_subtitle == '>' ) |
924 | 0 | psz_subtitle++; |
925 | 0 | free( psz_closetagname ); |
926 | 0 | } |
927 | 0 | else |
928 | 0 | { |
929 | | /** |
930 | | * This doesn't appear to be a valid tag closing syntax. |
931 | | * Simply append the text |
932 | | */ |
933 | 0 | AppendString( p_segment, "</" ); |
934 | 0 | psz_subtitle += 2; |
935 | 0 | } |
936 | 0 | } |
937 | 0 | else |
938 | 0 | { |
939 | | /* We have an unknown tag, just append it, and move on. |
940 | | * The rest of the string won't be recognized as a tag, and |
941 | | * we will ignore unknown closing tag |
942 | | */ |
943 | 0 | AppendCharacter( p_segment, '<' ); |
944 | 0 | psz_subtitle++; |
945 | 0 | } |
946 | 0 | } |
947 | | /* SSA extensions */ |
948 | 0 | else if( psz_subtitle[0] == '{' && psz_subtitle[1] == '\\' && |
949 | 0 | strchr( psz_subtitle, '}' ) ) |
950 | 0 | { |
951 | | /* Check for forced alignment */ |
952 | 0 | if( *pi_align < 0 && |
953 | 0 | !strncmp( psz_subtitle, "{\\an", 4 ) && psz_subtitle[4] >= '1' && psz_subtitle[4] <= '9' && psz_subtitle[5] == '}' ) |
954 | 0 | { |
955 | 0 | static const int pi_vertical[3] = { SUBPICTURE_ALIGN_BOTTOM, 0, SUBPICTURE_ALIGN_TOP }; |
956 | 0 | static const int pi_horizontal[3] = { SUBPICTURE_ALIGN_LEFT, 0, SUBPICTURE_ALIGN_RIGHT }; |
957 | 0 | const int i_id = psz_subtitle[4] - '1'; |
958 | |
|
959 | 0 | *pi_align = pi_vertical[i_id/3] | pi_horizontal[i_id%3]; |
960 | 0 | } |
961 | | /* TODO fr -> rotation */ |
962 | | |
963 | | /* Hide {\stupidity} */ |
964 | 0 | psz_subtitle = strchr( psz_subtitle, '}' ) + 1; |
965 | 0 | } |
966 | | /* MicroDVD extensions */ |
967 | | /* FIXME: |
968 | | * - Currently, we don't do difference between X and x, and we should: |
969 | | * Capital Letters applies to the whole text and not one line |
970 | | * - We don't support Position and Coordinates |
971 | | * - We don't support the DEFAULT flag (HEADER) |
972 | | */ |
973 | | |
974 | 0 | else if( psz_subtitle[0] == '{' && psz_subtitle[1] != 0 && |
975 | 0 | psz_subtitle[2] == ':' && strchr( &psz_subtitle[2], '}' ) ) |
976 | 0 | { |
977 | 0 | const char *psz_tag_end = strchr( &psz_subtitle[2], '}' ); |
978 | 0 | size_t i_len = psz_tag_end - &psz_subtitle[3]; |
979 | |
|
980 | 0 | if( psz_subtitle[1] == 'Y' || psz_subtitle[1] == 'y' ) |
981 | 0 | { |
982 | 0 | if( psz_subtitle[3] == 'i' ) |
983 | 0 | { |
984 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
985 | 0 | p_segment->style->i_style_flags |= STYLE_ITALIC; |
986 | 0 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
987 | 0 | psz_subtitle++; |
988 | 0 | } |
989 | 0 | if( psz_subtitle[3] == 'b' ) |
990 | 0 | { |
991 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
992 | 0 | p_segment->style->i_style_flags |= STYLE_BOLD; |
993 | 0 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
994 | 0 | psz_subtitle++; |
995 | 0 | } |
996 | 0 | if( psz_subtitle[3] == 'u' ) |
997 | 0 | { |
998 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
999 | 0 | p_segment->style->i_style_flags |= STYLE_UNDERLINE; |
1000 | 0 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
1001 | 0 | psz_subtitle++; |
1002 | 0 | } |
1003 | 0 | } |
1004 | 0 | else if( (psz_subtitle[1] == 'C' || psz_subtitle[1] == 'c' ) |
1005 | 0 | && psz_subtitle[3] == '$' && i_len >= 7 ) |
1006 | 0 | { |
1007 | | /* Yes, they use BBGGRR, instead of RRGGBB */ |
1008 | 0 | char psz_color[7]; |
1009 | 0 | psz_color[0] = psz_subtitle[8]; psz_color[1] = psz_subtitle[9]; |
1010 | 0 | psz_color[2] = psz_subtitle[6]; psz_color[3] = psz_subtitle[7]; |
1011 | 0 | psz_color[4] = psz_subtitle[4]; psz_color[5] = psz_subtitle[5]; |
1012 | 0 | psz_color[6] = '\0'; |
1013 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
1014 | 0 | p_segment->style->i_font_color = vlc_html_color( psz_color, NULL ); |
1015 | 0 | p_segment->style->i_features |= STYLE_HAS_FONT_COLOR; |
1016 | 0 | } |
1017 | 0 | else if( psz_subtitle[1] == 'F' || psz_subtitle[1] == 'f' ) |
1018 | 0 | { |
1019 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
1020 | 0 | free(p_segment->style->psz_fontname); |
1021 | 0 | p_segment->style->psz_fontname = strndup( &psz_subtitle[3], i_len ); |
1022 | 0 | } |
1023 | 0 | else if( psz_subtitle[1] == 'S' || psz_subtitle[1] == 's' ) |
1024 | 0 | { |
1025 | 0 | int size = atoi( &psz_subtitle[3] ); |
1026 | 0 | if( size ) |
1027 | 0 | { |
1028 | 0 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
1029 | 0 | p_segment->style->i_font_size = size; |
1030 | 0 | p_segment->style->f_font_relsize = STYLE_DEFAULT_REL_FONT_SIZE * |
1031 | 0 | STYLE_DEFAULT_FONT_SIZE / p_segment->style->i_font_size; |
1032 | |
|
1033 | 0 | } |
1034 | 0 | } |
1035 | | /* Currently unsupported since we don't have access to the i_align flag here |
1036 | | else if( psz_subtitle[1] == 'P' ) |
1037 | | { |
1038 | | if( psz_subtitle[3] == "1" ) |
1039 | | i_align = SUBPICTURE_ALIGN_TOP; |
1040 | | else if( psz_subtitle[3] == "0" ) |
1041 | | i_align = SUBPICTURE_ALIGN_BOTTOM; |
1042 | | } */ |
1043 | | // Hide other {x:y} atrocities, notably {o:x} |
1044 | 0 | psz_subtitle = psz_tag_end + 1; |
1045 | 0 | } |
1046 | 0 | else |
1047 | 0 | { |
1048 | 0 | if( *psz_subtitle == '\n' || !strncasecmp( psz_subtitle, "\\n", 2 ) ) |
1049 | 0 | { |
1050 | 0 | if ( !AppendCharacter( p_segment, '\n' ) ) |
1051 | 0 | goto fail; |
1052 | 0 | if ( *psz_subtitle == '\n' ) |
1053 | 0 | psz_subtitle++; |
1054 | 0 | else |
1055 | 0 | psz_subtitle += 2; |
1056 | 0 | } |
1057 | 0 | else if( !strncasecmp( psz_subtitle, "\\h", 2 ) ) |
1058 | 0 | { |
1059 | 0 | if ( !AppendString( p_segment, "\xC2\xA0" ) ) |
1060 | 0 | goto fail; |
1061 | 0 | psz_subtitle += 2; |
1062 | 0 | } |
1063 | 0 | else |
1064 | 0 | { |
1065 | | //FIXME: Highly inneficient |
1066 | 0 | AppendCharacter( p_segment, *psz_subtitle ); |
1067 | 0 | psz_subtitle++; |
1068 | 0 | } |
1069 | 0 | } |
1070 | 0 | } |
1071 | 0 | while ( p_stack ) |
1072 | 0 | PopStyle( &p_stack ); |
1073 | 0 | while ( p_tag_stack ) |
1074 | 0 | { |
1075 | 0 | tag_stack_t *p_tag = p_tag_stack; |
1076 | 0 | p_tag_stack = p_tag_stack->p_next; |
1077 | 0 | free( p_tag->psz_tagname ); |
1078 | 0 | free( p_tag ); |
1079 | 0 | } |
1080 | |
|
1081 | 0 | return p_first_segment; |
1082 | | |
1083 | 0 | fail: |
1084 | 0 | text_segment_ChainDelete( p_first_segment ); |
1085 | 0 | return NULL; |
1086 | 0 | } |