/src/vlc/modules/codec/subsdec.c
Line | Count | Source |
1 | | /***************************************************************************** |
2 | | * subsdec.c : text subtitle decoder |
3 | | ***************************************************************************** |
4 | | * Copyright (C) 2000-2006 VLC authors and VideoLAN |
5 | | * |
6 | | * Authors: Gildas Bazin <gbazin@videolan.org> |
7 | | * Samuel Hocevar <sam@zoy.org> |
8 | | * Derk-Jan Hartman <hartman at videolan dot org> |
9 | | * Bernie Purcell <bitmap@videolan.org> |
10 | | * |
11 | | * This program is free software; you can redistribute it and/or modify it |
12 | | * under the terms of the GNU Lesser General Public License as published by |
13 | | * the Free Software Foundation; either version 2.1 of the License, or |
14 | | * (at your option) any later version. |
15 | | * |
16 | | * This program is distributed in the hope that it will be useful, |
17 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | | * GNU Lesser General Public License for more details. |
20 | | * |
21 | | * You should have received a copy of the GNU Lesser General Public License |
22 | | * along with this program; if not, write to the Free Software Foundation, |
23 | | * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. |
24 | | *****************************************************************************/ |
25 | | |
26 | | /***************************************************************************** |
27 | | * Preamble |
28 | | *****************************************************************************/ |
29 | | #ifdef HAVE_CONFIG_H |
30 | | # include "config.h" |
31 | | #endif |
32 | | |
33 | | #include <limits.h> |
34 | | #include <errno.h> |
35 | | #include <ctype.h> |
36 | | |
37 | | #include <vlc_common.h> |
38 | | #include <vlc_plugin.h> |
39 | | #include <vlc_codec.h> |
40 | | #include <vlc_charset.h> |
41 | | #include <vlc_xml.h> |
42 | | |
43 | | #include "substext.h" |
44 | | |
45 | | /***************************************************************************** |
46 | | * Module descriptor. |
47 | | *****************************************************************************/ |
48 | | static const char *const ppsz_encodings[] = { |
49 | | "", |
50 | | "system", |
51 | | "UTF-8", |
52 | | "UTF-16", |
53 | | "UTF-16BE", |
54 | | "UTF-16LE", |
55 | | "GB18030", |
56 | | "ISO-8859-15", |
57 | | "Windows-1252", |
58 | | "IBM850", |
59 | | "ISO-8859-2", |
60 | | "Windows-1250", |
61 | | "ISO-8859-3", |
62 | | "ISO-8859-10", |
63 | | "Windows-1251", |
64 | | "KOI8-R", |
65 | | "KOI8-U", |
66 | | "ISO-8859-6", |
67 | | "Windows-1256", |
68 | | "ISO-8859-7", |
69 | | "Windows-1253", |
70 | | "ISO-8859-8", |
71 | | "Windows-1255", |
72 | | "ISO-8859-9", |
73 | | "Windows-1254", |
74 | | "ISO-8859-11", |
75 | | "Windows-874", |
76 | | "ISO-8859-13", |
77 | | "Windows-1257", |
78 | | "ISO-8859-14", |
79 | | "ISO-8859-16", |
80 | | "ISO-2022-CN-EXT", |
81 | | "EUC-CN", |
82 | | "ISO-2022-JP-2", |
83 | | "EUC-JP", |
84 | | "Shift_JIS", |
85 | | "CP949", |
86 | | "ISO-2022-KR", |
87 | | "Big5", |
88 | | "ISO-2022-TW", |
89 | | "Big5-HKSCS", |
90 | | "VISCII", |
91 | | "Windows-1258", |
92 | | }; |
93 | | |
94 | | static const char *const ppsz_encoding_names[] = { |
95 | | /* xgettext: |
96 | | The character encoding name in parenthesis corresponds to that used for |
97 | | the GetACP translation. "Windows-1252" applies to Western European |
98 | | languages using the Latin alphabet. */ |
99 | | N_("Default (Windows-1252)"), |
100 | | N_("System codeset"), |
101 | | N_("Universal (UTF-8)"), |
102 | | N_("Universal (UTF-16)"), |
103 | | N_("Universal (big endian UTF-16)"), |
104 | | N_("Universal (little endian UTF-16)"), |
105 | | N_("Universal, Chinese (GB18030)"), |
106 | | |
107 | | /* ISO 8859 and the likes */ |
108 | | /* 1 */ |
109 | | N_("Western European (Latin-9)"), /* mostly superset of Latin-1 */ |
110 | | N_("Western European (Windows-1252)"), |
111 | | N_("Western European (IBM 00850)"), |
112 | | /* 2 */ |
113 | | N_("Eastern European (Latin-2)"), |
114 | | N_("Eastern European (Windows-1250)"), |
115 | | /* 3 */ |
116 | | N_("Esperanto (Latin-3)"), |
117 | | /* 4 */ |
118 | | N_("Nordic (Latin-6)"), /* Latin 6 supersedes Latin 4 */ |
119 | | /* 5 */ |
120 | | N_("Cyrillic (Windows-1251)"), /* ISO 8859-5 is not practically used */ |
121 | | N_("Russian (KOI8-R)"), |
122 | | N_("Ukrainian (KOI8-U)"), |
123 | | /* 6 */ |
124 | | N_("Arabic (ISO 8859-6)"), |
125 | | N_("Arabic (Windows-1256)"), |
126 | | /* 7 */ |
127 | | N_("Greek (ISO 8859-7)"), |
128 | | N_("Greek (Windows-1253)"), |
129 | | /* 8 */ |
130 | | N_("Hebrew (ISO 8859-8)"), |
131 | | N_("Hebrew (Windows-1255)"), |
132 | | /* 9 */ |
133 | | N_("Turkish (ISO 8859-9)"), |
134 | | N_("Turkish (Windows-1254)"), |
135 | | /* 10 -> 4 */ |
136 | | /* 11 */ |
137 | | N_("Thai (TIS 620-2533/ISO 8859-11)"), |
138 | | N_("Thai (Windows-874)"), |
139 | | /* 13 */ |
140 | | N_("Baltic (Latin-7)"), |
141 | | N_("Baltic (Windows-1257)"), |
142 | | /* 12 -> /dev/null */ |
143 | | /* 14 */ |
144 | | N_("Celtic (Latin-8)"), |
145 | | /* 15 -> 1 */ |
146 | | /* 16 */ |
147 | | N_("South-Eastern European (Latin-10)"), |
148 | | /* CJK families */ |
149 | | N_("Simplified Chinese (ISO-2022-CN-EXT)"), |
150 | | N_("Simplified Chinese Unix (EUC-CN)"), |
151 | | N_("Japanese (7-bits JIS/ISO-2022-JP-2)"), |
152 | | N_("Japanese Unix (EUC-JP)"), |
153 | | N_("Japanese (Shift JIS)"), |
154 | | N_("Korean (EUC-KR/CP949)"), |
155 | | N_("Korean (ISO-2022-KR)"), |
156 | | N_("Traditional Chinese (Big5)"), |
157 | | N_("Traditional Chinese Unix (EUC-TW)"), |
158 | | N_("Hong-Kong Supplementary (HKSCS)"), |
159 | | /* Other */ |
160 | | N_("Vietnamese (VISCII)"), |
161 | | N_("Vietnamese (Windows-1258)"), |
162 | | }; |
163 | | |
164 | | static const int pi_justification[] = { -1, 0, SUBPICTURE_ALIGN_LEFT, SUBPICTURE_ALIGN_RIGHT }; |
165 | | static const char *const ppsz_justification_text[] = { |
166 | | N_("Auto"),N_("Center"),N_("Left"),N_("Right") |
167 | | }; |
168 | | |
169 | | #define ENCODING_TEXT N_("Subtitle text encoding") |
170 | | #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles") |
171 | | #define ALIGN_TEXT N_("Subtitle justification") |
172 | | #define ALIGN_LONGTEXT N_("Set the justification of subtitles") |
173 | | #define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitle autodetection") |
174 | | #define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \ |
175 | | "UTF-8 encoding within subtitle files.") |
176 | | |
177 | | static int OpenDecoder ( vlc_object_t * ); |
178 | | static void CloseDecoder ( vlc_object_t * ); |
179 | | |
180 | 108 | vlc_module_begin () |
181 | 54 | set_shortname( N_("Subtitles")) |
182 | 54 | set_description( N_("Text subtitle decoder") ) |
183 | 54 | set_capability( "spu decoder", 50 ) |
184 | 108 | set_callbacks( OpenDecoder, CloseDecoder ) |
185 | 54 | set_subcategory( SUBCAT_INPUT_SCODEC ) |
186 | | |
187 | 54 | add_integer( "subsdec-align", -1, ALIGN_TEXT, ALIGN_LONGTEXT ) |
188 | 54 | change_integer_list( pi_justification, ppsz_justification_text ) |
189 | 54 | add_string( "subsdec-encoding", "", |
190 | 54 | ENCODING_TEXT, ENCODING_LONGTEXT ) |
191 | 54 | change_string_list( ppsz_encodings, ppsz_encoding_names ) |
192 | 54 | add_bool( "subsdec-autodetect-utf8", true, |
193 | 54 | AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT ) |
194 | 54 | vlc_module_end () |
195 | | |
196 | | /***************************************************************************** |
197 | | * Local prototypes |
198 | | *****************************************************************************/ |
199 | | #define NO_BREAKING_SPACE " " |
200 | | |
201 | | typedef struct |
202 | | { |
203 | | int i_align; /* Subtitles alignment on the vout */ |
204 | | |
205 | | vlc_iconv_t iconv_handle; /* handle to iconv instance */ |
206 | | bool b_autodetect_utf8; |
207 | | } decoder_sys_t; |
208 | | |
209 | | |
210 | | static int DecodeBlock ( decoder_t *, block_t * ); |
211 | | static subpicture_t *ParseText ( decoder_t *, block_t * ); |
212 | | static text_segment_t *ParseSubtitles(int *pi_align, const char * ); |
213 | | |
214 | | /***************************************************************************** |
215 | | * OpenDecoder: probe the decoder and return score |
216 | | ***************************************************************************** |
217 | | * Tries to launch a decoder and return score so that the interface is able |
218 | | * to chose. |
219 | | *****************************************************************************/ |
220 | | static int OpenDecoder( vlc_object_t *p_this ) |
221 | 17.9k | { |
222 | 17.9k | decoder_t *p_dec = (decoder_t*)p_this; |
223 | 17.9k | decoder_sys_t *p_sys; |
224 | | |
225 | 17.9k | switch( p_dec->fmt_in->i_codec ) |
226 | 17.9k | { |
227 | 5.47k | case VLC_CODEC_SUBT: |
228 | 5.47k | case VLC_CODEC_ITU_T140: |
229 | 5.47k | break; |
230 | 12.4k | default: |
231 | 12.4k | return VLC_EGENERIC; |
232 | 17.9k | } |
233 | | |
234 | | /* Allocate the memory needed to store the decoder's structure */ |
235 | 5.47k | p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) ); |
236 | 5.47k | if( p_sys == NULL ) |
237 | 0 | return VLC_ENOMEM; |
238 | | |
239 | 5.47k | p_dec->pf_decode = DecodeBlock; |
240 | 5.47k | p_dec->fmt_out.i_codec = 0; |
241 | | |
242 | | /* init of p_sys */ |
243 | 5.47k | p_sys->i_align = -1; |
244 | 5.47k | p_sys->iconv_handle = (vlc_iconv_t)-1; |
245 | 5.47k | p_sys->b_autodetect_utf8 = false; |
246 | | |
247 | 5.47k | const char *encoding; |
248 | 5.47k | char *var = NULL; |
249 | | |
250 | | /* First try demux-specified encoding */ |
251 | 5.47k | if( p_dec->fmt_in->i_codec == VLC_CODEC_ITU_T140 ) |
252 | 0 | encoding = "UTF-8"; /* IUT T.140 is always using UTF-8 */ |
253 | 5.47k | else |
254 | 5.47k | if( p_dec->fmt_in->subs.psz_encoding && *p_dec->fmt_in->subs.psz_encoding ) |
255 | 150 | { |
256 | 150 | encoding = p_dec->fmt_in->subs.psz_encoding; |
257 | 150 | msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s", |
258 | 150 | encoding); |
259 | 150 | } |
260 | 5.32k | else |
261 | 5.32k | { |
262 | | /* Second, try configured encoding */ |
263 | 5.32k | if ((var = var_InheritString (p_dec, "subsdec-encoding")) != NULL) |
264 | 0 | { |
265 | 0 | msg_Dbg (p_dec, "trying configured character encoding: %s", var); |
266 | 0 | if (!strcmp (var, "system")) |
267 | 0 | { |
268 | 0 | free (var); |
269 | 0 | var = NULL; |
270 | 0 | encoding = ""; |
271 | | /* ^ iconv() treats "" as nl_langinfo(CODESET) */ |
272 | 0 | } |
273 | 0 | else |
274 | 0 | encoding = var; |
275 | 0 | } |
276 | 5.32k | else |
277 | | /* Third, try "local" encoding */ |
278 | 5.32k | { |
279 | | /* xgettext: |
280 | | The Windows ANSI code page most commonly used for this language. |
281 | | VLC uses this as a guess of the subtitle files character set |
282 | | (if UTF-8 and UTF-16 autodetection fails). |
283 | | Western European languages normally use "CP1252", which is a |
284 | | Microsoft-variant of ISO 8859-1. That suits the Latin alphabet. |
285 | | Other scripts use other code pages. |
286 | | |
287 | | This MUST be a valid iconv character set. If unsure, please refer |
288 | | the VideoLAN translators mailing list. */ |
289 | 5.32k | encoding = vlc_pgettext("GetACP", "CP1252"); |
290 | 5.32k | msg_Dbg (p_dec, "trying default character encoding: %s", encoding); |
291 | 5.32k | } |
292 | | |
293 | | /* Check UTF-8 autodetection */ |
294 | 5.32k | if (var_InheritBool (p_dec, "subsdec-autodetect-utf8")) |
295 | 5.32k | { |
296 | 5.32k | msg_Dbg (p_dec, "using automatic UTF-8 detection"); |
297 | 5.32k | p_sys->b_autodetect_utf8 = true; |
298 | 5.32k | } |
299 | 5.32k | } |
300 | | |
301 | 5.47k | if (strcasecmp (encoding, "UTF-8") && strcasecmp (encoding, "utf8")) |
302 | 5.32k | { |
303 | 5.32k | p_sys->iconv_handle = vlc_iconv_open ("UTF-8", encoding); |
304 | 5.32k | if (p_sys->iconv_handle == (vlc_iconv_t)(-1)) |
305 | 5.32k | msg_Err (p_dec, "cannot convert from %s: %s", encoding, |
306 | 5.32k | vlc_strerror_c(errno)); |
307 | 5.32k | } |
308 | 5.47k | free (var); |
309 | | |
310 | 5.47k | p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" ); |
311 | | |
312 | 5.47k | return VLC_SUCCESS; |
313 | 5.47k | } |
314 | | |
315 | | /**************************************************************************** |
316 | | * DecodeBlock: the whole thing |
317 | | **************************************************************************** |
318 | | * This function must be fed with complete subtitles units. |
319 | | ****************************************************************************/ |
320 | | static int DecodeBlock( decoder_t *p_dec, block_t *p_block ) |
321 | 445k | { |
322 | 445k | subpicture_t *p_spu; |
323 | | |
324 | 445k | if( p_block == NULL ) /* No Drain */ |
325 | 225k | return VLCDEC_SUCCESS; |
326 | | |
327 | 219k | if( p_block->i_flags & BLOCK_FLAG_CORRUPTED ) |
328 | 0 | { |
329 | 0 | block_Release( p_block ); |
330 | 0 | return VLCDEC_SUCCESS; |
331 | 0 | } |
332 | | |
333 | 219k | p_spu = ParseText( p_dec, p_block ); |
334 | | |
335 | 219k | block_Release( p_block ); |
336 | 219k | if( p_spu != NULL ) |
337 | 218k | decoder_QueueSub( p_dec, p_spu ); |
338 | 219k | return VLCDEC_SUCCESS; |
339 | 219k | } |
340 | | |
341 | | /***************************************************************************** |
342 | | * CloseDecoder: clean up the decoder |
343 | | *****************************************************************************/ |
344 | | static void CloseDecoder( vlc_object_t *p_this ) |
345 | 5.47k | { |
346 | 5.47k | decoder_t *p_dec = (decoder_t *)p_this; |
347 | 5.47k | decoder_sys_t *p_sys = p_dec->p_sys; |
348 | | |
349 | 5.47k | if( p_sys->iconv_handle != (vlc_iconv_t)-1 ) |
350 | 5.32k | vlc_iconv_close( p_sys->iconv_handle ); |
351 | | |
352 | 5.47k | free( p_sys ); |
353 | 5.47k | } |
354 | | |
355 | | /***************************************************************************** |
356 | | * ParseText: parse an text subtitle packet and send it to the video output |
357 | | *****************************************************************************/ |
358 | | static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) |
359 | 219k | { |
360 | 219k | decoder_sys_t *p_sys = p_dec->p_sys; |
361 | 219k | subpicture_t *p_spu = NULL; |
362 | | |
363 | 219k | if( p_block->i_flags & BLOCK_FLAG_CORRUPTED ) |
364 | 0 | return NULL; |
365 | | |
366 | | /* We cannot display a subpicture with no date */ |
367 | 219k | if( p_block->i_pts == VLC_TICK_INVALID ) |
368 | 0 | { |
369 | 0 | msg_Warn( p_dec, "subtitle without a date" ); |
370 | 0 | return NULL; |
371 | 0 | } |
372 | | |
373 | | /* Check validity of packet data */ |
374 | | /* An "empty" line containing only \0 can be used to force |
375 | | and ephemer picture from the screen */ |
376 | 219k | if( p_block->i_buffer < 1 ) |
377 | 71 | { |
378 | 71 | msg_Warn( p_dec, "no subtitle data" ); |
379 | 71 | return NULL; |
380 | 71 | } |
381 | | |
382 | 219k | char *psz_subtitle = NULL; |
383 | | |
384 | | /* Should be resiliant against bad subtitles */ |
385 | 219k | if( p_sys->iconv_handle == (vlc_iconv_t)-1 || |
386 | 219k | p_sys->b_autodetect_utf8 ) |
387 | 23.3k | { |
388 | 23.3k | psz_subtitle = malloc( p_block->i_buffer + 1 ); |
389 | 23.3k | if( psz_subtitle == NULL ) |
390 | 0 | return NULL; |
391 | 23.3k | memcpy( psz_subtitle, p_block->p_buffer, p_block->i_buffer ); |
392 | 23.3k | psz_subtitle[p_block->i_buffer] = '\0'; |
393 | 23.3k | } |
394 | | |
395 | 219k | if( p_sys->iconv_handle == (vlc_iconv_t)-1 ) |
396 | 98 | { |
397 | 98 | if (EnsureUTF8( psz_subtitle ) == NULL) |
398 | 5 | { |
399 | 5 | msg_Err( p_dec, "failed to convert subtitle encoding.\n" |
400 | 5 | "Try manually setting a character-encoding " |
401 | 5 | "before you open the file." ); |
402 | 5 | } |
403 | 98 | } |
404 | 219k | else |
405 | 219k | { |
406 | 219k | if( p_sys->b_autodetect_utf8 ) |
407 | 23.2k | { |
408 | 23.2k | if( IsUTF8( psz_subtitle ) == NULL ) |
409 | 1.89k | { |
410 | 1.89k | msg_Dbg( p_dec, "invalid UTF-8 sequence: " |
411 | 1.89k | "disabling UTF-8 subtitles autodetection" ); |
412 | 1.89k | p_sys->b_autodetect_utf8 = false; |
413 | 1.89k | } |
414 | 23.2k | } |
415 | | |
416 | 219k | if( !p_sys->b_autodetect_utf8 ) |
417 | 198k | { |
418 | 198k | size_t inbytes_left = p_block->i_buffer; |
419 | 198k | size_t outbytes_left = 6 * inbytes_left; |
420 | 198k | char *psz_new_subtitle = xmalloc( outbytes_left + 1 ); |
421 | 198k | char *psz_convert_buffer_out = psz_new_subtitle; |
422 | 198k | const char *psz_convert_buffer_in = |
423 | 198k | psz_subtitle ? psz_subtitle : (char *)p_block->p_buffer; |
424 | | |
425 | 198k | size_t ret = vlc_iconv( p_sys->iconv_handle, |
426 | 198k | &psz_convert_buffer_in, &inbytes_left, |
427 | 198k | &psz_convert_buffer_out, &outbytes_left ); |
428 | | |
429 | 198k | *psz_convert_buffer_out++ = '\0'; |
430 | 198k | free( psz_subtitle ); |
431 | | |
432 | 198k | if( ( ret == (size_t)(-1) ) || inbytes_left ) |
433 | 1.06k | { |
434 | 1.06k | free( psz_new_subtitle ); |
435 | 1.06k | msg_Err( p_dec, "failed to convert subtitle encoding.\n" |
436 | 1.06k | "Try manually setting a character-encoding " |
437 | 1.06k | "before you open the file." ); |
438 | 1.06k | return NULL; |
439 | 1.06k | } |
440 | | |
441 | 197k | psz_subtitle = realloc( psz_new_subtitle, |
442 | 197k | psz_convert_buffer_out - psz_new_subtitle ); |
443 | 197k | if( !psz_subtitle ) |
444 | 0 | psz_subtitle = psz_new_subtitle; |
445 | 197k | } |
446 | 219k | } |
447 | | |
448 | | /* Create the subpicture unit */ |
449 | 218k | p_spu = decoder_NewSubpictureText( p_dec ); |
450 | 218k | if( !p_spu ) |
451 | 0 | { |
452 | 0 | free( psz_subtitle ); |
453 | 0 | return NULL; |
454 | 0 | } |
455 | 218k | p_spu->i_start = p_block->i_pts; |
456 | 218k | p_spu->i_stop = p_block->i_pts + p_block->i_length; |
457 | 218k | p_spu->b_ephemer = (p_block->i_length == 0); |
458 | | |
459 | 218k | subtext_updater_sys_t *p_spu_sys = p_spu->updater.sys; |
460 | | |
461 | 218k | int i_inline_align = -1; |
462 | 218k | p_spu_sys->region.p_segments = ParseSubtitles( &i_inline_align, psz_subtitle ); |
463 | 218k | free( psz_subtitle ); |
464 | 218k | p_spu_sys->region.b_absolute = false; |
465 | 218k | p_spu_sys->region.b_in_window = true; |
466 | 218k | if( p_sys->i_align >= 0 ) /* bottom ; left, right or centered */ |
467 | 0 | { |
468 | 0 | p_spu_sys->region.align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; |
469 | 0 | p_spu_sys->region.inner_align = p_sys->i_align; |
470 | 0 | } |
471 | 218k | else if( i_inline_align >= 0 ) |
472 | 225 | { |
473 | 225 | p_spu_sys->region.align = i_inline_align; |
474 | 225 | p_spu_sys->region.inner_align = i_inline_align; |
475 | 225 | } |
476 | 218k | else /* default, bottom ; centered */ |
477 | 218k | { |
478 | 218k | p_spu_sys->region.align = SUBPICTURE_ALIGN_BOTTOM; |
479 | 218k | p_spu_sys->region.inner_align = 0; |
480 | 218k | } |
481 | | |
482 | 218k | return p_spu; |
483 | 218k | } |
484 | | |
485 | | static bool AppendCharacter( text_segment_t* p_segment, char c ) |
486 | 3.60M | { |
487 | 3.60M | char* tmp; |
488 | 3.60M | if ( asprintf( &tmp, "%s%c", p_segment->psz_text ? p_segment->psz_text : "", c ) < 0 ) |
489 | 0 | return false; |
490 | 3.60M | free( p_segment->psz_text ); |
491 | 3.60M | p_segment->psz_text = tmp; |
492 | 3.60M | return true; |
493 | 3.60M | } |
494 | | |
495 | | static bool AppendString( text_segment_t* p_segment, const char* psz_str ) |
496 | 53.6k | { |
497 | 53.6k | char* tmp; |
498 | 53.6k | if ( asprintf( &tmp, "%s%s", p_segment->psz_text ? p_segment->psz_text : "", psz_str ) < 0 ) |
499 | 0 | return false; |
500 | 53.6k | free( p_segment->psz_text ); |
501 | 53.6k | p_segment->psz_text = tmp; |
502 | 53.6k | return true; |
503 | 53.6k | } |
504 | | |
505 | | static char* ConsumeAttribute( const char** ppsz_subtitle, char** ppsz_attribute_value ) |
506 | 17.0k | { |
507 | 17.0k | const char* psz_subtitle = *ppsz_subtitle; |
508 | 17.0k | char* psz_attribute_name; |
509 | 17.0k | *ppsz_attribute_value = NULL; |
510 | | |
511 | 26.8k | while (*psz_subtitle == ' ') |
512 | 9.77k | psz_subtitle++; |
513 | | |
514 | 17.0k | size_t attr_len = 0; |
515 | 17.0k | char delimiter; |
516 | | |
517 | 116k | while ( *psz_subtitle && isalpha( *psz_subtitle ) ) |
518 | 99.4k | { |
519 | 99.4k | psz_subtitle++; |
520 | 99.4k | attr_len++; |
521 | 99.4k | } |
522 | 17.0k | if ( !*psz_subtitle || attr_len == 0 ) |
523 | 6.77k | return NULL; |
524 | 10.3k | psz_attribute_name = malloc( attr_len + 1 ); |
525 | 10.3k | if ( unlikely( !psz_attribute_name ) ) |
526 | 0 | return NULL; |
527 | 10.3k | strncpy( psz_attribute_name, psz_subtitle - attr_len, attr_len ); |
528 | 10.3k | psz_attribute_name[attr_len] = 0; |
529 | | |
530 | | // Skip over to the attribute value |
531 | 317k | while ( *psz_subtitle && *psz_subtitle != '=' ) |
532 | 307k | psz_subtitle++; |
533 | 10.3k | if ( !*psz_subtitle ) |
534 | 361 | { |
535 | 361 | *ppsz_subtitle = psz_subtitle; |
536 | 361 | return psz_attribute_name; |
537 | 361 | } |
538 | | // Skip the '=' sign |
539 | 9.94k | psz_subtitle++; |
540 | | |
541 | | // Aknoledge the delimiter if any |
542 | 10.3k | while ( *psz_subtitle && isspace( *psz_subtitle) ) |
543 | 427 | psz_subtitle++; |
544 | | |
545 | 9.94k | if ( *psz_subtitle == '\'' || *psz_subtitle == '"' ) |
546 | 5.73k | { |
547 | | // Save the delimiter and skip it |
548 | 5.73k | delimiter = *psz_subtitle; |
549 | 5.73k | psz_subtitle++; |
550 | 5.73k | } |
551 | 4.20k | else |
552 | 4.20k | delimiter = 0; |
553 | | |
554 | | // Skip spaces, just in case |
555 | 10.7k | while ( *psz_subtitle && isspace( *psz_subtitle ) ) |
556 | 790 | psz_subtitle++; |
557 | | |
558 | 9.94k | attr_len = 0; |
559 | 305k | while ( *psz_subtitle && ( ( delimiter != 0 && *psz_subtitle != delimiter ) || |
560 | 131k | ( delimiter == 0 && ( !isspace(*psz_subtitle) && *psz_subtitle != '>' ) ) ) ) |
561 | 295k | { |
562 | 295k | psz_subtitle++; |
563 | 295k | attr_len++; |
564 | 295k | } |
565 | 9.94k | if ( attr_len == 0 ) |
566 | 788 | { |
567 | 788 | *ppsz_subtitle = psz_subtitle; |
568 | 788 | return psz_attribute_name; |
569 | 788 | } |
570 | 9.15k | if ( unlikely( !( *ppsz_attribute_value = malloc( attr_len + 1 ) ) ) ) |
571 | 0 | { |
572 | 0 | free( psz_attribute_name ); |
573 | 0 | return NULL; |
574 | 0 | } |
575 | 9.15k | strncpy( *ppsz_attribute_value, psz_subtitle - attr_len, attr_len ); |
576 | 9.15k | (*ppsz_attribute_value)[attr_len] = 0; |
577 | | // Finally, skip over the final delimiter |
578 | 9.15k | if (delimiter != 0 && *psz_subtitle) |
579 | 4.53k | psz_subtitle++; |
580 | 9.15k | *ppsz_subtitle = psz_subtitle; |
581 | 9.15k | return psz_attribute_name; |
582 | 9.15k | } |
583 | | |
584 | | // Returns the next tag and consume the string up to after the tag name, or |
585 | | // returns NULL and doesn't advance if the angle bracket was not a tag opening |
586 | | // For instance, if psz_subtitle == "<some_tag attribute=value>" |
587 | | // GetTag will return "some_tag", and will advance up to the first 'a' in "attribute" |
588 | | // The returned value must be freed. |
589 | | static char* GetTag( const char** ppsz_subtitle, bool b_closing ) |
590 | 146k | { |
591 | 146k | const char* psz_subtitle = *ppsz_subtitle; |
592 | 146k | if ( *psz_subtitle != '<' ) |
593 | 0 | return NULL; |
594 | | // Skip the '<' |
595 | 146k | psz_subtitle++; |
596 | 146k | if ( b_closing && *psz_subtitle == '/' ) |
597 | 48.3k | psz_subtitle++; |
598 | | // Skip potential spaces |
599 | 147k | while ( *psz_subtitle == ' ' ) |
600 | 1.32k | psz_subtitle++; |
601 | | // Now we need to verify if what comes next is a valid tag: |
602 | 146k | if ( !isalpha( *psz_subtitle ) ) |
603 | 98.3k | return NULL; |
604 | 47.7k | size_t tag_size = 1; |
605 | 330k | while ( isalnum( psz_subtitle[tag_size] ) || psz_subtitle[tag_size] == '_' ) |
606 | 283k | tag_size++; |
607 | 47.7k | char* psz_tagname = vlc_alloc( tag_size + 1, sizeof( *psz_tagname ) ); |
608 | 47.7k | if ( unlikely( !psz_tagname ) ) |
609 | 0 | return NULL; |
610 | 47.7k | strncpy( psz_tagname, psz_subtitle, tag_size ); |
611 | 47.7k | psz_tagname[tag_size] = 0; |
612 | 47.7k | psz_subtitle += tag_size; |
613 | 47.7k | *ppsz_subtitle = psz_subtitle; |
614 | 47.7k | return psz_tagname; |
615 | 47.7k | } |
616 | | |
617 | | static bool IsClosed( const char* psz_subtitle, const char* psz_tagname ) |
618 | 13.2k | { |
619 | 13.2k | const char* psz_tagpos = strcasestr( psz_subtitle, psz_tagname ); |
620 | 13.2k | if ( !psz_tagpos ) |
621 | 4.64k | return false; |
622 | | // Search for '</' and '>' immediately before & after (minding the potential spaces) |
623 | 8.58k | const char* psz_endtag = psz_tagpos + strlen( psz_tagname ); |
624 | 9.44k | while ( *psz_endtag == ' ' ) |
625 | 860 | psz_endtag++; |
626 | 8.58k | if ( *psz_endtag != '>' ) |
627 | 2.36k | return false; |
628 | | // Skip back before the tag itself |
629 | 6.22k | psz_tagpos--; |
630 | 6.56k | while ( *psz_tagpos == ' ' && psz_tagpos > psz_subtitle ) |
631 | 336 | psz_tagpos--; |
632 | 6.22k | if ( *psz_tagpos-- != '/' ) |
633 | 4.44k | return false; |
634 | 1.77k | if ( *psz_tagpos != '<' ) |
635 | 235 | return false; |
636 | 1.54k | return true; |
637 | 1.77k | } |
638 | | |
639 | | typedef struct tag_stack tag_stack_t; |
640 | | struct tag_stack |
641 | | { |
642 | | char* psz_tagname; |
643 | | tag_stack_t *p_next; |
644 | | }; |
645 | | |
646 | | static void AppendTag( tag_stack_t **pp_stack, char* psz_tagname ) |
647 | 1.54k | { |
648 | 1.54k | tag_stack_t* p_elem = malloc( sizeof( *p_elem ) ); |
649 | 1.54k | if ( unlikely( !p_elem ) ) |
650 | 0 | return; |
651 | 1.54k | p_elem->p_next = *pp_stack; |
652 | 1.54k | p_elem->psz_tagname = psz_tagname; |
653 | 1.54k | *pp_stack = p_elem; |
654 | 1.54k | } |
655 | | |
656 | | static bool HasTag( tag_stack_t **pp_stack, const char* psz_tagname ) |
657 | 5.37k | { |
658 | 5.37k | tag_stack_t *p_prev = NULL; |
659 | 11.6k | for ( tag_stack_t* p_current = *pp_stack; p_current; p_current = p_current->p_next ) |
660 | 6.96k | { |
661 | 6.96k | if ( !strcasecmp( psz_tagname, p_current->psz_tagname ) ) |
662 | 698 | { |
663 | 698 | if ( p_current == *pp_stack ) |
664 | 460 | { |
665 | 460 | *pp_stack = p_current->p_next; |
666 | 460 | } |
667 | 238 | else |
668 | 238 | { |
669 | 238 | p_prev->p_next = p_current->p_next; |
670 | 238 | } |
671 | 698 | free( p_current->psz_tagname ); |
672 | 698 | free( p_current ); |
673 | 698 | return true; |
674 | 698 | } |
675 | 6.27k | p_prev = p_current; |
676 | 6.27k | } |
677 | 4.67k | return false; |
678 | 5.37k | } |
679 | | |
680 | | /* |
681 | | * mini style stack implementation |
682 | | */ |
683 | | typedef struct style_stack style_stack_t; |
684 | | struct style_stack |
685 | | { |
686 | | text_style_t* p_style; |
687 | | style_stack_t* p_next; |
688 | | }; |
689 | | |
690 | | static text_style_t* DuplicateAndPushStyle(style_stack_t** pp_stack) |
691 | 20.9k | { |
692 | 20.9k | text_style_t* p_dup = ( *pp_stack ) ? text_style_Duplicate( (*pp_stack)->p_style ) : text_style_Create( STYLE_NO_DEFAULTS ); |
693 | 20.9k | if ( unlikely( !p_dup ) ) |
694 | 0 | return NULL; |
695 | 20.9k | style_stack_t* p_entry = malloc( sizeof( *p_entry ) ); |
696 | 20.9k | if ( unlikely( !p_entry ) ) |
697 | 0 | { |
698 | 0 | text_style_Delete( p_dup ); |
699 | 0 | return NULL; |
700 | 0 | } |
701 | | // Give the style ownership to the segment. |
702 | 20.9k | p_entry->p_style = p_dup; |
703 | 20.9k | p_entry->p_next = *pp_stack; |
704 | 20.9k | *pp_stack = p_entry; |
705 | 20.9k | return p_dup; |
706 | 20.9k | } |
707 | | |
708 | | static void PopStyle(style_stack_t** pp_stack) |
709 | 27.7k | { |
710 | 27.7k | style_stack_t* p_old = *pp_stack; |
711 | 27.7k | if ( !p_old ) |
712 | 6.74k | return; |
713 | 20.9k | *pp_stack = p_old->p_next; |
714 | | // Don't free the style, it is now owned by the text_segment_t |
715 | 20.9k | free( p_old ); |
716 | 20.9k | } |
717 | | |
718 | | static text_segment_t* NewTextSegmentPushStyle( text_segment_t* p_segment, style_stack_t** pp_stack ) |
719 | 20.9k | { |
720 | 20.9k | text_segment_t* p_new = text_segment_New( NULL ); |
721 | 20.9k | if ( unlikely( p_new == NULL ) ) |
722 | 0 | return NULL; |
723 | 20.9k | text_style_t* p_style = DuplicateAndPushStyle( pp_stack ); |
724 | 20.9k | p_new->style = p_style; |
725 | 20.9k | p_segment->p_next = p_new; |
726 | 20.9k | return p_new; |
727 | 20.9k | } |
728 | | |
729 | | static text_segment_t* NewTextSegmentPopStyle( text_segment_t* p_segment, style_stack_t** pp_stack ) |
730 | 11.0k | { |
731 | 11.0k | text_segment_t* p_new = text_segment_New( NULL ); |
732 | 11.0k | if ( unlikely( p_new == NULL ) ) |
733 | 0 | return NULL; |
734 | | // We shouldn't have an empty stack since this happens when closing a tag, |
735 | | // but better be safe than sorry if (/when) we encounter a broken subtitle file. |
736 | 11.0k | PopStyle( pp_stack ); |
737 | 11.0k | text_style_t* p_dup = ( *pp_stack ) ? text_style_Duplicate( (*pp_stack)->p_style ) : text_style_Create( STYLE_NO_DEFAULTS ); |
738 | 11.0k | p_new->style = p_dup; |
739 | 11.0k | p_segment->p_next = p_new; |
740 | 11.0k | return p_new; |
741 | 11.0k | } |
742 | | |
743 | | static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle ) |
744 | 218k | { |
745 | 218k | text_segment_t* p_segment; |
746 | 218k | text_segment_t* p_first_segment; |
747 | 218k | style_stack_t* p_stack = NULL; |
748 | 218k | tag_stack_t* p_tag_stack = NULL; |
749 | | |
750 | | //FIXME: Remove initial allocation? Might make the below code more complicated |
751 | 218k | p_first_segment = p_segment = text_segment_New( "" ); |
752 | | |
753 | 218k | *pi_align = -1; |
754 | | |
755 | | /* */ |
756 | 3.87M | while( *psz_subtitle ) |
757 | 3.66M | { |
758 | | /* HTML extensions */ |
759 | 3.66M | if( *psz_subtitle == '<' ) |
760 | 97.7k | { |
761 | 97.7k | char *psz_tagname = GetTag( &psz_subtitle, false ); |
762 | 97.7k | if ( psz_tagname != NULL ) |
763 | 31.3k | { |
764 | 31.3k | if( !strcasecmp( psz_tagname, "br" ) ) |
765 | 1.51k | { |
766 | 1.51k | if ( !AppendCharacter( p_segment, '\n' ) ) |
767 | 0 | { |
768 | 0 | free( psz_tagname ); |
769 | 0 | goto fail; |
770 | 0 | } |
771 | 1.51k | } |
772 | 29.8k | else if( !strcasecmp( psz_tagname, "b" ) ) |
773 | 1.45k | { |
774 | 1.45k | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
775 | 1.45k | p_segment->style->i_style_flags |= STYLE_BOLD; |
776 | 1.45k | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
777 | 1.45k | } |
778 | 28.3k | else if( !strcasecmp( psz_tagname, "i" ) ) |
779 | 6.28k | { |
780 | 6.28k | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
781 | 6.28k | p_segment->style->i_style_flags |= STYLE_ITALIC; |
782 | 6.28k | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
783 | 6.28k | } |
784 | 22.0k | else if( !strcasecmp( psz_tagname, "u" ) ) |
785 | 1.36k | { |
786 | 1.36k | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
787 | 1.36k | p_segment->style->i_style_flags |= STYLE_UNDERLINE; |
788 | 1.36k | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
789 | 1.36k | } |
790 | 20.6k | else if( !strcasecmp( psz_tagname, "s" ) ) |
791 | 682 | { |
792 | 682 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
793 | 682 | p_segment->style->i_style_flags |= STYLE_STRIKEOUT; |
794 | 682 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
795 | 682 | } |
796 | 20.0k | else if( !strcasecmp( psz_tagname, "font" ) ) |
797 | 6.77k | { |
798 | 6.77k | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
799 | | |
800 | 6.77k | char* psz_attribute_name; |
801 | 6.77k | char* psz_attribute_value; |
802 | | |
803 | 17.0k | while( ( psz_attribute_name = ConsumeAttribute( &psz_subtitle, &psz_attribute_value ) ) ) |
804 | 10.3k | { |
805 | 10.3k | if ( !psz_attribute_value ) |
806 | 1.14k | { |
807 | 1.14k | free( psz_attribute_name ); |
808 | 1.14k | continue; |
809 | 1.14k | } |
810 | 9.15k | if ( !strcasecmp( psz_attribute_name, "face" ) ) |
811 | 238 | { |
812 | 238 | free(p_segment->style->psz_fontname); |
813 | 238 | p_segment->style->psz_fontname = psz_attribute_value; |
814 | | // We don't want to free the attribute value since it has become our fontname |
815 | 238 | psz_attribute_value = NULL; |
816 | 238 | } |
817 | 8.91k | else if ( !strcasecmp( psz_attribute_name, "family" ) ) |
818 | 226 | { |
819 | 226 | free(p_segment->style->psz_monofontname); |
820 | 226 | p_segment->style->psz_monofontname = psz_attribute_value; |
821 | 226 | psz_attribute_value = NULL; |
822 | 226 | } |
823 | 8.68k | else if ( !strcasecmp( psz_attribute_name, "size" ) ) |
824 | 697 | { |
825 | 697 | int size = atoi( psz_attribute_value ); |
826 | 697 | if( size ) |
827 | 392 | { |
828 | 392 | p_segment->style->i_font_size = size; |
829 | 392 | p_segment->style->f_font_relsize = STYLE_DEFAULT_REL_FONT_SIZE * |
830 | 392 | STYLE_DEFAULT_FONT_SIZE / p_segment->style->i_font_size; |
831 | 392 | } |
832 | 697 | } |
833 | 7.99k | else if ( !strcasecmp( psz_attribute_name, "color" ) ) |
834 | 4.65k | { |
835 | 4.65k | p_segment->style->i_font_color = vlc_html_color( psz_attribute_value, NULL ); |
836 | 4.65k | p_segment->style->i_features |= STYLE_HAS_FONT_COLOR; |
837 | 4.65k | } |
838 | 3.33k | else if ( !strcasecmp( psz_attribute_name, "outline-color" ) ) |
839 | 0 | { |
840 | 0 | p_segment->style->i_outline_color = vlc_html_color( psz_attribute_value, NULL ); |
841 | 0 | p_segment->style->i_features |= STYLE_HAS_OUTLINE_COLOR; |
842 | 0 | } |
843 | 3.33k | else if ( !strcasecmp( psz_attribute_name, "shadow-color" ) ) |
844 | 0 | { |
845 | 0 | p_segment->style->i_shadow_color = vlc_html_color( psz_attribute_value, NULL ); |
846 | 0 | p_segment->style->i_features |= STYLE_HAS_SHADOW_COLOR; |
847 | 0 | } |
848 | 3.33k | else if ( !strcasecmp( psz_attribute_name, "outline-level" ) ) |
849 | 0 | { |
850 | 0 | p_segment->style->i_outline_width = atoi( psz_attribute_value ); |
851 | 0 | } |
852 | 3.33k | else if ( !strcasecmp( psz_attribute_name, "shadow-level" ) ) |
853 | 0 | { |
854 | 0 | p_segment->style->i_shadow_width = atoi( psz_attribute_value ); |
855 | 0 | } |
856 | 3.33k | else if ( !strcasecmp( psz_attribute_name, "back-color" ) ) |
857 | 0 | { |
858 | 0 | p_segment->style->i_background_color = vlc_html_color( psz_attribute_value, NULL ); |
859 | 0 | p_segment->style->i_features |= STYLE_HAS_BACKGROUND_COLOR; |
860 | 0 | } |
861 | 3.33k | else if ( !strcasecmp( psz_attribute_name, "alpha" ) ) |
862 | 268 | { |
863 | 268 | p_segment->style->i_font_alpha = atoi( psz_attribute_value ); |
864 | 268 | p_segment->style->i_features |= STYLE_HAS_FONT_ALPHA; |
865 | 268 | } |
866 | | |
867 | 9.15k | free( psz_attribute_name ); |
868 | 9.15k | free( psz_attribute_value ); |
869 | 9.15k | } |
870 | 6.77k | } |
871 | 13.2k | else |
872 | 13.2k | { |
873 | | // This is an unknown tag. We need to hide it if it's properly closed, and display it otherwise |
874 | 13.2k | if ( !IsClosed( psz_subtitle, psz_tagname ) ) |
875 | 11.6k | { |
876 | 11.6k | AppendCharacter( p_segment, '<' ); |
877 | 11.6k | AppendString( p_segment, psz_tagname ); |
878 | 11.6k | AppendCharacter( p_segment, '>' ); |
879 | 11.6k | } |
880 | 1.54k | else |
881 | 1.54k | { |
882 | 1.54k | AppendTag( &p_tag_stack, psz_tagname ); |
883 | | // We don't want to free the tagname now, it will be freed when the tag |
884 | | // gets poped from the stack. |
885 | 1.54k | psz_tagname = NULL; |
886 | 1.54k | } |
887 | | // In any case, fall through and skip to the closing tag. |
888 | 13.2k | } |
889 | | // Skip potential spaces & end tag |
890 | 433k | while ( *psz_subtitle && *psz_subtitle != '>' ) |
891 | 402k | psz_subtitle++; |
892 | 31.3k | if ( *psz_subtitle == '>' ) |
893 | 27.1k | psz_subtitle++; |
894 | | |
895 | 31.3k | free( psz_tagname ); |
896 | 31.3k | } |
897 | 66.3k | else if( !strncmp( psz_subtitle, "</", 2 )) |
898 | 48.3k | { |
899 | 48.3k | char* psz_closetagname = GetTag( &psz_subtitle, true ); |
900 | 48.3k | if ( psz_closetagname != NULL ) |
901 | 16.4k | { |
902 | 16.4k | if ( !strcasecmp( psz_closetagname, "b" ) || |
903 | 15.1k | !strcasecmp( psz_closetagname, "i" ) || |
904 | 12.7k | !strcasecmp( psz_closetagname, "u" ) || |
905 | 9.92k | !strcasecmp( psz_closetagname, "s" ) || |
906 | 9.52k | !strcasecmp( psz_closetagname, "font" ) ) |
907 | 11.0k | { |
908 | | // A closing tag for one of the tags we handle, meaning |
909 | | // we pushed a style onto the stack earlier |
910 | 11.0k | p_segment = NewTextSegmentPopStyle( p_segment, &p_stack ); |
911 | 11.0k | } |
912 | 5.37k | else |
913 | 5.37k | { |
914 | | // Unknown closing tag. If it is closing an unknown tag, ignore it. Otherwise, display it |
915 | 5.37k | if ( !HasTag( &p_tag_stack, psz_closetagname ) ) |
916 | 4.67k | { |
917 | 4.67k | AppendString( p_segment, "</" ); |
918 | 4.67k | AppendString( p_segment, psz_closetagname ); |
919 | 4.67k | AppendCharacter( p_segment, '>' ); |
920 | 4.67k | } |
921 | 5.37k | } |
922 | 17.2k | while ( *psz_subtitle == ' ' ) |
923 | 832 | psz_subtitle++; |
924 | 16.4k | if ( *psz_subtitle == '>' ) |
925 | 11.5k | psz_subtitle++; |
926 | 16.4k | free( psz_closetagname ); |
927 | 16.4k | } |
928 | 31.9k | else |
929 | 31.9k | { |
930 | | /** |
931 | | * This doesn't appear to be a valid tag closing syntax. |
932 | | * Simply append the text |
933 | | */ |
934 | 31.9k | AppendString( p_segment, "</" ); |
935 | 31.9k | psz_subtitle += 2; |
936 | 31.9k | } |
937 | 48.3k | } |
938 | 18.0k | else |
939 | 18.0k | { |
940 | | /* We have an unknown tag, just append it, and move on. |
941 | | * The rest of the string won't be recognized as a tag, and |
942 | | * we will ignore unknown closing tag |
943 | | */ |
944 | 18.0k | AppendCharacter( p_segment, '<' ); |
945 | 18.0k | psz_subtitle++; |
946 | 18.0k | } |
947 | 97.7k | } |
948 | | /* SSA extensions */ |
949 | 3.56M | else if( psz_subtitle[0] == '{' && psz_subtitle[1] == '\\' && |
950 | 1.64k | strchr( psz_subtitle, '}' ) ) |
951 | 1.43k | { |
952 | | /* Check for forced alignment */ |
953 | 1.43k | if( *pi_align < 0 && |
954 | 1.22k | !strncmp( psz_subtitle, "{\\an", 4 ) && psz_subtitle[4] >= '1' && psz_subtitle[4] <= '9' && psz_subtitle[5] == '}' ) |
955 | 225 | { |
956 | 225 | static const int pi_vertical[3] = { SUBPICTURE_ALIGN_BOTTOM, 0, SUBPICTURE_ALIGN_TOP }; |
957 | 225 | static const int pi_horizontal[3] = { SUBPICTURE_ALIGN_LEFT, 0, SUBPICTURE_ALIGN_RIGHT }; |
958 | 225 | const int i_id = psz_subtitle[4] - '1'; |
959 | | |
960 | 225 | *pi_align = pi_vertical[i_id/3] | pi_horizontal[i_id%3]; |
961 | 225 | } |
962 | | /* TODO fr -> rotation */ |
963 | | |
964 | | /* Hide {\stupidity} */ |
965 | 1.43k | psz_subtitle = strchr( psz_subtitle, '}' ) + 1; |
966 | 1.43k | } |
967 | | /* MicroDVD extensions */ |
968 | | /* FIXME: |
969 | | * - Currently, we don't do difference between X and x, and we should: |
970 | | * Capital Letters applies to the whole text and not one line |
971 | | * - We don't support Position and Coordinates |
972 | | * - We don't support the DEFAULT flag (HEADER) |
973 | | */ |
974 | | |
975 | 3.56M | else if( psz_subtitle[0] == '{' && psz_subtitle[1] != 0 && |
976 | 14.4k | psz_subtitle[2] == ':' && strchr( &psz_subtitle[2], '}' ) ) |
977 | 6.31k | { |
978 | 6.31k | const char *psz_tag_end = strchr( &psz_subtitle[2], '}' ); |
979 | 6.31k | size_t i_len = psz_tag_end - &psz_subtitle[3]; |
980 | | |
981 | 6.31k | if( psz_subtitle[1] == 'Y' || psz_subtitle[1] == 'y' ) |
982 | 2.15k | { |
983 | 2.15k | if( psz_subtitle[3] == 'i' ) |
984 | 489 | { |
985 | 489 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
986 | 489 | p_segment->style->i_style_flags |= STYLE_ITALIC; |
987 | 489 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
988 | 489 | psz_subtitle++; |
989 | 489 | } |
990 | 2.15k | if( psz_subtitle[3] == 'b' ) |
991 | 491 | { |
992 | 491 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
993 | 491 | p_segment->style->i_style_flags |= STYLE_BOLD; |
994 | 491 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
995 | 491 | psz_subtitle++; |
996 | 491 | } |
997 | 2.15k | if( psz_subtitle[3] == 'u' ) |
998 | 729 | { |
999 | 729 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
1000 | 729 | p_segment->style->i_style_flags |= STYLE_UNDERLINE; |
1001 | 729 | p_segment->style->i_features |= STYLE_HAS_FLAGS; |
1002 | 729 | psz_subtitle++; |
1003 | 729 | } |
1004 | 2.15k | } |
1005 | 4.16k | else if( (psz_subtitle[1] == 'C' || psz_subtitle[1] == 'c' ) |
1006 | 2.24k | && psz_subtitle[3] == '$' && i_len >= 7 ) |
1007 | 1.29k | { |
1008 | | /* Yes, they use BBGGRR, instead of RRGGBB */ |
1009 | 1.29k | char psz_color[7]; |
1010 | 1.29k | psz_color[0] = psz_subtitle[8]; psz_color[1] = psz_subtitle[9]; |
1011 | 1.29k | psz_color[2] = psz_subtitle[6]; psz_color[3] = psz_subtitle[7]; |
1012 | 1.29k | psz_color[4] = psz_subtitle[4]; psz_color[5] = psz_subtitle[5]; |
1013 | 1.29k | psz_color[6] = '\0'; |
1014 | 1.29k | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
1015 | 1.29k | p_segment->style->i_font_color = vlc_html_color( psz_color, NULL ); |
1016 | 1.29k | p_segment->style->i_features |= STYLE_HAS_FONT_COLOR; |
1017 | 1.29k | } |
1018 | 2.86k | else if( psz_subtitle[1] == 'F' || psz_subtitle[1] == 'f' ) |
1019 | 664 | { |
1020 | 664 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
1021 | 664 | free(p_segment->style->psz_fontname); |
1022 | 664 | p_segment->style->psz_fontname = strndup( &psz_subtitle[3], i_len ); |
1023 | 664 | } |
1024 | 2.19k | else if( psz_subtitle[1] == 'S' || psz_subtitle[1] == 's' ) |
1025 | 1.07k | { |
1026 | 1.07k | int size = atoi( &psz_subtitle[3] ); |
1027 | 1.07k | if( size ) |
1028 | 733 | { |
1029 | 733 | p_segment = NewTextSegmentPushStyle( p_segment, &p_stack ); |
1030 | 733 | p_segment->style->i_font_size = size; |
1031 | 733 | p_segment->style->f_font_relsize = STYLE_DEFAULT_REL_FONT_SIZE * |
1032 | 733 | STYLE_DEFAULT_FONT_SIZE / p_segment->style->i_font_size; |
1033 | | |
1034 | 733 | } |
1035 | 1.07k | } |
1036 | | /* Currently unsupported since we don't have access to the i_align flag here |
1037 | | else if( psz_subtitle[1] == 'P' ) |
1038 | | { |
1039 | | if( psz_subtitle[3] == "1" ) |
1040 | | i_align = SUBPICTURE_ALIGN_TOP; |
1041 | | else if( psz_subtitle[3] == "0" ) |
1042 | | i_align = SUBPICTURE_ALIGN_BOTTOM; |
1043 | | } */ |
1044 | | // Hide other {x:y} atrocities, notably {o:x} |
1045 | 6.31k | psz_subtitle = psz_tag_end + 1; |
1046 | 6.31k | } |
1047 | 3.55M | else |
1048 | 3.55M | { |
1049 | 3.55M | if( *psz_subtitle == '\n' || !strncasecmp( psz_subtitle, "\\n", 2 ) ) |
1050 | 87.2k | { |
1051 | 87.2k | if ( !AppendCharacter( p_segment, '\n' ) ) |
1052 | 0 | goto fail; |
1053 | 87.2k | if ( *psz_subtitle == '\n' ) |
1054 | 86.8k | psz_subtitle++; |
1055 | 398 | else |
1056 | 398 | psz_subtitle += 2; |
1057 | 87.2k | } |
1058 | 3.46M | else if( !strncasecmp( psz_subtitle, "\\h", 2 ) ) |
1059 | 639 | { |
1060 | 639 | if ( !AppendString( p_segment, "\xC2\xA0" ) ) |
1061 | 0 | goto fail; |
1062 | 639 | psz_subtitle += 2; |
1063 | 639 | } |
1064 | 3.46M | else |
1065 | 3.46M | { |
1066 | | //FIXME: Highly inneficient |
1067 | 3.46M | AppendCharacter( p_segment, *psz_subtitle ); |
1068 | 3.46M | psz_subtitle++; |
1069 | 3.46M | } |
1070 | 3.55M | } |
1071 | 3.66M | } |
1072 | 235k | while ( p_stack ) |
1073 | 16.6k | PopStyle( &p_stack ); |
1074 | 219k | while ( p_tag_stack ) |
1075 | 844 | { |
1076 | 844 | tag_stack_t *p_tag = p_tag_stack; |
1077 | 844 | p_tag_stack = p_tag_stack->p_next; |
1078 | 844 | free( p_tag->psz_tagname ); |
1079 | 844 | free( p_tag ); |
1080 | 844 | } |
1081 | | |
1082 | 218k | return p_first_segment; |
1083 | | |
1084 | 0 | fail: |
1085 | 0 | text_segment_ChainDelete( p_first_segment ); |
1086 | | return NULL; |
1087 | 218k | } |