Coverage Report

Created: 2025-11-14 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vlc/modules/codec/subsdec.c
Line
Count
Source
1
/*****************************************************************************
2
 * subsdec.c : text subtitle decoder
3
 *****************************************************************************
4
 * Copyright (C) 2000-2006 VLC authors and VideoLAN
5
 *
6
 * Authors: Gildas Bazin <gbazin@videolan.org>
7
 *          Samuel Hocevar <sam@zoy.org>
8
 *          Derk-Jan Hartman <hartman at videolan dot org>
9
 *          Bernie Purcell <bitmap@videolan.org>
10
 *
11
 * This program is free software; you can redistribute it and/or modify it
12
 * under the terms of the GNU Lesser General Public License as published by
13
 * the Free Software Foundation; either version 2.1 of the License, or
14
 * (at your option) any later version.
15
 *
16
 * This program is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
 * GNU Lesser General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Lesser General Public License
22
 * along with this program; if not, write to the Free Software Foundation,
23
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24
 *****************************************************************************/
25
26
/*****************************************************************************
27
 * Preamble
28
 *****************************************************************************/
29
#ifdef HAVE_CONFIG_H
30
# include "config.h"
31
#endif
32
33
#include <limits.h>
34
#include <errno.h>
35
#include <ctype.h>
36
37
#include <vlc_common.h>
38
#include <vlc_plugin.h>
39
#include <vlc_codec.h>
40
#include <vlc_charset.h>
41
#include <vlc_xml.h>
42
43
#include "substext.h"
44
45
/*****************************************************************************
46
 * Module descriptor.
47
 *****************************************************************************/
48
static const char *const ppsz_encodings[] = {
49
    "",
50
    "system",
51
    "UTF-8",
52
    "UTF-16",
53
    "UTF-16BE",
54
    "UTF-16LE",
55
    "GB18030",
56
    "ISO-8859-15",
57
    "Windows-1252",
58
    "IBM850",
59
    "ISO-8859-2",
60
    "Windows-1250",
61
    "ISO-8859-3",
62
    "ISO-8859-10",
63
    "Windows-1251",
64
    "KOI8-R",
65
    "KOI8-U",
66
    "ISO-8859-6",
67
    "Windows-1256",
68
    "ISO-8859-7",
69
    "Windows-1253",
70
    "ISO-8859-8",
71
    "Windows-1255",
72
    "ISO-8859-9",
73
    "Windows-1254",
74
    "ISO-8859-11",
75
    "Windows-874",
76
    "ISO-8859-13",
77
    "Windows-1257",
78
    "ISO-8859-14",
79
    "ISO-8859-16",
80
    "ISO-2022-CN-EXT",
81
    "EUC-CN",
82
    "ISO-2022-JP-2",
83
    "EUC-JP",
84
    "Shift_JIS",
85
    "CP949",
86
    "ISO-2022-KR",
87
    "Big5",
88
    "ISO-2022-TW",
89
    "Big5-HKSCS",
90
    "VISCII",
91
    "Windows-1258",
92
};
93
94
static const char *const ppsz_encoding_names[] = {
95
    /* xgettext:
96
      The character encoding name in parenthesis corresponds to that used for
97
      the GetACP translation. "Windows-1252" applies to Western European
98
      languages using the Latin alphabet. */
99
    N_("Default (Windows-1252)"),
100
    N_("System codeset"),
101
    N_("Universal (UTF-8)"),
102
    N_("Universal (UTF-16)"),
103
    N_("Universal (big endian UTF-16)"),
104
    N_("Universal (little endian UTF-16)"),
105
    N_("Universal, Chinese (GB18030)"),
106
107
  /* ISO 8859 and the likes */
108
    /* 1 */
109
    N_("Western European (Latin-9)"), /* mostly superset of Latin-1 */
110
    N_("Western European (Windows-1252)"),
111
    N_("Western European (IBM 00850)"),
112
    /* 2 */
113
    N_("Eastern European (Latin-2)"),
114
    N_("Eastern European (Windows-1250)"),
115
    /* 3 */
116
    N_("Esperanto (Latin-3)"),
117
    /* 4 */
118
    N_("Nordic (Latin-6)"), /* Latin 6 supersedes Latin 4 */
119
    /* 5 */
120
    N_("Cyrillic (Windows-1251)"), /* ISO 8859-5 is not practically used */
121
    N_("Russian (KOI8-R)"),
122
    N_("Ukrainian (KOI8-U)"),
123
    /* 6 */
124
    N_("Arabic (ISO 8859-6)"),
125
    N_("Arabic (Windows-1256)"),
126
    /* 7 */
127
    N_("Greek (ISO 8859-7)"),
128
    N_("Greek (Windows-1253)"),
129
    /* 8 */
130
    N_("Hebrew (ISO 8859-8)"),
131
    N_("Hebrew (Windows-1255)"),
132
    /* 9 */
133
    N_("Turkish (ISO 8859-9)"),
134
    N_("Turkish (Windows-1254)"),
135
    /* 10 -> 4 */
136
    /* 11 */
137
    N_("Thai (TIS 620-2533/ISO 8859-11)"),
138
    N_("Thai (Windows-874)"),
139
    /* 13 */
140
    N_("Baltic (Latin-7)"),
141
    N_("Baltic (Windows-1257)"),
142
    /* 12 -> /dev/null */
143
    /* 14 */
144
    N_("Celtic (Latin-8)"),
145
    /* 15 -> 1 */
146
    /* 16 */
147
    N_("South-Eastern European (Latin-10)"),
148
  /* CJK families */
149
    N_("Simplified Chinese (ISO-2022-CN-EXT)"),
150
    N_("Simplified Chinese Unix (EUC-CN)"),
151
    N_("Japanese (7-bits JIS/ISO-2022-JP-2)"),
152
    N_("Japanese Unix (EUC-JP)"),
153
    N_("Japanese (Shift JIS)"),
154
    N_("Korean (EUC-KR/CP949)"),
155
    N_("Korean (ISO-2022-KR)"),
156
    N_("Traditional Chinese (Big5)"),
157
    N_("Traditional Chinese Unix (EUC-TW)"),
158
    N_("Hong-Kong Supplementary (HKSCS)"),
159
  /* Other */
160
    N_("Vietnamese (VISCII)"),
161
    N_("Vietnamese (Windows-1258)"),
162
};
163
164
static const int  pi_justification[] = { -1, 0, SUBPICTURE_ALIGN_LEFT, SUBPICTURE_ALIGN_RIGHT };
165
static const char *const ppsz_justification_text[] = {
166
    N_("Auto"),N_("Center"),N_("Left"),N_("Right")
167
};
168
169
#define ENCODING_TEXT N_("Subtitle text encoding")
170
#define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
171
#define ALIGN_TEXT N_("Subtitle justification")
172
#define ALIGN_LONGTEXT N_("Set the justification of subtitles")
173
#define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitle autodetection")
174
#define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
175
            "UTF-8 encoding within subtitle files.")
176
177
static int  OpenDecoder   ( vlc_object_t * );
178
static void CloseDecoder  ( vlc_object_t * );
179
180
108
vlc_module_begin ()
181
54
    set_shortname( N_("Subtitles"))
182
54
    set_description( N_("Text subtitle decoder") )
183
54
    set_capability( "spu decoder", 50 )
184
108
    set_callbacks( OpenDecoder, CloseDecoder )
185
54
    set_subcategory( SUBCAT_INPUT_SCODEC )
186
187
54
    add_integer( "subsdec-align", -1, ALIGN_TEXT, ALIGN_LONGTEXT )
188
54
        change_integer_list( pi_justification, ppsz_justification_text )
189
54
    add_string( "subsdec-encoding", "",
190
54
                ENCODING_TEXT, ENCODING_LONGTEXT )
191
54
        change_string_list( ppsz_encodings, ppsz_encoding_names )
192
54
    add_bool( "subsdec-autodetect-utf8", true,
193
54
              AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT )
194
54
vlc_module_end ()
195
196
/*****************************************************************************
197
 * Local prototypes
198
 *****************************************************************************/
199
#define NO_BREAKING_SPACE  "&#160;"
200
201
typedef struct
202
{
203
    int                 i_align;          /* Subtitles alignment on the vout */
204
205
    vlc_iconv_t         iconv_handle;            /* handle to iconv instance */
206
    bool                b_autodetect_utf8;
207
} decoder_sys_t;
208
209
210
static int             DecodeBlock   ( decoder_t *, block_t * );
211
static subpicture_t   *ParseText     ( decoder_t *, block_t * );
212
static text_segment_t *ParseSubtitles(int *pi_align, const char * );
213
214
/*****************************************************************************
215
 * OpenDecoder: probe the decoder and return score
216
 *****************************************************************************
217
 * Tries to launch a decoder and return score so that the interface is able
218
 * to chose.
219
 *****************************************************************************/
220
static int OpenDecoder( vlc_object_t *p_this )
221
17.9k
{
222
17.9k
    decoder_t     *p_dec = (decoder_t*)p_this;
223
17.9k
    decoder_sys_t *p_sys;
224
225
17.9k
    switch( p_dec->fmt_in->i_codec )
226
17.9k
    {
227
5.47k
        case VLC_CODEC_SUBT:
228
5.47k
        case VLC_CODEC_ITU_T140:
229
5.47k
            break;
230
12.4k
        default:
231
12.4k
            return VLC_EGENERIC;
232
17.9k
    }
233
234
    /* Allocate the memory needed to store the decoder's structure */
235
5.47k
    p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) );
236
5.47k
    if( p_sys == NULL )
237
0
        return VLC_ENOMEM;
238
239
5.47k
    p_dec->pf_decode = DecodeBlock;
240
5.47k
    p_dec->fmt_out.i_codec = 0;
241
242
    /* init of p_sys */
243
5.47k
    p_sys->i_align = -1;
244
5.47k
    p_sys->iconv_handle = (vlc_iconv_t)-1;
245
5.47k
    p_sys->b_autodetect_utf8 = false;
246
247
5.47k
    const char *encoding;
248
5.47k
    char *var = NULL;
249
250
    /* First try demux-specified encoding */
251
5.47k
    if( p_dec->fmt_in->i_codec == VLC_CODEC_ITU_T140 )
252
0
        encoding = "UTF-8"; /* IUT T.140 is always using UTF-8 */
253
5.47k
    else
254
5.47k
    if( p_dec->fmt_in->subs.psz_encoding && *p_dec->fmt_in->subs.psz_encoding )
255
150
    {
256
150
        encoding = p_dec->fmt_in->subs.psz_encoding;
257
150
        msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
258
150
                 encoding);
259
150
    }
260
5.32k
    else
261
5.32k
    {
262
        /* Second, try configured encoding */
263
5.32k
        if ((var = var_InheritString (p_dec, "subsdec-encoding")) != NULL)
264
0
        {
265
0
            msg_Dbg (p_dec, "trying configured character encoding: %s", var);
266
0
            if (!strcmp (var, "system"))
267
0
            {
268
0
                free (var);
269
0
                var = NULL;
270
0
                encoding = "";
271
                /* ^ iconv() treats "" as nl_langinfo(CODESET) */
272
0
            }
273
0
            else
274
0
                encoding = var;
275
0
        }
276
5.32k
        else
277
        /* Third, try "local" encoding */
278
5.32k
        {
279
        /* xgettext:
280
           The Windows ANSI code page most commonly used for this language.
281
           VLC uses this as a guess of the subtitle files character set
282
           (if UTF-8 and UTF-16 autodetection fails).
283
           Western European languages normally use "CP1252", which is a
284
           Microsoft-variant of ISO 8859-1. That suits the Latin alphabet.
285
           Other scripts use other code pages.
286
287
           This MUST be a valid iconv character set. If unsure, please refer
288
           the VideoLAN translators mailing list. */
289
5.32k
            encoding = vlc_pgettext("GetACP", "CP1252");
290
5.32k
            msg_Dbg (p_dec, "trying default character encoding: %s", encoding);
291
5.32k
        }
292
293
        /* Check UTF-8 autodetection */
294
5.32k
        if (var_InheritBool (p_dec, "subsdec-autodetect-utf8"))
295
5.32k
        {
296
5.32k
            msg_Dbg (p_dec, "using automatic UTF-8 detection");
297
5.32k
            p_sys->b_autodetect_utf8 = true;
298
5.32k
        }
299
5.32k
    }
300
301
5.47k
    if (strcasecmp (encoding, "UTF-8") && strcasecmp (encoding, "utf8"))
302
5.32k
    {
303
5.32k
        p_sys->iconv_handle = vlc_iconv_open ("UTF-8", encoding);
304
5.32k
        if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
305
5.32k
            msg_Err (p_dec, "cannot convert from %s: %s", encoding,
306
5.32k
                     vlc_strerror_c(errno));
307
5.32k
    }
308
5.47k
    free (var);
309
310
5.47k
    p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" );
311
312
5.47k
    return VLC_SUCCESS;
313
5.47k
}
314
315
/****************************************************************************
316
 * DecodeBlock: the whole thing
317
 ****************************************************************************
318
 * This function must be fed with complete subtitles units.
319
 ****************************************************************************/
320
static int DecodeBlock( decoder_t *p_dec, block_t *p_block )
321
445k
{
322
445k
    subpicture_t *p_spu;
323
324
445k
    if( p_block == NULL ) /* No Drain */
325
225k
        return VLCDEC_SUCCESS;
326
327
219k
    if( p_block->i_flags & BLOCK_FLAG_CORRUPTED )
328
0
    {
329
0
        block_Release( p_block );
330
0
        return VLCDEC_SUCCESS;
331
0
    }
332
333
219k
    p_spu = ParseText( p_dec, p_block );
334
335
219k
    block_Release( p_block );
336
219k
    if( p_spu != NULL )
337
218k
        decoder_QueueSub( p_dec, p_spu );
338
219k
    return VLCDEC_SUCCESS;
339
219k
}
340
341
/*****************************************************************************
342
 * CloseDecoder: clean up the decoder
343
 *****************************************************************************/
344
static void CloseDecoder( vlc_object_t *p_this )
345
5.47k
{
346
5.47k
    decoder_t *p_dec = (decoder_t *)p_this;
347
5.47k
    decoder_sys_t *p_sys = p_dec->p_sys;
348
349
5.47k
    if( p_sys->iconv_handle != (vlc_iconv_t)-1 )
350
5.32k
        vlc_iconv_close( p_sys->iconv_handle );
351
352
5.47k
    free( p_sys );
353
5.47k
}
354
355
/*****************************************************************************
356
 * ParseText: parse an text subtitle packet and send it to the video output
357
 *****************************************************************************/
358
static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
359
219k
{
360
219k
    decoder_sys_t *p_sys = p_dec->p_sys;
361
219k
    subpicture_t *p_spu = NULL;
362
363
219k
    if( p_block->i_flags & BLOCK_FLAG_CORRUPTED )
364
0
        return NULL;
365
366
    /* We cannot display a subpicture with no date */
367
219k
    if( p_block->i_pts == VLC_TICK_INVALID )
368
0
    {
369
0
        msg_Warn( p_dec, "subtitle without a date" );
370
0
        return NULL;
371
0
    }
372
373
    /* Check validity of packet data */
374
    /* An "empty" line containing only \0 can be used to force
375
       and ephemer picture from the screen */
376
219k
    if( p_block->i_buffer < 1 )
377
71
    {
378
71
        msg_Warn( p_dec, "no subtitle data" );
379
71
        return NULL;
380
71
    }
381
382
219k
    char *psz_subtitle = NULL;
383
384
    /* Should be resiliant against bad subtitles */
385
219k
    if( p_sys->iconv_handle == (vlc_iconv_t)-1 ||
386
219k
        p_sys->b_autodetect_utf8 )
387
23.3k
    {
388
23.3k
        psz_subtitle = malloc( p_block->i_buffer + 1 );
389
23.3k
        if( psz_subtitle == NULL )
390
0
            return NULL;
391
23.3k
        memcpy( psz_subtitle, p_block->p_buffer, p_block->i_buffer );
392
23.3k
        psz_subtitle[p_block->i_buffer] = '\0';
393
23.3k
    }
394
395
219k
    if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
396
98
    {
397
98
        if (EnsureUTF8( psz_subtitle ) == NULL)
398
5
        {
399
5
            msg_Err( p_dec, "failed to convert subtitle encoding.\n"
400
5
                     "Try manually setting a character-encoding "
401
5
                     "before you open the file." );
402
5
        }
403
98
    }
404
219k
    else
405
219k
    {
406
219k
        if( p_sys->b_autodetect_utf8 )
407
23.2k
        {
408
23.2k
            if( IsUTF8( psz_subtitle ) == NULL )
409
1.89k
            {
410
1.89k
                msg_Dbg( p_dec, "invalid UTF-8 sequence: "
411
1.89k
                         "disabling UTF-8 subtitles autodetection" );
412
1.89k
                p_sys->b_autodetect_utf8 = false;
413
1.89k
            }
414
23.2k
        }
415
416
219k
        if( !p_sys->b_autodetect_utf8 )
417
198k
        {
418
198k
            size_t inbytes_left = p_block->i_buffer;
419
198k
            size_t outbytes_left = 6 * inbytes_left;
420
198k
            char *psz_new_subtitle = xmalloc( outbytes_left + 1 );
421
198k
            char *psz_convert_buffer_out = psz_new_subtitle;
422
198k
            const char *psz_convert_buffer_in =
423
198k
                    psz_subtitle ? psz_subtitle : (char *)p_block->p_buffer;
424
425
198k
            size_t ret = vlc_iconv( p_sys->iconv_handle,
426
198k
                                    &psz_convert_buffer_in, &inbytes_left,
427
198k
                                    &psz_convert_buffer_out, &outbytes_left );
428
429
198k
            *psz_convert_buffer_out++ = '\0';
430
198k
            free( psz_subtitle );
431
432
198k
            if( ( ret == (size_t)(-1) ) || inbytes_left )
433
1.06k
            {
434
1.06k
                free( psz_new_subtitle );
435
1.06k
                msg_Err( p_dec, "failed to convert subtitle encoding.\n"
436
1.06k
                        "Try manually setting a character-encoding "
437
1.06k
                                "before you open the file." );
438
1.06k
                return NULL;
439
1.06k
            }
440
441
197k
            psz_subtitle = realloc( psz_new_subtitle,
442
197k
                                    psz_convert_buffer_out - psz_new_subtitle );
443
197k
            if( !psz_subtitle )
444
0
                psz_subtitle = psz_new_subtitle;
445
197k
        }
446
219k
    }
447
448
    /* Create the subpicture unit */
449
218k
    p_spu = decoder_NewSubpictureText( p_dec );
450
218k
    if( !p_spu )
451
0
    {
452
0
        free( psz_subtitle );
453
0
        return NULL;
454
0
    }
455
218k
    p_spu->i_start    = p_block->i_pts;
456
218k
    p_spu->i_stop     = p_block->i_pts + p_block->i_length;
457
218k
    p_spu->b_ephemer  = (p_block->i_length == 0);
458
459
218k
    subtext_updater_sys_t *p_spu_sys = p_spu->updater.sys;
460
461
218k
    int i_inline_align = -1;
462
218k
    p_spu_sys->region.p_segments = ParseSubtitles( &i_inline_align, psz_subtitle );
463
218k
    free( psz_subtitle );
464
218k
    p_spu_sys->region.b_absolute = false;
465
218k
    p_spu_sys->region.b_in_window = true;
466
218k
    if( p_sys->i_align >= 0 ) /* bottom ; left, right or centered */
467
0
    {
468
0
        p_spu_sys->region.align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
469
0
        p_spu_sys->region.inner_align = p_sys->i_align;
470
0
    }
471
218k
    else if( i_inline_align >= 0 )
472
225
    {
473
225
        p_spu_sys->region.align = i_inline_align;
474
225
        p_spu_sys->region.inner_align = i_inline_align;
475
225
    }
476
218k
    else /* default, bottom ; centered */
477
218k
    {
478
218k
        p_spu_sys->region.align = SUBPICTURE_ALIGN_BOTTOM;
479
218k
        p_spu_sys->region.inner_align = 0;
480
218k
    }
481
482
218k
    return p_spu;
483
218k
}
484
485
static bool AppendCharacter( text_segment_t* p_segment, char c )
486
3.60M
{
487
3.60M
    char* tmp;
488
3.60M
    if ( asprintf( &tmp, "%s%c", p_segment->psz_text ? p_segment->psz_text : "", c ) < 0 )
489
0
        return false;
490
3.60M
    free( p_segment->psz_text );
491
3.60M
    p_segment->psz_text = tmp;
492
3.60M
    return true;
493
3.60M
}
494
495
static bool AppendString( text_segment_t* p_segment, const char* psz_str )
496
53.6k
{
497
53.6k
    char* tmp;
498
53.6k
    if ( asprintf( &tmp, "%s%s", p_segment->psz_text ? p_segment->psz_text : "", psz_str ) < 0 )
499
0
        return false;
500
53.6k
    free( p_segment->psz_text );
501
53.6k
    p_segment->psz_text = tmp;
502
53.6k
    return true;
503
53.6k
}
504
505
static char* ConsumeAttribute( const char** ppsz_subtitle, char** ppsz_attribute_value )
506
17.0k
{
507
17.0k
    const char* psz_subtitle = *ppsz_subtitle;
508
17.0k
    char* psz_attribute_name;
509
17.0k
    *ppsz_attribute_value = NULL;
510
511
26.8k
    while (*psz_subtitle == ' ')
512
9.77k
        psz_subtitle++;
513
514
17.0k
    size_t attr_len = 0;
515
17.0k
    char delimiter;
516
517
116k
    while ( *psz_subtitle && isalpha( *psz_subtitle ) )
518
99.4k
    {
519
99.4k
        psz_subtitle++;
520
99.4k
        attr_len++;
521
99.4k
    }
522
17.0k
    if ( !*psz_subtitle || attr_len == 0 )
523
6.77k
        return NULL;
524
10.3k
    psz_attribute_name = malloc( attr_len + 1 );
525
10.3k
    if ( unlikely( !psz_attribute_name ) )
526
0
        return NULL;
527
10.3k
    strncpy( psz_attribute_name, psz_subtitle - attr_len, attr_len );
528
10.3k
    psz_attribute_name[attr_len] = 0;
529
530
    // Skip over to the attribute value
531
317k
    while ( *psz_subtitle && *psz_subtitle != '=' )
532
307k
        psz_subtitle++;
533
10.3k
    if ( !*psz_subtitle )
534
361
    {
535
361
        *ppsz_subtitle = psz_subtitle;
536
361
        return psz_attribute_name;
537
361
    }
538
    // Skip the '=' sign
539
9.94k
    psz_subtitle++;
540
541
    // Aknoledge the delimiter if any
542
10.3k
    while ( *psz_subtitle && isspace( *psz_subtitle) )
543
427
        psz_subtitle++;
544
545
9.94k
    if ( *psz_subtitle == '\'' || *psz_subtitle == '"' )
546
5.73k
    {
547
        // Save the delimiter and skip it
548
5.73k
        delimiter = *psz_subtitle;
549
5.73k
        psz_subtitle++;
550
5.73k
    }
551
4.20k
    else
552
4.20k
        delimiter = 0;
553
554
    // Skip spaces, just in case
555
10.7k
    while ( *psz_subtitle && isspace( *psz_subtitle ) )
556
790
        psz_subtitle++;
557
558
9.94k
    attr_len = 0;
559
305k
    while ( *psz_subtitle && ( ( delimiter != 0 && *psz_subtitle != delimiter ) ||
560
131k
                               ( delimiter == 0 && ( !isspace(*psz_subtitle) && *psz_subtitle != '>' ) ) ) )
561
295k
    {
562
295k
        psz_subtitle++;
563
295k
        attr_len++;
564
295k
    }
565
9.94k
    if ( attr_len == 0 )
566
788
    {
567
788
        *ppsz_subtitle = psz_subtitle;
568
788
        return psz_attribute_name;
569
788
    }
570
9.15k
    if ( unlikely( !( *ppsz_attribute_value = malloc( attr_len + 1 ) ) ) )
571
0
    {
572
0
        free( psz_attribute_name );
573
0
        return NULL;
574
0
    }
575
9.15k
    strncpy( *ppsz_attribute_value, psz_subtitle - attr_len, attr_len );
576
9.15k
    (*ppsz_attribute_value)[attr_len] = 0;
577
    // Finally, skip over the final delimiter
578
9.15k
    if (delimiter != 0 && *psz_subtitle)
579
4.53k
        psz_subtitle++;
580
9.15k
    *ppsz_subtitle = psz_subtitle;
581
9.15k
    return psz_attribute_name;
582
9.15k
}
583
584
// Returns the next tag and consume the string up to after the tag name, or
585
// returns NULL and doesn't advance if the angle bracket was not a tag opening
586
// For instance, if psz_subtitle == "<some_tag attribute=value>"
587
// GetTag will return "some_tag", and will advance up to the first 'a' in "attribute"
588
// The returned value must be freed.
589
static char* GetTag( const char** ppsz_subtitle, bool b_closing )
590
146k
{
591
146k
    const char* psz_subtitle = *ppsz_subtitle;
592
146k
    if ( *psz_subtitle != '<' )
593
0
        return NULL;
594
    // Skip the '<'
595
146k
    psz_subtitle++;
596
146k
    if ( b_closing && *psz_subtitle == '/' )
597
48.3k
        psz_subtitle++;
598
    // Skip potential spaces
599
147k
    while ( *psz_subtitle == ' ' )
600
1.32k
        psz_subtitle++;
601
    // Now we need to verify if what comes next is a valid tag:
602
146k
    if ( !isalpha( *psz_subtitle ) )
603
98.3k
        return NULL;
604
47.7k
    size_t tag_size = 1;
605
330k
    while ( isalnum( psz_subtitle[tag_size] ) || psz_subtitle[tag_size] == '_' )
606
283k
        tag_size++;
607
47.7k
    char* psz_tagname = vlc_alloc( tag_size + 1, sizeof( *psz_tagname ) );
608
47.7k
    if ( unlikely( !psz_tagname ) )
609
0
        return NULL;
610
47.7k
    strncpy( psz_tagname, psz_subtitle, tag_size );
611
47.7k
    psz_tagname[tag_size] = 0;
612
47.7k
    psz_subtitle += tag_size;
613
47.7k
    *ppsz_subtitle = psz_subtitle;
614
47.7k
    return psz_tagname;
615
47.7k
}
616
617
static bool IsClosed( const char* psz_subtitle, const char* psz_tagname )
618
13.2k
{
619
13.2k
    const char* psz_tagpos = strcasestr( psz_subtitle, psz_tagname );
620
13.2k
    if ( !psz_tagpos )
621
4.64k
        return false;
622
    // Search for '</' and '>' immediately before & after (minding the potential spaces)
623
8.58k
    const char* psz_endtag = psz_tagpos + strlen( psz_tagname );
624
9.44k
    while ( *psz_endtag == ' ' )
625
860
        psz_endtag++;
626
8.58k
    if ( *psz_endtag != '>' )
627
2.36k
        return false;
628
    // Skip back before the tag itself
629
6.22k
    psz_tagpos--;
630
6.56k
    while ( *psz_tagpos == ' ' && psz_tagpos > psz_subtitle )
631
336
        psz_tagpos--;
632
6.22k
    if ( *psz_tagpos-- != '/' )
633
4.44k
        return false;
634
1.77k
    if ( *psz_tagpos != '<' )
635
235
        return false;
636
1.54k
    return true;
637
1.77k
}
638
639
typedef struct tag_stack tag_stack_t;
640
struct tag_stack
641
{
642
    char* psz_tagname;
643
    tag_stack_t *p_next;
644
};
645
646
static void AppendTag( tag_stack_t **pp_stack, char* psz_tagname )
647
1.54k
{
648
1.54k
    tag_stack_t* p_elem = malloc( sizeof( *p_elem ) );
649
1.54k
    if ( unlikely( !p_elem ) )
650
0
        return;
651
1.54k
    p_elem->p_next = *pp_stack;
652
1.54k
    p_elem->psz_tagname = psz_tagname;
653
1.54k
    *pp_stack = p_elem;
654
1.54k
}
655
656
static bool HasTag( tag_stack_t **pp_stack, const char* psz_tagname )
657
5.37k
{
658
5.37k
    tag_stack_t *p_prev = NULL;
659
11.6k
    for ( tag_stack_t* p_current = *pp_stack; p_current; p_current = p_current->p_next )
660
6.96k
    {
661
6.96k
        if ( !strcasecmp( psz_tagname, p_current->psz_tagname ) )
662
698
        {
663
698
            if ( p_current == *pp_stack )
664
460
            {
665
460
                *pp_stack = p_current->p_next;
666
460
            }
667
238
            else
668
238
            {
669
238
                p_prev->p_next = p_current->p_next;
670
238
            }
671
698
            free( p_current->psz_tagname );
672
698
            free( p_current );
673
698
            return true;
674
698
        }
675
6.27k
        p_prev = p_current;
676
6.27k
    }
677
4.67k
    return false;
678
5.37k
}
679
680
/*
681
 * mini style stack implementation
682
 */
683
typedef struct style_stack style_stack_t;
684
struct  style_stack
685
{
686
    text_style_t* p_style;
687
    style_stack_t* p_next;
688
};
689
690
static text_style_t* DuplicateAndPushStyle(style_stack_t** pp_stack)
691
20.9k
{
692
20.9k
    text_style_t* p_dup = ( *pp_stack ) ? text_style_Duplicate( (*pp_stack)->p_style ) : text_style_Create( STYLE_NO_DEFAULTS );
693
20.9k
    if ( unlikely( !p_dup ) )
694
0
        return NULL;
695
20.9k
    style_stack_t* p_entry = malloc( sizeof( *p_entry ) );
696
20.9k
    if ( unlikely( !p_entry ) )
697
0
    {
698
0
        text_style_Delete( p_dup );
699
0
        return NULL;
700
0
    }
701
    // Give the style ownership to the segment.
702
20.9k
    p_entry->p_style = p_dup;
703
20.9k
    p_entry->p_next = *pp_stack;
704
20.9k
    *pp_stack = p_entry;
705
20.9k
    return p_dup;
706
20.9k
}
707
708
static void PopStyle(style_stack_t** pp_stack)
709
27.7k
{
710
27.7k
    style_stack_t* p_old = *pp_stack;
711
27.7k
    if ( !p_old )
712
6.74k
        return;
713
20.9k
    *pp_stack = p_old->p_next;
714
    // Don't free the style, it is now owned by the text_segment_t
715
20.9k
    free( p_old );
716
20.9k
}
717
718
static text_segment_t* NewTextSegmentPushStyle( text_segment_t* p_segment, style_stack_t** pp_stack )
719
20.9k
{
720
20.9k
    text_segment_t* p_new = text_segment_New( NULL );
721
20.9k
    if ( unlikely( p_new == NULL ) )
722
0
        return NULL;
723
20.9k
    text_style_t* p_style = DuplicateAndPushStyle( pp_stack );
724
20.9k
    p_new->style = p_style;
725
20.9k
    p_segment->p_next = p_new;
726
20.9k
    return p_new;
727
20.9k
}
728
729
static text_segment_t* NewTextSegmentPopStyle( text_segment_t* p_segment, style_stack_t** pp_stack )
730
11.0k
{
731
11.0k
    text_segment_t* p_new = text_segment_New( NULL );
732
11.0k
    if ( unlikely( p_new == NULL ) )
733
0
        return NULL;
734
    // We shouldn't have an empty stack since this happens when closing a tag,
735
    // but better be safe than sorry if (/when) we encounter a broken subtitle file.
736
11.0k
    PopStyle( pp_stack );
737
11.0k
    text_style_t* p_dup = ( *pp_stack ) ? text_style_Duplicate( (*pp_stack)->p_style ) : text_style_Create( STYLE_NO_DEFAULTS );
738
11.0k
    p_new->style = p_dup;
739
11.0k
    p_segment->p_next = p_new;
740
11.0k
    return p_new;
741
11.0k
}
742
743
static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
744
218k
{
745
218k
    text_segment_t* p_segment;
746
218k
    text_segment_t* p_first_segment;
747
218k
    style_stack_t* p_stack = NULL;
748
218k
    tag_stack_t* p_tag_stack = NULL;
749
750
    //FIXME: Remove initial allocation? Might make the below code more complicated
751
218k
    p_first_segment = p_segment = text_segment_New( "" );
752
753
218k
    *pi_align = -1;
754
755
    /* */
756
3.87M
    while( *psz_subtitle )
757
3.66M
    {
758
        /* HTML extensions */
759
3.66M
        if( *psz_subtitle == '<' )
760
97.7k
        {
761
97.7k
            char *psz_tagname = GetTag( &psz_subtitle, false );
762
97.7k
            if ( psz_tagname != NULL )
763
31.3k
            {
764
31.3k
                if( !strcasecmp( psz_tagname, "br" ) )
765
1.51k
                {
766
1.51k
                    if ( !AppendCharacter( p_segment, '\n' ) )
767
0
                    {
768
0
                        free( psz_tagname );
769
0
                        goto fail;
770
0
                    }
771
1.51k
                }
772
29.8k
                else if( !strcasecmp( psz_tagname, "b" ) )
773
1.45k
                {
774
1.45k
                    p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
775
1.45k
                    p_segment->style->i_style_flags |= STYLE_BOLD;
776
1.45k
                    p_segment->style->i_features |= STYLE_HAS_FLAGS;
777
1.45k
                }
778
28.3k
                else if( !strcasecmp( psz_tagname, "i" ) )
779
6.28k
                {
780
6.28k
                    p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
781
6.28k
                    p_segment->style->i_style_flags |= STYLE_ITALIC;
782
6.28k
                    p_segment->style->i_features |= STYLE_HAS_FLAGS;
783
6.28k
                }
784
22.0k
                else if( !strcasecmp( psz_tagname, "u" ) )
785
1.36k
                {
786
1.36k
                    p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
787
1.36k
                    p_segment->style->i_style_flags |= STYLE_UNDERLINE;
788
1.36k
                    p_segment->style->i_features |= STYLE_HAS_FLAGS;
789
1.36k
                }
790
20.6k
                else if( !strcasecmp( psz_tagname, "s" ) )
791
682
                {
792
682
                    p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
793
682
                    p_segment->style->i_style_flags |= STYLE_STRIKEOUT;
794
682
                    p_segment->style->i_features |= STYLE_HAS_FLAGS;
795
682
                }
796
20.0k
                else if( !strcasecmp( psz_tagname, "font" ) )
797
6.77k
                {
798
6.77k
                    p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
799
800
6.77k
                    char* psz_attribute_name;
801
6.77k
                    char* psz_attribute_value;
802
803
17.0k
                    while( ( psz_attribute_name = ConsumeAttribute( &psz_subtitle, &psz_attribute_value ) ) )
804
10.3k
                    {
805
10.3k
                        if ( !psz_attribute_value )
806
1.14k
                        {
807
1.14k
                            free( psz_attribute_name );
808
1.14k
                            continue;
809
1.14k
                        }
810
9.15k
                        if ( !strcasecmp( psz_attribute_name, "face" ) )
811
238
                        {
812
238
                            free(p_segment->style->psz_fontname);
813
238
                            p_segment->style->psz_fontname = psz_attribute_value;
814
                            // We don't want to free the attribute value since it has become our fontname
815
238
                            psz_attribute_value = NULL;
816
238
                        }
817
8.91k
                        else if ( !strcasecmp( psz_attribute_name, "family" ) )
818
226
                        {
819
226
                            free(p_segment->style->psz_monofontname);
820
226
                            p_segment->style->psz_monofontname = psz_attribute_value;
821
226
                            psz_attribute_value = NULL;
822
226
                        }
823
8.68k
                        else if ( !strcasecmp( psz_attribute_name, "size" ) )
824
697
                        {
825
697
                            int size = atoi( psz_attribute_value );
826
697
                            if( size )
827
392
                            {
828
392
                                p_segment->style->i_font_size = size;
829
392
                                p_segment->style->f_font_relsize = STYLE_DEFAULT_REL_FONT_SIZE *
830
392
                                        STYLE_DEFAULT_FONT_SIZE / p_segment->style->i_font_size;
831
392
                            }
832
697
                        }
833
7.99k
                        else if ( !strcasecmp( psz_attribute_name, "color" ) )
834
4.65k
                        {
835
4.65k
                            p_segment->style->i_font_color = vlc_html_color( psz_attribute_value, NULL );
836
4.65k
                            p_segment->style->i_features |= STYLE_HAS_FONT_COLOR;
837
4.65k
                        }
838
3.33k
                        else if ( !strcasecmp( psz_attribute_name, "outline-color" ) )
839
0
                        {
840
0
                            p_segment->style->i_outline_color = vlc_html_color( psz_attribute_value, NULL );
841
0
                            p_segment->style->i_features |= STYLE_HAS_OUTLINE_COLOR;
842
0
                        }
843
3.33k
                        else if ( !strcasecmp( psz_attribute_name, "shadow-color" ) )
844
0
                        {
845
0
                            p_segment->style->i_shadow_color = vlc_html_color( psz_attribute_value, NULL );
846
0
                            p_segment->style->i_features |= STYLE_HAS_SHADOW_COLOR;
847
0
                        }
848
3.33k
                        else if ( !strcasecmp( psz_attribute_name, "outline-level" ) )
849
0
                        {
850
0
                            p_segment->style->i_outline_width = atoi( psz_attribute_value );
851
0
                        }
852
3.33k
                        else if ( !strcasecmp( psz_attribute_name, "shadow-level" ) )
853
0
                        {
854
0
                            p_segment->style->i_shadow_width = atoi( psz_attribute_value );
855
0
                        }
856
3.33k
                        else if ( !strcasecmp( psz_attribute_name, "back-color" ) )
857
0
                        {
858
0
                            p_segment->style->i_background_color = vlc_html_color( psz_attribute_value, NULL );
859
0
                            p_segment->style->i_features |= STYLE_HAS_BACKGROUND_COLOR;
860
0
                        }
861
3.33k
                        else if ( !strcasecmp( psz_attribute_name, "alpha" ) )
862
268
                        {
863
268
                            p_segment->style->i_font_alpha = atoi( psz_attribute_value );
864
268
                            p_segment->style->i_features |= STYLE_HAS_FONT_ALPHA;
865
268
                        }
866
867
9.15k
                        free( psz_attribute_name );
868
9.15k
                        free( psz_attribute_value );
869
9.15k
                    }
870
6.77k
                }
871
13.2k
                else
872
13.2k
                {
873
                    // This is an unknown tag. We need to hide it if it's properly closed, and display it otherwise
874
13.2k
                    if ( !IsClosed( psz_subtitle, psz_tagname ) )
875
11.6k
                    {
876
11.6k
                        AppendCharacter( p_segment, '<' );
877
11.6k
                        AppendString( p_segment, psz_tagname );
878
11.6k
                        AppendCharacter( p_segment, '>' );
879
11.6k
                    }
880
1.54k
                    else
881
1.54k
                    {
882
1.54k
                        AppendTag( &p_tag_stack, psz_tagname );
883
                        // We don't want to free the tagname now, it will be freed when the tag
884
                        // gets poped from the stack.
885
1.54k
                        psz_tagname = NULL;
886
1.54k
                    }
887
                    // In any case, fall through and skip to the closing tag.
888
13.2k
                }
889
                // Skip potential spaces & end tag
890
433k
                while ( *psz_subtitle && *psz_subtitle != '>' )
891
402k
                    psz_subtitle++;
892
31.3k
                if ( *psz_subtitle == '>' )
893
27.1k
                    psz_subtitle++;
894
895
31.3k
                free( psz_tagname );
896
31.3k
            }
897
66.3k
            else if( !strncmp( psz_subtitle, "</", 2 ))
898
48.3k
            {
899
48.3k
                char* psz_closetagname = GetTag( &psz_subtitle, true );
900
48.3k
                if ( psz_closetagname != NULL )
901
16.4k
                {
902
16.4k
                    if ( !strcasecmp( psz_closetagname, "b" ) ||
903
15.1k
                         !strcasecmp( psz_closetagname, "i" ) ||
904
12.7k
                         !strcasecmp( psz_closetagname, "u" ) ||
905
9.92k
                         !strcasecmp( psz_closetagname, "s" ) ||
906
9.52k
                         !strcasecmp( psz_closetagname, "font" ) )
907
11.0k
                    {
908
                        // A closing tag for one of the tags we handle, meaning
909
                        // we pushed a style onto the stack earlier
910
11.0k
                        p_segment = NewTextSegmentPopStyle( p_segment, &p_stack );
911
11.0k
                    }
912
5.37k
                    else
913
5.37k
                    {
914
                        // Unknown closing tag. If it is closing an unknown tag, ignore it. Otherwise, display it
915
5.37k
                        if ( !HasTag( &p_tag_stack, psz_closetagname ) )
916
4.67k
                        {
917
4.67k
                            AppendString( p_segment, "</" );
918
4.67k
                            AppendString( p_segment, psz_closetagname );
919
4.67k
                            AppendCharacter( p_segment, '>' );
920
4.67k
                        }
921
5.37k
                    }
922
17.2k
                    while ( *psz_subtitle == ' ' )
923
832
                        psz_subtitle++;
924
16.4k
                    if ( *psz_subtitle == '>' )
925
11.5k
                        psz_subtitle++;
926
16.4k
                    free( psz_closetagname );
927
16.4k
                }
928
31.9k
                else
929
31.9k
                {
930
                    /**
931
                      * This doesn't appear to be a valid tag closing syntax.
932
                      * Simply append the text
933
                      */
934
31.9k
                    AppendString( p_segment, "</" );
935
31.9k
                    psz_subtitle += 2;
936
31.9k
                }
937
48.3k
            }
938
18.0k
            else
939
18.0k
            {
940
                /* We have an unknown tag, just append it, and move on.
941
                 * The rest of the string won't be recognized as a tag, and
942
                 * we will ignore unknown closing tag
943
                 */
944
18.0k
                AppendCharacter( p_segment, '<' );
945
18.0k
                psz_subtitle++;
946
18.0k
            }
947
97.7k
        }
948
        /* SSA extensions */
949
3.56M
        else if( psz_subtitle[0] == '{' && psz_subtitle[1] == '\\' &&
950
1.64k
                 strchr( psz_subtitle, '}' ) )
951
1.43k
        {
952
            /* Check for forced alignment */
953
1.43k
            if( *pi_align < 0 &&
954
1.22k
                !strncmp( psz_subtitle, "{\\an", 4 ) && psz_subtitle[4] >= '1' && psz_subtitle[4] <= '9' && psz_subtitle[5] == '}' )
955
225
            {
956
225
                static const int pi_vertical[3] = { SUBPICTURE_ALIGN_BOTTOM, 0, SUBPICTURE_ALIGN_TOP };
957
225
                static const int pi_horizontal[3] = { SUBPICTURE_ALIGN_LEFT, 0, SUBPICTURE_ALIGN_RIGHT };
958
225
                const int i_id = psz_subtitle[4] - '1';
959
960
225
                *pi_align = pi_vertical[i_id/3] | pi_horizontal[i_id%3];
961
225
            }
962
            /* TODO fr -> rotation */
963
964
            /* Hide {\stupidity} */
965
1.43k
            psz_subtitle = strchr( psz_subtitle, '}' ) + 1;
966
1.43k
        }
967
        /* MicroDVD extensions */
968
        /* FIXME:
969
         *  - Currently, we don't do difference between X and x, and we should:
970
         *    Capital Letters applies to the whole text and not one line
971
         *  - We don't support Position and Coordinates
972
         *  - We don't support the DEFAULT flag (HEADER)
973
         */
974
975
3.56M
        else if( psz_subtitle[0] == '{' && psz_subtitle[1] != 0 &&
976
14.4k
                 psz_subtitle[2] == ':' && strchr( &psz_subtitle[2], '}' ) )
977
6.31k
        {
978
6.31k
            const char *psz_tag_end = strchr( &psz_subtitle[2], '}' );
979
6.31k
            size_t i_len = psz_tag_end - &psz_subtitle[3];
980
981
6.31k
            if( psz_subtitle[1] == 'Y' || psz_subtitle[1] == 'y' )
982
2.15k
            {
983
2.15k
                if( psz_subtitle[3] == 'i' )
984
489
                {
985
489
                    p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
986
489
                    p_segment->style->i_style_flags |= STYLE_ITALIC;
987
489
                    p_segment->style->i_features |= STYLE_HAS_FLAGS;
988
489
                    psz_subtitle++;
989
489
                }
990
2.15k
                if( psz_subtitle[3] == 'b' )
991
491
                {
992
491
                    p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
993
491
                    p_segment->style->i_style_flags |= STYLE_BOLD;
994
491
                    p_segment->style->i_features |= STYLE_HAS_FLAGS;
995
491
                    psz_subtitle++;
996
491
                }
997
2.15k
                if( psz_subtitle[3] == 'u' )
998
729
                {
999
729
                    p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
1000
729
                    p_segment->style->i_style_flags |= STYLE_UNDERLINE;
1001
729
                    p_segment->style->i_features |= STYLE_HAS_FLAGS;
1002
729
                    psz_subtitle++;
1003
729
                }
1004
2.15k
            }
1005
4.16k
            else if( (psz_subtitle[1] == 'C' || psz_subtitle[1] == 'c' )
1006
2.24k
                    && psz_subtitle[3] == '$' && i_len >= 7 )
1007
1.29k
            {
1008
                /* Yes, they use BBGGRR, instead of RRGGBB */
1009
1.29k
                char psz_color[7];
1010
1.29k
                psz_color[0] = psz_subtitle[8]; psz_color[1] = psz_subtitle[9];
1011
1.29k
                psz_color[2] = psz_subtitle[6]; psz_color[3] = psz_subtitle[7];
1012
1.29k
                psz_color[4] = psz_subtitle[4]; psz_color[5] = psz_subtitle[5];
1013
1.29k
                psz_color[6] = '\0';
1014
1.29k
                p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
1015
1.29k
                p_segment->style->i_font_color = vlc_html_color( psz_color, NULL );
1016
1.29k
                p_segment->style->i_features |= STYLE_HAS_FONT_COLOR;
1017
1.29k
            }
1018
2.86k
            else if( psz_subtitle[1] == 'F' || psz_subtitle[1] == 'f' )
1019
664
            {
1020
664
                p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
1021
664
                free(p_segment->style->psz_fontname);
1022
664
                p_segment->style->psz_fontname = strndup( &psz_subtitle[3], i_len );
1023
664
            }
1024
2.19k
            else if( psz_subtitle[1] == 'S' || psz_subtitle[1] == 's' )
1025
1.07k
            {
1026
1.07k
                int size = atoi( &psz_subtitle[3] );
1027
1.07k
                if( size )
1028
733
                {
1029
733
                    p_segment = NewTextSegmentPushStyle( p_segment, &p_stack );
1030
733
                    p_segment->style->i_font_size = size;
1031
733
                    p_segment->style->f_font_relsize = STYLE_DEFAULT_REL_FONT_SIZE *
1032
733
                                STYLE_DEFAULT_FONT_SIZE / p_segment->style->i_font_size;
1033
1034
733
                }
1035
1.07k
            }
1036
            /* Currently unsupported since we don't have access to the i_align flag here
1037
            else if( psz_subtitle[1] == 'P' )
1038
            {
1039
                if( psz_subtitle[3] == "1" )
1040
                    i_align = SUBPICTURE_ALIGN_TOP;
1041
                else if( psz_subtitle[3] == "0" )
1042
                    i_align = SUBPICTURE_ALIGN_BOTTOM;
1043
            } */
1044
            // Hide other {x:y} atrocities, notably {o:x}
1045
6.31k
            psz_subtitle = psz_tag_end + 1;
1046
6.31k
        }
1047
3.55M
        else
1048
3.55M
        {
1049
3.55M
            if( *psz_subtitle == '\n' || !strncasecmp( psz_subtitle, "\\n", 2 ) )
1050
87.2k
            {
1051
87.2k
                if ( !AppendCharacter( p_segment, '\n' ) )
1052
0
                    goto fail;
1053
87.2k
                if ( *psz_subtitle == '\n' )
1054
86.8k
                    psz_subtitle++;
1055
398
                else
1056
398
                    psz_subtitle += 2;
1057
87.2k
            }
1058
3.46M
            else if( !strncasecmp( psz_subtitle, "\\h", 2 ) )
1059
639
            {
1060
639
                if ( !AppendString( p_segment, "\xC2\xA0" ) )
1061
0
                    goto fail;
1062
639
                psz_subtitle += 2;
1063
639
            }
1064
3.46M
            else
1065
3.46M
            {
1066
                //FIXME: Highly inneficient
1067
3.46M
                AppendCharacter( p_segment, *psz_subtitle );
1068
3.46M
                psz_subtitle++;
1069
3.46M
            }
1070
3.55M
        }
1071
3.66M
    }
1072
235k
    while ( p_stack )
1073
16.6k
        PopStyle( &p_stack );
1074
219k
    while ( p_tag_stack )
1075
844
    {
1076
844
        tag_stack_t *p_tag = p_tag_stack;
1077
844
        p_tag_stack = p_tag_stack->p_next;
1078
844
        free( p_tag->psz_tagname );
1079
844
        free( p_tag );
1080
844
    }
1081
1082
218k
    return p_first_segment;
1083
1084
0
fail:
1085
0
    text_segment_ChainDelete( p_first_segment );
1086
    return NULL;
1087
218k
}