Coverage Report

Created: 2026-02-14 08:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vlc/modules/demux/subtitle.c
Line
Count
Source
1
/*****************************************************************************
2
 * subtitle.c: Demux for subtitle text files.
3
 *****************************************************************************
4
 * Copyright (C) 1999-2007 VLC authors and VideoLAN
5
 * Copyright (C) 2023      Videolabs
6
 *
7
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8
 *          Derk-Jan Hartman <hartman at videolan dot org>
9
 *          Jean-Baptiste Kempf <jb@videolan.org>
10
 *          Alexandre Janniaux <ajanni@videolabs.io>
11
 *
12
 * This program is free software; you can redistribute it and/or modify it
13
 * under the terms of the GNU Lesser General Public License as published by
14
 * the Free Software Foundation; either version 2.1 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
 * GNU Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public License
23
 * along with this program; if not, write to the Free Software Foundation,
24
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25
 *****************************************************************************/
26
27
/*****************************************************************************
28
 * Preamble
29
 *****************************************************************************/
30
31
#ifdef HAVE_CONFIG_H
32
# include "config.h"
33
#endif
34
35
#include <vlc_common.h>
36
#include <vlc_arrays.h>
37
#include <vlc_plugin.h>
38
#include <vlc_url.h>
39
40
#include <ctype.h>
41
#include <math.h>
42
#include <assert.h>
43
44
#include <vlc_demux.h>
45
#include <vlc_charset.h>
46
47
/*****************************************************************************
48
 * Module descriptor
49
 *****************************************************************************/
50
static int  Open ( vlc_object_t *p_this );
51
static void Close( vlc_object_t *p_this );
52
53
#define SUB_TYPE_LONGTEXT \
54
    N_("Force the subtitles format. Selecting \"auto\" means autodetection and should always work.")
55
#define SUB_DESCRIPTION_LONGTEXT \
56
    N_("Override the default track description.")
57
58
static const char *const ppsz_sub_type[] =
59
{
60
    "auto", "microdvd", "subrip", "subviewer", "ssa1",
61
    "ssa2-4", "ass", "vplayer", "sami", "dvdsubtitle", "mpl2",
62
    "aqt", "pjs", "mpsub", "jacosub", "psb", "realtext", "dks",
63
    "subviewer1", "sbv"
64
};
65
66
104
vlc_module_begin ()
67
52
    set_shortname( N_("Subtitles"))
68
52
    set_description( N_("Text subtitle parser") )
69
52
    set_capability( "demux", 0 )
70
52
    set_subcategory( SUBCAT_INPUT_DEMUX )
71
52
    add_string( "sub-type", "auto", N_("Subtitle format"),
72
52
                SUB_TYPE_LONGTEXT )
73
52
        change_string_list( ppsz_sub_type, ppsz_sub_type )
74
52
    add_string( "sub-description", NULL, N_("Subtitle description"),
75
52
                SUB_DESCRIPTION_LONGTEXT )
76
52
    set_callbacks( Open, Close )
77
78
52
    add_shortcut( "subtitle" )
79
52
vlc_module_end ()
80
81
/*****************************************************************************
82
 * Prototypes:
83
 *****************************************************************************/
84
enum subtitle_type_e
85
{
86
    SUB_TYPE_UNKNOWN = -1,
87
    SUB_TYPE_MICRODVD,
88
    SUB_TYPE_SUBRIP,
89
    SUB_TYPE_SSA1,
90
    SUB_TYPE_SSA2_4,
91
    SUB_TYPE_ASS,
92
    SUB_TYPE_VPLAYER,
93
    SUB_TYPE_SAMI,
94
    SUB_TYPE_SUBVIEWER, /* SUBVIEWER 2 */
95
    SUB_TYPE_DVDSUBTITLE, /* Mplayer calls it subviewer2 */
96
    SUB_TYPE_MPL2,
97
    SUB_TYPE_AQT,
98
    SUB_TYPE_PJS,
99
    SUB_TYPE_MPSUB,
100
    SUB_TYPE_JACOSUB,
101
    SUB_TYPE_PSB,
102
    SUB_TYPE_RT,
103
    SUB_TYPE_DKS,
104
    SUB_TYPE_SUBVIEW1, /* SUBVIEWER 1 - mplayer calls it subrip09,
105
                         and Gnome subtitles SubViewer 1.0 */
106
    SUB_TYPE_SBV,
107
    SUB_TYPE_SCC,      /* Scenarist Closed Caption */
108
};
109
110
typedef struct
111
{
112
    size_t  i_line_count;
113
    size_t  i_line;
114
    char    **line;
115
} text_t;
116
117
static int  TextLoad( text_t *, stream_t *s );
118
static void TextUnload( text_t * );
119
120
typedef struct
121
{
122
    vlc_tick_t i_start;
123
    vlc_tick_t i_stop;
124
125
    char    *psz_text;
126
} subtitle_t;
127
128
typedef struct
129
{
130
    enum subtitle_type_e i_type;
131
    vlc_tick_t  i_microsecperframe;
132
133
    char        *psz_header; /* SSA */
134
    char        *psz_lang;
135
136
    struct
137
    {
138
        bool b_inited;
139
140
        int i_comment;
141
        int i_time_resolution;
142
        int i_time_shift;
143
    } jss;
144
145
    struct
146
    {
147
        bool  b_inited;
148
149
        float f_total;
150
        int i_factor;
151
    } mpsub;
152
153
    struct
154
    {
155
        const char *psz_start;
156
    } sami;
157
158
} subs_properties_t;
159
160
typedef struct
161
{
162
    es_out_id_t *es;
163
    bool        b_slave;
164
    bool        b_first_time;
165
    bool        b_sorted;
166
167
    double      f_rate;
168
    vlc_tick_t  i_next_demux_date;
169
170
    struct
171
    {
172
        subtitle_t *p_array;
173
        size_t      i_count;
174
        size_t      i_current;
175
    } subtitles;
176
177
    vlc_tick_t  i_length;
178
179
    /* */
180
    subs_properties_t props;
181
182
    block_t * (*pf_convert)( const subtitle_t * );
183
} demux_sys_t;
184
185
static int  ParseMicroDvd   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
186
static int  ParseSubRip     ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
187
static int  ParseSubViewer  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
188
static int  ParseSSA        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
189
static int  ParseVplayer    ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
190
static int  ParseSami       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
191
static int  ParseDVDSubtitle( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
192
static int  ParseMPL2       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
193
static int  ParseAQT        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
194
static int  ParsePJS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
195
static int  ParseMPSub      ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
196
static int  ParseJSS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
197
static int  ParsePSB        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
198
static int  ParseRealText   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
199
static int  ParseDKS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
200
static int  ParseSubViewer1 ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
201
static int  ParseCommonSBV  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
202
static int  ParseSCC        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
203
204
static const struct
205
{
206
    const char *psz_type_name;
207
    int  i_type;
208
    const char *psz_name;
209
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
210
} sub_read_subtitle_function [] =
211
{
212
    { "microdvd",   SUB_TYPE_MICRODVD,    "MicroDVD",    ParseMicroDvd },
213
    { "subrip",     SUB_TYPE_SUBRIP,      "SubRIP",      ParseSubRip },
214
    { "subviewer",  SUB_TYPE_SUBVIEWER,   "SubViewer",   ParseSubViewer },
215
    { "ssa1",       SUB_TYPE_SSA1,        "SSA-1",       ParseSSA },
216
    { "ssa2-4",     SUB_TYPE_SSA2_4,      "SSA-2/3/4",   ParseSSA },
217
    { "ass",        SUB_TYPE_ASS,         "SSA/ASS",     ParseSSA },
218
    { "vplayer",    SUB_TYPE_VPLAYER,     "VPlayer",     ParseVplayer },
219
    { "sami",       SUB_TYPE_SAMI,        "SAMI",        ParseSami },
220
    { "dvdsubtitle",SUB_TYPE_DVDSUBTITLE, "DVDSubtitle", ParseDVDSubtitle },
221
    { "mpl2",       SUB_TYPE_MPL2,        "MPL2",        ParseMPL2 },
222
    { "aqt",        SUB_TYPE_AQT,         "AQTitle",     ParseAQT },
223
    { "pjs",        SUB_TYPE_PJS,         "PhoenixSub",  ParsePJS },
224
    { "mpsub",      SUB_TYPE_MPSUB,       "MPSub",       ParseMPSub },
225
    { "jacosub",    SUB_TYPE_JACOSUB,     "JacoSub",     ParseJSS },
226
    { "psb",        SUB_TYPE_PSB,         "PowerDivx",   ParsePSB },
227
    { "realtext",   SUB_TYPE_RT,          "RealText",    ParseRealText },
228
    { "dks",        SUB_TYPE_DKS,         "DKS",         ParseDKS },
229
    { "subviewer1", SUB_TYPE_SUBVIEW1,    "Subviewer 1", ParseSubViewer1 },
230
    { "sbv",        SUB_TYPE_SBV,         "SBV",         ParseCommonSBV },
231
    { "scc",        SUB_TYPE_SCC,         "SCC",         ParseSCC },
232
    { NULL,         SUB_TYPE_UNKNOWN,     "Unknown",     NULL }
233
};
234
/* When adding support for more formats, be sure to add their file extension
235
 * to src/input/subtitles.c to enable auto-detection.
236
 */
237
238
static int Demux( demux_t * );
239
static int Control( demux_t *, int, va_list );
240
241
static void Fix( demux_t * );
242
static char *get_language_from_url(const char *);
243
244
static vlc_tick_t vlc_tick_from_HMS( int h, int m, int s )
245
15.2k
{
246
15.2k
    return vlc_tick_from_sec(h * INT64_C(3600) + m * INT64_C(60) + s);
247
15.2k
}
248
249
/*****************************************************************************
250
 * Decoder format output function
251
 *****************************************************************************/
252
253
static block_t *ToTextBlock( const subtitle_t *p_subtitle )
254
330k
{
255
330k
    if ( p_subtitle->psz_text == NULL )
256
1.81k
        return NULL;
257
258
328k
    block_t *p_block;
259
328k
    size_t i_len = strlen( p_subtitle->psz_text ) + 1;
260
261
328k
    if( i_len <= 1 || !(p_block = block_Alloc( i_len )) )
262
39.9k
        return NULL;
263
264
288k
    memcpy( p_block->p_buffer, p_subtitle->psz_text, i_len );
265
266
288k
    return p_block;
267
328k
}
268
269
static block_t *ToEIA608Block( const subtitle_t *p_subtitle )
270
3.29k
{
271
3.29k
    if ( p_subtitle->psz_text == NULL )
272
0
        return NULL;
273
274
3.29k
    block_t *p_block;
275
3.29k
    const size_t i_len = strlen( p_subtitle->psz_text );
276
3.29k
    const size_t i_block = (1 + i_len / 5) * 3;
277
278
3.29k
    if( i_len < 4 || !(p_block = block_Alloc( i_block )) )
279
38
        return NULL;
280
281
3.25k
    p_block->i_buffer = 0;
282
283
3.25k
    char *saveptr = NULL;
284
3.25k
    char *psz_tok = strtok_r( p_subtitle->psz_text, " ", &saveptr );
285
3.25k
    unsigned a, b;
286
39.1k
    while( psz_tok &&
287
37.7k
           sscanf( psz_tok, "%2x%2x", &a, &b ) == 2 &&
288
35.9k
           i_block - p_block->i_buffer >= 3 )
289
35.9k
    {
290
35.9k
        uint8_t *p_data = &p_block->p_buffer[p_block->i_buffer];
291
35.9k
        p_data[0] = 0xFC;
292
35.9k
        p_data[1] = a;
293
35.9k
        p_data[2] = b;
294
35.9k
        p_block->i_buffer += 3;
295
35.9k
        psz_tok = strtok_r( NULL, " ", &saveptr );
296
35.9k
    }
297
298
3.25k
    return p_block;
299
3.29k
}
300
301
/*****************************************************************************
302
 * Module initializer
303
 *****************************************************************************/
304
static int Open ( vlc_object_t *p_this )
305
499
{
306
499
    demux_t        *p_demux = (demux_t*)p_this;
307
499
    demux_sys_t    *p_sys;
308
499
    es_format_t    fmt;
309
499
    float          f_fps;
310
499
    char           *psz_type;
311
499
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
312
313
499
    if( !p_demux->obj.force )
314
0
    {
315
0
        msg_Dbg( p_demux, "subtitle demux discarded" );
316
0
        return VLC_EGENERIC;
317
0
    }
318
319
499
    p_demux->pf_demux = Demux;
320
499
    p_demux->pf_control = Control;
321
499
    p_demux->p_sys = p_sys = malloc( sizeof( demux_sys_t ) );
322
499
    if( p_sys == NULL )
323
0
        return VLC_ENOMEM;
324
325
499
    p_sys->b_slave = false;
326
499
    p_sys->b_first_time = true;
327
499
    p_sys->b_sorted = false;
328
499
    p_sys->i_next_demux_date = 0;
329
499
    p_sys->f_rate = 1.0;
330
331
499
    p_sys->pf_convert = ToTextBlock;
332
333
499
    p_sys->subtitles.i_current= 0;
334
499
    p_sys->subtitles.i_count  = 0;
335
499
    p_sys->subtitles.p_array  = NULL;
336
337
499
    p_sys->props.psz_header         = NULL;
338
499
    p_sys->props.psz_lang           = NULL;
339
499
    p_sys->props.i_microsecperframe = VLC_TICK_FROM_MS(40);
340
499
    p_sys->props.jss.b_inited       = false;
341
499
    p_sys->props.mpsub.b_inited     = false;
342
499
    p_sys->props.sami.psz_start     = NULL;
343
344
    /* Get the FPS */
345
499
    f_fps = var_CreateGetFloat( p_demux, "sub-original-fps" );
346
499
    if( f_fps >= 1.f )
347
0
    {
348
0
        p_sys->props.i_microsecperframe = llroundf( (float)CLOCK_FREQ / f_fps );
349
0
        msg_Dbg( p_demux, "Override subtitle fps %f", (double) f_fps );
350
0
    }
351
352
    /* Get or probe the type */
353
499
    p_sys->props.i_type = SUB_TYPE_UNKNOWN;
354
499
    psz_type = var_CreateGetString( p_demux, "sub-type" );
355
499
    if( psz_type && *psz_type )
356
499
    {
357
9.98k
        for( int i = 0; ; i++ )
358
10.4k
        {
359
10.4k
            if( sub_read_subtitle_function[i].psz_type_name == NULL )
360
499
                break;
361
362
9.98k
            if( !strcmp( sub_read_subtitle_function[i].psz_type_name,
363
9.98k
                         psz_type ) )
364
0
            {
365
0
                p_sys->props.i_type = sub_read_subtitle_function[i].i_type;
366
0
                break;
367
0
            }
368
9.98k
        }
369
499
    }
370
499
    free( psz_type );
371
372
499
#ifndef NDEBUG
373
499
    const uint64_t i_start_pos = vlc_stream_Tell( p_demux->s );
374
499
#endif
375
376
499
    ssize_t i_peek;
377
499
    const uint8_t *p_peek;
378
499
    if( vlc_stream_Peek( p_demux->s, &p_peek, 16 ) < 16 )
379
0
    {
380
0
        free( p_sys );
381
0
        return VLC_EGENERIC;
382
0
    }
383
384
499
    enum
385
499
    {
386
499
        UTF8BOM,
387
499
        UTF16LE,
388
499
        UTF16BE,
389
499
        NOBOM,
390
499
    } e_bom = NOBOM;
391
499
    const char *psz_bom = NULL;
392
393
499
    i_peek = 4096;
394
    /* Detect Unicode while skipping the UTF-8 Byte Order Mark */
395
499
    if( !memcmp( p_peek, "\xEF\xBB\xBF", 3 ) )
396
23
    {
397
23
        e_bom = UTF8BOM;
398
23
        psz_bom = "UTF-8";
399
23
    }
400
476
    else if( !memcmp( p_peek, "\xFF\xFE", 2 ) )
401
16
    {
402
16
        e_bom = UTF16LE;
403
16
        psz_bom = "UTF-16LE";
404
16
        i_peek *= 2;
405
16
    }
406
460
    else if( !memcmp( p_peek, "\xFE\xFF", 2 ) )
407
2
    {
408
2
        e_bom = UTF16BE;
409
2
        psz_bom = "UTF-16BE";
410
2
        i_peek *= 2;
411
2
    }
412
413
499
    if( e_bom != NOBOM )
414
499
        msg_Dbg( p_demux, "detected %s Byte Order Mark", psz_bom );
415
416
499
    i_peek = vlc_stream_Peek( p_demux->s, &p_peek, i_peek );
417
499
    if( unlikely(i_peek < 16) )
418
0
    {
419
0
        free( p_sys );
420
0
        return VLC_EGENERIC;
421
0
    }
422
423
499
    stream_t *p_probestream = NULL;
424
499
    if( e_bom != UTF8BOM && e_bom != NOBOM )
425
18
    {
426
18
        if( i_peek > 16 )
427
18
        {
428
18
            char *p_outbuf = FromCharset( psz_bom, p_peek, i_peek );
429
18
            if( p_outbuf != NULL )
430
16
                p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *)p_outbuf,
431
18
                                                      strlen( p_outbuf ),
432
18
                                                      false ); /* free p_outbuf on release */
433
18
        }
434
18
    }
435
481
    else
436
481
    {
437
481
        const size_t i_skip = (e_bom == UTF8BOM) ? 3 : 0;
438
481
        p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *) &p_peek[i_skip],
439
481
                                              i_peek - i_skip, true );
440
481
    }
441
442
499
    if( p_probestream == NULL )
443
2
    {
444
2
        free( p_sys );
445
2
        return VLC_EGENERIC;
446
2
    }
447
448
    /* Probe if unknown type */
449
497
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
450
497
    {
451
497
        int     i_try;
452
497
        char    *s = NULL;
453
454
497
        msg_Dbg( p_demux, "autodetecting subtitle format" );
455
2.50k
        for( i_try = 0; i_try < 256; i_try++ )
456
2.50k
        {
457
2.50k
            int i_dummy;
458
2.50k
            char p_dummy;
459
460
2.50k
            if( (s = vlc_stream_ReadLine( p_probestream ) ) == NULL )
461
9
                break;
462
463
2.50k
            if( strcasestr( s, "<SAMI>" ) )
464
63
            {
465
63
                p_sys->props.i_type = SUB_TYPE_SAMI;
466
63
                break;
467
63
            }
468
2.43k
            else if( sscanf( s, "{%d}{%d}", &i_dummy, &i_dummy ) == 2 ||
469
2.43k
                     sscanf( s, "{%d}{}", &i_dummy ) == 1)
470
9
            {
471
9
                p_sys->props.i_type = SUB_TYPE_MICRODVD;
472
9
                break;
473
9
            }
474
2.42k
            else if( sscanf( s, "%d:%d:%d,%d --> %d:%d:%d,%d",
475
2.42k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
476
2.42k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
477
2.41k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d,%d",
478
2.41k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
479
2.41k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
480
2.41k
                     sscanf( s, "%d:%d:%d,%d --> %d:%d:%d",
481
2.41k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
482
2.41k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
483
2.39k
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d.%d",
484
2.39k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
485
2.39k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
486
2.39k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d.%d",
487
2.39k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
488
2.39k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
489
2.39k
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d",
490
2.39k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
491
2.39k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
492
2.39k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d",
493
2.39k
                             &i_dummy,&i_dummy,&i_dummy,
494
2.39k
                             &i_dummy,&i_dummy,&i_dummy ) == 6 )
495
42
            {
496
42
                p_sys->props.i_type = SUB_TYPE_SUBRIP;
497
42
                break;
498
42
            }
499
2.38k
            else if( !strncasecmp( s, "!: This is a Sub Station Alpha v1", 33 ) )
500
1
            {
501
1
                p_sys->props.i_type = SUB_TYPE_SSA1;
502
1
                break;
503
1
            }
504
2.38k
            else if( !strncasecmp( s, "ScriptType: v4.00+", 18 ) )
505
2
            {
506
2
                p_sys->props.i_type = SUB_TYPE_ASS;
507
2
                break;
508
2
            }
509
2.38k
            else if( !strncasecmp( s, "ScriptType: v4.00", 17 ) )
510
4
            {
511
4
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
512
4
                break;
513
4
            }
514
2.37k
            else if( !strncasecmp( s, "Dialogue: Marked", 16  ) )
515
2
            {
516
2
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
517
2
                break;
518
2
            }
519
2.37k
            else if( !strncasecmp( s, "Dialogue:", 9  ) )
520
20
            {
521
20
                p_sys->props.i_type = SUB_TYPE_ASS;
522
20
                break;
523
20
            }
524
2.35k
            else if( strcasestr( s, "[INFORMATION]" ) )
525
6
            {
526
6
                p_sys->props.i_type = SUB_TYPE_SUBVIEWER; /* I hope this will work */
527
6
                break;
528
6
            }
529
2.35k
            else if( sscanf( s, "%d:%d:%d.%d %d:%d:%d",
530
2.35k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
531
2.35k
                                 &i_dummy, &i_dummy, &i_dummy ) == 7 ||
532
2.35k
                     sscanf( s, "@%d @%d", &i_dummy, &i_dummy) == 2)
533
64
            {
534
64
                p_sys->props.i_type = SUB_TYPE_JACOSUB;
535
64
                break;
536
64
            }
537
2.28k
            else if( sscanf( s, "%d:%d:%d.%d,%d:%d:%d.%d",
538
2.28k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
539
2.28k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy ) == 8 )
540
5
            {
541
5
                p_sys->props.i_type = SUB_TYPE_SBV;
542
5
                break;
543
5
            }
544
2.28k
            else if( sscanf( s, "%d:%d:%d:", &i_dummy, &i_dummy, &i_dummy ) == 3 ||
545
2.26k
                     sscanf( s, "%d:%d:%d ", &i_dummy, &i_dummy, &i_dummy ) == 3 )
546
22
            {
547
22
                p_sys->props.i_type = SUB_TYPE_VPLAYER;
548
22
                break;
549
22
            }
550
2.26k
            else if( sscanf( s, "{T %d:%d:%d:%d", &i_dummy, &i_dummy,
551
2.26k
                             &i_dummy, &i_dummy ) == 4 )
552
6
            {
553
6
                p_sys->props.i_type = SUB_TYPE_DVDSUBTITLE;
554
6
                break;
555
6
            }
556
2.25k
            else if( sscanf( s, "[%d:%d:%d]%c",
557
2.25k
                     &i_dummy, &i_dummy, &i_dummy, &p_dummy ) == 4 )
558
16
            {
559
16
                p_sys->props.i_type = SUB_TYPE_DKS;
560
16
                break;
561
16
            }
562
2.23k
            else if( strstr( s, "*** START SCRIPT" ) )
563
16
            {
564
16
                p_sys->props.i_type = SUB_TYPE_SUBVIEW1;
565
16
                break;
566
16
            }
567
2.22k
            else if( sscanf( s, "[%d][%d]", &i_dummy, &i_dummy ) == 2 ||
568
2.21k
                     sscanf( s, "[%d][]", &i_dummy ) == 1)
569
21
            {
570
21
                p_sys->props.i_type = SUB_TYPE_MPL2;
571
21
                break;
572
21
            }
573
2.20k
            else if( sscanf (s, "FORMAT=%d", &i_dummy) == 1 ||
574
2.18k
                     ( sscanf (s, "FORMAT=TIM%c", &p_dummy) == 1
575
2
                       && p_dummy =='E' ) )
576
19
            {
577
19
                p_sys->props.i_type = SUB_TYPE_MPSUB;
578
19
                break;
579
19
            }
580
2.18k
            else if( sscanf( s, "-->> %d", &i_dummy) == 1 )
581
58
            {
582
58
                p_sys->props.i_type = SUB_TYPE_AQT;
583
58
                break;
584
58
            }
585
2.12k
            else if( sscanf( s, "%d,%d,", &i_dummy, &i_dummy ) == 2 )
586
4
            {
587
4
                p_sys->props.i_type = SUB_TYPE_PJS;
588
4
                break;
589
4
            }
590
2.12k
            else if( sscanf( s, "{%d:%d:%d}",
591
2.12k
                                &i_dummy, &i_dummy, &i_dummy ) == 3 )
592
0
            {
593
0
                p_sys->props.i_type = SUB_TYPE_PSB;
594
0
                break;
595
0
            }
596
2.12k
            else if( strcasestr( s, "<time" ) )
597
33
            {
598
33
                p_sys->props.i_type = SUB_TYPE_RT;
599
33
                break;
600
33
            }
601
2.08k
            else if( !strncasecmp( s, "WEBVTT",6 ) )
602
0
            {
603
                /* FAIL */
604
0
                break;
605
0
            }
606
2.08k
            else if( !strncasecmp( s, "Scenarist_SCC V1.0", 18 ) )
607
75
            {
608
75
                p_sys->props.i_type = SUB_TYPE_SCC;
609
75
                p_sys->pf_convert = ToEIA608Block;
610
75
                break;
611
75
            }
612
613
2.01k
            free( s );
614
2.01k
            s = NULL;
615
2.01k
        }
616
617
497
        free( s );
618
497
    }
619
620
497
    vlc_stream_Delete( p_probestream );
621
622
    /* Quit on unknown subtitles */
623
497
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
624
9
    {
625
9
#ifndef NDEBUG
626
        /* Ensure it will work with non seekable streams */
627
9
        assert( i_start_pos == vlc_stream_Tell( p_demux->s ) );
628
9
#endif
629
9
        msg_Warn( p_demux, "failed to recognize subtitle type" );
630
9
        free( p_sys );
631
9
        return VLC_EGENERIC;
632
9
    }
633
634
5.22k
    for( int i = 0; ; i++ )
635
5.71k
    {
636
5.71k
        if( sub_read_subtitle_function[i].i_type == p_sys->props.i_type )
637
488
        {
638
488
            msg_Dbg( p_demux, "detected %s format",
639
488
                     sub_read_subtitle_function[i].psz_name );
640
488
            pf_read = sub_read_subtitle_function[i].pf_read;
641
488
            break;
642
488
        }
643
5.71k
    }
644
645
488
    msg_Dbg( p_demux, "loading all subtitles..." );
646
647
488
    if( e_bom == UTF8BOM && /* skip BOM */
648
22
        vlc_stream_Read( p_demux->s, NULL, 3 ) != 3 )
649
0
    {
650
0
        Close( p_this );
651
0
        return VLC_EGENERIC;
652
0
    }
653
654
    /* Load the whole file */
655
488
    text_t txtlines;
656
488
    TextLoad( &txtlines, p_demux->s );
657
658
    /* Parse it */
659
334k
    for( size_t i_max = 0; i_max < SIZE_MAX - 500 * sizeof(subtitle_t); )
660
334k
    {
661
334k
        if( p_sys->subtitles.i_count >= i_max )
662
1.12k
        {
663
1.12k
            i_max += 500;
664
1.12k
            subtitle_t *p_realloc = realloc( p_sys->subtitles.p_array, sizeof(subtitle_t) * i_max );
665
1.12k
            if( p_realloc == NULL )
666
0
            {
667
0
                TextUnload( &txtlines );
668
0
                Close( p_this );
669
0
                return VLC_ENOMEM;
670
0
            }
671
1.12k
            p_sys->subtitles.p_array = p_realloc;
672
1.12k
        }
673
674
334k
        if( pf_read( VLC_OBJECT(p_demux), &p_sys->props, &txtlines,
675
334k
                     &p_sys->subtitles.p_array[p_sys->subtitles.i_count],
676
334k
                     p_sys->subtitles.i_count ) )
677
488
            break;
678
679
333k
        p_sys->subtitles.i_count++;
680
333k
    }
681
    /* Unload */
682
488
    TextUnload( &txtlines );
683
684
488
    msg_Dbg(p_demux, "loaded %zu subtitles", p_sys->subtitles.i_count );
685
686
    /* *** add subtitle ES *** */
687
488
    if( p_sys->props.i_type == SUB_TYPE_SSA1 ||
688
487
             p_sys->props.i_type == SUB_TYPE_SSA2_4 ||
689
481
             p_sys->props.i_type == SUB_TYPE_ASS )
690
29
    {
691
29
        Fix( p_demux );
692
29
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SSA );
693
29
    }
694
459
    else if( p_sys->props.i_type == SUB_TYPE_SCC )
695
75
    {
696
75
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_CEA608 );
697
75
        fmt.subs.cc.i_reorder_depth = -1;
698
75
    }
699
384
    else
700
384
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SUBT );
701
702
488
    p_sys->subtitles.i_current = 0;
703
488
    p_sys->i_length = 0;
704
488
    if( p_sys->subtitles.i_count > 0 )
705
387
        p_sys->i_length = p_sys->subtitles.p_array[p_sys->subtitles.i_count-1].i_stop;
706
707
488
    if( p_sys->props.psz_lang )
708
0
    {
709
0
        fmt.psz_language = p_sys->props.psz_lang;
710
0
        p_sys->props.psz_lang = NULL;
711
0
        msg_Dbg( p_demux, "detected language '%s' of subtitle: %s", fmt.psz_language,
712
0
                 p_demux->psz_location );
713
0
    }
714
488
    else
715
488
    {
716
488
        fmt.psz_language = get_language_from_url( p_demux->psz_url );
717
488
        if( fmt.psz_language )
718
488
            msg_Dbg( p_demux, "selected '%s' as possible filename language substring of subtitle: %s",
719
488
                     fmt.psz_language, p_demux->psz_location );
720
488
    }
721
722
488
    char *psz_description = var_InheritString( p_demux, "sub-description" );
723
488
    if( psz_description && *psz_description )
724
0
        fmt.psz_description = psz_description;
725
488
    else
726
488
        free( psz_description );
727
488
    if( p_sys->props.psz_header != NULL &&
728
29
       (fmt.p_extra = strdup( p_sys->props.psz_header )) )
729
29
    {
730
29
        fmt.i_extra = strlen( p_sys->props.psz_header ) + 1;
731
29
    }
732
733
488
    fmt.i_id = 0;
734
488
    p_sys->es = es_out_Add( p_demux->out, &fmt );
735
488
    es_format_Clean( &fmt );
736
488
    if( p_sys->es == NULL )
737
0
    {
738
0
        Close( p_this );
739
0
        return VLC_EGENERIC;
740
0
    }
741
742
488
    return VLC_SUCCESS;
743
488
}
744
745
/*****************************************************************************
746
 * Close: Close subtitle demux
747
 *****************************************************************************/
748
static void Close( vlc_object_t *p_this )
749
488
{
750
488
    demux_t *p_demux = (demux_t*)p_this;
751
488
    demux_sys_t *p_sys = p_demux->p_sys;
752
753
334k
    for( size_t i = 0; i < p_sys->subtitles.i_count; i++ )
754
333k
        free( p_sys->subtitles.p_array[i].psz_text );
755
488
    free( p_sys->subtitles.p_array );
756
488
    free( p_sys->props.psz_header );
757
758
488
    free( p_sys );
759
488
}
760
761
static void
762
ResetCurrentIndex( demux_t *p_demux )
763
0
{
764
0
    demux_sys_t *p_sys = p_demux->p_sys;
765
0
    for( size_t i = 0; i < p_sys->subtitles.i_count; i++ )
766
0
    {
767
0
        if( p_sys->subtitles.p_array[i].i_start * p_sys->f_rate >
768
0
            p_sys->i_next_demux_date && i > 0 )
769
0
            break;
770
0
        p_sys->subtitles.i_current = i;
771
0
    }
772
0
}
773
774
/*****************************************************************************
775
 * Control:
776
 *****************************************************************************/
777
static int Control( demux_t *p_demux, int i_query, va_list args )
778
0
{
779
0
    demux_sys_t *p_sys = p_demux->p_sys;
780
0
    double *pf, f;
781
782
0
    switch( i_query )
783
0
    {
784
0
        case DEMUX_CAN_SEEK:
785
0
            *va_arg( args, bool * ) = true;
786
0
            return VLC_SUCCESS;
787
788
0
        case DEMUX_GET_LENGTH:
789
0
            *va_arg( args, vlc_tick_t * ) = p_sys->i_length;
790
0
            return VLC_SUCCESS;
791
792
0
        case DEMUX_GET_TIME:
793
0
            *va_arg( args, vlc_tick_t * ) = p_sys->i_next_demux_date;
794
0
            return VLC_SUCCESS;
795
796
0
        case DEMUX_SET_TIME:
797
0
        {
798
0
            p_sys->b_first_time = true;
799
0
            p_sys->i_next_demux_date = va_arg( args, vlc_tick_t );
800
0
            ResetCurrentIndex( p_demux );
801
0
            return VLC_SUCCESS;
802
0
        }
803
804
0
        case DEMUX_GET_POSITION:
805
0
            pf = va_arg( args, double * );
806
0
            if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
807
0
            {
808
0
                *pf = 1.0;
809
0
            }
810
0
            else if( p_sys->subtitles.i_count > 0 && p_sys->i_length )
811
0
            {
812
0
                *pf = p_sys->i_next_demux_date;
813
0
                *pf /= p_sys->i_length;
814
0
            }
815
0
            else
816
0
            {
817
0
                *pf = 0.0;
818
0
            }
819
0
            return VLC_SUCCESS;
820
821
0
        case DEMUX_SET_POSITION:
822
0
            f = va_arg( args, double );
823
0
            if( p_sys->subtitles.i_count && p_sys->i_length )
824
0
            {
825
0
                vlc_tick_t i64 = VLC_TICK_0 + f * p_sys->i_length;
826
0
                return demux_Control( p_demux, DEMUX_SET_TIME, i64 );
827
0
            }
828
0
            break;
829
830
0
        case DEMUX_CAN_CONTROL_RATE:
831
0
            *va_arg( args, bool * ) = true;
832
0
            return VLC_SUCCESS;
833
0
        case DEMUX_SET_RATE:
834
0
            p_sys->f_rate = *va_arg( args, float * );
835
0
            ResetCurrentIndex( p_demux );
836
0
            return VLC_SUCCESS;
837
0
        case DEMUX_SET_NEXT_DEMUX_TIME:
838
0
            p_sys->b_slave = true;
839
0
            p_sys->i_next_demux_date = va_arg( args, vlc_tick_t ) - VLC_TICK_0;
840
0
            return VLC_SUCCESS;
841
842
0
        case DEMUX_CAN_PAUSE:
843
0
        case DEMUX_SET_PAUSE_STATE:
844
0
        case DEMUX_CAN_CONTROL_PACE:
845
0
            return demux_vaControlHelper( p_demux->s, 0, -1, 0, 1, i_query, args );
846
847
0
        case DEMUX_GET_PTS_DELAY:
848
0
        case DEMUX_GET_FPS:
849
0
        case DEMUX_GET_META:
850
0
        case DEMUX_GET_ATTACHMENTS:
851
0
        case DEMUX_GET_TITLE_INFO:
852
0
        case DEMUX_HAS_UNSUPPORTED_META:
853
0
        case DEMUX_CAN_RECORD:
854
0
        default:
855
0
            break;
856
857
0
    }
858
0
    return VLC_EGENERIC;
859
0
}
860
861
/*****************************************************************************
862
 * Demux: Send subtitle to decoder
863
 *****************************************************************************/
864
static int Demux( demux_t *p_demux )
865
117M
{
866
117M
    demux_sys_t *p_sys = p_demux->p_sys;
867
868
117M
    if ( !p_sys->b_slave )
869
117M
        Fix( p_demux );
870
871
117M
    vlc_tick_t i_barrier = p_sys->i_next_demux_date;
872
873
118M
    while( p_sys->subtitles.i_current < p_sys->subtitles.i_count &&
874
118M
           ( p_sys->subtitles.p_array[p_sys->subtitles.i_current].i_start *
875
118M
             p_sys->f_rate ) <= i_barrier )
876
333k
    {
877
333k
        const subtitle_t *p_subtitle = &p_sys->subtitles.p_array[p_sys->subtitles.i_current];
878
879
333k
        if ( !p_sys->b_slave && p_sys->b_first_time )
880
387
        {
881
387
            es_out_SetPCR( p_demux->out, VLC_TICK_0 + i_barrier );
882
387
            p_sys->b_first_time = false;
883
387
        }
884
885
333k
        if( p_subtitle->i_start >= 0 )
886
333k
        {
887
333k
            block_t *p_block = p_sys->pf_convert( p_subtitle );
888
333k
            if( p_block )
889
291k
            {
890
291k
                p_block->i_dts =
891
291k
                p_block->i_pts = VLC_TICK_0 + p_subtitle->i_start * p_sys->f_rate;
892
291k
                if( p_subtitle->i_stop != VLC_TICK_INVALID && p_subtitle->i_stop >= p_subtitle->i_start )
893
50.9k
                    p_block->i_length = (p_subtitle->i_stop - p_subtitle->i_start) * p_sys->f_rate;
894
895
291k
                es_out_Send( p_demux->out, p_sys->es, p_block );
896
291k
            }
897
333k
        }
898
899
333k
        p_sys->subtitles.i_current++;
900
333k
    }
901
902
117M
    if ( !p_sys->b_slave )
903
117M
    {
904
117M
        es_out_SetPCR( p_demux->out, VLC_TICK_0 + i_barrier );
905
117M
        p_sys->i_next_demux_date += VLC_TICK_FROM_MS(125);
906
117M
    }
907
908
117M
    if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
909
488
        return VLC_DEMUXER_EOF;
910
911
117M
    return VLC_DEMUXER_SUCCESS;
912
117M
}
913
914
915
static int subtitle_cmp( const void *first, const void *second )
916
2.80M
{
917
2.80M
    vlc_tick_t result = ((subtitle_t *)(first))->i_start - ((subtitle_t *)(second))->i_start;
918
    /* Return -1, 0 ,1, and not directly subtraction
919
     * as result can be > INT_MAX */
920
2.80M
    return result == 0 ? 0 : result > 0 ? 1 : -1;
921
2.80M
}
922
/*****************************************************************************
923
 * Fix: fix time stamp and order of subtitle
924
 *****************************************************************************/
925
static void Fix( demux_t *p_demux )
926
117M
{
927
117M
    demux_sys_t *p_sys = p_demux->p_sys;
928
117M
    if (p_sys->b_sorted)
929
117M
        return;
930
931
    /* *** fix order (to be sure...) *** */
932
488
    qsort( p_sys->subtitles.p_array, p_sys->subtitles.i_count, sizeof( p_sys->subtitles.p_array[0] ), subtitle_cmp);
933
488
    p_sys->b_sorted = true;
934
488
}
935
936
static int TextLoad( text_t *txt, stream_t *s )
937
488
{
938
488
    size_t i_line_max;
939
940
    /* init txt */
941
488
    i_line_max          = 500;
942
488
    txt->i_line_count   = 0;
943
488
    txt->i_line         = 0;
944
488
    txt->line           = calloc( i_line_max, sizeof( char * ) );
945
488
    if( !txt->line )
946
0
        return VLC_ENOMEM;
947
948
    /* load the complete file */
949
488
    for( ;; )
950
9.66M
    {
951
9.66M
        char *psz = vlc_stream_ReadLine( s );
952
953
9.66M
        if( psz == NULL )
954
488
            break;
955
956
9.65M
        txt->line[txt->i_line_count] = psz;
957
9.65M
        if( txt->i_line_count + 1 >= i_line_max )
958
95.5k
        {
959
95.5k
            i_line_max += 100;
960
95.5k
            char **p_realloc = realloc( txt->line, i_line_max * sizeof( char * ) );
961
95.5k
            if( p_realloc == NULL )
962
0
                return VLC_ENOMEM;
963
95.5k
            txt->line = p_realloc;
964
95.5k
        }
965
9.65M
        txt->i_line_count++;
966
9.65M
    }
967
968
488
    if( txt->i_line_count == 0 )
969
1
    {
970
1
        free( txt->line );
971
1
        return VLC_EGENERIC;
972
1
    }
973
974
487
    return VLC_SUCCESS;
975
488
}
976
static void TextUnload( text_t *txt )
977
488
{
978
488
    if( txt->i_line_count )
979
487
    {
980
9.66M
        for( size_t i = 0; i < txt->i_line_count; i++ )
981
9.65M
            free( txt->line[i] );
982
487
        free( txt->line );
983
487
    }
984
488
    txt->i_line       = 0;
985
488
    txt->i_line_count = 0;
986
488
}
987
988
static char *TextGetLine( text_t *txt )
989
9.66M
{
990
9.66M
    if( txt->i_line >= txt->i_line_count )
991
538
        return( NULL );
992
993
9.66M
    return txt->line[txt->i_line++];
994
9.66M
}
995
static void TextPreviousLine( text_t *txt )
996
2.05k
{
997
2.05k
    if( txt->i_line > 0 )
998
2.05k
        txt->i_line--;
999
2.05k
}
1000
1001
/*****************************************************************************
1002
 * Specific Subtitle function
1003
 *****************************************************************************/
1004
/* ParseMicroDvd:
1005
 *  Format:
1006
 *      {n1}{n2}Line1|Line2|Line3....
1007
 *  where n1 and n2 are the video frame number (n2 can be empty)
1008
 */
1009
static int ParseMicroDvd( vlc_object_t *p_obj, subs_properties_t *p_props,
1010
                          text_t *txt, subtitle_t *p_subtitle,
1011
                          size_t i_idx )
1012
20
{
1013
20
    VLC_UNUSED( i_idx );
1014
20
    char *psz_text;
1015
20
    int  i_start;
1016
20
    int  i_stop;
1017
20
    int  i;
1018
1019
20
    for( ;; )
1020
521k
    {
1021
521k
        const char *s = TextGetLine( txt );
1022
521k
        if( !s )
1023
9
            return VLC_EGENERIC;
1024
1025
521k
        psz_text = malloc( strlen(s) + 1 );
1026
521k
        if( !psz_text )
1027
0
            return VLC_ENOMEM;
1028
1029
521k
        i_start = 0;
1030
521k
        i_stop  = -1;
1031
521k
        if( sscanf( s, "{%d}{}%[^\r\n]", &i_start, psz_text ) == 2 ||
1032
521k
            sscanf( s, "{%d}{%d}%[^\r\n]", &i_start, &i_stop, psz_text ) == 3)
1033
31
        {
1034
31
            if( i_start != 1 || i_stop != 1 )
1035
11
                break;
1036
1037
            /* We found a possible setting of the framerate "{1}{1}23.976" */
1038
            /* Check if it's usable, and if the sub-original-fps is not set */
1039
20
            float f_fps = vlc_strtof_c( psz_text, NULL );
1040
20
            if( f_fps > 0.f && var_GetFloat( p_obj, "sub-original-fps" ) <= 0.f )
1041
1
                p_props->i_microsecperframe = llroundf((float)CLOCK_FREQ / f_fps);
1042
20
        }
1043
521k
        free( psz_text );
1044
521k
    }
1045
1046
    /* replace | by \n */
1047
434
    for( i = 0; psz_text[i] != '\0'; i++ )
1048
423
    {
1049
423
        if( psz_text[i] == '|' )
1050
8
            psz_text[i] = '\n';
1051
423
    }
1052
1053
    /* */
1054
11
    p_subtitle->i_start  =  VLC_TICK_0 + i_start * p_props->i_microsecperframe;
1055
11
    p_subtitle->i_stop   = i_stop >= 0 ? (VLC_TICK_0 + i_stop  * p_props->i_microsecperframe) : VLC_TICK_INVALID;
1056
11
    p_subtitle->psz_text = psz_text;
1057
11
    return VLC_SUCCESS;
1058
20
}
1059
1060
/* ParseSubRipSubViewer
1061
 *  Format SubRip
1062
 *      n
1063
 *      h1:m1:s1,d1 --> h2:m2:s2,d2
1064
 *      Line1
1065
 *      Line2
1066
 *      ....
1067
 *      [Empty line]
1068
 *  Format SubViewer v1/v2
1069
 *      h1:m1:s1.d1,h2:m2:s2.d2
1070
 *      Line1[br]Line2
1071
 *      Line3
1072
 *      ...
1073
 *      [empty line]
1074
 *  We ignore line number for SubRip
1075
 */
1076
static int ParseSubRipSubViewer( vlc_object_t *p_obj, subs_properties_t *p_props,
1077
                                 text_t *txt, subtitle_t *p_subtitle,
1078
                                 int (* pf_parse_timing)(subtitle_t *, const char *),
1079
                                 bool b_replace_br )
1080
503
{
1081
503
    VLC_UNUSED(p_obj);
1082
503
    VLC_UNUSED(p_props);
1083
503
    char    *psz_text;
1084
1085
503
    for( ;; )
1086
1.13M
    {
1087
1.13M
        const char *s = TextGetLine( txt );
1088
1089
1.13M
        if( !s )
1090
48
            return VLC_EGENERIC;
1091
1092
1.13M
        if( pf_parse_timing( p_subtitle, s) == VLC_SUCCESS &&
1093
1.25k
            p_subtitle->i_start < p_subtitle->i_stop )
1094
455
        {
1095
455
            break;
1096
455
        }
1097
1.13M
    }
1098
1099
    /* Now read text until an empty line */
1100
455
    size_t i_old = 0;
1101
455
    psz_text = NULL;
1102
455
    for( ;; )
1103
2.02k
    {
1104
2.02k
        const char *s = TextGetLine( txt );
1105
2.02k
        size_t i_len;
1106
1107
2.02k
        i_len = s ? strlen( s ) : 0;
1108
2.02k
        if( i_len == 0 )
1109
455
        {
1110
455
            p_subtitle->psz_text = psz_text;
1111
455
            return VLC_SUCCESS;
1112
455
        }
1113
1114
1.57k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1115
1.57k
        if( !psz_text )
1116
0
            return VLC_ENOMEM;
1117
1118
1.57k
        memcpy( &psz_text[i_old], s, i_len );
1119
1.57k
        psz_text[i_old + i_len + 0] = '\n';
1120
1.57k
        psz_text[i_old + i_len + 1] = '\0';
1121
1.57k
        i_old += i_len + 1;
1122
1123
        /* replace [br] by \n */
1124
1.57k
        if( b_replace_br )
1125
393
        {
1126
393
            char *p;
1127
1128
684
            while( ( p = strstr( psz_text, "[br]" ) ) )
1129
291
            {
1130
291
                *p++ = '\n';
1131
291
                memmove( p, &p[3], strlen(&p[3])+1 );
1132
291
                i_old -= 3;
1133
291
            }
1134
393
        }
1135
1.57k
    }
1136
455
}
1137
1138
/* subtitle_ParseSubRipTimingValue
1139
 * Parses SubRip timing value.
1140
 */
1141
static int subtitle_ParseSubRipTimingValue(vlc_tick_t *timing_value,
1142
                                           const char *s, size_t length)
1143
2.92k
{
1144
2.92k
    int h1, m1, s1, d1 = 0;
1145
1146
2.92k
    int count;
1147
2.92k
    if (sscanf(s, "%d:%d:%d,%d%n", &h1, &m1, &s1, &d1, &count) == 4
1148
364
        && (size_t)count <= length)
1149
364
        goto success;
1150
1151
2.56k
    if (sscanf(s, "%d:%d:%d.%d%n", &h1, &m1, &s1, &d1, &count) == 4
1152
144
        && (size_t)count <= length)
1153
144
        goto success;
1154
1155
2.41k
    d1 = 0;
1156
2.41k
    if (sscanf(s, "%d:%d:%d%n", &h1, &m1, &s1, &count) == 3
1157
1.89k
        && (size_t)count <= length)
1158
1.89k
        goto success;
1159
1160
525
    return VLC_EGENERIC;
1161
1162
2.39k
success:
1163
2.39k
    (*timing_value) = VLC_TICK_0
1164
2.39k
        + vlc_tick_from_HMS(h1, m1, s1)
1165
2.39k
        + VLC_TICK_FROM_MS(d1);
1166
1167
2.39k
    return VLC_SUCCESS;
1168
1169
2.41k
}
1170
1171
/* subtitle_ParseSubRipTiming
1172
 * Parses SubRip timing.
1173
 */
1174
static int subtitle_ParseSubRipTiming( subtitle_t *p_subtitle,
1175
                                       const char *s )
1176
980k
{
1177
980k
    const char *delimiter = strstr(s, " --> ");
1178
980k
    if (delimiter == NULL || delimiter == s)
1179
978k
        return VLC_EGENERIC;
1180
1181
1.61k
    int ret = subtitle_ParseSubRipTimingValue(&p_subtitle->i_start, s, (size_t)(delimiter - s));
1182
1.61k
    if (ret != VLC_SUCCESS)
1183
298
        return ret;
1184
1185
1.31k
    const char *right = delimiter + strlen(" --> ");
1186
1.31k
    return subtitle_ParseSubRipTimingValue(&p_subtitle->i_stop, right, strlen(right));
1187
1.61k
}
1188
1189
/* ParseSubRip
1190
 */
1191
static int  ParseSubRip( vlc_object_t *p_obj, subs_properties_t *p_props,
1192
                         text_t *txt, subtitle_t *p_subtitle,
1193
                         size_t i_idx )
1194
331
{
1195
331
    VLC_UNUSED( i_idx );
1196
331
    return ParseSubRipSubViewer( p_obj, p_props, txt, p_subtitle,
1197
331
                                 &subtitle_ParseSubRipTiming,
1198
331
                                 false );
1199
331
}
1200
1201
/* subtitle_ParseSubViewerTiming
1202
 * Parses SubViewer timing.
1203
 */
1204
static int subtitle_ParseSubViewerTiming( subtitle_t *p_subtitle,
1205
                                   const char *s )
1206
151k
{
1207
151k
    int h1, m1, s1, d1, h2, m2, s2, d2;
1208
1209
151k
    if( sscanf( s, "%d:%d:%d.%d,%d:%d:%d.%d",
1210
151k
                &h1, &m1, &s1, &d1, &h2, &m2, &s2, &d2) != 8 )
1211
151k
        return VLC_EGENERIC;
1212
1213
168
    p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1214
168
                          VLC_TICK_FROM_MS( d1 ) + VLC_TICK_0;
1215
1216
168
    p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
1217
168
                          VLC_TICK_FROM_MS( d2 ) + VLC_TICK_0;
1218
168
    return VLC_SUCCESS;
1219
151k
}
1220
1221
/* ParseSubViewer
1222
 */
1223
static int  ParseSubViewer( vlc_object_t *p_obj, subs_properties_t *p_props,
1224
                            text_t *txt, subtitle_t *p_subtitle,
1225
                            size_t i_idx )
1226
172
{
1227
172
    VLC_UNUSED( i_idx );
1228
1229
172
    return ParseSubRipSubViewer( p_obj, p_props, txt, p_subtitle,
1230
172
                                 &subtitle_ParseSubViewerTiming,
1231
172
                                 true );
1232
172
}
1233
1234
/* ParseSSA
1235
 */
1236
static int  ParseSSA( vlc_object_t *p_obj, subs_properties_t *p_props,
1237
                      text_t *txt, subtitle_t *p_subtitle,
1238
                      size_t i_idx )
1239
562
{
1240
562
    VLC_UNUSED(p_obj);
1241
562
    size_t header_len = 0;
1242
1243
562
    for( ;; )
1244
569k
    {
1245
569k
        const char *s = TextGetLine( txt );
1246
569k
        int h1, m1, s1, c1, h2, m2, s2, c2;
1247
569k
        char *psz_text, *psz_temp;
1248
569k
        char temp[16];
1249
1250
569k
        if( !s )
1251
29
            return VLC_EGENERIC;
1252
1253
        /* We expect (SSA2-4):
1254
         * Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
1255
         * Dialogue: Marked=0,0:02:40.65,0:02:41.79,Wolf main,Cher,0000,0000,0000,,Et les enregistrements de ses ondes delta ?
1256
         *
1257
         * SSA-1 is similar but only has 8 commas up until the subtitle text. Probably the Effect field is no present, but not 100 % sure.
1258
         */
1259
1260
        /* For ASS:
1261
         * Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
1262
         * Dialogue: Layer#,0:02:40.65,0:02:41.79,Wolf main,Cher,0000,0000,0000,,Et les enregistrements de ses ondes delta ?
1263
         */
1264
1265
569k
        psz_text = NULL;
1266
569k
        if( s[0] == 'D' || s[0] == 'L' )
1267
974
        {
1268
            /* The output text is always shorter than the input text. */
1269
974
            psz_text = malloc( strlen(s) );
1270
974
            if( !psz_text )
1271
0
                return VLC_ENOMEM;
1272
974
        }
1273
1274
        /* Try to capture the language property */
1275
569k
        if( s[0] == 'L' &&
1276
2
            sscanf( s, "Language: %[^\r\n]", psz_text ) == 1 )
1277
0
        {
1278
0
            free( p_props->psz_lang ); /* just in case of multiple instances */
1279
0
            p_props->psz_lang = psz_text;
1280
0
            psz_text = NULL;
1281
0
        }
1282
569k
        else if( s[0] == 'D' &&
1283
972
            sscanf( s,
1284
972
                    "Dialogue: %15[^,],%d:%d:%d.%d,%d:%d:%d.%d,%[^\r\n]",
1285
972
                    temp,
1286
972
                    &h1, &m1, &s1, &c1,
1287
972
                    &h2, &m2, &s2, &c2,
1288
972
                    psz_text ) == 10 )
1289
533
        {
1290
            /* The dec expects: ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text */
1291
            /* (Layer comes from ASS specs ... it's empty for SSA.) */
1292
533
            if( p_props->i_type == SUB_TYPE_SSA1 )
1293
128
            {
1294
                /* SSA1 has only 8 commas before the text starts, not 9 */
1295
128
                memmove( &psz_text[1], psz_text, strlen(psz_text)+1 );
1296
128
                psz_text[0] = ',';
1297
128
            }
1298
405
            else
1299
405
            {
1300
405
                int i_layer = ( p_props->i_type == SUB_TYPE_ASS ) ? atoi( temp ) : 0;
1301
1302
                /* ReadOrder, Layer, %s(rest of fields) */
1303
405
                if( asprintf( &psz_temp, "%zu,%d,%s", i_idx, i_layer, psz_text ) == -1 )
1304
0
                {
1305
0
                    free( psz_text );
1306
0
                    return VLC_ENOMEM;
1307
0
                }
1308
1309
405
                free( psz_text );
1310
405
                psz_text = psz_temp;
1311
405
            }
1312
1313
533
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1314
533
                                  VLC_TICK_FROM_MS( c1 * 10 ) + VLC_TICK_0;
1315
533
            p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
1316
533
                                  VLC_TICK_FROM_MS( c2 * 10 ) + VLC_TICK_0;
1317
533
            p_subtitle->psz_text = psz_text;
1318
533
            return VLC_SUCCESS;
1319
533
        }
1320
568k
        free( psz_text );
1321
1322
        /* All the other stuff we add to the header field */
1323
568k
        if( header_len == 0 && p_props->psz_header )
1324
196
            header_len = strlen( p_props->psz_header );
1325
1326
568k
        size_t s_len = strlen( s );
1327
568k
        p_props->psz_header = realloc_or_free( p_props->psz_header, header_len + s_len + 2 );
1328
568k
        if( !p_props->psz_header )
1329
0
            return VLC_ENOMEM;
1330
568k
        snprintf( p_props->psz_header + header_len, s_len + 2, "%s\n", s );
1331
568k
        header_len += s_len + 1;
1332
568k
    }
1333
562
}
1334
1335
/* ParseVplayer
1336
 *  Format
1337
 *      h:m:s:Line1|Line2|Line3....
1338
 *  or
1339
 *      h:m:s Line1|Line2|Line3....
1340
 */
1341
static int ParseVplayer( vlc_object_t *p_obj, subs_properties_t *p_props,
1342
                         text_t *txt, subtitle_t *p_subtitle,
1343
                         size_t i_idx )
1344
741
{
1345
741
    VLC_UNUSED(p_obj);
1346
741
    VLC_UNUSED(p_props);
1347
741
    VLC_UNUSED( i_idx );
1348
741
    char *psz_text;
1349
1350
741
    for( ;; )
1351
298k
    {
1352
298k
        const char *s = TextGetLine( txt );
1353
298k
        int h1, m1, s1;
1354
1355
298k
        if( !s )
1356
22
            return VLC_EGENERIC;
1357
1358
298k
        psz_text = malloc( strlen( s ) + 1 );
1359
298k
        if( !psz_text )
1360
0
            return VLC_ENOMEM;
1361
1362
298k
        if( sscanf( s, "%d:%d:%d%*c%[^\r\n]",
1363
298k
                    &h1, &m1, &s1, psz_text ) == 4 )
1364
719
        {
1365
719
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
1366
719
            p_subtitle->i_stop  = -1;
1367
719
            break;
1368
719
        }
1369
297k
        free( psz_text );
1370
297k
    }
1371
1372
    /* replace | by \n */
1373
791k
    for( size_t i = 0; psz_text[i] != '\0'; i++ )
1374
790k
    {
1375
790k
        if( psz_text[i] == '|' )
1376
6
            psz_text[i] = '\n';
1377
790k
    }
1378
719
    p_subtitle->psz_text = psz_text;
1379
719
    return VLC_SUCCESS;
1380
741
}
1381
1382
/* ParseSami
1383
 */
1384
static const char *ParseSamiSearch( text_t *txt,
1385
                                    const char *psz_start, const char *psz_str )
1386
3.99k
{
1387
3.99k
    if( psz_start && strcasestr( psz_start, psz_str ) )
1388
3.12k
    {
1389
3.12k
        const char *s = strcasestr( psz_start, psz_str );
1390
3.12k
        return &s[strlen( psz_str )];
1391
3.12k
    }
1392
1393
877
    for( ;; )
1394
1.45M
    {
1395
1.45M
        const char *p = TextGetLine( txt );
1396
1.45M
        if( !p )
1397
76
            return NULL;
1398
1399
1.45M
        const char *s = strcasestr( p, psz_str );
1400
1.45M
        if( s != NULL )
1401
801
            return &s[strlen( psz_str )];
1402
1.45M
    }
1403
877
}
1404
static int ParseSami( vlc_object_t *p_obj, subs_properties_t *p_props,
1405
                      text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1406
272
{
1407
272
    VLC_UNUSED(p_obj);
1408
272
    VLC_UNUSED(p_props);
1409
272
    VLC_UNUSED( i_idx );
1410
272
    const char *s;
1411
272
    int64_t i_start;
1412
1413
272
    unsigned int i_text;
1414
272
    char text[8192]; /* Arbitrary but should be long enough */
1415
1416
    /* search "Start=" */
1417
272
    s = ParseSamiSearch( txt, p_props->sami.psz_start, "Start=" );
1418
272
    p_props->sami.psz_start = NULL;
1419
272
    if( !s )
1420
58
        return VLC_EGENERIC;
1421
1422
    /* get start value */
1423
214
    char *psz_end;
1424
214
    i_start = strtol( s, &psz_end, 0 );
1425
214
    s = psz_end;
1426
1427
    /* search <P */
1428
214
    if( !( s = ParseSamiSearch( txt, s, "<P" ) ) )
1429
3
        return VLC_EGENERIC;
1430
1431
    /* search > */
1432
211
    if( !( s = ParseSamiSearch( txt, s, ">" ) ) )
1433
2
        return VLC_EGENERIC;
1434
1435
209
    i_text = 0;
1436
209
    text[0] = '\0';
1437
    /* now get all txt until  a "Start=" line */
1438
209
    for( ;; )
1439
150k
    {
1440
150k
        char c = '\0';
1441
        /* Search non empty line */
1442
411k
        while( s && *s == '\0' )
1443
260k
            s = TextGetLine( txt );
1444
150k
        if( !s )
1445
38
            break;
1446
1447
150k
        if( *s == '<' )
1448
3.47k
        {
1449
3.47k
            if( !strncasecmp( s, "<br", 3 ) )
1450
83
            {
1451
83
                c = '\n';
1452
83
            }
1453
3.39k
            else if( strcasestr( s, "Start=" ) )
1454
171
            {
1455
171
                p_props->sami.psz_start = s;
1456
171
                break;
1457
171
            }
1458
3.30k
            s = ParseSamiSearch( txt, s, ">" );
1459
3.30k
        }
1460
147k
        else if( !strncmp( s, "&nbsp;", 6 ) )
1461
309
        {
1462
309
            c = ' ';
1463
309
            s += 6;
1464
309
        }
1465
146k
        else if( *s == '\t' )
1466
652
        {
1467
652
            c = ' ';
1468
652
            s++;
1469
652
        }
1470
146k
        else
1471
146k
        {
1472
146k
            c = *s;
1473
146k
            s++;
1474
146k
        }
1475
150k
        if( c != '\0' && i_text+1 < sizeof(text) )
1476
147k
        {
1477
147k
            text[i_text++] = c;
1478
147k
            text[i_text] = '\0';
1479
147k
        }
1480
150k
    }
1481
1482
209
    p_subtitle->i_start = VLC_TICK_0 + VLC_TICK_FROM_MS(i_start);
1483
209
    p_subtitle->i_stop  = -1;
1484
209
    p_subtitle->psz_text = strdup( text );
1485
1486
209
    return VLC_SUCCESS;
1487
211
}
1488
1489
/* ParseDVDSubtitle
1490
 *  Format
1491
 *      {T h1:m1:s1:c1
1492
 *      Line1
1493
 *      Line2
1494
 *      ...
1495
 *      }
1496
 * TODO it can have a header
1497
 *      { HEAD
1498
 *          ...
1499
 *          CODEPAGE=...
1500
 *          FORMAT=...
1501
 *          LANG=English
1502
 *      }
1503
 *      LANG support would be cool
1504
 *      CODEPAGE is probably mandatory FIXME
1505
 */
1506
static int ParseDVDSubtitle(vlc_object_t *p_obj, subs_properties_t *p_props,
1507
                            text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1508
438
{
1509
438
    VLC_UNUSED(p_obj);
1510
438
    VLC_UNUSED(p_props);
1511
438
    VLC_UNUSED( i_idx );
1512
438
    char *psz_text;
1513
1514
438
    for( ;; )
1515
5.20k
    {
1516
5.20k
        const char *s = TextGetLine( txt );
1517
5.20k
        int h1, m1, s1, c1;
1518
1519
5.20k
        if( !s )
1520
3
            return VLC_EGENERIC;
1521
1522
5.20k
        if( sscanf( s,
1523
5.20k
                    "{T %d:%d:%d:%d",
1524
5.20k
                    &h1, &m1, &s1, &c1 ) == 4 )
1525
435
        {
1526
435
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1527
435
                                  VLC_TICK_FROM_MS( c1 * 10 ) + VLC_TICK_0;
1528
435
            p_subtitle->i_stop = -1;
1529
435
            break;
1530
435
        }
1531
5.20k
    }
1532
1533
    /* Now read text until a line containing "}" */
1534
435
    size_t i_old = 0;
1535
435
    psz_text = NULL;
1536
435
    for( ;; )
1537
12.5k
    {
1538
12.5k
        const char *s = TextGetLine( txt );
1539
12.5k
        size_t i_len;
1540
1541
12.5k
        if( !s )
1542
3
        {
1543
3
            free( psz_text );
1544
3
            return VLC_EGENERIC;
1545
3
        }
1546
1547
12.5k
        i_len = strlen( s );
1548
12.5k
        if( i_len == 1 && s[0] == '}')
1549
432
        {
1550
432
            if (psz_text)
1551
160
                psz_text[i_old] = '\0';
1552
432
            p_subtitle->psz_text = psz_text;
1553
432
            return VLC_SUCCESS;
1554
432
        }
1555
1556
12.0k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1557
12.0k
        if( !psz_text )
1558
0
            return VLC_ENOMEM;
1559
1560
12.0k
        memcpy( &psz_text[i_old], s, i_len );
1561
12.0k
        psz_text[i_old + i_len + 0] = '\n';
1562
12.0k
        i_old += i_len + 1;
1563
12.0k
    }
1564
435
}
1565
1566
/* ParseMPL2
1567
 *  Format
1568
 *     [n1][n2]Line1|Line2|Line3...
1569
 *  where n1 and n2 are the video frame number (n2 can be empty)
1570
 */
1571
static int ParseMPL2(vlc_object_t *p_obj, subs_properties_t *p_props,
1572
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1573
230k
{
1574
230k
    VLC_UNUSED(p_obj);
1575
230k
    VLC_UNUSED(p_props);
1576
230k
    VLC_UNUSED( i_idx );
1577
230k
    char *psz_text;
1578
230k
    int i;
1579
1580
230k
    for( ;; )
1581
255k
    {
1582
255k
        const char *s = TextGetLine( txt );
1583
255k
        int i_start;
1584
255k
        int i_stop;
1585
1586
255k
        if( !s )
1587
21
            return VLC_EGENERIC;
1588
1589
255k
        psz_text = malloc( strlen(s) + 1 );
1590
255k
        if( !psz_text )
1591
0
            return VLC_ENOMEM;
1592
1593
255k
        i_start = 0;
1594
255k
        i_stop  = -1;
1595
255k
        if( sscanf( s, "[%d][] %[^\r\n]", &i_start, psz_text ) == 2 ||
1596
25.6k
            sscanf( s, "[%d][%d] %[^\r\n]", &i_start, &i_stop, psz_text ) == 3)
1597
230k
        {
1598
230k
            p_subtitle->i_start = VLC_TICK_0 + VLC_TICK_FROM_MS(i_start * 100);
1599
230k
            p_subtitle->i_stop  = i_stop >= 0 ? VLC_TICK_0 + VLC_TICK_FROM_MS(i_stop  * 100) : VLC_TICK_INVALID;
1600
230k
            break;
1601
230k
        }
1602
25.5k
        free( psz_text );
1603
25.5k
    }
1604
1605
560k
    for( i = 0; psz_text[i] != '\0'; )
1606
330k
    {
1607
        /* replace | by \n */
1608
330k
        if( psz_text[i] == '|' )
1609
0
            psz_text[i] = '\n';
1610
1611
        /* Remove italic */
1612
330k
        if( psz_text[i] == '/' && ( i == 0 || psz_text[i-1] == '\n' ) )
1613
9
            memmove( &psz_text[i], &psz_text[i+1], strlen(&psz_text[i+1])+1 );
1614
330k
        else
1615
330k
            i++;
1616
330k
    }
1617
230k
    p_subtitle->psz_text = psz_text;
1618
230k
    return VLC_SUCCESS;
1619
230k
}
1620
1621
static int ParseAQT(vlc_object_t *p_obj, subs_properties_t *p_props, text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1622
254
{
1623
254
    VLC_UNUSED(p_obj);
1624
254
    VLC_UNUSED(p_props);
1625
254
    VLC_UNUSED( i_idx );
1626
1627
254
    char *psz_text = NULL;
1628
254
    size_t i_old = 0;
1629
254
    size_t i_len;
1630
254
    int i_firstline = 1;
1631
1632
254
    for( ;; )
1633
273k
    {
1634
273k
        int t; /* Time */
1635
1636
273k
        const char *s = TextGetLine( txt );
1637
1638
273k
        if( !s )
1639
58
        {
1640
58
            free( psz_text );
1641
58
            return VLC_EGENERIC;
1642
58
        }
1643
1644
        /* Data Lines */
1645
273k
        if( sscanf (s, "-->> %d", &t) == 1)
1646
336
        {
1647
            /* Starting of a subtitle */
1648
336
            if( i_firstline )
1649
197
            {
1650
197
                p_subtitle->i_start = VLC_TICK_0 + t * p_props->i_microsecperframe;
1651
197
                i_firstline = 0;
1652
197
            }
1653
            /* We have been too far: end of the subtitle, begin of next */
1654
139
            else
1655
139
            {
1656
139
                p_subtitle->i_stop  = VLC_TICK_0 + t * p_props->i_microsecperframe;
1657
139
                break;
1658
139
            }
1659
336
        }
1660
        /* Text Lines */
1661
273k
        else
1662
273k
        {
1663
273k
            i_len = strlen( s );
1664
273k
            psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1665
273k
            if( !psz_text )
1666
0
                 return VLC_ENOMEM;
1667
1668
273k
            memcpy( &psz_text[i_old], s, i_len );
1669
273k
            psz_text[i_old + i_len + 0] = '\n';
1670
273k
            i_old += i_len + 1;
1671
273k
            if( txt->i_line == txt->i_line_count )
1672
57
                break;
1673
273k
        }
1674
273k
    }
1675
196
    if (psz_text)
1676
196
        psz_text[i_old] = '\0';
1677
196
    p_subtitle->psz_text = psz_text;
1678
196
    return VLC_SUCCESS;
1679
254
}
1680
1681
static int ParsePJS(vlc_object_t *p_obj, subs_properties_t *p_props,
1682
                    text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1683
4
{
1684
4
    VLC_UNUSED(p_obj);
1685
4
    VLC_UNUSED(p_props);
1686
4
    VLC_UNUSED( i_idx );
1687
1688
4
    char *psz_text;
1689
4
    int i;
1690
1691
4
    for( ;; )
1692
40.0k
    {
1693
40.0k
        const char *s = TextGetLine( txt );
1694
40.0k
        int t1, t2;
1695
1696
40.0k
        if( !s )
1697
4
            return VLC_EGENERIC;
1698
1699
40.0k
        psz_text = malloc( strlen(s) + 1 );
1700
40.0k
        if( !psz_text )
1701
0
            return VLC_ENOMEM;
1702
1703
        /* Data Lines */
1704
40.0k
        if( sscanf (s, "%d,%d,\"%[^\n\r]", &t1, &t2, psz_text ) == 3 )
1705
0
        {
1706
            /* 1/10th of second ? Frame based ? FIXME */
1707
0
            p_subtitle->i_start = VLC_TICK_0 + INT64_C(10) * t1;
1708
0
            p_subtitle->i_stop = VLC_TICK_0 + INT64_C(10) * t2;
1709
            /* Remove latest " */
1710
0
            psz_text[ strlen(psz_text) - 1 ] = '\0';
1711
1712
0
            break;
1713
0
        }
1714
40.0k
        free( psz_text );
1715
40.0k
    }
1716
1717
    /* replace | by \n */
1718
0
    for( i = 0; psz_text[i] != '\0'; i++ )
1719
0
    {
1720
0
        if( psz_text[i] == '|' )
1721
0
            psz_text[i] = '\n';
1722
0
    }
1723
1724
0
    p_subtitle->psz_text = psz_text;
1725
0
    msg_Dbg( p_obj, "%s", psz_text );
1726
0
    return VLC_SUCCESS;
1727
4
}
1728
1729
static int ParseMPSub( vlc_object_t *p_obj, subs_properties_t *p_props,
1730
                       text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1731
1.80k
{
1732
1.80k
    VLC_UNUSED( i_idx );
1733
1734
1.80k
    if( !p_props->mpsub.b_inited )
1735
19
    {
1736
19
        p_props->mpsub.f_total = 0.0;
1737
19
        p_props->mpsub.i_factor = 0;
1738
1739
19
        p_props->mpsub.b_inited = true;
1740
19
    }
1741
1742
1.80k
    for( ;; )
1743
823k
    {
1744
823k
        const char *s = TextGetLine( txt );
1745
823k
        if( !s )
1746
16
        {
1747
16
            return VLC_EGENERIC;
1748
16
        }
1749
1750
823k
        if ( *s =='#' || *s == '\0' )
1751
783k
            continue;
1752
1753
        /* Data Lines */
1754
39.9k
        float wait, duration;
1755
39.9k
        if( sscanf( s, "%f %f", &wait, &duration ) == 2 )
1756
1.79k
        {
1757
1.79k
            float f1 = wait;
1758
1.79k
            float f2 = duration;
1759
1.79k
            p_props->mpsub.f_total += f1 * p_props->mpsub.i_factor;
1760
1.79k
            p_subtitle->i_start = VLC_TICK_0 + llroundf(10000.f * p_props->mpsub.f_total);
1761
1.79k
            p_props->mpsub.f_total += f2 * p_props->mpsub.i_factor;
1762
1.79k
            p_subtitle->i_stop = VLC_TICK_0 + llroundf(10000.f * p_props->mpsub.f_total);
1763
1.79k
            break;
1764
1.79k
        }
1765
1766
38.1k
        if( !strncmp( s, "FORMAT=", strlen("FORMAT=") ) )
1767
489
        {
1768
489
            const char *psz_format = s + strlen( "FORMAT=" );
1769
489
            if( !strncmp( psz_format, "TIME", strlen("TIME") ) && (psz_format[4] == '\0' || psz_format[4] == ' ') )
1770
64
            {
1771
                // FORMAT=TIME may be followed by a comment
1772
64
                p_props->mpsub.i_factor = 100;
1773
64
            }
1774
425
            else
1775
425
            {
1776
425
                float f_fps;
1777
425
                if( sscanf( psz_format, "%f", &f_fps ) == 1 )
1778
318
                {
1779
318
                    if( f_fps > 0.f && var_GetFloat( p_obj, "sub-original-fps" ) <= 0.f )
1780
13
                        var_SetFloat( p_obj, "sub-original-fps", f_fps );
1781
1782
318
                    p_props->mpsub.i_factor = 1;
1783
318
                }
1784
425
            }
1785
489
        }
1786
38.1k
    }
1787
1788
1.79k
    char *psz_text = NULL;
1789
1.79k
    size_t i_old = 0;
1790
1.79k
    for( ;; )
1791
2.45k
    {
1792
2.45k
        const char *s = TextGetLine( txt );
1793
1794
2.45k
        if( !s )
1795
3
        {
1796
3
            free( psz_text );
1797
3
            return VLC_EGENERIC;
1798
3
        }
1799
1800
2.45k
        size_t i_len = strlen( s );
1801
2.45k
        if( i_len == 0 )
1802
1.78k
            break;
1803
1804
662
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1805
662
        if( !psz_text )
1806
0
             return VLC_ENOMEM;
1807
1808
662
        memcpy( &psz_text[i_old], s, i_len );
1809
662
        psz_text[i_old + i_len + 0] = '\n';
1810
662
        i_old += i_len + 1;
1811
662
    }
1812
1813
1.78k
    if (psz_text)
1814
354
        psz_text[i_old] = '\0';
1815
1.78k
    p_subtitle->psz_text = psz_text;
1816
1.78k
    return VLC_SUCCESS;
1817
1.79k
}
1818
1819
static int ParseJSS( vlc_object_t *p_obj, subs_properties_t *p_props,
1820
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1821
88.9k
{
1822
88.9k
    VLC_UNUSED( i_idx );
1823
88.9k
    char         *psz_text, *psz_orig;
1824
88.9k
    char         *psz_text2, *psz_orig2;
1825
1826
88.9k
    if( !p_props->jss.b_inited )
1827
64
    {
1828
64
        p_props->jss.i_comment = 0;
1829
64
        p_props->jss.i_time_resolution = 30;
1830
64
        p_props->jss.i_time_shift = 0;
1831
1832
64
        p_props->jss.b_inited = true;
1833
64
    }
1834
1835
    /* Parse the main lines */
1836
88.9k
    for( ;; )
1837
237k
    {
1838
237k
        const char *s = TextGetLine( txt );
1839
237k
        if( !s )
1840
64
            return VLC_EGENERIC;
1841
1842
237k
        size_t line_length = strlen( s );
1843
237k
        psz_orig = malloc( line_length + 1 );
1844
237k
        if( !psz_orig )
1845
0
            return VLC_ENOMEM;
1846
237k
        psz_text = psz_orig;
1847
1848
        /* Complete time lines */
1849
237k
        int h1, h2, m1, m2, s1, s2, f1, f2;
1850
237k
        if( sscanf( s, "%d:%d:%d.%d %d:%d:%d.%d %[^\n\r]",
1851
237k
                    &h1, &m1, &s1, &f1, &h2, &m2, &s2, &f2, psz_text ) == 9 )
1852
128
        {
1853
128
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 ) +
1854
128
                vlc_tick_from_sec( ( f1 +  p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1855
128
            p_subtitle->i_stop = VLC_TICK_0 + vlc_tick_from_HMS( h2, m2, s2 ) +
1856
128
                vlc_tick_from_sec( ( f2 +  p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1857
128
            break;
1858
128
        }
1859
        /* Short time lines */
1860
237k
        else if( sscanf( s, "@%d @%d %[^\n\r]", &f1, &f2, psz_text ) == 3 )
1861
88.7k
        {
1862
88.7k
            p_subtitle->i_start = VLC_TICK_0 +
1863
88.7k
                    vlc_tick_from_sec( (f1 + p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1864
88.7k
            p_subtitle->i_stop = VLC_TICK_0 +
1865
88.7k
                    vlc_tick_from_sec( (f2 + p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1866
88.7k
            break;
1867
88.7k
        }
1868
        /* General Directive lines */
1869
        /* Only TIME and SHIFT are supported so far */
1870
148k
        else if( s[0] == '#' )
1871
97
        {
1872
97
            int h = 0, m =0, sec = 1, f = 1;
1873
97
            unsigned shift = 1;
1874
97
            int inv = 1;
1875
1876
97
            strcpy( psz_text, s );
1877
1878
97
            switch( toupper( (unsigned char)psz_text[1] ) )
1879
97
            {
1880
85
            case 'S':
1881
85
                 shift = isalpha( (unsigned char)psz_text[2] ) ? 6 : 2 ;
1882
85
                 if ( shift > line_length )
1883
13
                     break;
1884
1885
72
                 if( sscanf( &psz_text[shift], "%d", &h ) )
1886
60
                 {
1887
                     /* Negative shifting */
1888
60
                     if( h < 0 )
1889
0
                     {
1890
0
                         h *= -1;
1891
0
                         inv = -1;
1892
0
                     }
1893
1894
60
                     if( sscanf( &psz_text[shift], "%*d:%d", &m ) )
1895
56
                     {
1896
56
                         if( sscanf( &psz_text[shift], "%*d:%*d:%d", &sec ) )
1897
0
                         {
1898
0
                             sscanf( &psz_text[shift], "%*d:%*d:%*d.%d", &f );
1899
0
                         }
1900
56
                         else
1901
56
                         {
1902
56
                             h = 0;
1903
56
                             sscanf( &psz_text[shift], "%d:%d.%d",
1904
56
                                     &m, &sec, &f );
1905
56
                             m *= inv;
1906
56
                         }
1907
56
                     }
1908
4
                     else
1909
4
                     {
1910
4
                         h = m = 0;
1911
4
                         sscanf( &psz_text[shift], "%d.%d", &sec, &f);
1912
4
                         sec *= inv;
1913
4
                     }
1914
60
                     p_props->jss.i_time_shift = ( ( h * INT64_C(3600) + m * INT64_C(60) + sec )
1915
60
                         * p_props->jss.i_time_resolution + f ) * inv;
1916
60
                 }
1917
72
                 break;
1918
1919
0
            case 'T':
1920
0
                shift = isalpha( (unsigned char)psz_text[2] ) ? 8 : 2 ;
1921
0
                if ( shift > line_length )
1922
0
                    break;
1923
1924
0
                sscanf( &psz_text[shift], "%d", &p_props->jss.i_time_resolution );
1925
0
                if( !p_props->jss.i_time_resolution || p_props->jss.i_time_resolution < 0 )
1926
0
                    p_props->jss.i_time_resolution = 30;
1927
0
                break;
1928
97
            }
1929
97
            free( psz_orig );
1930
97
            continue;
1931
97
        }
1932
148k
        else
1933
            /* Unknown type line, probably a comment */
1934
148k
        {
1935
148k
            free( psz_orig );
1936
148k
            continue;
1937
148k
        }
1938
237k
    }
1939
1940
88.8k
    while( psz_text[ strlen( psz_text ) - 1 ] == '\\' )
1941
0
    {
1942
0
        const char *s2 = TextGetLine( txt );
1943
1944
0
        if( !s2 )
1945
0
        {
1946
0
            free( psz_orig );
1947
0
            return VLC_EGENERIC;
1948
0
        }
1949
1950
0
        size_t i_len = strlen( s2 );
1951
0
        if( i_len == 0 )
1952
0
            break;
1953
1954
0
        size_t i_old = strlen( psz_text );
1955
1956
0
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 );
1957
0
        if( !psz_text )
1958
0
             return VLC_ENOMEM;
1959
1960
0
        psz_orig = psz_text;
1961
0
        strcat( psz_text, s2 );
1962
0
    }
1963
1964
    /* Skip the blanks */
1965
88.8k
    while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++;
1966
1967
    /* Parse the directives */
1968
88.8k
    if( isalpha( (unsigned char)*psz_text ) || *psz_text == '[' )
1969
2.56k
    {
1970
74.5k
        while( *psz_text && *psz_text != ' ' )
1971
71.9k
            ++psz_text;
1972
1973
        /* Directives are NOT parsed yet */
1974
        /* This has probably a better place in a decoder ? */
1975
        /* directive = malloc( strlen( psz_text ) + 1 );
1976
           if( sscanf( psz_text, "%s %[^\n\r]", directive, psz_text2 ) == 2 )*/
1977
2.56k
    }
1978
1979
    /* Skip the blanks after directives */
1980
90.3k
    while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++;
1981
1982
    /* Clean all the lines from inline comments and other stuffs */
1983
88.8k
    psz_orig2 = calloc( strlen( psz_text) + 1, 1 );
1984
88.8k
    psz_text2 = psz_orig2;
1985
1986
1.69M
    for( ; *psz_text != '\0' && *psz_text != '\n' && *psz_text != '\r'; )
1987
1.60M
    {
1988
1.60M
        switch( *psz_text )
1989
1.60M
        {
1990
488k
        case '{':
1991
488k
            p_props->jss.i_comment++;
1992
488k
            break;
1993
237
        case '}':
1994
237
            if( p_props->jss.i_comment )
1995
229
            {
1996
229
                p_props->jss.i_comment = 0;
1997
229
                if( (*(psz_text + 1 ) ) == ' ' ) psz_text++;
1998
229
            }
1999
237
            break;
2000
3.56k
        case '~':
2001
3.56k
            if( !p_props->jss.i_comment )
2002
23
            {
2003
23
                *psz_text2 = ' ';
2004
23
                psz_text2++;
2005
23
            }
2006
3.56k
            break;
2007
13.6k
        case ' ':
2008
13.8k
        case '\t':
2009
13.8k
            if( (*(psz_text + 1 ) ) == ' ' || (*(psz_text + 1 ) ) == '\t' )
2010
636
                break;
2011
13.2k
            if( !p_props->jss.i_comment )
2012
3.57k
            {
2013
3.57k
                *psz_text2 = ' ';
2014
3.57k
                psz_text2++;
2015
3.57k
            }
2016
13.2k
            break;
2017
11.0k
        case '\\':
2018
11.0k
            if( (*(psz_text + 1 ) ) == 'n' )
2019
1.78k
            {
2020
1.78k
                *psz_text2 = '\n';
2021
1.78k
                psz_text++;
2022
1.78k
                psz_text2++;
2023
1.78k
                break;
2024
1.78k
            }
2025
9.26k
            if( ( toupper((unsigned char)*(psz_text + 1 ) ) == 'C' ) ||
2026
9.19k
                    ( toupper((unsigned char)*(psz_text + 1 ) ) == 'F' ) )
2027
72
            {
2028
72
                psz_text++;
2029
72
                break;
2030
72
            }
2031
9.19k
            if( (*(psz_text + 1 ) ) == 'B' || (*(psz_text + 1 ) ) == 'b' ||
2032
9.19k
                (*(psz_text + 1 ) ) == 'I' || (*(psz_text + 1 ) ) == 'i' ||
2033
9.19k
                (*(psz_text + 1 ) ) == 'U' || (*(psz_text + 1 ) ) == 'u' ||
2034
9.19k
                (*(psz_text + 1 ) ) == 'D' || (*(psz_text + 1 ) ) == 'N' )
2035
4
            {
2036
4
                psz_text++;
2037
4
                break;
2038
4
            }
2039
9.19k
            if( (*(psz_text + 1 ) ) == '~' || (*(psz_text + 1 ) ) == '{' ||
2040
4.63k
                (*(psz_text + 1 ) ) == '\\' )
2041
4.67k
                psz_text++;
2042
4.51k
            else if( ( *(psz_text + 1 ) == '\r' ||  *(psz_text + 1 ) == '\n' ) &&
2043
0
                     *(psz_text + 1 ) != '\0' )
2044
0
            {
2045
0
                psz_text++;
2046
0
            }
2047
9.19k
            break;
2048
1.08M
        default:
2049
1.08M
            if( !p_props->jss.i_comment )
2050
550k
            {
2051
550k
                *psz_text2 = *psz_text;
2052
550k
                psz_text2++;
2053
550k
            }
2054
1.60M
        }
2055
1.60M
        psz_text++;
2056
1.60M
    }
2057
2058
88.8k
    p_subtitle->psz_text = psz_orig2;
2059
88.8k
    msg_Dbg( p_obj, "%s", p_subtitle->psz_text );
2060
88.8k
    free( psz_orig );
2061
88.8k
    return VLC_SUCCESS;
2062
88.8k
}
2063
2064
static int ParsePSB( vlc_object_t *p_obj, subs_properties_t *p_props,
2065
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2066
0
{
2067
0
    VLC_UNUSED(p_obj);
2068
0
    VLC_UNUSED(p_props);
2069
0
    VLC_UNUSED( i_idx );
2070
2071
0
    char *psz_text;
2072
0
    int i;
2073
2074
0
    for( ;; )
2075
0
    {
2076
0
        int h1, m1, s1;
2077
0
        int h2, m2, s2;
2078
0
        const char *s = TextGetLine( txt );
2079
2080
0
        if( !s )
2081
0
            return VLC_EGENERIC;
2082
2083
0
        psz_text = malloc( strlen( s ) + 1 );
2084
0
        if( !psz_text )
2085
0
            return VLC_ENOMEM;
2086
2087
0
        if( sscanf( s, "{%d:%d:%d}{%d:%d:%d}%[^\r\n]",
2088
0
                    &h1, &m1, &s1, &h2, &m2, &s2, psz_text ) == 7 )
2089
0
        {
2090
0
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2091
0
            p_subtitle->i_stop  = VLC_TICK_0 + vlc_tick_from_HMS( h2, m2, s2 );
2092
0
            break;
2093
0
        }
2094
0
        free( psz_text );
2095
0
    }
2096
2097
    /* replace | by \n */
2098
0
    for( i = 0; psz_text[i] != '\0'; i++ )
2099
0
    {
2100
0
        if( psz_text[i] == '|' )
2101
0
            psz_text[i] = '\n';
2102
0
    }
2103
0
    p_subtitle->psz_text = psz_text;
2104
0
    return VLC_SUCCESS;
2105
0
}
2106
2107
static vlc_tick_t ParseRealTime( const char *psz )
2108
2.62k
{
2109
2.62k
    if( *psz == '\0' ) return VLC_TICK_0;
2110
2.62k
    int h, m, s, f;
2111
2.62k
    if( sscanf( psz, "%d:%d:%d.%d", &h, &m, &s, &f ) == 4 )
2112
128
    {
2113
128
        return vlc_tick_from_HMS( h, m, s )
2114
128
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2115
128
    }
2116
2.49k
    if( sscanf( psz, "%d:%d.%d", &m, &s, &f ) == 3 )
2117
134
    {
2118
134
        return vlc_tick_from_HMS( 0, m, s )
2119
134
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2120
134
    }
2121
2.36k
    if( sscanf( psz, "%d.%d", &s, &f ) == 2 )
2122
792
    {
2123
792
        return vlc_tick_from_sec( s )
2124
792
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2125
792
    }
2126
1.57k
    if( sscanf( psz, "%d:%d", &m, &s ) == 2 )
2127
630
    {
2128
630
        return vlc_tick_from_HMS( 0, m, s )
2129
630
               + VLC_TICK_0;
2130
630
    }
2131
943
    if( sscanf( psz, "%d", &s ) == 1 )
2132
639
    {
2133
639
        return vlc_tick_from_sec( s )
2134
639
               + VLC_TICK_0;
2135
639
    }
2136
304
    return VLC_TICK_MIN;
2137
943
}
2138
2139
static int ParseRealText( vlc_object_t *p_obj, subs_properties_t *p_props,
2140
                          text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2141
2.30k
{
2142
2.30k
    VLC_UNUSED(p_obj);
2143
2.30k
    VLC_UNUSED(p_props);
2144
2.30k
    VLC_UNUSED( i_idx );
2145
2.30k
    char *psz_text = NULL;
2146
2147
2.30k
    for( ;; )
2148
923k
    {
2149
923k
        const char *s = TextGetLine( txt );
2150
923k
        free( psz_text );
2151
2152
923k
        if( !s )
2153
27
            return VLC_EGENERIC;
2154
2155
923k
        psz_text = malloc( strlen( s ) + 1 );
2156
923k
        if( !psz_text )
2157
0
            return VLC_ENOMEM;
2158
2159
        /* Find the good beginning. This removes extra spaces at the beginning
2160
           of the line.*/
2161
923k
        char *psz_temp = strcasestr( s, "<time");
2162
923k
        if( psz_temp != NULL )
2163
2.81k
        {
2164
2.81k
            char psz_end[12], psz_begin[12];
2165
2.81k
            vlc_tick_t end = VLC_TICK_MIN;
2166
            /* Line has begin and end */
2167
2.81k
            if( sscanf( psz_temp,
2168
2.81k
                  "<%*[t|T]ime %*[b|B]egin=\"%11[^\"]\" %*[e|E]nd=\"%11[^\"]%*[^>]%[^\n\r]",
2169
2.81k
                            psz_begin, psz_end, psz_text) == 3 )
2170
349
            {
2171
349
                end = ParseRealTime( psz_end );
2172
349
            }
2173
2.46k
            else if ( sscanf( psz_temp,
2174
2.46k
                                "<%*[t|T]ime %*[b|B]egin=\"%11[^\"]\"%*[^>]%[^\n\r]",
2175
2.46k
                                psz_begin, psz_text ) != 2)
2176
                /* Line is not recognized */
2177
534
            {
2178
534
                continue;
2179
534
            }
2180
2181
            /* Get the times */
2182
2.27k
            vlc_tick_t i_time = ParseRealTime( psz_begin );
2183
2.27k
            if (i_time != VLC_TICK_MIN)
2184
2.11k
                p_subtitle->i_start = i_time;
2185
160
            else
2186
160
                p_subtitle->i_start = -1;
2187
2188
2.27k
            if (end != VLC_TICK_MIN)
2189
205
                p_subtitle->i_stop = end;
2190
2.07k
            else
2191
2.07k
                p_subtitle->i_stop = -1;
2192
2.27k
            break;
2193
2.81k
        }
2194
923k
    }
2195
2196
    /* Get the following Lines */
2197
2.27k
    size_t i_old = strlen( psz_text );
2198
2.27k
    for( ;; )
2199
3.26k
    {
2200
3.26k
        const char *s = TextGetLine( txt );
2201
2202
3.26k
        if( !s )
2203
6
        {
2204
6
            free( psz_text );
2205
6
            return VLC_EGENERIC;
2206
6
        }
2207
2208
3.25k
        size_t i_len = strlen( s );
2209
3.25k
        if( i_len == 0 ) break;
2210
2211
3.04k
        if( strcasestr( s, "<time" ) ||
2212
1.18k
            strcasestr( s, "<clear/") )
2213
2.05k
        {
2214
2.05k
            TextPreviousLine( txt );
2215
2.05k
            break;
2216
2.05k
        }
2217
2218
984
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
2219
984
        if( !psz_text )
2220
0
            return VLC_ENOMEM;
2221
2222
984
        memcpy( &psz_text[i_old], s, i_len );
2223
984
        psz_text[i_old + i_len + 0] = '\n';
2224
984
        i_old += i_len + 1;
2225
984
    }
2226
2227
2.27k
    psz_text[i_old] = '\0';
2228
    /* Remove the starting ">" that remained after the sscanf */
2229
2.27k
    memmove( &psz_text[0], &psz_text[1], strlen( psz_text ) );
2230
2231
2.27k
    p_subtitle->psz_text = psz_text;
2232
2233
2.27k
    return VLC_SUCCESS;
2234
2.27k
}
2235
2236
static int ParseDKS( vlc_object_t *p_obj, subs_properties_t *p_props,
2237
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2238
2.61k
{
2239
2.61k
    VLC_UNUSED(p_obj);
2240
2.61k
    VLC_UNUSED(p_props);
2241
2.61k
    VLC_UNUSED( i_idx );
2242
2243
2.61k
    char *psz_text;
2244
2245
2.61k
    for( ;; )
2246
1.12M
    {
2247
1.12M
        int h1, m1, s1;
2248
1.12M
        int h2, m2, s2;
2249
1.12M
        char *s = TextGetLine( txt );
2250
2251
1.12M
        if( !s )
2252
16
            return VLC_EGENERIC;
2253
2254
1.12M
        psz_text = malloc( strlen( s ) + 1 );
2255
1.12M
        if( !psz_text )
2256
0
            return VLC_ENOMEM;
2257
2258
1.12M
        if( sscanf( s, "[%d:%d:%d]%[^\r\n]",
2259
1.12M
                    &h1, &m1, &s1, psz_text ) == 4 )
2260
2.60k
        {
2261
2.60k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2262
2263
2.60k
            s = TextGetLine( txt );
2264
2.60k
            if( !s )
2265
0
            {
2266
0
                free( psz_text );
2267
0
                return VLC_EGENERIC;
2268
0
            }
2269
2270
2.60k
            if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 )
2271
1.84k
                p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 );
2272
757
            else
2273
757
                p_subtitle->i_stop  = -1;
2274
2.60k
            break;
2275
2.60k
        }
2276
1.12M
        free( psz_text );
2277
1.12M
    }
2278
2279
    /* replace [br] by \n */
2280
2.60k
    char *p;
2281
2.74k
    while( ( p = strstr( psz_text, "[br]" ) ) )
2282
143
    {
2283
143
        *p++ = '\n';
2284
143
        memmove( p, &p[3], strlen(&p[3])+1 );
2285
143
    }
2286
2287
2.60k
    p_subtitle->psz_text = psz_text;
2288
2.60k
    return VLC_SUCCESS;
2289
2.61k
}
2290
2291
static int ParseSubViewer1( vlc_object_t *p_obj, subs_properties_t *p_props,
2292
                            text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2293
2.12k
{
2294
2.12k
    VLC_UNUSED(p_obj);
2295
2.12k
    VLC_UNUSED(p_props);
2296
2.12k
    VLC_UNUSED( i_idx );
2297
2.12k
    char *psz_text;
2298
2299
2.12k
    for( ;; )
2300
1.13M
    {
2301
1.13M
        int h1, m1, s1;
2302
1.13M
        int h2, m2, s2;
2303
1.13M
        char *s = TextGetLine( txt );
2304
2305
1.13M
        if( !s )
2306
13
            return VLC_EGENERIC;
2307
2308
1.13M
        if( sscanf( s, "[%d:%d:%d]", &h1, &m1, &s1 ) == 3 )
2309
2.10k
        {
2310
2.10k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2311
2312
2.10k
            s = TextGetLine( txt );
2313
2.10k
            if( !s )
2314
0
                return VLC_EGENERIC;
2315
2316
2.10k
            psz_text = strdup( s );
2317
2.10k
            if( !psz_text )
2318
0
                return VLC_ENOMEM;
2319
2320
2.10k
            s = TextGetLine( txt );
2321
2.10k
            if( !s )
2322
3
            {
2323
3
                free( psz_text );
2324
3
                return VLC_EGENERIC;
2325
3
            }
2326
2327
2.10k
            if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 )
2328
1.84k
                p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 );
2329
264
            else
2330
264
                p_subtitle->i_stop  = -1;
2331
2332
2.10k
            break;
2333
2.10k
        }
2334
1.13M
    }
2335
2336
2.10k
    p_subtitle->psz_text = psz_text;
2337
2338
2.10k
    return VLC_SUCCESS;
2339
2.12k
}
2340
2341
static int ParseCommonSBV( vlc_object_t *p_obj, subs_properties_t *p_props,
2342
                           text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2343
270
{
2344
270
    VLC_UNUSED(p_obj);
2345
270
    VLC_UNUSED( i_idx );
2346
270
    VLC_UNUSED( p_props );
2347
270
    char        *psz_text;
2348
2349
270
    for( ;; )
2350
9.82k
    {
2351
9.82k
        const char *s = TextGetLine( txt );
2352
9.82k
        int h1 = 0, m1 = 0, s1 = 0, d1 = 0;
2353
9.82k
        int h2 = 0, m2 = 0, s2 = 0, d2 = 0;
2354
2355
9.82k
        if( !s )
2356
5
            return VLC_EGENERIC;
2357
2358
9.82k
        if( sscanf( s,"%d:%d:%d.%d,%d:%d:%d.%d",
2359
9.82k
                    &h1, &m1, &s1, &d1,
2360
9.82k
                    &h2, &m2, &s2, &d2 ) == 8 )
2361
398
        {
2362
398
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
2363
398
                                  VLC_TICK_FROM_MS( d1 ) + VLC_TICK_0;
2364
2365
398
            p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
2366
398
                                  VLC_TICK_FROM_MS( d2 ) + VLC_TICK_0;
2367
398
            if( p_subtitle->i_start < p_subtitle->i_stop )
2368
265
                break;
2369
398
        }
2370
9.82k
    }
2371
2372
    /* Now read text until an empty line */
2373
265
    size_t i_old = 0;
2374
265
    psz_text = NULL;
2375
265
    for( ;; )
2376
540
    {
2377
540
        const char *s = TextGetLine( txt );
2378
540
        size_t i_len;
2379
2380
540
        i_len = s ? strlen( s ) : 0;
2381
540
        if( i_len <= 0 )
2382
265
        {
2383
265
            if (psz_text)
2384
133
                psz_text[i_old] = '\0';
2385
265
            p_subtitle->psz_text = psz_text;
2386
265
            return VLC_SUCCESS;
2387
265
        }
2388
2389
275
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
2390
275
        if( !psz_text )
2391
0
            return VLC_ENOMEM;
2392
2393
275
        memcpy( &psz_text[i_old], s, i_len );
2394
275
        psz_text[i_old + i_len + 0] = '\n';
2395
275
        i_old += i_len + 1;
2396
275
    }
2397
265
}
2398
2399
static int ParseSCC( vlc_object_t *p_obj, subs_properties_t *p_props,
2400
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2401
3.37k
{
2402
3.37k
    VLC_UNUSED(p_obj);
2403
3.37k
    VLC_UNUSED( i_idx );
2404
3.37k
    VLC_UNUSED( p_props );
2405
2406
3.37k
    static const struct rates
2407
3.37k
    {
2408
3.37k
        unsigned val;
2409
3.37k
        vlc_rational_t rate;
2410
3.37k
        bool b_drop_allowed;
2411
3.37k
    } framerates[] = {
2412
3.37k
        { 2398, { 24000, 1001 }, false },
2413
3.37k
        { 2400, { 24, 1 },       false },
2414
3.37k
        { 2500, { 25, 1 },       false },
2415
3.37k
        { 2997, { 30000, 1001 }, true }, /* encoding rate */
2416
3.37k
        { 3000, { 30, 1 },       false },
2417
3.37k
        { 5000, { 50, 1 },       false },
2418
3.37k
        { 5994, { 60000, 1001 }, true },
2419
3.37k
        { 6000, { 60, 1 },       false },
2420
3.37k
    };
2421
3.37k
    const struct rates *p_rate = &framerates[3];
2422
3.37k
    float f_fps = var_GetFloat( p_obj, "sub-original-fps" );
2423
3.37k
    if( f_fps > 1.0 )
2424
0
    {
2425
0
        for( size_t i=0; i<ARRAY_SIZE(framerates); i++ )
2426
0
        {
2427
0
            if( (unsigned)(f_fps * 100) == framerates[i].val )
2428
0
            {
2429
0
                p_rate = &framerates[i];
2430
0
                break;
2431
0
            }
2432
0
        }
2433
0
    }
2434
2435
3.37k
    for( ;; )
2436
576k
    {
2437
576k
        const char *psz_line = TextGetLine( txt );
2438
576k
        if( !psz_line )
2439
75
            return VLC_EGENERIC;
2440
2441
576k
        unsigned h, m, s, f;
2442
576k
        char c;
2443
576k
        if( sscanf( psz_line, "%u:%u:%u%c%u ", &h, &m, &s, &c, &f ) != 5 ||
2444
3.39k
                ( c != ':' && c != ';' ) )
2445
572k
            continue;
2446
2447
        /* convert everything to seconds */
2448
3.31k
        int64_t i_frames = h * INT64_C(3600) + m * INT64_C(60) + s;
2449
2450
3.31k
        if( c == ';' && p_rate->b_drop_allowed ) /* dropframe */
2451
1
        {
2452
            /* convert to frame # to be accurate between inter drop drift
2453
             * of 18 frames see http://andrewduncan.net/timecodes/ */
2454
1
            const unsigned i_mins = h * 60 + m;
2455
1
            i_frames = i_frames * p_rate[+1].rate.num + f
2456
1
                    - (p_rate[+1].rate.den * 2 * (i_mins - i_mins % 10));
2457
1
        }
2458
3.31k
        else
2459
3.31k
        {
2460
            /* convert to frame # at 29.97 */
2461
3.31k
            i_frames = i_frames * framerates[3].rate.num / framerates[3].rate.den + f;
2462
3.31k
        }
2463
3.31k
        p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_sec(i_frames)*
2464
3.31k
                                         p_rate->rate.den / p_rate->rate.num;
2465
3.31k
        p_subtitle->i_stop = -1;
2466
2467
3.31k
        const char *psz_text = strchr( psz_line, '\t' );
2468
3.31k
        if( !psz_text && !(psz_text = strchr( psz_line, ' ' )) )
2469
14
            continue;
2470
2471
3.30k
        if ( psz_text[1] == '\0' )
2472
3
            continue;
2473
2474
3.29k
        p_subtitle->psz_text = strdup( psz_text + 1 );
2475
3.29k
        if( !p_subtitle->psz_text )
2476
0
            return VLC_ENOMEM;
2477
2478
3.29k
        break;
2479
3.29k
    }
2480
2481
3.29k
    return VLC_SUCCESS;
2482
3.37k
}
2483
2484
/* Tries to extract language from common filename patterns PATH/filename.LANG.ext
2485
   and PATH/Subs/x_LANG.ext (where 'x' is an integer). */
2486
static char *get_language_from_url(const char *urlstr)
2487
488
{
2488
488
    vlc_url_t url;
2489
488
    const char *filename = NULL;
2490
488
    char *ret = NULL;
2491
2492
488
    assert(urlstr != NULL);
2493
2494
488
    if (vlc_UrlParse(&url, urlstr) != 0)
2495
0
    {
2496
0
        vlc_UrlClean(&url);
2497
0
        return NULL;
2498
0
    }
2499
488
    if (url.psz_path != NULL)
2500
0
        filename = strrchr(url.psz_path, '/');
2501
488
    if (filename != NULL) {
2502
0
        filename++; // skip forward slash
2503
2504
0
        const char *ext = strrchr(filename, '.');
2505
2506
0
        if (ext != NULL) {
2507
            /* Get string between last two periods, hopefully the language. */
2508
0
            const char *lang = memrchr(filename, '.', ext - filename);
2509
2510
            /* Otherwise try string after last underscore. */
2511
0
            if (lang == NULL)
2512
0
                lang = memrchr(filename, '_', ext - filename);
2513
2514
0
            if (lang != NULL) {
2515
0
                lang++; // skip period or underscore
2516
0
                ret = strndup(lang, ext - lang);
2517
0
            }
2518
0
       }
2519
0
    }
2520
2521
488
    vlc_UrlClean(&url);
2522
488
    return ret;
2523
488
}
2524
2525
#ifdef ENABLE_TEST
2526
static void test_subtitle_ParseSubRipTimingValue(void)
2527
{
2528
    fprintf(stderr, "\n# %s:\n", __func__);
2529
2530
    struct test_timing_value
2531
    {
2532
        const char *str;
2533
        vlc_tick_t value;
2534
    };
2535
2536
    static const struct test_timing_value timing_values_success[] =
2537
    {
2538
        { "0:0:0,0",        VLC_TICK_0 },
2539
        { "0:0:0.0",        VLC_TICK_0 },
2540
        { "0:0:0",          VLC_TICK_0 },
2541
    };
2542
2543
    struct test_sized_timing_value
2544
    {
2545
        const char *str;
2546
        vlc_tick_t value;
2547
        size_t length;
2548
    };
2549
2550
    static const struct test_sized_timing_value sized_timing_values_success[] =
2551
    {
2552
        { "0:0:0,1",        VLC_TICK_0, strlen("0:0:0") },
2553
        { "0:0:0.1",        VLC_TICK_0, strlen("0:0:0") },
2554
    };
2555
2556
    static const char *timing_values_fail[] =
2557
    {
2558
        "0:0",
2559
        "0",
2560
    };
2561
2562
    for (size_t i=0; i<ARRAY_SIZE(timing_values_success); ++i)
2563
    {
2564
        fprintf(stderr, "Checking that %s parses into %" PRId64 "\n",
2565
                timing_values_success[i].str, timing_values_success[i].value);
2566
2567
        vlc_tick_t value;
2568
        int ret = subtitle_ParseSubRipTimingValue(&value,
2569
                timing_values_success[i].str,
2570
                strlen(timing_values_success[i].str));
2571
        fprintf(stderr, " -> %" PRId64 "\n", value);
2572
        assert(ret == VLC_SUCCESS);
2573
        assert(value == timing_values_success[i].value);
2574
    }
2575
2576
    for (size_t i=0; i<ARRAY_SIZE(sized_timing_values_success); ++i)
2577
    {
2578
        fprintf(stderr, "Checking that %s (length=%zu) parses into %" PRId64 "\n",
2579
                sized_timing_values_success[i].str,
2580
                sized_timing_values_success[i].length,
2581
                sized_timing_values_success[i].value);
2582
2583
        vlc_tick_t value;
2584
        int ret = subtitle_ParseSubRipTimingValue(&value,
2585
                sized_timing_values_success[i].str,
2586
                sized_timing_values_success[i].length);
2587
        assert(ret == VLC_SUCCESS);
2588
        fprintf(stderr, " -> %" PRId64 "\n", value);
2589
        assert(value == sized_timing_values_success[i].value);
2590
    }
2591
2592
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2593
    {
2594
        fprintf(stderr, "Checking that %s fails to parse\n",
2595
                timing_values_fail[i]);
2596
        vlc_tick_t value;
2597
        int ret = subtitle_ParseSubRipTimingValue(&value,
2598
                timing_values_fail[i], strlen(timing_values_fail[i]));
2599
        (void)value;
2600
        assert(ret != VLC_SUCCESS);
2601
    }
2602
2603
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2604
    {
2605
        fprintf(stderr, "Checking that %s fails to parse\n",
2606
                timing_values_fail[i]);
2607
        vlc_tick_t value;
2608
        int ret = subtitle_ParseSubRipTimingValue(&value,
2609
                timing_values_fail[i], strlen(timing_values_fail[i]));
2610
        (void)value;
2611
        assert(ret != VLC_SUCCESS);
2612
    }
2613
}
2614
2615
static void test_subtitle_ParseSubRipTiming(void)
2616
{
2617
    fprintf(stderr, "\n# %s:\n", __func__);
2618
2619
    struct test_timing_value
2620
    {
2621
        const char *str;
2622
        vlc_tick_t left;
2623
        vlc_tick_t right;
2624
    };
2625
2626
    static const struct test_timing_value timing_values_success[] =
2627
    {
2628
        { "0:0:0,0 --> 0:0:0,0",        VLC_TICK_0,     VLC_TICK_0 },
2629
        { "0:0:0.0 --> 0:0:0.0",        VLC_TICK_0,     VLC_TICK_0 },
2630
        { "0:0:0   --> 0:0:0",          VLC_TICK_0,     VLC_TICK_0 },
2631
    };
2632
2633
    static const char *timing_values_fail[] =
2634
    {
2635
        "0:0 --> 0:0",
2636
        "0:0 --> 0:0:0,0",
2637
        "0:0:0,0 --> 0:0",
2638
        "0 -> 0",
2639
    };
2640
2641
    for (size_t i=0; i<ARRAY_SIZE(timing_values_success); ++i)
2642
    {
2643
        fprintf(stderr, "Checking that %s parses into %" PRId64 " --> %" PRId64 "\n",
2644
                timing_values_success[i].str,
2645
                timing_values_success[i].left,
2646
                timing_values_success[i].right);
2647
2648
        subtitle_t sub = { .i_start = VLC_TICK_INVALID, .i_stop = VLC_TICK_INVALID };
2649
        int ret = subtitle_ParseSubRipTiming(&sub, timing_values_success[i].str);
2650
        fprintf(stderr, " -> %" PRId64 " --> %" PRId64 "\n", sub.i_start, sub.i_stop);
2651
        assert(ret == VLC_SUCCESS);
2652
        assert(sub.i_start == timing_values_success[i].left);
2653
        assert(sub.i_stop == timing_values_success[i].right);
2654
    }
2655
2656
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2657
    {
2658
        fprintf(stderr, "Checking that %s fails to parse\n",
2659
                timing_values_fail[i]);
2660
        subtitle_t sub = { .i_start = VLC_TICK_INVALID, .i_stop = VLC_TICK_INVALID };
2661
        int ret = subtitle_ParseSubRipTiming(&sub, timing_values_fail[i]);
2662
        (void)sub;
2663
        assert(ret != VLC_SUCCESS);
2664
    }
2665
}
2666
2667
int main(int argc, char **argv)
2668
{
2669
    (void)argc; (void)argv;
2670
    test_subtitle_ParseSubRipTimingValue();
2671
    test_subtitle_ParseSubRipTiming();
2672
2673
    return 0;
2674
}
2675
#endif