Coverage Report

Created: 2026-05-30 08:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vlc/modules/demux/subtitle.c
Line
Count
Source
1
/*****************************************************************************
2
 * subtitle.c: Demux for subtitle text files.
3
 *****************************************************************************
4
 * Copyright (C) 1999-2007 VLC authors and VideoLAN
5
 * Copyright (C) 2023      Videolabs
6
 *
7
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8
 *          Derk-Jan Hartman <hartman at videolan dot org>
9
 *          Jean-Baptiste Kempf <jb@videolan.org>
10
 *          Alexandre Janniaux <ajanni@videolabs.io>
11
 *
12
 * This program is free software; you can redistribute it and/or modify it
13
 * under the terms of the GNU Lesser General Public License as published by
14
 * the Free Software Foundation; either version 2.1 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
 * GNU Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public License
23
 * along with this program; if not, write to the Free Software Foundation,
24
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25
 *****************************************************************************/
26
27
/*****************************************************************************
28
 * Preamble
29
 *****************************************************************************/
30
31
#ifdef HAVE_CONFIG_H
32
# include "config.h"
33
#endif
34
35
#include <vlc_common.h>
36
#include <vlc_arrays.h>
37
#include <vlc_plugin.h>
38
#include <vlc_url.h>
39
40
#include <ctype.h>
41
#include <math.h>
42
#include <assert.h>
43
44
#include <vlc_demux.h>
45
#include <vlc_charset.h>
46
47
/*****************************************************************************
48
 * Module descriptor
49
 *****************************************************************************/
50
static int  Open ( vlc_object_t *p_this );
51
static void Close( vlc_object_t *p_this );
52
53
#define SUB_TYPE_LONGTEXT \
54
    N_("Force the subtitles format. Selecting \"auto\" means autodetection and should always work.")
55
#define SUB_DESCRIPTION_LONGTEXT \
56
    N_("Override the default track description.")
57
58
static const char *const ppsz_sub_type[] =
59
{
60
    "auto", "microdvd", "subrip", "subviewer", "ssa1",
61
    "ssa2-4", "ass", "vplayer", "sami", "dvdsubtitle", "mpl2",
62
    "aqt", "pjs", "mpsub", "jacosub", "psb", "realtext", "dks",
63
    "subviewer1", "sbv"
64
};
65
66
150
vlc_module_begin ()
67
75
    set_shortname( N_("Subtitles"))
68
75
    set_description( N_("Text subtitle parser") )
69
75
    set_capability( "demux", 0 )
70
75
    set_subcategory( SUBCAT_INPUT_DEMUX )
71
75
    add_string( "sub-type", "auto", N_("Subtitle format"),
72
75
                SUB_TYPE_LONGTEXT )
73
75
        change_string_list( ppsz_sub_type, ppsz_sub_type )
74
75
    add_string( "sub-description", NULL, N_("Subtitle description"),
75
75
                SUB_DESCRIPTION_LONGTEXT )
76
75
    set_callbacks( Open, Close )
77
78
75
    add_shortcut( "subtitle" )
79
75
vlc_module_end ()
80
81
/*****************************************************************************
82
 * Prototypes:
83
 *****************************************************************************/
84
enum subtitle_type_e
85
{
86
    SUB_TYPE_UNKNOWN = -1,
87
    SUB_TYPE_MICRODVD,
88
    SUB_TYPE_SUBRIP,
89
    SUB_TYPE_SSA1,
90
    SUB_TYPE_SSA2_4,
91
    SUB_TYPE_ASS,
92
    SUB_TYPE_VPLAYER,
93
    SUB_TYPE_SAMI,
94
    SUB_TYPE_SUBVIEWER, /* SUBVIEWER 2 */
95
    SUB_TYPE_DVDSUBTITLE, /* Mplayer calls it subviewer2 */
96
    SUB_TYPE_MPL2,
97
    SUB_TYPE_AQT,
98
    SUB_TYPE_PJS,
99
    SUB_TYPE_MPSUB,
100
    SUB_TYPE_JACOSUB,
101
    SUB_TYPE_PSB,
102
    SUB_TYPE_RT,
103
    SUB_TYPE_DKS,
104
    SUB_TYPE_SUBVIEW1, /* SUBVIEWER 1 - mplayer calls it subrip09,
105
                         and Gnome subtitles SubViewer 1.0 */
106
    SUB_TYPE_SBV,
107
    SUB_TYPE_SCC,      /* Scenarist Closed Caption */
108
};
109
110
typedef struct
111
{
112
    size_t  i_line_count;
113
    size_t  i_line;
114
    char    **line;
115
} text_t;
116
117
static int  TextLoad( text_t *, stream_t *s );
118
static void TextUnload( text_t * );
119
120
typedef struct
121
{
122
    vlc_tick_t i_start;
123
    vlc_tick_t i_stop;
124
125
    char    *psz_text;
126
} subtitle_t;
127
128
typedef struct
129
{
130
    enum subtitle_type_e i_type;
131
    vlc_tick_t  i_microsecperframe;
132
133
    char        *psz_header; /* SSA */
134
    char        *psz_lang;
135
136
    struct
137
    {
138
        bool b_inited;
139
140
        int i_comment;
141
        int i_time_resolution;
142
        int i_time_shift;
143
    } jss;
144
145
    struct
146
    {
147
        bool  b_inited;
148
149
        float f_total;
150
        int i_factor;
151
    } mpsub;
152
153
    struct
154
    {
155
        const char *psz_start;
156
    } sami;
157
158
} subs_properties_t;
159
160
typedef struct
161
{
162
    es_out_id_t *es;
163
    bool        b_slave;
164
    bool        b_first_time;
165
    bool        b_sorted;
166
167
    double      f_rate;
168
    vlc_tick_t  i_next_demux_date;
169
170
    struct
171
    {
172
        subtitle_t *p_array;
173
        size_t      i_count;
174
        size_t      i_current;
175
    } subtitles;
176
177
    vlc_tick_t  i_length;
178
179
    /* */
180
    subs_properties_t props;
181
182
    block_t * (*pf_convert)( const subtitle_t * );
183
} demux_sys_t;
184
185
static int  ParseMicroDvd   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
186
static int  ParseSubRip     ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
187
static int  ParseSubViewer  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
188
static int  ParseSSA        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
189
static int  ParseVplayer    ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
190
static int  ParseSami       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
191
static int  ParseDVDSubtitle( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
192
static int  ParseMPL2       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
193
static int  ParseAQT        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
194
static int  ParsePJS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
195
static int  ParseMPSub      ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
196
static int  ParseJSS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
197
static int  ParsePSB        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
198
static int  ParseRealText   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
199
static int  ParseDKS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
200
static int  ParseSubViewer1 ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
201
static int  ParseCommonSBV  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
202
static int  ParseSCC        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
203
204
static const struct
205
{
206
    const char *psz_type_name;
207
    int  i_type;
208
    const char *psz_name;
209
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
210
} sub_read_subtitle_function [] =
211
{
212
    { "microdvd",   SUB_TYPE_MICRODVD,    "MicroDVD",    ParseMicroDvd },
213
    { "subrip",     SUB_TYPE_SUBRIP,      "SubRIP",      ParseSubRip },
214
    { "subviewer",  SUB_TYPE_SUBVIEWER,   "SubViewer",   ParseSubViewer },
215
    { "ssa1",       SUB_TYPE_SSA1,        "SSA-1",       ParseSSA },
216
    { "ssa2-4",     SUB_TYPE_SSA2_4,      "SSA-2/3/4",   ParseSSA },
217
    { "ass",        SUB_TYPE_ASS,         "SSA/ASS",     ParseSSA },
218
    { "vplayer",    SUB_TYPE_VPLAYER,     "VPlayer",     ParseVplayer },
219
    { "sami",       SUB_TYPE_SAMI,        "SAMI",        ParseSami },
220
    { "dvdsubtitle",SUB_TYPE_DVDSUBTITLE, "DVDSubtitle", ParseDVDSubtitle },
221
    { "mpl2",       SUB_TYPE_MPL2,        "MPL2",        ParseMPL2 },
222
    { "aqt",        SUB_TYPE_AQT,         "AQTitle",     ParseAQT },
223
    { "pjs",        SUB_TYPE_PJS,         "PhoenixSub",  ParsePJS },
224
    { "mpsub",      SUB_TYPE_MPSUB,       "MPSub",       ParseMPSub },
225
    { "jacosub",    SUB_TYPE_JACOSUB,     "JacoSub",     ParseJSS },
226
    { "psb",        SUB_TYPE_PSB,         "PowerDivx",   ParsePSB },
227
    { "realtext",   SUB_TYPE_RT,          "RealText",    ParseRealText },
228
    { "dks",        SUB_TYPE_DKS,         "DKS",         ParseDKS },
229
    { "subviewer1", SUB_TYPE_SUBVIEW1,    "Subviewer 1", ParseSubViewer1 },
230
    { "sbv",        SUB_TYPE_SBV,         "SBV",         ParseCommonSBV },
231
    { "scc",        SUB_TYPE_SCC,         "SCC",         ParseSCC },
232
    { NULL,         SUB_TYPE_UNKNOWN,     "Unknown",     NULL }
233
};
234
/* When adding support for more formats, be sure to add their file extension
235
 * to src/input/subtitles.c to enable auto-detection.
236
 */
237
238
static int Demux( demux_t * );
239
static int Control( demux_t *, int, va_list );
240
241
static void Fix( demux_t * );
242
static char *get_language_from_url(const char *);
243
244
static vlc_tick_t vlc_tick_from_HMS( int h, int m, int s )
245
22.1k
{
246
22.1k
    return vlc_tick_from_sec(h * INT64_C(3600) + m * INT64_C(60) + s);
247
22.1k
}
248
249
/*****************************************************************************
250
 * Decoder format output function
251
 *****************************************************************************/
252
253
static block_t *ToTextBlock( const subtitle_t *p_subtitle )
254
528k
{
255
528k
    if ( p_subtitle->psz_text == NULL )
256
2.30k
        return NULL;
257
258
525k
    block_t *p_block;
259
525k
    size_t i_len = strlen( p_subtitle->psz_text ) + 1;
260
261
525k
    if( i_len <= 1 || !(p_block = block_Alloc( i_len )) )
262
48.8k
        return NULL;
263
264
477k
    memcpy( p_block->p_buffer, p_subtitle->psz_text, i_len );
265
266
477k
    return p_block;
267
525k
}
268
269
static block_t *ToEIA608Block( const subtitle_t *p_subtitle )
270
6.21k
{
271
6.21k
    if ( p_subtitle->psz_text == NULL )
272
0
        return NULL;
273
274
6.21k
    block_t *p_block;
275
6.21k
    const size_t i_len = strlen( p_subtitle->psz_text );
276
6.21k
    const size_t i_block = (1 + i_len / 5) * 3;
277
278
6.21k
    if( i_len < 4 || !(p_block = block_Alloc( i_block )) )
279
518
        return NULL;
280
281
5.69k
    p_block->i_buffer = 0;
282
283
5.69k
    char *saveptr = NULL;
284
5.69k
    char *psz_tok = strtok_r( p_subtitle->psz_text, " ", &saveptr );
285
5.69k
    unsigned a, b;
286
76.2k
    while( psz_tok &&
287
74.0k
           sscanf( psz_tok, "%2x%2x", &a, &b ) == 2 &&
288
70.5k
           i_block - p_block->i_buffer >= 3 )
289
70.5k
    {
290
70.5k
        uint8_t *p_data = &p_block->p_buffer[p_block->i_buffer];
291
70.5k
        p_data[0] = 0xFC;
292
70.5k
        p_data[1] = a;
293
70.5k
        p_data[2] = b;
294
70.5k
        p_block->i_buffer += 3;
295
70.5k
        psz_tok = strtok_r( NULL, " ", &saveptr );
296
70.5k
    }
297
298
5.69k
    return p_block;
299
6.21k
}
300
301
/*****************************************************************************
302
 * Module initializer
303
 *****************************************************************************/
304
static int Open ( vlc_object_t *p_this )
305
6.82k
{
306
6.82k
    demux_t        *p_demux = (demux_t*)p_this;
307
6.82k
    demux_sys_t    *p_sys;
308
6.82k
    es_format_t    fmt;
309
6.82k
    float          f_fps;
310
6.82k
    char           *psz_type;
311
6.82k
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
312
313
6.82k
    if( !p_demux->obj.force )
314
0
    {
315
0
        msg_Dbg( p_demux, "subtitle demux discarded" );
316
0
        return VLC_EGENERIC;
317
0
    }
318
319
6.82k
    p_demux->pf_demux = Demux;
320
6.82k
    p_demux->pf_control = Control;
321
6.82k
    p_demux->p_sys = p_sys = malloc( sizeof( demux_sys_t ) );
322
6.82k
    if( p_sys == NULL )
323
0
        return VLC_ENOMEM;
324
325
6.82k
    p_sys->b_slave = false;
326
6.82k
    p_sys->b_first_time = true;
327
6.82k
    p_sys->b_sorted = false;
328
6.82k
    p_sys->i_next_demux_date = 0;
329
6.82k
    p_sys->f_rate = 1.0;
330
331
6.82k
    p_sys->pf_convert = ToTextBlock;
332
333
6.82k
    p_sys->subtitles.i_current= 0;
334
6.82k
    p_sys->subtitles.i_count  = 0;
335
6.82k
    p_sys->subtitles.p_array  = NULL;
336
337
6.82k
    p_sys->props.psz_header         = NULL;
338
6.82k
    p_sys->props.psz_lang           = NULL;
339
6.82k
    p_sys->props.i_microsecperframe = VLC_TICK_FROM_MS(40);
340
6.82k
    p_sys->props.jss.b_inited       = false;
341
6.82k
    p_sys->props.mpsub.b_inited     = false;
342
6.82k
    p_sys->props.sami.psz_start     = NULL;
343
344
    /* Get the FPS */
345
6.82k
    f_fps = var_CreateGetFloat( p_demux, "sub-original-fps" );
346
6.82k
    if( f_fps >= 1.f )
347
0
    {
348
0
        p_sys->props.i_microsecperframe = llroundf( (float)CLOCK_FREQ / f_fps );
349
0
        msg_Dbg( p_demux, "Override subtitle fps %f", (double) f_fps );
350
0
    }
351
352
    /* Get or probe the type */
353
6.82k
    p_sys->props.i_type = SUB_TYPE_UNKNOWN;
354
6.82k
    psz_type = var_CreateGetString( p_demux, "sub-type" );
355
6.82k
    if( psz_type && *psz_type )
356
6.82k
    {
357
136k
        for( int i = 0; ; i++ )
358
143k
        {
359
143k
            if( sub_read_subtitle_function[i].psz_type_name == NULL )
360
6.82k
                break;
361
362
136k
            if( !strcmp( sub_read_subtitle_function[i].psz_type_name,
363
136k
                         psz_type ) )
364
0
            {
365
0
                p_sys->props.i_type = sub_read_subtitle_function[i].i_type;
366
0
                break;
367
0
            }
368
136k
        }
369
6.82k
    }
370
6.82k
    free( psz_type );
371
372
6.82k
#ifndef NDEBUG
373
6.82k
    const uint64_t i_start_pos = vlc_stream_Tell( p_demux->s );
374
6.82k
#endif
375
376
6.82k
    ssize_t i_peek;
377
6.82k
    const uint8_t *p_peek;
378
6.82k
    if( vlc_stream_Peek( p_demux->s, &p_peek, 16 ) < 16 )
379
119
    {
380
119
        free( p_sys );
381
119
        return VLC_EGENERIC;
382
119
    }
383
384
6.70k
    enum
385
6.70k
    {
386
6.70k
        UTF8BOM,
387
6.70k
        UTF16LE,
388
6.70k
        UTF16BE,
389
6.70k
        NOBOM,
390
6.70k
    } e_bom = NOBOM;
391
6.70k
    const char *psz_bom = NULL;
392
393
6.70k
    i_peek = 4096;
394
    /* Detect Unicode while skipping the UTF-8 Byte Order Mark */
395
6.70k
    if( !memcmp( p_peek, "\xEF\xBB\xBF", 3 ) )
396
227
    {
397
227
        e_bom = UTF8BOM;
398
227
        psz_bom = "UTF-8";
399
227
    }
400
6.47k
    else if( !memcmp( p_peek, "\xFF\xFE", 2 ) )
401
56
    {
402
56
        e_bom = UTF16LE;
403
56
        psz_bom = "UTF-16LE";
404
56
        i_peek *= 2;
405
56
    }
406
6.42k
    else if( !memcmp( p_peek, "\xFE\xFF", 2 ) )
407
33
    {
408
33
        e_bom = UTF16BE;
409
33
        psz_bom = "UTF-16BE";
410
33
        i_peek *= 2;
411
33
    }
412
413
6.70k
    if( e_bom != NOBOM )
414
6.70k
        msg_Dbg( p_demux, "detected %s Byte Order Mark", psz_bom );
415
416
6.70k
    i_peek = vlc_stream_Peek( p_demux->s, &p_peek, i_peek );
417
6.70k
    if( unlikely(i_peek < 16) )
418
0
    {
419
0
        free( p_sys );
420
0
        return VLC_EGENERIC;
421
0
    }
422
423
6.70k
    stream_t *p_probestream = NULL;
424
6.70k
    if( e_bom != UTF8BOM && e_bom != NOBOM )
425
89
    {
426
89
        if( i_peek > 16 )
427
84
        {
428
84
            char *p_outbuf = FromCharset( psz_bom, p_peek, i_peek );
429
84
            if( p_outbuf != NULL )
430
54
                p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *)p_outbuf,
431
84
                                                      strlen( p_outbuf ),
432
84
                                                      false ); /* free p_outbuf on release */
433
84
        }
434
89
    }
435
6.61k
    else
436
6.61k
    {
437
6.61k
        const size_t i_skip = (e_bom == UTF8BOM) ? 3 : 0;
438
6.61k
        p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *) &p_peek[i_skip],
439
6.61k
                                              i_peek - i_skip, true );
440
6.61k
    }
441
442
6.70k
    if( p_probestream == NULL )
443
35
    {
444
35
        free( p_sys );
445
35
        return VLC_EGENERIC;
446
35
    }
447
448
    /* Probe if unknown type */
449
6.66k
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
450
6.66k
    {
451
6.66k
        int     i_try;
452
6.66k
        char    *s = NULL;
453
454
6.66k
        msg_Dbg( p_demux, "autodetecting subtitle format" );
455
32.6k
        for( i_try = 0; i_try < 256; i_try++ )
456
32.6k
        {
457
32.6k
            int i_dummy;
458
32.6k
            char p_dummy;
459
460
32.6k
            if( (s = vlc_stream_ReadLine( p_probestream ) ) == NULL )
461
805
                break;
462
463
31.7k
            if( strcasestr( s, "<SAMI>" ) )
464
308
            {
465
308
                p_sys->props.i_type = SUB_TYPE_SAMI;
466
308
                break;
467
308
            }
468
31.4k
            else if( sscanf( s, "{%d}{%d}", &i_dummy, &i_dummy ) == 2 ||
469
31.3k
                     sscanf( s, "{%d}{}", &i_dummy ) == 1)
470
246
            {
471
246
                p_sys->props.i_type = SUB_TYPE_MICRODVD;
472
246
                break;
473
246
            }
474
31.2k
            else if( sscanf( s, "%d:%d:%d,%d --> %d:%d:%d,%d",
475
31.2k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
476
31.2k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
477
31.1k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d,%d",
478
31.1k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
479
31.1k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
480
31.1k
                     sscanf( s, "%d:%d:%d,%d --> %d:%d:%d",
481
31.1k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
482
31.1k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
483
31.1k
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d.%d",
484
31.1k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
485
31.1k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
486
31.1k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d.%d",
487
31.1k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
488
31.1k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
489
31.1k
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d",
490
31.1k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
491
31.1k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
492
31.1k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d",
493
31.1k
                             &i_dummy,&i_dummy,&i_dummy,
494
31.1k
                             &i_dummy,&i_dummy,&i_dummy ) == 6 )
495
227
            {
496
227
                p_sys->props.i_type = SUB_TYPE_SUBRIP;
497
227
                break;
498
227
            }
499
31.0k
            else if( !strncasecmp( s, "!: This is a Sub Station Alpha v1", 33 ) )
500
10
            {
501
10
                p_sys->props.i_type = SUB_TYPE_SSA1;
502
10
                break;
503
10
            }
504
31.0k
            else if( !strncasecmp( s, "ScriptType: v4.00+", 18 ) )
505
6
            {
506
6
                p_sys->props.i_type = SUB_TYPE_ASS;
507
6
                break;
508
6
            }
509
30.9k
            else if( !strncasecmp( s, "ScriptType: v4.00", 17 ) )
510
6
            {
511
6
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
512
6
                break;
513
6
            }
514
30.9k
            else if( !strncasecmp( s, "Dialogue: Marked", 16  ) )
515
9
            {
516
9
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
517
9
                break;
518
9
            }
519
30.9k
            else if( !strncasecmp( s, "Dialogue:", 9  ) )
520
156
            {
521
156
                p_sys->props.i_type = SUB_TYPE_ASS;
522
156
                break;
523
156
            }
524
30.8k
            else if( strcasestr( s, "[INFORMATION]" ) )
525
69
            {
526
69
                p_sys->props.i_type = SUB_TYPE_SUBVIEWER; /* I hope this will work */
527
69
                break;
528
69
            }
529
30.7k
            else if( sscanf( s, "%d:%d:%d.%d %d:%d:%d",
530
30.7k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
531
30.7k
                                 &i_dummy, &i_dummy, &i_dummy ) == 7 ||
532
30.7k
                     sscanf( s, "@%d @%d", &i_dummy, &i_dummy) == 2)
533
1.61k
            {
534
1.61k
                p_sys->props.i_type = SUB_TYPE_JACOSUB;
535
1.61k
                break;
536
1.61k
            }
537
29.1k
            else if( sscanf( s, "%d:%d:%d.%d,%d:%d:%d.%d",
538
29.1k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
539
29.1k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy ) == 8 )
540
146
            {
541
146
                p_sys->props.i_type = SUB_TYPE_SBV;
542
146
                break;
543
146
            }
544
28.9k
            else if( sscanf( s, "%d:%d:%d:", &i_dummy, &i_dummy, &i_dummy ) == 3 ||
545
28.5k
                     sscanf( s, "%d:%d:%d ", &i_dummy, &i_dummy, &i_dummy ) == 3 )
546
466
            {
547
466
                p_sys->props.i_type = SUB_TYPE_VPLAYER;
548
466
                break;
549
466
            }
550
28.5k
            else if( sscanf( s, "{T %d:%d:%d:%d", &i_dummy, &i_dummy,
551
28.5k
                             &i_dummy, &i_dummy ) == 4 )
552
69
            {
553
69
                p_sys->props.i_type = SUB_TYPE_DVDSUBTITLE;
554
69
                break;
555
69
            }
556
28.4k
            else if( sscanf( s, "[%d:%d:%d]%c",
557
28.4k
                     &i_dummy, &i_dummy, &i_dummy, &p_dummy ) == 4 )
558
72
            {
559
72
                p_sys->props.i_type = SUB_TYPE_DKS;
560
72
                break;
561
72
            }
562
28.3k
            else if( strstr( s, "*** START SCRIPT" ) )
563
61
            {
564
61
                p_sys->props.i_type = SUB_TYPE_SUBVIEW1;
565
61
                break;
566
61
            }
567
28.3k
            else if( sscanf( s, "[%d][%d]", &i_dummy, &i_dummy ) == 2 ||
568
28.1k
                     sscanf( s, "[%d][]", &i_dummy ) == 1)
569
187
            {
570
187
                p_sys->props.i_type = SUB_TYPE_MPL2;
571
187
                break;
572
187
            }
573
28.1k
            else if( sscanf (s, "FORMAT=%d", &i_dummy) == 1 ||
574
27.9k
                     ( sscanf (s, "FORMAT=TIM%c", &p_dummy) == 1
575
297
                       && p_dummy =='E' ) )
576
247
            {
577
247
                p_sys->props.i_type = SUB_TYPE_MPSUB;
578
247
                break;
579
247
            }
580
27.8k
            else if( sscanf( s, "-->> %d", &i_dummy) == 1 )
581
647
            {
582
647
                p_sys->props.i_type = SUB_TYPE_AQT;
583
647
                break;
584
647
            }
585
27.2k
            else if( sscanf( s, "%d,%d,", &i_dummy, &i_dummy ) == 2 )
586
99
            {
587
99
                p_sys->props.i_type = SUB_TYPE_PJS;
588
99
                break;
589
99
            }
590
27.1k
            else if( sscanf( s, "{%d:%d:%d}",
591
27.1k
                                &i_dummy, &i_dummy, &i_dummy ) == 3 )
592
0
            {
593
0
                p_sys->props.i_type = SUB_TYPE_PSB;
594
0
                break;
595
0
            }
596
27.1k
            else if( strcasestr( s, "<time" ) )
597
240
            {
598
240
                p_sys->props.i_type = SUB_TYPE_RT;
599
240
                break;
600
240
            }
601
26.9k
            else if( !strncasecmp( s, "WEBVTT",6 ) )
602
1
            {
603
                /* FAIL */
604
1
                break;
605
1
            }
606
26.9k
            else if( !strncasecmp( s, "Scenarist_SCC V1.0", 18 ) )
607
961
            {
608
961
                p_sys->props.i_type = SUB_TYPE_SCC;
609
961
                p_sys->pf_convert = ToEIA608Block;
610
961
                break;
611
961
            }
612
613
25.9k
            free( s );
614
25.9k
            s = NULL;
615
25.9k
        }
616
617
6.66k
        free( s );
618
6.66k
    }
619
620
6.66k
    vlc_stream_Delete( p_probestream );
621
622
    /* Quit on unknown subtitles */
623
6.66k
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
624
817
    {
625
817
#ifndef NDEBUG
626
        /* Ensure it will work with non seekable streams */
627
817
        assert( i_start_pos == vlc_stream_Tell( p_demux->s ) );
628
817
#endif
629
817
        msg_Warn( p_demux, "failed to recognize subtitle type" );
630
817
        free( p_sys );
631
817
        return VLC_EGENERIC;
632
817
    }
633
634
66.6k
    for( int i = 0; ; i++ )
635
72.5k
    {
636
72.5k
        if( sub_read_subtitle_function[i].i_type == p_sys->props.i_type )
637
5.85k
        {
638
5.85k
            msg_Dbg( p_demux, "detected %s format",
639
5.85k
                     sub_read_subtitle_function[i].psz_name );
640
5.85k
            pf_read = sub_read_subtitle_function[i].pf_read;
641
5.85k
            break;
642
5.85k
        }
643
72.5k
    }
644
645
5.85k
    msg_Dbg( p_demux, "loading all subtitles..." );
646
647
5.85k
    if( e_bom == UTF8BOM && /* skip BOM */
648
166
        vlc_stream_Read( p_demux->s, NULL, 3 ) != 3 )
649
0
    {
650
0
        Close( p_this );
651
0
        return VLC_EGENERIC;
652
0
    }
653
654
    /* Load the whole file */
655
5.85k
    text_t txtlines;
656
5.85k
    TextLoad( &txtlines, p_demux->s );
657
658
    /* Parse it */
659
541k
    for( size_t i_max = 0; i_max < SIZE_MAX - 500 * sizeof(subtitle_t); )
660
541k
    {
661
541k
        if( p_sys->subtitles.i_count >= i_max )
662
6.84k
        {
663
6.84k
            i_max += 500;
664
6.84k
            subtitle_t *p_realloc = realloc( p_sys->subtitles.p_array, sizeof(subtitle_t) * i_max );
665
6.84k
            if( p_realloc == NULL )
666
0
            {
667
0
                TextUnload( &txtlines );
668
0
                Close( p_this );
669
0
                return VLC_ENOMEM;
670
0
            }
671
6.84k
            p_sys->subtitles.p_array = p_realloc;
672
6.84k
        }
673
674
541k
        if( pf_read( VLC_OBJECT(p_demux), &p_sys->props, &txtlines,
675
541k
                     &p_sys->subtitles.p_array[p_sys->subtitles.i_count],
676
541k
                     p_sys->subtitles.i_count ) )
677
5.85k
            break;
678
679
535k
        p_sys->subtitles.i_count++;
680
535k
    }
681
    /* Unload */
682
5.85k
    TextUnload( &txtlines );
683
684
5.85k
    msg_Dbg(p_demux, "loaded %zu subtitles", p_sys->subtitles.i_count );
685
686
    /* *** add subtitle ES *** */
687
5.85k
    if( p_sys->props.i_type == SUB_TYPE_SSA1 ||
688
5.84k
             p_sys->props.i_type == SUB_TYPE_SSA2_4 ||
689
5.82k
             p_sys->props.i_type == SUB_TYPE_ASS )
690
187
    {
691
187
        Fix( p_demux );
692
187
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SSA );
693
187
    }
694
5.66k
    else if( p_sys->props.i_type == SUB_TYPE_SCC )
695
961
    {
696
961
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_CEA608 );
697
961
        fmt.subs.cc.i_reorder_depth = -1;
698
961
    }
699
4.70k
    else
700
4.70k
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SUBT );
701
702
5.85k
    p_sys->subtitles.i_current = 0;
703
5.85k
    p_sys->i_length = 0;
704
5.85k
    if( p_sys->subtitles.i_count > 0 )
705
4.87k
        p_sys->i_length = p_sys->subtitles.p_array[p_sys->subtitles.i_count-1].i_stop;
706
707
5.85k
    if( p_sys->props.psz_lang )
708
0
    {
709
0
        fmt.psz_language = p_sys->props.psz_lang;
710
0
        p_sys->props.psz_lang = NULL;
711
0
        msg_Dbg( p_demux, "detected language '%s' of subtitle: %s", fmt.psz_language,
712
0
                 p_demux->psz_location );
713
0
    }
714
5.85k
    else
715
5.85k
    {
716
5.85k
        fmt.psz_language = get_language_from_url( p_demux->psz_url );
717
5.85k
        if( fmt.psz_language )
718
5.85k
            msg_Dbg( p_demux, "selected '%s' as possible filename language substring of subtitle: %s",
719
5.85k
                     fmt.psz_language, p_demux->psz_location );
720
5.85k
    }
721
722
5.85k
    char *psz_description = var_InheritString( p_demux, "sub-description" );
723
5.85k
    if( psz_description && *psz_description )
724
0
        fmt.psz_description = psz_description;
725
5.85k
    else
726
5.85k
        free( psz_description );
727
5.85k
    if( p_sys->props.psz_header != NULL &&
728
184
       (fmt.p_extra = strdup( p_sys->props.psz_header )) )
729
184
    {
730
184
        fmt.i_extra = strlen( p_sys->props.psz_header ) + 1;
731
184
    }
732
733
5.85k
    fmt.i_id = 0;
734
5.85k
    p_sys->es = es_out_Add( p_demux->out, &fmt );
735
5.85k
    es_format_Clean( &fmt );
736
5.85k
    if( p_sys->es == NULL )
737
0
    {
738
0
        Close( p_this );
739
0
        return VLC_EGENERIC;
740
0
    }
741
742
5.85k
    return VLC_SUCCESS;
743
5.85k
}
744
745
/*****************************************************************************
746
 * Close: Close subtitle demux
747
 *****************************************************************************/
748
static void Close( vlc_object_t *p_this )
749
5.85k
{
750
5.85k
    demux_t *p_demux = (demux_t*)p_this;
751
5.85k
    demux_sys_t *p_sys = p_demux->p_sys;
752
753
541k
    for( size_t i = 0; i < p_sys->subtitles.i_count; i++ )
754
535k
        free( p_sys->subtitles.p_array[i].psz_text );
755
5.85k
    free( p_sys->subtitles.p_array );
756
5.85k
    free( p_sys->props.psz_header );
757
758
5.85k
    free( p_sys );
759
5.85k
}
760
761
static void
762
ResetCurrentIndex( demux_t *p_demux )
763
0
{
764
0
    demux_sys_t *p_sys = p_demux->p_sys;
765
0
    for( size_t i = 0; i < p_sys->subtitles.i_count; i++ )
766
0
    {
767
0
        if( p_sys->subtitles.p_array[i].i_start * p_sys->f_rate >
768
0
            p_sys->i_next_demux_date && i > 0 )
769
0
            break;
770
0
        p_sys->subtitles.i_current = i;
771
0
    }
772
0
}
773
774
/*****************************************************************************
775
 * Control:
776
 *****************************************************************************/
777
static int Control( demux_t *p_demux, int i_query, va_list args )
778
0
{
779
0
    demux_sys_t *p_sys = p_demux->p_sys;
780
0
    double *pf, f;
781
782
0
    switch( i_query )
783
0
    {
784
0
        case DEMUX_CAN_SEEK:
785
0
            *va_arg( args, bool * ) = true;
786
0
            return VLC_SUCCESS;
787
788
0
        case DEMUX_GET_LENGTH:
789
0
            *va_arg( args, vlc_tick_t * ) = p_sys->i_length;
790
0
            return VLC_SUCCESS;
791
792
0
        case DEMUX_GET_TIME:
793
0
            *va_arg( args, vlc_tick_t * ) = p_sys->i_next_demux_date;
794
0
            return VLC_SUCCESS;
795
796
0
        case DEMUX_SET_TIME:
797
0
        {
798
0
            p_sys->b_first_time = true;
799
0
            p_sys->i_next_demux_date = va_arg( args, vlc_tick_t );
800
0
            ResetCurrentIndex( p_demux );
801
0
            return VLC_SUCCESS;
802
0
        }
803
804
0
        case DEMUX_GET_POSITION:
805
0
            pf = va_arg( args, double * );
806
0
            if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
807
0
            {
808
0
                *pf = 1.0;
809
0
            }
810
0
            else if( p_sys->subtitles.i_count > 0 && p_sys->i_length )
811
0
            {
812
0
                *pf = p_sys->i_next_demux_date;
813
0
                *pf /= p_sys->i_length;
814
0
            }
815
0
            else
816
0
            {
817
0
                *pf = 0.0;
818
0
            }
819
0
            return VLC_SUCCESS;
820
821
0
        case DEMUX_SET_POSITION:
822
0
            f = va_arg( args, double );
823
0
            if( p_sys->subtitles.i_count && p_sys->i_length )
824
0
            {
825
0
                vlc_tick_t i64 = VLC_TICK_0 + f * p_sys->i_length;
826
0
                return demux_Control( p_demux, DEMUX_SET_TIME, i64 );
827
0
            }
828
0
            break;
829
830
0
        case DEMUX_CAN_CONTROL_RATE:
831
0
            *va_arg( args, bool * ) = true;
832
0
            return VLC_SUCCESS;
833
0
        case DEMUX_SET_RATE:
834
0
            p_sys->f_rate = *va_arg( args, float * );
835
0
            ResetCurrentIndex( p_demux );
836
0
            return VLC_SUCCESS;
837
0
        case DEMUX_SET_NEXT_DEMUX_TIME:
838
0
            p_sys->b_slave = true;
839
0
            p_sys->i_next_demux_date = va_arg( args, vlc_tick_t ) - VLC_TICK_0;
840
0
            return VLC_SUCCESS;
841
842
0
        case DEMUX_CAN_PAUSE:
843
0
        case DEMUX_SET_PAUSE_STATE:
844
0
        case DEMUX_CAN_CONTROL_PACE:
845
0
            return demux_vaControlHelper( p_demux->s, 0, -1, 0, 1, i_query, args );
846
847
0
        case DEMUX_GET_PTS_DELAY:
848
0
        case DEMUX_GET_FPS:
849
0
        case DEMUX_GET_META:
850
0
        case DEMUX_GET_ATTACHMENTS:
851
0
        case DEMUX_GET_TITLE_INFO:
852
0
        case DEMUX_HAS_UNSUPPORTED_META:
853
0
        case DEMUX_CAN_RECORD:
854
0
        default:
855
0
            break;
856
857
0
    }
858
0
    return VLC_EGENERIC;
859
0
}
860
861
/*****************************************************************************
862
 * Demux: Send subtitle to decoder
863
 *****************************************************************************/
864
static int Demux( demux_t *p_demux )
865
3.88G
{
866
3.88G
    demux_sys_t *p_sys = p_demux->p_sys;
867
868
3.88G
    if ( !p_sys->b_slave )
869
3.88G
        Fix( p_demux );
870
871
3.88G
    vlc_tick_t i_barrier = p_sys->i_next_demux_date;
872
873
3.88G
    while( p_sys->subtitles.i_current < p_sys->subtitles.i_count &&
874
3.88G
           ( p_sys->subtitles.p_array[p_sys->subtitles.i_current].i_start *
875
3.88G
             p_sys->f_rate ) <= i_barrier )
876
535k
    {
877
535k
        const subtitle_t *p_subtitle = &p_sys->subtitles.p_array[p_sys->subtitles.i_current];
878
879
535k
        if ( !p_sys->b_slave && p_sys->b_first_time )
880
4.87k
        {
881
4.87k
            es_out_SetPCR( p_demux->out, VLC_TICK_0 + i_barrier );
882
4.87k
            p_sys->b_first_time = false;
883
4.87k
        }
884
885
535k
        if( p_subtitle->i_start >= 0 )
886
534k
        {
887
534k
            block_t *p_block = p_sys->pf_convert( p_subtitle );
888
534k
            if( p_block )
889
482k
            {
890
482k
                p_block->i_dts =
891
482k
                p_block->i_pts = VLC_TICK_0 + p_subtitle->i_start * p_sys->f_rate;
892
482k
                if( p_subtitle->i_stop != VLC_TICK_INVALID && p_subtitle->i_stop >= p_subtitle->i_start )
893
40.6k
                    p_block->i_length = (p_subtitle->i_stop - p_subtitle->i_start) * p_sys->f_rate;
894
895
482k
                es_out_Send( p_demux->out, p_sys->es, p_block );
896
482k
            }
897
534k
        }
898
899
535k
        p_sys->subtitles.i_current++;
900
535k
    }
901
902
3.88G
    if ( !p_sys->b_slave )
903
3.88G
    {
904
3.88G
        es_out_SetPCR( p_demux->out, VLC_TICK_0 + i_barrier );
905
3.88G
        p_sys->i_next_demux_date += VLC_TICK_FROM_MS(125);
906
3.88G
    }
907
908
3.88G
    if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
909
5.85k
        return VLC_DEMUXER_EOF;
910
911
3.88G
    return VLC_DEMUXER_SUCCESS;
912
3.88G
}
913
914
915
static int subtitle_cmp( const void *first, const void *second )
916
4.64M
{
917
4.64M
    vlc_tick_t result = ((subtitle_t *)(first))->i_start - ((subtitle_t *)(second))->i_start;
918
    /* Return -1, 0 ,1, and not directly subtraction
919
     * as result can be > INT_MAX */
920
4.64M
    return result == 0 ? 0 : result > 0 ? 1 : -1;
921
4.64M
}
922
/*****************************************************************************
923
 * Fix: fix time stamp and order of subtitle
924
 *****************************************************************************/
925
static void Fix( demux_t *p_demux )
926
3.88G
{
927
3.88G
    demux_sys_t *p_sys = p_demux->p_sys;
928
3.88G
    if (p_sys->b_sorted)
929
3.88G
        return;
930
931
    /* *** fix order (to be sure...) *** */
932
5.85k
    qsort( p_sys->subtitles.p_array, p_sys->subtitles.i_count, sizeof( p_sys->subtitles.p_array[0] ), subtitle_cmp);
933
5.85k
    p_sys->b_sorted = true;
934
5.85k
}
935
936
static int TextLoad( text_t *txt, stream_t *s )
937
5.85k
{
938
5.85k
    size_t i_line_max;
939
940
    /* init txt */
941
5.85k
    i_line_max          = 500;
942
5.85k
    txt->i_line_count   = 0;
943
5.85k
    txt->i_line         = 0;
944
5.85k
    txt->line           = calloc( i_line_max, sizeof( char * ) );
945
5.85k
    if( !txt->line )
946
0
        return VLC_ENOMEM;
947
948
    /* load the complete file */
949
5.85k
    for( ;; )
950
10.5M
    {
951
10.5M
        char *psz = vlc_stream_ReadLine( s );
952
953
10.5M
        if( psz == NULL )
954
5.85k
            break;
955
956
10.5M
        txt->line[txt->i_line_count] = psz;
957
10.5M
        if( txt->i_line_count + 1 >= i_line_max )
958
103k
        {
959
103k
            i_line_max += 100;
960
103k
            char **p_realloc = realloc( txt->line, i_line_max * sizeof( char * ) );
961
103k
            if( p_realloc == NULL )
962
0
                return VLC_ENOMEM;
963
103k
            txt->line = p_realloc;
964
103k
        }
965
10.5M
        txt->i_line_count++;
966
10.5M
    }
967
968
5.85k
    if( txt->i_line_count == 0 )
969
1
    {
970
1
        free( txt->line );
971
1
        return VLC_EGENERIC;
972
1
    }
973
974
5.85k
    return VLC_SUCCESS;
975
5.85k
}
976
static void TextUnload( text_t *txt )
977
5.85k
{
978
5.85k
    if( txt->i_line_count )
979
5.85k
    {
980
10.5M
        for( size_t i = 0; i < txt->i_line_count; i++ )
981
10.5M
            free( txt->line[i] );
982
5.85k
        free( txt->line );
983
5.85k
    }
984
5.85k
    txt->i_line       = 0;
985
5.85k
    txt->i_line_count = 0;
986
5.85k
}
987
988
static char *TextGetLine( text_t *txt )
989
10.6M
{
990
10.6M
    if( txt->i_line >= txt->i_line_count )
991
6.20k
        return( NULL );
992
993
10.5M
    return txt->line[txt->i_line++];
994
10.6M
}
995
static void TextPreviousLine( text_t *txt )
996
2.21k
{
997
2.21k
    if( txt->i_line > 0 )
998
2.21k
        txt->i_line--;
999
2.21k
}
1000
1001
/*****************************************************************************
1002
 * Specific Subtitle function
1003
 *****************************************************************************/
1004
/* ParseMicroDvd:
1005
 *  Format:
1006
 *      {n1}{n2}Line1|Line2|Line3....
1007
 *  where n1 and n2 are the video frame number (n2 can be empty)
1008
 */
1009
static int ParseMicroDvd( vlc_object_t *p_obj, subs_properties_t *p_props,
1010
                          text_t *txt, subtitle_t *p_subtitle,
1011
                          size_t i_idx )
1012
1.41k
{
1013
1.41k
    VLC_UNUSED( i_idx );
1014
1.41k
    char *psz_text;
1015
1.41k
    int  i_start;
1016
1.41k
    int  i_stop;
1017
1.41k
    int  i;
1018
1019
1.41k
    for( ;; )
1020
443k
    {
1021
443k
        const char *s = TextGetLine( txt );
1022
443k
        if( !s )
1023
246
            return VLC_EGENERIC;
1024
1025
443k
        psz_text = malloc( strlen(s) + 1 );
1026
443k
        if( !psz_text )
1027
0
            return VLC_ENOMEM;
1028
1029
443k
        i_start = 0;
1030
443k
        i_stop  = -1;
1031
443k
        if( sscanf( s, "{%d}{}%[^\r\n]", &i_start, psz_text ) == 2 ||
1032
442k
            sscanf( s, "{%d}{%d}%[^\r\n]", &i_start, &i_stop, psz_text ) == 3)
1033
1.99k
        {
1034
1.99k
            if( i_start != 1 || i_stop != 1 )
1035
1.16k
                break;
1036
1037
            /* We found a possible setting of the framerate "{1}{1}23.976" */
1038
            /* Check if it's usable, and if the sub-original-fps is not set */
1039
827
            float f_fps = vlc_strtof_c( psz_text, NULL );
1040
827
            if( f_fps > 0.f && var_GetFloat( p_obj, "sub-original-fps" ) <= 0.f )
1041
238
                p_props->i_microsecperframe = llroundf((float)CLOCK_FREQ / f_fps);
1042
827
        }
1043
442k
        free( psz_text );
1044
442k
    }
1045
1046
    /* replace | by \n */
1047
8.42k
    for( i = 0; psz_text[i] != '\0'; i++ )
1048
7.26k
    {
1049
7.26k
        if( psz_text[i] == '|' )
1050
276
            psz_text[i] = '\n';
1051
7.26k
    }
1052
1053
    /* */
1054
1.16k
    p_subtitle->i_start  =  VLC_TICK_0 + i_start * p_props->i_microsecperframe;
1055
1.16k
    p_subtitle->i_stop   = i_stop >= 0 ? (VLC_TICK_0 + i_stop  * p_props->i_microsecperframe) : VLC_TICK_INVALID;
1056
1.16k
    p_subtitle->psz_text = psz_text;
1057
1.16k
    return VLC_SUCCESS;
1058
1.41k
}
1059
1060
/* ParseSubRipSubViewer
1061
 *  Format SubRip
1062
 *      n
1063
 *      h1:m1:s1,d1 --> h2:m2:s2,d2
1064
 *      Line1
1065
 *      Line2
1066
 *      ....
1067
 *      [Empty line]
1068
 *  Format SubViewer v1/v2
1069
 *      h1:m1:s1.d1,h2:m2:s2.d2
1070
 *      Line1[br]Line2
1071
 *      Line3
1072
 *      ...
1073
 *      [empty line]
1074
 *  We ignore line number for SubRip
1075
 */
1076
static int ParseSubRipSubViewer( vlc_object_t *p_obj, subs_properties_t *p_props,
1077
                                 text_t *txt, subtitle_t *p_subtitle,
1078
                                 int (* pf_parse_timing)(subtitle_t *, const char *),
1079
                                 bool b_replace_br )
1080
1.11k
{
1081
1.11k
    VLC_UNUSED(p_obj);
1082
1.11k
    VLC_UNUSED(p_props);
1083
1.11k
    char    *psz_text;
1084
1085
1.11k
    for( ;; )
1086
878k
    {
1087
878k
        const char *s = TextGetLine( txt );
1088
1089
878k
        if( !s )
1090
296
            return VLC_EGENERIC;
1091
1092
878k
        if( pf_parse_timing( p_subtitle, s) == VLC_SUCCESS &&
1093
1.63k
            p_subtitle->i_start < p_subtitle->i_stop )
1094
818
        {
1095
818
            break;
1096
818
        }
1097
878k
    }
1098
1099
    /* Now read text until an empty line */
1100
818
    size_t i_old = 0;
1101
818
    psz_text = NULL;
1102
818
    for( ;; )
1103
3.15k
    {
1104
3.15k
        const char *s = TextGetLine( txt );
1105
3.15k
        size_t i_len;
1106
1107
3.15k
        i_len = s ? strlen( s ) : 0;
1108
3.15k
        if( i_len == 0 )
1109
818
        {
1110
818
            p_subtitle->psz_text = psz_text;
1111
818
            return VLC_SUCCESS;
1112
818
        }
1113
1114
2.33k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1115
2.33k
        if( !psz_text )
1116
0
            return VLC_ENOMEM;
1117
1118
2.33k
        memcpy( &psz_text[i_old], s, i_len );
1119
2.33k
        psz_text[i_old + i_len + 0] = '\n';
1120
2.33k
        psz_text[i_old + i_len + 1] = '\0';
1121
2.33k
        i_old += i_len + 1;
1122
1123
        /* replace [br] by \n */
1124
2.33k
        if( b_replace_br )
1125
845
        {
1126
845
            char *p;
1127
1128
1.29k
            while( ( p = strstr( psz_text, "[br]" ) ) )
1129
447
            {
1130
447
                *p++ = '\n';
1131
447
                memmove( p, &p[3], strlen(&p[3])+1 );
1132
447
                i_old -= 3;
1133
447
            }
1134
845
        }
1135
2.33k
    }
1136
818
}
1137
1138
/* subtitle_ParseSubRipTimingValue
1139
 * Parses SubRip timing value.
1140
 */
1141
static int subtitle_ParseSubRipTimingValue(vlc_tick_t *timing_value,
1142
                                           const char *s, size_t length)
1143
2.61k
{
1144
2.61k
    int h1, m1, s1, d1 = 0;
1145
1146
2.61k
    int count;
1147
2.61k
    if (sscanf(s, "%d:%d:%d,%d%n", &h1, &m1, &s1, &d1, &count) == 4
1148
629
        && (size_t)count <= length)
1149
629
        goto success;
1150
1151
1.98k
    if (sscanf(s, "%d:%d:%d.%d%n", &h1, &m1, &s1, &d1, &count) == 4
1152
246
        && (size_t)count <= length)
1153
246
        goto success;
1154
1155
1.74k
    d1 = 0;
1156
1.74k
    if (sscanf(s, "%d:%d:%d%n", &h1, &m1, &s1, &count) == 3
1157
1.34k
        && (size_t)count <= length)
1158
1.34k
        goto success;
1159
1160
396
    return VLC_EGENERIC;
1161
1162
2.22k
success:
1163
2.22k
    (*timing_value) = VLC_TICK_0
1164
2.22k
        + vlc_tick_from_HMS(h1, m1, s1)
1165
2.22k
        + VLC_TICK_FROM_MS(d1);
1166
1167
2.22k
    return VLC_SUCCESS;
1168
1169
1.74k
}
1170
1171
/* subtitle_ParseSubRipTiming
1172
 * Parses SubRip timing.
1173
 */
1174
static int subtitle_ParseSubRipTiming( subtitle_t *p_subtitle,
1175
                                       const char *s )
1176
859k
{
1177
859k
    const char *delimiter = strstr(s, " --> ");
1178
859k
    if (delimiter == NULL || delimiter == s)
1179
858k
        return VLC_EGENERIC;
1180
1181
1.43k
    int ret = subtitle_ParseSubRipTimingValue(&p_subtitle->i_start, s, (size_t)(delimiter - s));
1182
1.43k
    if (ret != VLC_SUCCESS)
1183
261
        return ret;
1184
1185
1.17k
    const char *right = delimiter + strlen(" --> ");
1186
1.17k
    return subtitle_ParseSubRipTimingValue(&p_subtitle->i_stop, right, strlen(right));
1187
1.43k
}
1188
1189
/* ParseSubRip
1190
 */
1191
static int  ParseSubRip( vlc_object_t *p_obj, subs_properties_t *p_props,
1192
                         text_t *txt, subtitle_t *p_subtitle,
1193
                         size_t i_idx )
1194
667
{
1195
667
    VLC_UNUSED( i_idx );
1196
667
    return ParseSubRipSubViewer( p_obj, p_props, txt, p_subtitle,
1197
667
                                 &subtitle_ParseSubRipTiming,
1198
667
                                 false );
1199
667
}
1200
1201
/* subtitle_ParseSubViewerTiming
1202
 * Parses SubViewer timing.
1203
 */
1204
static int subtitle_ParseSubViewerTiming( subtitle_t *p_subtitle,
1205
                                   const char *s )
1206
18.4k
{
1207
18.4k
    int h1, m1, s1, d1, h2, m2, s2, d2;
1208
1209
18.4k
    if( sscanf( s, "%d:%d:%d.%d,%d:%d:%d.%d",
1210
18.4k
                &h1, &m1, &s1, &d1, &h2, &m2, &s2, &d2) != 8 )
1211
17.8k
        return VLC_EGENERIC;
1212
1213
590
    p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1214
590
                          VLC_TICK_FROM_MS( d1 ) + VLC_TICK_0;
1215
1216
590
    p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
1217
590
                          VLC_TICK_FROM_MS( d2 ) + VLC_TICK_0;
1218
590
    return VLC_SUCCESS;
1219
18.4k
}
1220
1221
/* ParseSubViewer
1222
 */
1223
static int  ParseSubViewer( vlc_object_t *p_obj, subs_properties_t *p_props,
1224
                            text_t *txt, subtitle_t *p_subtitle,
1225
                            size_t i_idx )
1226
447
{
1227
447
    VLC_UNUSED( i_idx );
1228
1229
447
    return ParseSubRipSubViewer( p_obj, p_props, txt, p_subtitle,
1230
447
                                 &subtitle_ParseSubViewerTiming,
1231
447
                                 true );
1232
447
}
1233
1234
/* ParseSSA
1235
 */
1236
static int  ParseSSA( vlc_object_t *p_obj, subs_properties_t *p_props,
1237
                      text_t *txt, subtitle_t *p_subtitle,
1238
                      size_t i_idx )
1239
1.05k
{
1240
1.05k
    VLC_UNUSED(p_obj);
1241
1.05k
    size_t header_len = 0;
1242
1243
1.05k
    for( ;; )
1244
393k
    {
1245
393k
        const char *s = TextGetLine( txt );
1246
393k
        int h1, m1, s1, c1, h2, m2, s2, c2;
1247
393k
        char *psz_text, *psz_temp;
1248
393k
        char temp[16];
1249
1250
393k
        if( !s )
1251
187
            return VLC_EGENERIC;
1252
1253
        /* We expect (SSA2-4):
1254
         * Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
1255
         * Dialogue: Marked=0,0:02:40.65,0:02:41.79,Wolf main,Cher,0000,0000,0000,,Et les enregistrements de ses ondes delta ?
1256
         *
1257
         * SSA-1 is similar but only has 8 commas up until the subtitle text. Probably the Effect field is no present, but not 100 % sure.
1258
         */
1259
1260
        /* For ASS:
1261
         * Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
1262
         * Dialogue: Layer#,0:02:40.65,0:02:41.79,Wolf main,Cher,0000,0000,0000,,Et les enregistrements de ses ondes delta ?
1263
         */
1264
1265
393k
        psz_text = NULL;
1266
393k
        if( s[0] == 'D' || s[0] == 'L' )
1267
2.02k
        {
1268
            /* The output text is always shorter than the input text. */
1269
2.02k
            psz_text = malloc( strlen(s) );
1270
2.02k
            if( !psz_text )
1271
0
                return VLC_ENOMEM;
1272
2.02k
        }
1273
1274
        /* Try to capture the language property */
1275
393k
        if( s[0] == 'L' &&
1276
307
            sscanf( s, "Language: %[^\r\n]", psz_text ) == 1 )
1277
0
        {
1278
0
            free( p_props->psz_lang ); /* just in case of multiple instances */
1279
0
            p_props->psz_lang = psz_text;
1280
0
            psz_text = NULL;
1281
0
        }
1282
393k
        else if( s[0] == 'D' &&
1283
1.71k
            sscanf( s,
1284
1.71k
                    "Dialogue: %15[^,],%d:%d:%d.%d,%d:%d:%d.%d,%[^\r\n]",
1285
1.71k
                    temp,
1286
1.71k
                    &h1, &m1, &s1, &c1,
1287
1.71k
                    &h2, &m2, &s2, &c2,
1288
1.71k
                    psz_text ) == 10 )
1289
869
        {
1290
            /* The dec expects: ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text */
1291
            /* (Layer comes from ASS specs ... it's empty for SSA.) */
1292
869
            if( p_props->i_type == SUB_TYPE_SSA1 )
1293
201
            {
1294
                /* SSA1 has only 8 commas before the text starts, not 9 */
1295
201
                memmove( &psz_text[1], psz_text, strlen(psz_text)+1 );
1296
201
                psz_text[0] = ',';
1297
201
            }
1298
668
            else
1299
668
            {
1300
668
                int i_layer = ( p_props->i_type == SUB_TYPE_ASS ) ? atoi( temp ) : 0;
1301
1302
                /* ReadOrder, Layer, %s(rest of fields) */
1303
668
                if( asprintf( &psz_temp, "%zu,%d,%s", i_idx, i_layer, psz_text ) == -1 )
1304
0
                {
1305
0
                    free( psz_text );
1306
0
                    return VLC_ENOMEM;
1307
0
                }
1308
1309
668
                free( psz_text );
1310
668
                psz_text = psz_temp;
1311
668
            }
1312
1313
869
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1314
869
                                  VLC_TICK_FROM_MS( c1 * 10 ) + VLC_TICK_0;
1315
869
            p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
1316
869
                                  VLC_TICK_FROM_MS( c2 * 10 ) + VLC_TICK_0;
1317
869
            p_subtitle->psz_text = psz_text;
1318
869
            return VLC_SUCCESS;
1319
869
        }
1320
392k
        free( psz_text );
1321
1322
        /* All the other stuff we add to the header field */
1323
392k
        if( header_len == 0 && p_props->psz_header )
1324
292
            header_len = strlen( p_props->psz_header );
1325
1326
392k
        size_t s_len = strlen( s );
1327
392k
        p_props->psz_header = realloc_or_free( p_props->psz_header, header_len + s_len + 2 );
1328
392k
        if( !p_props->psz_header )
1329
0
            return VLC_ENOMEM;
1330
392k
        snprintf( p_props->psz_header + header_len, s_len + 2, "%s\n", s );
1331
392k
        header_len += s_len + 1;
1332
392k
    }
1333
1.05k
}
1334
1335
/* ParseVplayer
1336
 *  Format
1337
 *      h:m:s:Line1|Line2|Line3....
1338
 *  or
1339
 *      h:m:s Line1|Line2|Line3....
1340
 */
1341
static int ParseVplayer( vlc_object_t *p_obj, subs_properties_t *p_props,
1342
                         text_t *txt, subtitle_t *p_subtitle,
1343
                         size_t i_idx )
1344
4.39k
{
1345
4.39k
    VLC_UNUSED(p_obj);
1346
4.39k
    VLC_UNUSED(p_props);
1347
4.39k
    VLC_UNUSED( i_idx );
1348
4.39k
    char *psz_text;
1349
1350
4.39k
    for( ;; )
1351
676k
    {
1352
676k
        const char *s = TextGetLine( txt );
1353
676k
        int h1, m1, s1;
1354
1355
676k
        if( !s )
1356
466
            return VLC_EGENERIC;
1357
1358
676k
        psz_text = malloc( strlen( s ) + 1 );
1359
676k
        if( !psz_text )
1360
0
            return VLC_ENOMEM;
1361
1362
676k
        if( sscanf( s, "%d:%d:%d%*c%[^\r\n]",
1363
676k
                    &h1, &m1, &s1, psz_text ) == 4 )
1364
3.93k
        {
1365
3.93k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
1366
3.93k
            p_subtitle->i_stop  = -1;
1367
3.93k
            break;
1368
3.93k
        }
1369
672k
        free( psz_text );
1370
672k
    }
1371
1372
    /* replace | by \n */
1373
1.33M
    for( size_t i = 0; psz_text[i] != '\0'; i++ )
1374
1.33M
    {
1375
1.33M
        if( psz_text[i] == '|' )
1376
333
            psz_text[i] = '\n';
1377
1.33M
    }
1378
3.93k
    p_subtitle->psz_text = psz_text;
1379
3.93k
    return VLC_SUCCESS;
1380
4.39k
}
1381
1382
/* ParseSami
1383
 */
1384
static const char *ParseSamiSearch( text_t *txt,
1385
                                    const char *psz_start, const char *psz_str )
1386
10.2k
{
1387
10.2k
    if( psz_start && strcasestr( psz_start, psz_str ) )
1388
7.19k
    {
1389
7.19k
        const char *s = strcasestr( psz_start, psz_str );
1390
7.19k
        return &s[strlen( psz_str )];
1391
7.19k
    }
1392
1393
3.07k
    for( ;; )
1394
1.77M
    {
1395
1.77M
        const char *p = TextGetLine( txt );
1396
1.77M
        if( !p )
1397
394
            return NULL;
1398
1399
1.76M
        const char *s = strcasestr( p, psz_str );
1400
1.76M
        if( s != NULL )
1401
2.68k
            return &s[strlen( psz_str )];
1402
1.76M
    }
1403
3.07k
}
1404
static int ParseSami( vlc_object_t *p_obj, subs_properties_t *p_props,
1405
                      text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1406
1.57k
{
1407
1.57k
    VLC_UNUSED(p_obj);
1408
1.57k
    VLC_UNUSED(p_props);
1409
1.57k
    VLC_UNUSED( i_idx );
1410
1.57k
    const char *s;
1411
1.57k
    int64_t i_start;
1412
1413
1.57k
    unsigned int i_text;
1414
1.57k
    char text[8192]; /* Arbitrary but should be long enough */
1415
1416
    /* search "Start=" */
1417
1.57k
    s = ParseSamiSearch( txt, p_props->sami.psz_start, "Start=" );
1418
1.57k
    p_props->sami.psz_start = NULL;
1419
1.57k
    if( !s )
1420
265
        return VLC_EGENERIC;
1421
1422
    /* get start value */
1423
1.31k
    char *psz_end;
1424
1.31k
    i_start = strtol( s, &psz_end, 0 );
1425
1.31k
    s = psz_end;
1426
1427
    /* search <P */
1428
1.31k
    if( !( s = ParseSamiSearch( txt, s, "<P" ) ) )
1429
24
        return VLC_EGENERIC;
1430
1431
    /* search > */
1432
1.28k
    if( !( s = ParseSamiSearch( txt, s, ">" ) ) )
1433
19
        return VLC_EGENERIC;
1434
1435
1.27k
    i_text = 0;
1436
1.27k
    text[0] = '\0';
1437
    /* now get all txt until  a "Start=" line */
1438
1.27k
    for( ;; )
1439
204k
    {
1440
204k
        char c = '\0';
1441
        /* Search non empty line */
1442
497k
        while( s && *s == '\0' )
1443
292k
            s = TextGetLine( txt );
1444
204k
        if( !s )
1445
213
            break;
1446
1447
204k
        if( *s == '<' )
1448
7.14k
        {
1449
7.14k
            if( !strncasecmp( s, "<br", 3 ) )
1450
365
            {
1451
365
                c = '\n';
1452
365
            }
1453
6.77k
            else if( strcasestr( s, "Start=" ) )
1454
1.05k
            {
1455
1.05k
                p_props->sami.psz_start = s;
1456
1.05k
                break;
1457
1.05k
            }
1458
6.08k
            s = ParseSamiSearch( txt, s, ">" );
1459
6.08k
        }
1460
196k
        else if( !strncmp( s, "&nbsp;", 6 ) )
1461
1.03k
        {
1462
1.03k
            c = ' ';
1463
1.03k
            s += 6;
1464
1.03k
        }
1465
195k
        else if( *s == '\t' )
1466
932
        {
1467
932
            c = ' ';
1468
932
            s++;
1469
932
        }
1470
195k
        else
1471
195k
        {
1472
195k
            c = *s;
1473
195k
            s++;
1474
195k
        }
1475
203k
        if( c != '\0' && i_text+1 < sizeof(text) )
1476
195k
        {
1477
195k
            text[i_text++] = c;
1478
195k
            text[i_text] = '\0';
1479
195k
        }
1480
203k
    }
1481
1482
1.27k
    p_subtitle->i_start = VLC_TICK_0 + VLC_TICK_FROM_MS(i_start);
1483
1.27k
    p_subtitle->i_stop  = -1;
1484
1.27k
    p_subtitle->psz_text = strdup( text );
1485
1486
1.27k
    return VLC_SUCCESS;
1487
1.28k
}
1488
1489
/* ParseDVDSubtitle
1490
 *  Format
1491
 *      {T h1:m1:s1:c1
1492
 *      Line1
1493
 *      Line2
1494
 *      ...
1495
 *      }
1496
 * TODO it can have a header
1497
 *      { HEAD
1498
 *          ...
1499
 *          CODEPAGE=...
1500
 *          FORMAT=...
1501
 *          LANG=English
1502
 *      }
1503
 *      LANG support would be cool
1504
 *      CODEPAGE is probably mandatory FIXME
1505
 */
1506
static int ParseDVDSubtitle(vlc_object_t *p_obj, subs_properties_t *p_props,
1507
                            text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1508
803
{
1509
803
    VLC_UNUSED(p_obj);
1510
803
    VLC_UNUSED(p_props);
1511
803
    VLC_UNUSED( i_idx );
1512
803
    char *psz_text;
1513
1514
803
    for( ;; )
1515
243k
    {
1516
243k
        const char *s = TextGetLine( txt );
1517
243k
        int h1, m1, s1, c1;
1518
1519
243k
        if( !s )
1520
27
            return VLC_EGENERIC;
1521
1522
243k
        if( sscanf( s,
1523
243k
                    "{T %d:%d:%d:%d",
1524
243k
                    &h1, &m1, &s1, &c1 ) == 4 )
1525
776
        {
1526
776
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1527
776
                                  VLC_TICK_FROM_MS( c1 * 10 ) + VLC_TICK_0;
1528
776
            p_subtitle->i_stop = -1;
1529
776
            break;
1530
776
        }
1531
243k
    }
1532
1533
    /* Now read text until a line containing "}" */
1534
776
    size_t i_old = 0;
1535
776
    psz_text = NULL;
1536
776
    for( ;; )
1537
98.6k
    {
1538
98.6k
        const char *s = TextGetLine( txt );
1539
98.6k
        size_t i_len;
1540
1541
98.6k
        if( !s )
1542
42
        {
1543
42
            free( psz_text );
1544
42
            return VLC_EGENERIC;
1545
42
        }
1546
1547
98.6k
        i_len = strlen( s );
1548
98.6k
        if( i_len == 1 && s[0] == '}')
1549
734
        {
1550
734
            if (psz_text)
1551
283
                psz_text[i_old] = '\0';
1552
734
            p_subtitle->psz_text = psz_text;
1553
734
            return VLC_SUCCESS;
1554
734
        }
1555
1556
97.8k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1557
97.8k
        if( !psz_text )
1558
0
            return VLC_ENOMEM;
1559
1560
97.8k
        memcpy( &psz_text[i_old], s, i_len );
1561
97.8k
        psz_text[i_old + i_len + 0] = '\n';
1562
97.8k
        i_old += i_len + 1;
1563
97.8k
    }
1564
776
}
1565
1566
/* ParseMPL2
1567
 *  Format
1568
 *     [n1][n2]Line1|Line2|Line3...
1569
 *  where n1 and n2 are the video frame number (n2 can be empty)
1570
 */
1571
static int ParseMPL2(vlc_object_t *p_obj, subs_properties_t *p_props,
1572
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1573
424k
{
1574
424k
    VLC_UNUSED(p_obj);
1575
424k
    VLC_UNUSED(p_props);
1576
424k
    VLC_UNUSED( i_idx );
1577
424k
    char *psz_text;
1578
424k
    int i;
1579
1580
424k
    for( ;; )
1581
686k
    {
1582
686k
        const char *s = TextGetLine( txt );
1583
686k
        int i_start;
1584
686k
        int i_stop;
1585
1586
686k
        if( !s )
1587
187
            return VLC_EGENERIC;
1588
1589
686k
        psz_text = malloc( strlen(s) + 1 );
1590
686k
        if( !psz_text )
1591
0
            return VLC_ENOMEM;
1592
1593
686k
        i_start = 0;
1594
686k
        i_stop  = -1;
1595
686k
        if( sscanf( s, "[%d][] %[^\r\n]", &i_start, psz_text ) == 2 ||
1596
262k
            sscanf( s, "[%d][%d] %[^\r\n]", &i_start, &i_stop, psz_text ) == 3)
1597
424k
        {
1598
424k
            p_subtitle->i_start = VLC_TICK_0 + VLC_TICK_FROM_MS(i_start * 100);
1599
424k
            p_subtitle->i_stop  = i_stop >= 0 ? VLC_TICK_0 + VLC_TICK_FROM_MS(i_stop  * 100) : VLC_TICK_INVALID;
1600
424k
            break;
1601
424k
        }
1602
262k
        free( psz_text );
1603
262k
    }
1604
1605
1.03M
    for( i = 0; psz_text[i] != '\0'; )
1606
606k
    {
1607
        /* replace | by \n */
1608
606k
        if( psz_text[i] == '|' )
1609
290
            psz_text[i] = '\n';
1610
1611
        /* Remove italic */
1612
606k
        if( psz_text[i] == '/' && ( i == 0 || psz_text[i-1] == '\n' ) )
1613
1.06k
            memmove( &psz_text[i], &psz_text[i+1], strlen(&psz_text[i+1])+1 );
1614
605k
        else
1615
605k
            i++;
1616
606k
    }
1617
424k
    p_subtitle->psz_text = psz_text;
1618
424k
    return VLC_SUCCESS;
1619
424k
}
1620
1621
static int ParseAQT(vlc_object_t *p_obj, subs_properties_t *p_props, text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1622
2.28k
{
1623
2.28k
    VLC_UNUSED(p_obj);
1624
2.28k
    VLC_UNUSED(p_props);
1625
2.28k
    VLC_UNUSED( i_idx );
1626
1627
2.28k
    char *psz_text = NULL;
1628
2.28k
    size_t i_old = 0;
1629
2.28k
    size_t i_len;
1630
2.28k
    int i_firstline = 1;
1631
1632
2.28k
    for( ;; )
1633
300k
    {
1634
300k
        int t; /* Time */
1635
1636
300k
        const char *s = TextGetLine( txt );
1637
1638
300k
        if( !s )
1639
647
        {
1640
647
            free( psz_text );
1641
647
            return VLC_EGENERIC;
1642
647
        }
1643
1644
        /* Data Lines */
1645
299k
        if( sscanf (s, "-->> %d", &t) == 1)
1646
2.62k
        {
1647
            /* Starting of a subtitle */
1648
2.62k
            if( i_firstline )
1649
1.62k
            {
1650
1.62k
                p_subtitle->i_start = VLC_TICK_0 + t * p_props->i_microsecperframe;
1651
1.62k
                i_firstline = 0;
1652
1.62k
            }
1653
            /* We have been too far: end of the subtitle, begin of next */
1654
1.00k
            else
1655
1.00k
            {
1656
1.00k
                p_subtitle->i_stop  = VLC_TICK_0 + t * p_props->i_microsecperframe;
1657
1.00k
                break;
1658
1.00k
            }
1659
2.62k
        }
1660
        /* Text Lines */
1661
296k
        else
1662
296k
        {
1663
296k
            i_len = strlen( s );
1664
296k
            psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1665
296k
            if( !psz_text )
1666
0
                 return VLC_ENOMEM;
1667
1668
296k
            memcpy( &psz_text[i_old], s, i_len );
1669
296k
            psz_text[i_old + i_len + 0] = '\n';
1670
296k
            i_old += i_len + 1;
1671
296k
            if( txt->i_line == txt->i_line_count )
1672
634
                break;
1673
296k
        }
1674
299k
    }
1675
1.63k
    if (psz_text)
1676
1.08k
        psz_text[i_old] = '\0';
1677
1.63k
    p_subtitle->psz_text = psz_text;
1678
1.63k
    return VLC_SUCCESS;
1679
2.28k
}
1680
1681
static int ParsePJS(vlc_object_t *p_obj, subs_properties_t *p_props,
1682
                    text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1683
612
{
1684
612
    VLC_UNUSED(p_obj);
1685
612
    VLC_UNUSED(p_props);
1686
612
    VLC_UNUSED( i_idx );
1687
1688
612
    char *psz_text;
1689
612
    int i;
1690
1691
612
    for( ;; )
1692
613k
    {
1693
613k
        const char *s = TextGetLine( txt );
1694
613k
        int t1, t2;
1695
1696
613k
        if( !s )
1697
99
            return VLC_EGENERIC;
1698
1699
613k
        psz_text = malloc( strlen(s) + 1 );
1700
613k
        if( !psz_text )
1701
0
            return VLC_ENOMEM;
1702
1703
        /* Data Lines */
1704
613k
        if( sscanf (s, "%d,%d,\"%[^\n\r]", &t1, &t2, psz_text ) == 3 )
1705
513
        {
1706
            /* 1/10th of second ? Frame based ? FIXME */
1707
513
            p_subtitle->i_start = VLC_TICK_0 + INT64_C(10) * t1;
1708
513
            p_subtitle->i_stop = VLC_TICK_0 + INT64_C(10) * t2;
1709
            /* Remove latest " */
1710
513
            psz_text[ strlen(psz_text) - 1 ] = '\0';
1711
1712
513
            break;
1713
513
        }
1714
612k
        free( psz_text );
1715
612k
    }
1716
1717
    /* replace | by \n */
1718
2.84k
    for( i = 0; psz_text[i] != '\0'; i++ )
1719
2.32k
    {
1720
2.32k
        if( psz_text[i] == '|' )
1721
211
            psz_text[i] = '\n';
1722
2.32k
    }
1723
1724
513
    p_subtitle->psz_text = psz_text;
1725
513
    msg_Dbg( p_obj, "%s", psz_text );
1726
513
    return VLC_SUCCESS;
1727
612
}
1728
1729
static int ParseMPSub( vlc_object_t *p_obj, subs_properties_t *p_props,
1730
                       text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1731
1.55k
{
1732
1.55k
    VLC_UNUSED( i_idx );
1733
1734
1.55k
    if( !p_props->mpsub.b_inited )
1735
247
    {
1736
247
        p_props->mpsub.f_total = 0.0;
1737
247
        p_props->mpsub.i_factor = 0;
1738
1739
247
        p_props->mpsub.b_inited = true;
1740
247
    }
1741
1742
1.55k
    for( ;; )
1743
582k
    {
1744
582k
        const char *s = TextGetLine( txt );
1745
582k
        if( !s )
1746
221
        {
1747
221
            return VLC_EGENERIC;
1748
221
        }
1749
1750
582k
        if ( *s =='#' || *s == '\0' )
1751
542k
            continue;
1752
1753
        /* Data Lines */
1754
39.4k
        float wait, duration;
1755
39.4k
        if( sscanf( s, "%f %f", &wait, &duration ) == 2 )
1756
1.33k
        {
1757
1.33k
            float f1 = wait;
1758
1.33k
            float f2 = duration;
1759
1.33k
            p_props->mpsub.f_total += f1 * p_props->mpsub.i_factor;
1760
1.33k
            p_subtitle->i_start = VLC_TICK_0 + llroundf(10000.f * p_props->mpsub.f_total);
1761
1.33k
            p_props->mpsub.f_total += f2 * p_props->mpsub.i_factor;
1762
1.33k
            p_subtitle->i_stop = VLC_TICK_0 + llroundf(10000.f * p_props->mpsub.f_total);
1763
1.33k
            break;
1764
1.33k
        }
1765
1766
38.0k
        if( !strncmp( s, "FORMAT=", strlen("FORMAT=") ) )
1767
4.06k
        {
1768
4.06k
            const char *psz_format = s + strlen( "FORMAT=" );
1769
4.06k
            if( !strncmp( psz_format, "TIME", strlen("TIME") ) && (psz_format[4] == '\0' || psz_format[4] == ' ') )
1770
432
            {
1771
                // FORMAT=TIME may be followed by a comment
1772
432
                p_props->mpsub.i_factor = 100;
1773
432
            }
1774
3.63k
            else
1775
3.63k
            {
1776
3.63k
                float f_fps;
1777
3.63k
                if( sscanf( psz_format, "%f", &f_fps ) == 1 )
1778
2.80k
                {
1779
2.80k
                    if( f_fps > 0.f && var_GetFloat( p_obj, "sub-original-fps" ) <= 0.f )
1780
120
                        var_SetFloat( p_obj, "sub-original-fps", f_fps );
1781
1782
2.80k
                    p_props->mpsub.i_factor = 1;
1783
2.80k
                }
1784
3.63k
            }
1785
4.06k
        }
1786
38.0k
    }
1787
1788
1.33k
    char *psz_text = NULL;
1789
1.33k
    size_t i_old = 0;
1790
1.33k
    for( ;; )
1791
2.11k
    {
1792
2.11k
        const char *s = TextGetLine( txt );
1793
1794
2.11k
        if( !s )
1795
26
        {
1796
26
            free( psz_text );
1797
26
            return VLC_EGENERIC;
1798
26
        }
1799
1800
2.08k
        size_t i_len = strlen( s );
1801
2.08k
        if( i_len == 0 )
1802
1.31k
            break;
1803
1804
777
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1805
777
        if( !psz_text )
1806
0
             return VLC_ENOMEM;
1807
1808
777
        memcpy( &psz_text[i_old], s, i_len );
1809
777
        psz_text[i_old + i_len + 0] = '\n';
1810
777
        i_old += i_len + 1;
1811
777
    }
1812
1813
1.31k
    if (psz_text)
1814
450
        psz_text[i_old] = '\0';
1815
1.31k
    p_subtitle->psz_text = psz_text;
1816
1.31k
    return VLC_SUCCESS;
1817
1.33k
}
1818
1819
static int ParseJSS( vlc_object_t *p_obj, subs_properties_t *p_props,
1820
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1821
86.0k
{
1822
86.0k
    VLC_UNUSED( i_idx );
1823
86.0k
    char         *psz_text, *psz_orig;
1824
86.0k
    char         *psz_text2, *psz_orig2;
1825
1826
86.0k
    if( !p_props->jss.b_inited )
1827
1.61k
    {
1828
1.61k
        p_props->jss.i_comment = 0;
1829
1.61k
        p_props->jss.i_time_resolution = 30;
1830
1.61k
        p_props->jss.i_time_shift = 0;
1831
1832
1.61k
        p_props->jss.b_inited = true;
1833
1.61k
    }
1834
1835
    /* Parse the main lines */
1836
86.0k
    for( ;; )
1837
637k
    {
1838
637k
        const char *s = TextGetLine( txt );
1839
637k
        if( !s )
1840
1.61k
            return VLC_EGENERIC;
1841
1842
635k
        size_t line_length = strlen( s );
1843
635k
        psz_orig = malloc( line_length + 1 );
1844
635k
        if( !psz_orig )
1845
0
            return VLC_ENOMEM;
1846
635k
        psz_text = psz_orig;
1847
1848
        /* Complete time lines */
1849
635k
        int h1, h2, m1, m2, s1, s2, f1, f2;
1850
635k
        if( sscanf( s, "%d:%d:%d.%d %d:%d:%d.%d %[^\n\r]",
1851
635k
                    &h1, &m1, &s1, &f1, &h2, &m2, &s2, &f2, psz_text ) == 9 )
1852
216
        {
1853
216
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 ) +
1854
216
                vlc_tick_from_sec( ( f1 +  p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1855
216
            p_subtitle->i_stop = VLC_TICK_0 + vlc_tick_from_HMS( h2, m2, s2 ) +
1856
216
                vlc_tick_from_sec( ( f2 +  p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1857
216
            break;
1858
216
        }
1859
        /* Short time lines */
1860
635k
        else if( sscanf( s, "@%d @%d %[^\n\r]", &f1, &f2, psz_text ) == 3 )
1861
84.1k
        {
1862
84.1k
            p_subtitle->i_start = VLC_TICK_0 +
1863
84.1k
                    vlc_tick_from_sec( (f1 + p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1864
84.1k
            p_subtitle->i_stop = VLC_TICK_0 +
1865
84.1k
                    vlc_tick_from_sec( (f2 + p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1866
84.1k
            break;
1867
84.1k
        }
1868
        /* General Directive lines */
1869
        /* Only TIME and SHIFT are supported so far */
1870
551k
        else if( s[0] == '#' )
1871
3.53k
        {
1872
3.53k
            int h = 0, m =0, sec = 1, f = 1;
1873
3.53k
            unsigned shift = 1;
1874
3.53k
            int inv = 1;
1875
1876
3.53k
            strcpy( psz_text, s );
1877
1878
3.53k
            switch( toupper( (unsigned char)psz_text[1] ) )
1879
3.53k
            {
1880
2.21k
            case 'S':
1881
2.21k
                 shift = isalpha( (unsigned char)psz_text[2] ) ? 6 : 2 ;
1882
2.21k
                 if ( shift > line_length )
1883
228
                     break;
1884
1885
1.98k
                 if( sscanf( &psz_text[shift], "%d", &h ) )
1886
1.71k
                 {
1887
                     /* Negative shifting */
1888
1.71k
                     if( h < 0 )
1889
911
                     {
1890
911
                         h *= -1;
1891
911
                         inv = -1;
1892
911
                     }
1893
1894
1.71k
                     if( sscanf( &psz_text[shift], "%*d:%d", &m ) )
1895
1.41k
                     {
1896
1.41k
                         if( sscanf( &psz_text[shift], "%*d:%*d:%d", &sec ) )
1897
1.19k
                         {
1898
1.19k
                             sscanf( &psz_text[shift], "%*d:%*d:%*d.%d", &f );
1899
1.19k
                         }
1900
213
                         else
1901
213
                         {
1902
213
                             h = 0;
1903
213
                             sscanf( &psz_text[shift], "%d:%d.%d",
1904
213
                                     &m, &sec, &f );
1905
213
                             m *= inv;
1906
213
                         }
1907
1.41k
                     }
1908
301
                     else
1909
301
                     {
1910
301
                         h = m = 0;
1911
301
                         sscanf( &psz_text[shift], "%d.%d", &sec, &f);
1912
301
                         sec *= inv;
1913
301
                     }
1914
1.71k
                     p_props->jss.i_time_shift = ( ( h * INT64_C(3600) + m * INT64_C(60) + sec )
1915
1.71k
                         * p_props->jss.i_time_resolution + f ) * inv;
1916
1.71k
                 }
1917
1.98k
                 break;
1918
1919
998
            case 'T':
1920
998
                shift = isalpha( (unsigned char)psz_text[2] ) ? 8 : 2 ;
1921
998
                if ( shift > line_length )
1922
204
                    break;
1923
1924
794
                sscanf( &psz_text[shift], "%d", &p_props->jss.i_time_resolution );
1925
794
                if( !p_props->jss.i_time_resolution || p_props->jss.i_time_resolution < 0 )
1926
559
                    p_props->jss.i_time_resolution = 30;
1927
794
                break;
1928
3.53k
            }
1929
3.53k
            free( psz_orig );
1930
3.53k
            continue;
1931
3.53k
        }
1932
547k
        else
1933
            /* Unknown type line, probably a comment */
1934
547k
        {
1935
547k
            free( psz_orig );
1936
547k
            continue;
1937
547k
        }
1938
635k
    }
1939
1940
84.8k
    while( psz_text[ strlen( psz_text ) - 1 ] == '\\' )
1941
704
    {
1942
704
        const char *s2 = TextGetLine( txt );
1943
1944
704
        if( !s2 )
1945
2
        {
1946
2
            free( psz_orig );
1947
2
            return VLC_EGENERIC;
1948
2
        }
1949
1950
702
        size_t i_len = strlen( s2 );
1951
702
        if( i_len == 0 )
1952
244
            break;
1953
1954
458
        size_t i_old = strlen( psz_text );
1955
1956
458
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 );
1957
458
        if( !psz_text )
1958
0
             return VLC_ENOMEM;
1959
1960
458
        psz_orig = psz_text;
1961
458
        strcat( psz_text, s2 );
1962
458
    }
1963
1964
    /* Skip the blanks */
1965
84.3k
    while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++;
1966
1967
    /* Parse the directives */
1968
84.3k
    if( isalpha( (unsigned char)*psz_text ) || *psz_text == '[' )
1969
4.04k
    {
1970
17.2k
        while( *psz_text && *psz_text != ' ' )
1971
13.1k
            ++psz_text;
1972
1973
        /* Directives are NOT parsed yet */
1974
        /* This has probably a better place in a decoder ? */
1975
        /* directive = malloc( strlen( psz_text ) + 1 );
1976
           if( sscanf( psz_text, "%s %[^\n\r]", directive, psz_text2 ) == 2 )*/
1977
4.04k
    }
1978
1979
    /* Skip the blanks after directives */
1980
85.0k
    while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++;
1981
1982
    /* Clean all the lines from inline comments and other stuffs */
1983
84.3k
    psz_orig2 = calloc( strlen( psz_text) + 1, 1 );
1984
84.3k
    psz_text2 = psz_orig2;
1985
1986
1.64M
    for( ; *psz_text != '\0' && *psz_text != '\n' && *psz_text != '\r'; )
1987
1.55M
    {
1988
1.55M
        switch( *psz_text )
1989
1.55M
        {
1990
310k
        case '{':
1991
310k
            p_props->jss.i_comment++;
1992
310k
            break;
1993
1.67k
        case '}':
1994
1.67k
            if( p_props->jss.i_comment )
1995
736
            {
1996
736
                p_props->jss.i_comment = 0;
1997
736
                if( (*(psz_text + 1 ) ) == ' ' ) psz_text++;
1998
736
            }
1999
1.67k
            break;
2000
3.43k
        case '~':
2001
3.43k
            if( !p_props->jss.i_comment )
2002
2.55k
            {
2003
2.55k
                *psz_text2 = ' ';
2004
2.55k
                psz_text2++;
2005
2.55k
            }
2006
3.43k
            break;
2007
13.7k
        case ' ':
2008
15.4k
        case '\t':
2009
15.4k
            if( (*(psz_text + 1 ) ) == ' ' || (*(psz_text + 1 ) ) == '\t' )
2010
1.93k
                break;
2011
13.5k
            if( !p_props->jss.i_comment )
2012
9.99k
            {
2013
9.99k
                *psz_text2 = ' ';
2014
9.99k
                psz_text2++;
2015
9.99k
            }
2016
13.5k
            break;
2017
9.88k
        case '\\':
2018
9.88k
            if( (*(psz_text + 1 ) ) == 'n' )
2019
459
            {
2020
459
                *psz_text2 = '\n';
2021
459
                psz_text++;
2022
459
                psz_text2++;
2023
459
                break;
2024
459
            }
2025
9.42k
            if( ( toupper((unsigned char)*(psz_text + 1 ) ) == 'C' ) ||
2026
9.19k
                    ( toupper((unsigned char)*(psz_text + 1 ) ) == 'F' ) )
2027
737
            {
2028
737
                psz_text++;
2029
737
                break;
2030
737
            }
2031
8.68k
            if( (*(psz_text + 1 ) ) == 'B' || (*(psz_text + 1 ) ) == 'b' ||
2032
7.25k
                (*(psz_text + 1 ) ) == 'I' || (*(psz_text + 1 ) ) == 'i' ||
2033
6.63k
                (*(psz_text + 1 ) ) == 'U' || (*(psz_text + 1 ) ) == 'u' ||
2034
6.01k
                (*(psz_text + 1 ) ) == 'D' || (*(psz_text + 1 ) ) == 'N' )
2035
3.29k
            {
2036
3.29k
                psz_text++;
2037
3.29k
                break;
2038
3.29k
            }
2039
5.39k
            if( (*(psz_text + 1 ) ) == '~' || (*(psz_text + 1 ) ) == '{' ||
2040
4.07k
                (*(psz_text + 1 ) ) == '\\' )
2041
3.16k
                psz_text++;
2042
2.23k
            else if( ( *(psz_text + 1 ) == '\r' ||  *(psz_text + 1 ) == '\n' ) &&
2043
210
                     *(psz_text + 1 ) != '\0' )
2044
210
            {
2045
210
                psz_text++;
2046
210
            }
2047
5.39k
            break;
2048
1.21M
        default:
2049
1.21M
            if( !p_props->jss.i_comment )
2050
973k
            {
2051
973k
                *psz_text2 = *psz_text;
2052
973k
                psz_text2++;
2053
973k
            }
2054
1.55M
        }
2055
1.55M
        psz_text++;
2056
1.55M
    }
2057
2058
84.3k
    p_subtitle->psz_text = psz_orig2;
2059
84.3k
    msg_Dbg( p_obj, "%s", p_subtitle->psz_text );
2060
84.3k
    free( psz_orig );
2061
84.3k
    return VLC_SUCCESS;
2062
84.3k
}
2063
2064
static int ParsePSB( vlc_object_t *p_obj, subs_properties_t *p_props,
2065
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2066
0
{
2067
0
    VLC_UNUSED(p_obj);
2068
0
    VLC_UNUSED(p_props);
2069
0
    VLC_UNUSED( i_idx );
2070
2071
0
    char *psz_text;
2072
0
    int i;
2073
2074
0
    for( ;; )
2075
0
    {
2076
0
        int h1, m1, s1;
2077
0
        int h2, m2, s2;
2078
0
        const char *s = TextGetLine( txt );
2079
2080
0
        if( !s )
2081
0
            return VLC_EGENERIC;
2082
2083
0
        psz_text = malloc( strlen( s ) + 1 );
2084
0
        if( !psz_text )
2085
0
            return VLC_ENOMEM;
2086
2087
0
        if( sscanf( s, "{%d:%d:%d}{%d:%d:%d}%[^\r\n]",
2088
0
                    &h1, &m1, &s1, &h2, &m2, &s2, psz_text ) == 7 )
2089
0
        {
2090
0
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2091
0
            p_subtitle->i_stop  = VLC_TICK_0 + vlc_tick_from_HMS( h2, m2, s2 );
2092
0
            break;
2093
0
        }
2094
0
        free( psz_text );
2095
0
    }
2096
2097
    /* replace | by \n */
2098
0
    for( i = 0; psz_text[i] != '\0'; i++ )
2099
0
    {
2100
0
        if( psz_text[i] == '|' )
2101
0
            psz_text[i] = '\n';
2102
0
    }
2103
0
    p_subtitle->psz_text = psz_text;
2104
0
    return VLC_SUCCESS;
2105
0
}
2106
2107
static vlc_tick_t ParseRealTime( const char *psz )
2108
2.97k
{
2109
2.97k
    if( *psz == '\0' ) return VLC_TICK_0;
2110
2.97k
    int h, m, s, f;
2111
2.97k
    if( sscanf( psz, "%d:%d:%d.%d", &h, &m, &s, &f ) == 4 )
2112
208
    {
2113
208
        return vlc_tick_from_HMS( h, m, s )
2114
208
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2115
208
    }
2116
2.76k
    if( sscanf( psz, "%d:%d.%d", &m, &s, &f ) == 3 )
2117
202
    {
2118
202
        return vlc_tick_from_HMS( 0, m, s )
2119
202
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2120
202
    }
2121
2.56k
    if( sscanf( psz, "%d.%d", &s, &f ) == 2 )
2122
444
    {
2123
444
        return vlc_tick_from_sec( s )
2124
444
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2125
444
    }
2126
2.11k
    if( sscanf( psz, "%d:%d", &m, &s ) == 2 )
2127
478
    {
2128
478
        return vlc_tick_from_HMS( 0, m, s )
2129
478
               + VLC_TICK_0;
2130
478
    }
2131
1.64k
    if( sscanf( psz, "%d", &s ) == 1 )
2132
1.25k
    {
2133
1.25k
        return vlc_tick_from_sec( s )
2134
1.25k
               + VLC_TICK_0;
2135
1.25k
    }
2136
381
    return VLC_TICK_MIN;
2137
1.64k
}
2138
2139
static int ParseRealText( vlc_object_t *p_obj, subs_properties_t *p_props,
2140
                          text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2141
2.72k
{
2142
2.72k
    VLC_UNUSED(p_obj);
2143
2.72k
    VLC_UNUSED(p_props);
2144
2.72k
    VLC_UNUSED( i_idx );
2145
2.72k
    char *psz_text = NULL;
2146
2147
2.72k
    for( ;; )
2148
734k
    {
2149
734k
        const char *s = TextGetLine( txt );
2150
734k
        free( psz_text );
2151
2152
734k
        if( !s )
2153
123
            return VLC_EGENERIC;
2154
2155
734k
        psz_text = malloc( strlen( s ) + 1 );
2156
734k
        if( !psz_text )
2157
0
            return VLC_ENOMEM;
2158
2159
        /* Find the good beginning. This removes extra spaces at the beginning
2160
           of the line.*/
2161
734k
        char *psz_temp = strcasestr( s, "<time");
2162
734k
        if( psz_temp != NULL )
2163
3.43k
        {
2164
3.43k
            char psz_end[12], psz_begin[12];
2165
3.43k
            vlc_tick_t end = VLC_TICK_MIN;
2166
            /* Line has begin and end */
2167
3.43k
            if( sscanf( psz_temp,
2168
3.43k
                  "<%*[t|T]ime %*[b|B]egin=\"%11[^\"]\" %*[e|E]nd=\"%11[^\"]%*[^>]%[^\n\r]",
2169
3.43k
                            psz_begin, psz_end, psz_text) == 3 )
2170
375
            {
2171
375
                end = ParseRealTime( psz_end );
2172
375
            }
2173
3.06k
            else if ( sscanf( psz_temp,
2174
3.06k
                                "<%*[t|T]ime %*[b|B]egin=\"%11[^\"]\"%*[^>]%[^\n\r]",
2175
3.06k
                                psz_begin, psz_text ) != 2)
2176
                /* Line is not recognized */
2177
838
            {
2178
838
                continue;
2179
838
            }
2180
2181
            /* Get the times */
2182
2.59k
            vlc_tick_t i_time = ParseRealTime( psz_begin );
2183
2.59k
            if (i_time != VLC_TICK_MIN)
2184
2.23k
                p_subtitle->i_start = i_time;
2185
363
            else
2186
363
                p_subtitle->i_start = -1;
2187
2188
2.59k
            if (end != VLC_TICK_MIN)
2189
357
                p_subtitle->i_stop = end;
2190
2.24k
            else
2191
2.24k
                p_subtitle->i_stop = -1;
2192
2.59k
            break;
2193
3.43k
        }
2194
734k
    }
2195
2196
    /* Get the following Lines */
2197
2.59k
    size_t i_old = strlen( psz_text );
2198
2.59k
    for( ;; )
2199
3.61k
    {
2200
3.61k
        const char *s = TextGetLine( txt );
2201
2202
3.61k
        if( !s )
2203
117
        {
2204
117
            free( psz_text );
2205
117
            return VLC_EGENERIC;
2206
117
        }
2207
2208
3.49k
        size_t i_len = strlen( s );
2209
3.49k
        if( i_len == 0 ) break;
2210
2211
3.23k
        if( strcasestr( s, "<time" ) ||
2212
1.26k
            strcasestr( s, "<clear/") )
2213
2.21k
        {
2214
2.21k
            TextPreviousLine( txt );
2215
2.21k
            break;
2216
2.21k
        }
2217
2218
1.01k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
2219
1.01k
        if( !psz_text )
2220
0
            return VLC_ENOMEM;
2221
2222
1.01k
        memcpy( &psz_text[i_old], s, i_len );
2223
1.01k
        psz_text[i_old + i_len + 0] = '\n';
2224
1.01k
        i_old += i_len + 1;
2225
1.01k
    }
2226
2227
2.48k
    psz_text[i_old] = '\0';
2228
    /* Remove the starting ">" that remained after the sscanf */
2229
2.48k
    memmove( &psz_text[0], &psz_text[1], strlen( psz_text ) );
2230
2231
2.48k
    p_subtitle->psz_text = psz_text;
2232
2233
2.48k
    return VLC_SUCCESS;
2234
2.59k
}
2235
2236
static int ParseDKS( vlc_object_t *p_obj, subs_properties_t *p_props,
2237
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2238
2.33k
{
2239
2.33k
    VLC_UNUSED(p_obj);
2240
2.33k
    VLC_UNUSED(p_props);
2241
2.33k
    VLC_UNUSED( i_idx );
2242
2243
2.33k
    char *psz_text;
2244
2245
2.33k
    for( ;; )
2246
638k
    {
2247
638k
        int h1, m1, s1;
2248
638k
        int h2, m2, s2;
2249
638k
        char *s = TextGetLine( txt );
2250
2251
638k
        if( !s )
2252
64
            return VLC_EGENERIC;
2253
2254
638k
        psz_text = malloc( strlen( s ) + 1 );
2255
638k
        if( !psz_text )
2256
0
            return VLC_ENOMEM;
2257
2258
638k
        if( sscanf( s, "[%d:%d:%d]%[^\r\n]",
2259
638k
                    &h1, &m1, &s1, psz_text ) == 4 )
2260
2.26k
        {
2261
2.26k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2262
2263
2.26k
            s = TextGetLine( txt );
2264
2.26k
            if( !s )
2265
8
            {
2266
8
                free( psz_text );
2267
8
                return VLC_EGENERIC;
2268
8
            }
2269
2270
2.26k
            if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 )
2271
1.39k
                p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 );
2272
867
            else
2273
867
                p_subtitle->i_stop  = -1;
2274
2.26k
            break;
2275
2.26k
        }
2276
636k
        free( psz_text );
2277
636k
    }
2278
2279
    /* replace [br] by \n */
2280
2.26k
    char *p;
2281
2.68k
    while( ( p = strstr( psz_text, "[br]" ) ) )
2282
427
    {
2283
427
        *p++ = '\n';
2284
427
        memmove( p, &p[3], strlen(&p[3])+1 );
2285
427
    }
2286
2287
2.26k
    p_subtitle->psz_text = psz_text;
2288
2.26k
    return VLC_SUCCESS;
2289
2.33k
}
2290
2291
static int ParseSubViewer1( vlc_object_t *p_obj, subs_properties_t *p_props,
2292
                            text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2293
3.39k
{
2294
3.39k
    VLC_UNUSED(p_obj);
2295
3.39k
    VLC_UNUSED(p_props);
2296
3.39k
    VLC_UNUSED( i_idx );
2297
3.39k
    char *psz_text;
2298
2299
3.39k
    for( ;; )
2300
1.28M
    {
2301
1.28M
        int h1, m1, s1;
2302
1.28M
        int h2, m2, s2;
2303
1.28M
        char *s = TextGetLine( txt );
2304
2305
1.28M
        if( !s )
2306
55
            return VLC_EGENERIC;
2307
2308
1.28M
        if( sscanf( s, "[%d:%d:%d]", &h1, &m1, &s1 ) == 3 )
2309
3.34k
        {
2310
3.34k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2311
2312
3.34k
            s = TextGetLine( txt );
2313
3.34k
            if( !s )
2314
3
                return VLC_EGENERIC;
2315
2316
3.33k
            psz_text = strdup( s );
2317
3.33k
            if( !psz_text )
2318
0
                return VLC_ENOMEM;
2319
2320
3.33k
            s = TextGetLine( txt );
2321
3.33k
            if( !s )
2322
3
            {
2323
3
                free( psz_text );
2324
3
                return VLC_EGENERIC;
2325
3
            }
2326
2327
3.33k
            if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 )
2328
2.12k
                p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 );
2329
1.20k
            else
2330
1.20k
                p_subtitle->i_stop  = -1;
2331
2332
3.33k
            break;
2333
3.33k
        }
2334
1.28M
    }
2335
2336
3.33k
    p_subtitle->psz_text = psz_text;
2337
2338
3.33k
    return VLC_SUCCESS;
2339
3.39k
}
2340
2341
static int ParseCommonSBV( vlc_object_t *p_obj, subs_properties_t *p_props,
2342
                           text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2343
818
{
2344
818
    VLC_UNUSED(p_obj);
2345
818
    VLC_UNUSED( i_idx );
2346
818
    VLC_UNUSED( p_props );
2347
818
    char        *psz_text;
2348
2349
818
    for( ;; )
2350
274k
    {
2351
274k
        const char *s = TextGetLine( txt );
2352
274k
        int h1 = 0, m1 = 0, s1 = 0, d1 = 0;
2353
274k
        int h2 = 0, m2 = 0, s2 = 0, d2 = 0;
2354
2355
274k
        if( !s )
2356
146
            return VLC_EGENERIC;
2357
2358
274k
        if( sscanf( s,"%d:%d:%d.%d,%d:%d:%d.%d",
2359
274k
                    &h1, &m1, &s1, &d1,
2360
274k
                    &h2, &m2, &s2, &d2 ) == 8 )
2361
940
        {
2362
940
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
2363
940
                                  VLC_TICK_FROM_MS( d1 ) + VLC_TICK_0;
2364
2365
940
            p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
2366
940
                                  VLC_TICK_FROM_MS( d2 ) + VLC_TICK_0;
2367
940
            if( p_subtitle->i_start < p_subtitle->i_stop )
2368
672
                break;
2369
940
        }
2370
274k
    }
2371
2372
    /* Now read text until an empty line */
2373
672
    size_t i_old = 0;
2374
672
    psz_text = NULL;
2375
672
    for( ;; )
2376
1.90k
    {
2377
1.90k
        const char *s = TextGetLine( txt );
2378
1.90k
        size_t i_len;
2379
2380
1.90k
        i_len = s ? strlen( s ) : 0;
2381
1.90k
        if( i_len <= 0 )
2382
672
        {
2383
672
            if (psz_text)
2384
306
                psz_text[i_old] = '\0';
2385
672
            p_subtitle->psz_text = psz_text;
2386
672
            return VLC_SUCCESS;
2387
672
        }
2388
2389
1.23k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
2390
1.23k
        if( !psz_text )
2391
0
            return VLC_ENOMEM;
2392
2393
1.23k
        memcpy( &psz_text[i_old], s, i_len );
2394
1.23k
        psz_text[i_old + i_len + 0] = '\n';
2395
1.23k
        i_old += i_len + 1;
2396
1.23k
    }
2397
672
}
2398
2399
static int ParseSCC( vlc_object_t *p_obj, subs_properties_t *p_props,
2400
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2401
7.18k
{
2402
7.18k
    VLC_UNUSED(p_obj);
2403
7.18k
    VLC_UNUSED( i_idx );
2404
7.18k
    VLC_UNUSED( p_props );
2405
2406
7.18k
    static const struct rates
2407
7.18k
    {
2408
7.18k
        unsigned val;
2409
7.18k
        vlc_rational_t rate;
2410
7.18k
        bool b_drop_allowed;
2411
7.18k
    } framerates[] = {
2412
7.18k
        { 2398, { 24000, 1001 }, false },
2413
7.18k
        { 2400, { 24, 1 },       false },
2414
7.18k
        { 2500, { 25, 1 },       false },
2415
7.18k
        { 2997, { 30000, 1001 }, true }, /* encoding rate */
2416
7.18k
        { 3000, { 30, 1 },       false },
2417
7.18k
        { 5000, { 50, 1 },       false },
2418
7.18k
        { 5994, { 60000, 1001 }, true },
2419
7.18k
        { 6000, { 60, 1 },       false },
2420
7.18k
    };
2421
7.18k
    const struct rates *p_rate = &framerates[3];
2422
7.18k
    float f_fps = var_GetFloat( p_obj, "sub-original-fps" );
2423
7.18k
    if( f_fps > 1.0 )
2424
0
    {
2425
0
        for( size_t i=0; i<ARRAY_SIZE(framerates); i++ )
2426
0
        {
2427
0
            if( (unsigned)(f_fps * 100) == framerates[i].val )
2428
0
            {
2429
0
                p_rate = &framerates[i];
2430
0
                break;
2431
0
            }
2432
0
        }
2433
0
    }
2434
2435
7.18k
    for( ;; )
2436
27.0k
    {
2437
27.0k
        const char *psz_line = TextGetLine( txt );
2438
27.0k
        if( !psz_line )
2439
961
            return VLC_EGENERIC;
2440
2441
26.1k
        unsigned h, m, s, f;
2442
26.1k
        char c;
2443
26.1k
        if( sscanf( psz_line, "%u:%u:%u%c%u ", &h, &m, &s, &c, &f ) != 5 ||
2444
7.33k
                ( c != ':' && c != ';' ) )
2445
19.1k
            continue;
2446
2447
        /* convert everything to seconds */
2448
6.94k
        int64_t i_frames = h * INT64_C(3600) + m * INT64_C(60) + s;
2449
2450
6.94k
        if( c == ';' && p_rate->b_drop_allowed ) /* dropframe */
2451
210
        {
2452
            /* convert to frame # to be accurate between inter drop drift
2453
             * of 18 frames see http://andrewduncan.net/timecodes/ */
2454
210
            const unsigned i_mins = h * 60 + m;
2455
210
            i_frames = i_frames * p_rate[+1].rate.num + f
2456
210
                    - (p_rate[+1].rate.den * 2 * (i_mins - i_mins % 10));
2457
210
        }
2458
6.73k
        else
2459
6.73k
        {
2460
            /* convert to frame # at 29.97 */
2461
6.73k
            i_frames = i_frames * framerates[3].rate.num / framerates[3].rate.den + f;
2462
6.73k
        }
2463
6.94k
        p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_sec(i_frames)*
2464
6.94k
                                         p_rate->rate.den / p_rate->rate.num;
2465
6.94k
        p_subtitle->i_stop = -1;
2466
2467
6.94k
        const char *psz_text = strchr( psz_line, '\t' );
2468
6.94k
        if( !psz_text && !(psz_text = strchr( psz_line, ' ' )) )
2469
219
            continue;
2470
2471
6.73k
        if ( psz_text[1] == '\0' )
2472
508
            continue;
2473
2474
6.22k
        p_subtitle->psz_text = strdup( psz_text + 1 );
2475
6.22k
        if( !p_subtitle->psz_text )
2476
0
            return VLC_ENOMEM;
2477
2478
6.22k
        break;
2479
6.22k
    }
2480
2481
6.22k
    return VLC_SUCCESS;
2482
7.18k
}
2483
2484
/* Tries to extract language from common filename patterns PATH/filename.LANG.ext
2485
   and PATH/Subs/x_LANG.ext (where 'x' is an integer). */
2486
static char *get_language_from_url(const char *urlstr)
2487
5.85k
{
2488
5.85k
    vlc_url_t url;
2489
5.85k
    const char *filename = NULL;
2490
5.85k
    char *ret = NULL;
2491
2492
5.85k
    assert(urlstr != NULL);
2493
2494
5.85k
    if (vlc_UrlParse(&url, urlstr) != 0)
2495
0
    {
2496
0
        vlc_UrlClean(&url);
2497
0
        return NULL;
2498
0
    }
2499
5.85k
    if (url.psz_path != NULL)
2500
0
        filename = strrchr(url.psz_path, '/');
2501
5.85k
    if (filename != NULL) {
2502
0
        filename++; // skip forward slash
2503
2504
0
        const char *ext = strrchr(filename, '.');
2505
2506
0
        if (ext != NULL) {
2507
            /* Get string between last two periods, hopefully the language. */
2508
0
            const char *lang = memrchr(filename, '.', ext - filename);
2509
2510
            /* Otherwise try string after last underscore. */
2511
0
            if (lang == NULL)
2512
0
                lang = memrchr(filename, '_', ext - filename);
2513
2514
0
            if (lang != NULL) {
2515
0
                lang++; // skip period or underscore
2516
0
                ret = strndup(lang, ext - lang);
2517
0
            }
2518
0
       }
2519
0
    }
2520
2521
5.85k
    vlc_UrlClean(&url);
2522
5.85k
    return ret;
2523
5.85k
}
2524
2525
#ifdef ENABLE_TEST
2526
static void test_subtitle_ParseSubRipTimingValue(void)
2527
{
2528
    fprintf(stderr, "\n# %s:\n", __func__);
2529
2530
    struct test_timing_value
2531
    {
2532
        const char *str;
2533
        vlc_tick_t value;
2534
    };
2535
2536
    static const struct test_timing_value timing_values_success[] =
2537
    {
2538
        { "0:0:0,0",        VLC_TICK_0 },
2539
        { "0:0:0.0",        VLC_TICK_0 },
2540
        { "0:0:0",          VLC_TICK_0 },
2541
    };
2542
2543
    struct test_sized_timing_value
2544
    {
2545
        const char *str;
2546
        vlc_tick_t value;
2547
        size_t length;
2548
    };
2549
2550
    static const struct test_sized_timing_value sized_timing_values_success[] =
2551
    {
2552
        { "0:0:0,1",        VLC_TICK_0, strlen("0:0:0") },
2553
        { "0:0:0.1",        VLC_TICK_0, strlen("0:0:0") },
2554
    };
2555
2556
    static const char *timing_values_fail[] =
2557
    {
2558
        "0:0",
2559
        "0",
2560
    };
2561
2562
    for (size_t i=0; i<ARRAY_SIZE(timing_values_success); ++i)
2563
    {
2564
        fprintf(stderr, "Checking that %s parses into %" PRId64 "\n",
2565
                timing_values_success[i].str, timing_values_success[i].value);
2566
2567
        vlc_tick_t value;
2568
        int ret = subtitle_ParseSubRipTimingValue(&value,
2569
                timing_values_success[i].str,
2570
                strlen(timing_values_success[i].str));
2571
        fprintf(stderr, " -> %" PRId64 "\n", value);
2572
        assert(ret == VLC_SUCCESS);
2573
        assert(value == timing_values_success[i].value);
2574
    }
2575
2576
    for (size_t i=0; i<ARRAY_SIZE(sized_timing_values_success); ++i)
2577
    {
2578
        fprintf(stderr, "Checking that %s (length=%zu) parses into %" PRId64 "\n",
2579
                sized_timing_values_success[i].str,
2580
                sized_timing_values_success[i].length,
2581
                sized_timing_values_success[i].value);
2582
2583
        vlc_tick_t value;
2584
        int ret = subtitle_ParseSubRipTimingValue(&value,
2585
                sized_timing_values_success[i].str,
2586
                sized_timing_values_success[i].length);
2587
        assert(ret == VLC_SUCCESS);
2588
        fprintf(stderr, " -> %" PRId64 "\n", value);
2589
        assert(value == sized_timing_values_success[i].value);
2590
    }
2591
2592
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2593
    {
2594
        fprintf(stderr, "Checking that %s fails to parse\n",
2595
                timing_values_fail[i]);
2596
        vlc_tick_t value;
2597
        int ret = subtitle_ParseSubRipTimingValue(&value,
2598
                timing_values_fail[i], strlen(timing_values_fail[i]));
2599
        (void)value;
2600
        assert(ret != VLC_SUCCESS);
2601
    }
2602
2603
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2604
    {
2605
        fprintf(stderr, "Checking that %s fails to parse\n",
2606
                timing_values_fail[i]);
2607
        vlc_tick_t value;
2608
        int ret = subtitle_ParseSubRipTimingValue(&value,
2609
                timing_values_fail[i], strlen(timing_values_fail[i]));
2610
        (void)value;
2611
        assert(ret != VLC_SUCCESS);
2612
    }
2613
}
2614
2615
static void test_subtitle_ParseSubRipTiming(void)
2616
{
2617
    fprintf(stderr, "\n# %s:\n", __func__);
2618
2619
    struct test_timing_value
2620
    {
2621
        const char *str;
2622
        vlc_tick_t left;
2623
        vlc_tick_t right;
2624
    };
2625
2626
    static const struct test_timing_value timing_values_success[] =
2627
    {
2628
        { "0:0:0,0 --> 0:0:0,0",        VLC_TICK_0,     VLC_TICK_0 },
2629
        { "0:0:0.0 --> 0:0:0.0",        VLC_TICK_0,     VLC_TICK_0 },
2630
        { "0:0:0   --> 0:0:0",          VLC_TICK_0,     VLC_TICK_0 },
2631
    };
2632
2633
    static const char *timing_values_fail[] =
2634
    {
2635
        "0:0 --> 0:0",
2636
        "0:0 --> 0:0:0,0",
2637
        "0:0:0,0 --> 0:0",
2638
        "0 -> 0",
2639
    };
2640
2641
    for (size_t i=0; i<ARRAY_SIZE(timing_values_success); ++i)
2642
    {
2643
        fprintf(stderr, "Checking that %s parses into %" PRId64 " --> %" PRId64 "\n",
2644
                timing_values_success[i].str,
2645
                timing_values_success[i].left,
2646
                timing_values_success[i].right);
2647
2648
        subtitle_t sub = { .i_start = VLC_TICK_INVALID, .i_stop = VLC_TICK_INVALID };
2649
        int ret = subtitle_ParseSubRipTiming(&sub, timing_values_success[i].str);
2650
        fprintf(stderr, " -> %" PRId64 " --> %" PRId64 "\n", sub.i_start, sub.i_stop);
2651
        assert(ret == VLC_SUCCESS);
2652
        assert(sub.i_start == timing_values_success[i].left);
2653
        assert(sub.i_stop == timing_values_success[i].right);
2654
    }
2655
2656
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2657
    {
2658
        fprintf(stderr, "Checking that %s fails to parse\n",
2659
                timing_values_fail[i]);
2660
        subtitle_t sub = { .i_start = VLC_TICK_INVALID, .i_stop = VLC_TICK_INVALID };
2661
        int ret = subtitle_ParseSubRipTiming(&sub, timing_values_fail[i]);
2662
        (void)sub;
2663
        assert(ret != VLC_SUCCESS);
2664
    }
2665
}
2666
2667
int main(int argc, char **argv)
2668
{
2669
    (void)argc; (void)argv;
2670
    test_subtitle_ParseSubRipTimingValue();
2671
    test_subtitle_ParseSubRipTiming();
2672
2673
    return 0;
2674
}
2675
#endif