Coverage Report

Created: 2026-01-17 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vlc/modules/demux/subtitle.c
Line
Count
Source
1
/*****************************************************************************
2
 * subtitle.c: Demux for subtitle text files.
3
 *****************************************************************************
4
 * Copyright (C) 1999-2007 VLC authors and VideoLAN
5
 * Copyright (C) 2023      Videolabs
6
 *
7
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8
 *          Derk-Jan Hartman <hartman at videolan dot org>
9
 *          Jean-Baptiste Kempf <jb@videolan.org>
10
 *          Alexandre Janniaux <ajanni@videolabs.io>
11
 *
12
 * This program is free software; you can redistribute it and/or modify it
13
 * under the terms of the GNU Lesser General Public License as published by
14
 * the Free Software Foundation; either version 2.1 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
 * GNU Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public License
23
 * along with this program; if not, write to the Free Software Foundation,
24
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25
 *****************************************************************************/
26
27
/*****************************************************************************
28
 * Preamble
29
 *****************************************************************************/
30
31
#ifdef HAVE_CONFIG_H
32
# include "config.h"
33
#endif
34
35
#include <vlc_common.h>
36
#include <vlc_arrays.h>
37
#include <vlc_plugin.h>
38
#include <vlc_url.h>
39
40
#include <ctype.h>
41
#include <math.h>
42
#include <assert.h>
43
44
#include <vlc_demux.h>
45
#include <vlc_charset.h>
46
47
/*****************************************************************************
48
 * Module descriptor
49
 *****************************************************************************/
50
static int  Open ( vlc_object_t *p_this );
51
static void Close( vlc_object_t *p_this );
52
53
#define SUB_TYPE_LONGTEXT \
54
    N_("Force the subtitles format. Selecting \"auto\" means autodetection and should always work.")
55
#define SUB_DESCRIPTION_LONGTEXT \
56
    N_("Override the default track description.")
57
58
static const char *const ppsz_sub_type[] =
59
{
60
    "auto", "microdvd", "subrip", "subviewer", "ssa1",
61
    "ssa2-4", "ass", "vplayer", "sami", "dvdsubtitle", "mpl2",
62
    "aqt", "pjs", "mpsub", "jacosub", "psb", "realtext", "dks",
63
    "subviewer1", "sbv"
64
};
65
66
108
vlc_module_begin ()
67
54
    set_shortname( N_("Subtitles"))
68
54
    set_description( N_("Text subtitle parser") )
69
54
    set_capability( "demux", 0 )
70
54
    set_subcategory( SUBCAT_INPUT_DEMUX )
71
54
    add_string( "sub-type", "auto", N_("Subtitle format"),
72
54
                SUB_TYPE_LONGTEXT )
73
54
        change_string_list( ppsz_sub_type, ppsz_sub_type )
74
54
    add_string( "sub-description", NULL, N_("Subtitle description"),
75
54
                SUB_DESCRIPTION_LONGTEXT )
76
54
    set_callbacks( Open, Close )
77
78
54
    add_shortcut( "subtitle" )
79
54
vlc_module_end ()
80
81
/*****************************************************************************
82
 * Prototypes:
83
 *****************************************************************************/
84
enum subtitle_type_e
85
{
86
    SUB_TYPE_UNKNOWN = -1,
87
    SUB_TYPE_MICRODVD,
88
    SUB_TYPE_SUBRIP,
89
    SUB_TYPE_SSA1,
90
    SUB_TYPE_SSA2_4,
91
    SUB_TYPE_ASS,
92
    SUB_TYPE_VPLAYER,
93
    SUB_TYPE_SAMI,
94
    SUB_TYPE_SUBVIEWER, /* SUBVIEWER 2 */
95
    SUB_TYPE_DVDSUBTITLE, /* Mplayer calls it subviewer2 */
96
    SUB_TYPE_MPL2,
97
    SUB_TYPE_AQT,
98
    SUB_TYPE_PJS,
99
    SUB_TYPE_MPSUB,
100
    SUB_TYPE_JACOSUB,
101
    SUB_TYPE_PSB,
102
    SUB_TYPE_RT,
103
    SUB_TYPE_DKS,
104
    SUB_TYPE_SUBVIEW1, /* SUBVIEWER 1 - mplayer calls it subrip09,
105
                         and Gnome subtitles SubViewer 1.0 */
106
    SUB_TYPE_SBV,
107
    SUB_TYPE_SCC,      /* Scenarist Closed Caption */
108
};
109
110
typedef struct
111
{
112
    size_t  i_line_count;
113
    size_t  i_line;
114
    char    **line;
115
} text_t;
116
117
static int  TextLoad( text_t *, stream_t *s );
118
static void TextUnload( text_t * );
119
120
typedef struct
121
{
122
    vlc_tick_t i_start;
123
    vlc_tick_t i_stop;
124
125
    char    *psz_text;
126
} subtitle_t;
127
128
typedef struct
129
{
130
    enum subtitle_type_e i_type;
131
    vlc_tick_t  i_microsecperframe;
132
133
    char        *psz_header; /* SSA */
134
    char        *psz_lang;
135
136
    struct
137
    {
138
        bool b_inited;
139
140
        int i_comment;
141
        int i_time_resolution;
142
        int i_time_shift;
143
    } jss;
144
145
    struct
146
    {
147
        bool  b_inited;
148
149
        float f_total;
150
        int i_factor;
151
    } mpsub;
152
153
    struct
154
    {
155
        const char *psz_start;
156
    } sami;
157
158
} subs_properties_t;
159
160
typedef struct
161
{
162
    es_out_id_t *es;
163
    bool        b_slave;
164
    bool        b_first_time;
165
    bool        b_sorted;
166
167
    double      f_rate;
168
    vlc_tick_t  i_next_demux_date;
169
170
    struct
171
    {
172
        subtitle_t *p_array;
173
        size_t      i_count;
174
        size_t      i_current;
175
    } subtitles;
176
177
    vlc_tick_t  i_length;
178
179
    /* */
180
    subs_properties_t props;
181
182
    block_t * (*pf_convert)( const subtitle_t * );
183
} demux_sys_t;
184
185
static int  ParseMicroDvd   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
186
static int  ParseSubRip     ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
187
static int  ParseSubViewer  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
188
static int  ParseSSA        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
189
static int  ParseVplayer    ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
190
static int  ParseSami       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
191
static int  ParseDVDSubtitle( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
192
static int  ParseMPL2       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
193
static int  ParseAQT        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
194
static int  ParsePJS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
195
static int  ParseMPSub      ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
196
static int  ParseJSS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
197
static int  ParsePSB        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
198
static int  ParseRealText   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
199
static int  ParseDKS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
200
static int  ParseSubViewer1 ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
201
static int  ParseCommonSBV  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
202
static int  ParseSCC        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
203
204
static const struct
205
{
206
    const char *psz_type_name;
207
    int  i_type;
208
    const char *psz_name;
209
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
210
} sub_read_subtitle_function [] =
211
{
212
    { "microdvd",   SUB_TYPE_MICRODVD,    "MicroDVD",    ParseMicroDvd },
213
    { "subrip",     SUB_TYPE_SUBRIP,      "SubRIP",      ParseSubRip },
214
    { "subviewer",  SUB_TYPE_SUBVIEWER,   "SubViewer",   ParseSubViewer },
215
    { "ssa1",       SUB_TYPE_SSA1,        "SSA-1",       ParseSSA },
216
    { "ssa2-4",     SUB_TYPE_SSA2_4,      "SSA-2/3/4",   ParseSSA },
217
    { "ass",        SUB_TYPE_ASS,         "SSA/ASS",     ParseSSA },
218
    { "vplayer",    SUB_TYPE_VPLAYER,     "VPlayer",     ParseVplayer },
219
    { "sami",       SUB_TYPE_SAMI,        "SAMI",        ParseSami },
220
    { "dvdsubtitle",SUB_TYPE_DVDSUBTITLE, "DVDSubtitle", ParseDVDSubtitle },
221
    { "mpl2",       SUB_TYPE_MPL2,        "MPL2",        ParseMPL2 },
222
    { "aqt",        SUB_TYPE_AQT,         "AQTitle",     ParseAQT },
223
    { "pjs",        SUB_TYPE_PJS,         "PhoenixSub",  ParsePJS },
224
    { "mpsub",      SUB_TYPE_MPSUB,       "MPSub",       ParseMPSub },
225
    { "jacosub",    SUB_TYPE_JACOSUB,     "JacoSub",     ParseJSS },
226
    { "psb",        SUB_TYPE_PSB,         "PowerDivx",   ParsePSB },
227
    { "realtext",   SUB_TYPE_RT,          "RealText",    ParseRealText },
228
    { "dks",        SUB_TYPE_DKS,         "DKS",         ParseDKS },
229
    { "subviewer1", SUB_TYPE_SUBVIEW1,    "Subviewer 1", ParseSubViewer1 },
230
    { "sbv",        SUB_TYPE_SBV,         "SBV",         ParseCommonSBV },
231
    { "scc",        SUB_TYPE_SCC,         "SCC",         ParseSCC },
232
    { NULL,         SUB_TYPE_UNKNOWN,     "Unknown",     NULL }
233
};
234
/* When adding support for more formats, be sure to add their file extension
235
 * to src/input/subtitles.c to enable auto-detection.
236
 */
237
238
static int Demux( demux_t * );
239
static int Control( demux_t *, int, va_list );
240
241
static void Fix( demux_t * );
242
static char *get_language_from_url(const char *);
243
244
static vlc_tick_t vlc_tick_from_HMS( int h, int m, int s )
245
152k
{
246
152k
    return vlc_tick_from_sec(h * INT64_C(3600) + m * INT64_C(60) + s);
247
152k
}
248
249
/*****************************************************************************
250
 * Decoder format output function
251
 *****************************************************************************/
252
253
static block_t *ToTextBlock( const subtitle_t *p_subtitle )
254
271k
{
255
271k
    if ( p_subtitle->psz_text == NULL )
256
2.06k
        return NULL;
257
258
269k
    block_t *p_block;
259
269k
    size_t i_len = strlen( p_subtitle->psz_text ) + 1;
260
261
269k
    if( i_len <= 1 || !(p_block = block_Alloc( i_len )) )
262
51.2k
        return NULL;
263
264
218k
    memcpy( p_block->p_buffer, p_subtitle->psz_text, i_len );
265
266
218k
    return p_block;
267
269k
}
268
269
static block_t *ToEIA608Block( const subtitle_t *p_subtitle )
270
1.20k
{
271
1.20k
    if ( p_subtitle->psz_text == NULL )
272
0
        return NULL;
273
274
1.20k
    block_t *p_block;
275
1.20k
    const size_t i_len = strlen( p_subtitle->psz_text );
276
1.20k
    const size_t i_block = (1 + i_len / 5) * 3;
277
278
1.20k
    if( i_len < 4 || !(p_block = block_Alloc( i_block )) )
279
197
        return NULL;
280
281
1.00k
    p_block->i_buffer = 0;
282
283
1.00k
    char *saveptr = NULL;
284
1.00k
    char *psz_tok = strtok_r( p_subtitle->psz_text, " ", &saveptr );
285
1.00k
    unsigned a, b;
286
18.9k
    while( psz_tok &&
287
18.5k
           sscanf( psz_tok, "%2x%2x", &a, &b ) == 2 &&
288
17.9k
           i_block - p_block->i_buffer >= 3 )
289
17.9k
    {
290
17.9k
        uint8_t *p_data = &p_block->p_buffer[p_block->i_buffer];
291
17.9k
        p_data[0] = 0xFC;
292
17.9k
        p_data[1] = a;
293
17.9k
        p_data[2] = b;
294
17.9k
        p_block->i_buffer += 3;
295
17.9k
        psz_tok = strtok_r( NULL, " ", &saveptr );
296
17.9k
    }
297
298
1.00k
    return p_block;
299
1.20k
}
300
301
/*****************************************************************************
302
 * Module initializer
303
 *****************************************************************************/
304
static int Open ( vlc_object_t *p_this )
305
287
{
306
287
    demux_t        *p_demux = (demux_t*)p_this;
307
287
    demux_sys_t    *p_sys;
308
287
    es_format_t    fmt;
309
287
    float          f_fps;
310
287
    char           *psz_type;
311
287
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
312
313
287
    if( !p_demux->obj.force )
314
0
    {
315
0
        msg_Dbg( p_demux, "subtitle demux discarded" );
316
0
        return VLC_EGENERIC;
317
0
    }
318
319
287
    p_demux->pf_demux = Demux;
320
287
    p_demux->pf_control = Control;
321
287
    p_demux->p_sys = p_sys = malloc( sizeof( demux_sys_t ) );
322
287
    if( p_sys == NULL )
323
0
        return VLC_ENOMEM;
324
325
287
    p_sys->b_slave = false;
326
287
    p_sys->b_first_time = true;
327
287
    p_sys->b_sorted = false;
328
287
    p_sys->i_next_demux_date = 0;
329
287
    p_sys->f_rate = 1.0;
330
331
287
    p_sys->pf_convert = ToTextBlock;
332
333
287
    p_sys->subtitles.i_current= 0;
334
287
    p_sys->subtitles.i_count  = 0;
335
287
    p_sys->subtitles.p_array  = NULL;
336
337
287
    p_sys->props.psz_header         = NULL;
338
287
    p_sys->props.psz_lang           = NULL;
339
287
    p_sys->props.i_microsecperframe = VLC_TICK_FROM_MS(40);
340
287
    p_sys->props.jss.b_inited       = false;
341
287
    p_sys->props.mpsub.b_inited     = false;
342
287
    p_sys->props.sami.psz_start     = NULL;
343
344
    /* Get the FPS */
345
287
    f_fps = var_CreateGetFloat( p_demux, "sub-original-fps" );
346
287
    if( f_fps >= 1.f )
347
0
    {
348
0
        p_sys->props.i_microsecperframe = llroundf( (float)CLOCK_FREQ / f_fps );
349
0
        msg_Dbg( p_demux, "Override subtitle fps %f", (double) f_fps );
350
0
    }
351
352
    /* Get or probe the type */
353
287
    p_sys->props.i_type = SUB_TYPE_UNKNOWN;
354
287
    psz_type = var_CreateGetString( p_demux, "sub-type" );
355
287
    if( psz_type && *psz_type )
356
287
    {
357
5.74k
        for( int i = 0; ; i++ )
358
6.02k
        {
359
6.02k
            if( sub_read_subtitle_function[i].psz_type_name == NULL )
360
287
                break;
361
362
5.74k
            if( !strcmp( sub_read_subtitle_function[i].psz_type_name,
363
5.74k
                         psz_type ) )
364
0
            {
365
0
                p_sys->props.i_type = sub_read_subtitle_function[i].i_type;
366
0
                break;
367
0
            }
368
5.74k
        }
369
287
    }
370
287
    free( psz_type );
371
372
287
#ifndef NDEBUG
373
287
    const uint64_t i_start_pos = vlc_stream_Tell( p_demux->s );
374
287
#endif
375
376
287
    ssize_t i_peek;
377
287
    const uint8_t *p_peek;
378
287
    if( vlc_stream_Peek( p_demux->s, &p_peek, 16 ) < 16 )
379
0
    {
380
0
        free( p_sys );
381
0
        return VLC_EGENERIC;
382
0
    }
383
384
287
    enum
385
287
    {
386
287
        UTF8BOM,
387
287
        UTF16LE,
388
287
        UTF16BE,
389
287
        NOBOM,
390
287
    } e_bom = NOBOM;
391
287
    const char *psz_bom = NULL;
392
393
287
    i_peek = 4096;
394
    /* Detect Unicode while skipping the UTF-8 Byte Order Mark */
395
287
    if( !memcmp( p_peek, "\xEF\xBB\xBF", 3 ) )
396
15
    {
397
15
        e_bom = UTF8BOM;
398
15
        psz_bom = "UTF-8";
399
15
    }
400
272
    else if( !memcmp( p_peek, "\xFF\xFE", 2 ) )
401
11
    {
402
11
        e_bom = UTF16LE;
403
11
        psz_bom = "UTF-16LE";
404
11
        i_peek *= 2;
405
11
    }
406
261
    else if( !memcmp( p_peek, "\xFE\xFF", 2 ) )
407
0
    {
408
0
        e_bom = UTF16BE;
409
0
        psz_bom = "UTF-16BE";
410
0
        i_peek *= 2;
411
0
    }
412
413
287
    if( e_bom != NOBOM )
414
287
        msg_Dbg( p_demux, "detected %s Byte Order Mark", psz_bom );
415
416
287
    i_peek = vlc_stream_Peek( p_demux->s, &p_peek, i_peek );
417
287
    if( unlikely(i_peek < 16) )
418
0
    {
419
0
        free( p_sys );
420
0
        return VLC_EGENERIC;
421
0
    }
422
423
287
    stream_t *p_probestream = NULL;
424
287
    if( e_bom != UTF8BOM && e_bom != NOBOM )
425
11
    {
426
11
        if( i_peek > 16 )
427
11
        {
428
11
            char *p_outbuf = FromCharset( psz_bom, p_peek, i_peek );
429
11
            if( p_outbuf != NULL )
430
11
                p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *)p_outbuf,
431
11
                                                      strlen( p_outbuf ),
432
11
                                                      false ); /* free p_outbuf on release */
433
11
        }
434
11
    }
435
276
    else
436
276
    {
437
276
        const size_t i_skip = (e_bom == UTF8BOM) ? 3 : 0;
438
276
        p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *) &p_peek[i_skip],
439
276
                                              i_peek - i_skip, true );
440
276
    }
441
442
287
    if( p_probestream == NULL )
443
0
    {
444
0
        free( p_sys );
445
0
        return VLC_EGENERIC;
446
0
    }
447
448
    /* Probe if unknown type */
449
287
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
450
287
    {
451
287
        int     i_try;
452
287
        char    *s = NULL;
453
454
287
        msg_Dbg( p_demux, "autodetecting subtitle format" );
455
1.78k
        for( i_try = 0; i_try < 256; i_try++ )
456
1.78k
        {
457
1.78k
            int i_dummy;
458
1.78k
            char p_dummy;
459
460
1.78k
            if( (s = vlc_stream_ReadLine( p_probestream ) ) == NULL )
461
2
                break;
462
463
1.78k
            if( strcasestr( s, "<SAMI>" ) )
464
33
            {
465
33
                p_sys->props.i_type = SUB_TYPE_SAMI;
466
33
                break;
467
33
            }
468
1.75k
            else if( sscanf( s, "{%d}{%d}", &i_dummy, &i_dummy ) == 2 ||
469
1.75k
                     sscanf( s, "{%d}{}", &i_dummy ) == 1)
470
9
            {
471
9
                p_sys->props.i_type = SUB_TYPE_MICRODVD;
472
9
                break;
473
9
            }
474
1.74k
            else if( sscanf( s, "%d:%d:%d,%d --> %d:%d:%d,%d",
475
1.74k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
476
1.74k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
477
1.73k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d,%d",
478
1.73k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
479
1.73k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
480
1.73k
                     sscanf( s, "%d:%d:%d,%d --> %d:%d:%d",
481
1.73k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
482
1.73k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
483
1.72k
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d.%d",
484
1.72k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
485
1.72k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
486
1.71k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d.%d",
487
1.71k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
488
1.71k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
489
1.71k
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d",
490
1.71k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
491
1.71k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
492
1.71k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d",
493
1.71k
                             &i_dummy,&i_dummy,&i_dummy,
494
1.71k
                             &i_dummy,&i_dummy,&i_dummy ) == 6 )
495
27
            {
496
27
                p_sys->props.i_type = SUB_TYPE_SUBRIP;
497
27
                break;
498
27
            }
499
1.71k
            else if( !strncasecmp( s, "!: This is a Sub Station Alpha v1", 33 ) )
500
1
            {
501
1
                p_sys->props.i_type = SUB_TYPE_SSA1;
502
1
                break;
503
1
            }
504
1.71k
            else if( !strncasecmp( s, "ScriptType: v4.00+", 18 ) )
505
2
            {
506
2
                p_sys->props.i_type = SUB_TYPE_ASS;
507
2
                break;
508
2
            }
509
1.71k
            else if( !strncasecmp( s, "ScriptType: v4.00", 17 ) )
510
3
            {
511
3
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
512
3
                break;
513
3
            }
514
1.71k
            else if( !strncasecmp( s, "Dialogue: Marked", 16  ) )
515
1
            {
516
1
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
517
1
                break;
518
1
            }
519
1.70k
            else if( !strncasecmp( s, "Dialogue:", 9  ) )
520
8
            {
521
8
                p_sys->props.i_type = SUB_TYPE_ASS;
522
8
                break;
523
8
            }
524
1.70k
            else if( strcasestr( s, "[INFORMATION]" ) )
525
1
            {
526
1
                p_sys->props.i_type = SUB_TYPE_SUBVIEWER; /* I hope this will work */
527
1
                break;
528
1
            }
529
1.70k
            else if( sscanf( s, "%d:%d:%d.%d %d:%d:%d",
530
1.70k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
531
1.70k
                                 &i_dummy, &i_dummy, &i_dummy ) == 7 ||
532
1.70k
                     sscanf( s, "@%d @%d", &i_dummy, &i_dummy) == 2)
533
37
            {
534
37
                p_sys->props.i_type = SUB_TYPE_JACOSUB;
535
37
                break;
536
37
            }
537
1.66k
            else if( sscanf( s, "%d:%d:%d.%d,%d:%d:%d.%d",
538
1.66k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
539
1.66k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy ) == 8 )
540
4
            {
541
4
                p_sys->props.i_type = SUB_TYPE_SBV;
542
4
                break;
543
4
            }
544
1.65k
            else if( sscanf( s, "%d:%d:%d:", &i_dummy, &i_dummy, &i_dummy ) == 3 ||
545
1.63k
                     sscanf( s, "%d:%d:%d ", &i_dummy, &i_dummy, &i_dummy ) == 3 )
546
23
            {
547
23
                p_sys->props.i_type = SUB_TYPE_VPLAYER;
548
23
                break;
549
23
            }
550
1.63k
            else if( sscanf( s, "{T %d:%d:%d:%d", &i_dummy, &i_dummy,
551
1.63k
                             &i_dummy, &i_dummy ) == 4 )
552
4
            {
553
4
                p_sys->props.i_type = SUB_TYPE_DVDSUBTITLE;
554
4
                break;
555
4
            }
556
1.63k
            else if( sscanf( s, "[%d:%d:%d]%c",
557
1.63k
                     &i_dummy, &i_dummy, &i_dummy, &p_dummy ) == 4 )
558
11
            {
559
11
                p_sys->props.i_type = SUB_TYPE_DKS;
560
11
                break;
561
11
            }
562
1.62k
            else if( strstr( s, "*** START SCRIPT" ) )
563
12
            {
564
12
                p_sys->props.i_type = SUB_TYPE_SUBVIEW1;
565
12
                break;
566
12
            }
567
1.60k
            else if( sscanf( s, "[%d][%d]", &i_dummy, &i_dummy ) == 2 ||
568
1.60k
                     sscanf( s, "[%d][]", &i_dummy ) == 1)
569
24
            {
570
24
                p_sys->props.i_type = SUB_TYPE_MPL2;
571
24
                break;
572
24
            }
573
1.58k
            else if( sscanf (s, "FORMAT=%d", &i_dummy) == 1 ||
574
1.57k
                     ( sscanf (s, "FORMAT=TIM%c", &p_dummy) == 1
575
2
                       && p_dummy =='E' ) )
576
16
            {
577
16
                p_sys->props.i_type = SUB_TYPE_MPSUB;
578
16
                break;
579
16
            }
580
1.56k
            else if( sscanf( s, "-->> %d", &i_dummy) == 1 )
581
28
            {
582
28
                p_sys->props.i_type = SUB_TYPE_AQT;
583
28
                break;
584
28
            }
585
1.54k
            else if( sscanf( s, "%d,%d,", &i_dummy, &i_dummy ) == 2 )
586
5
            {
587
5
                p_sys->props.i_type = SUB_TYPE_PJS;
588
5
                break;
589
5
            }
590
1.53k
            else if( sscanf( s, "{%d:%d:%d}",
591
1.53k
                                &i_dummy, &i_dummy, &i_dummy ) == 3 )
592
0
            {
593
0
                p_sys->props.i_type = SUB_TYPE_PSB;
594
0
                break;
595
0
            }
596
1.53k
            else if( strcasestr( s, "<time" ) )
597
16
            {
598
16
                p_sys->props.i_type = SUB_TYPE_RT;
599
16
                break;
600
16
            }
601
1.52k
            else if( !strncasecmp( s, "WEBVTT",6 ) )
602
0
            {
603
                /* FAIL */
604
0
                break;
605
0
            }
606
1.52k
            else if( !strncasecmp( s, "Scenarist_SCC V1.0", 18 ) )
607
20
            {
608
20
                p_sys->props.i_type = SUB_TYPE_SCC;
609
20
                p_sys->pf_convert = ToEIA608Block;
610
20
                break;
611
20
            }
612
613
1.50k
            free( s );
614
1.50k
            s = NULL;
615
1.50k
        }
616
617
287
        free( s );
618
287
    }
619
620
287
    vlc_stream_Delete( p_probestream );
621
622
    /* Quit on unknown subtitles */
623
287
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
624
2
    {
625
2
#ifndef NDEBUG
626
        /* Ensure it will work with non seekable streams */
627
2
        assert( i_start_pos == vlc_stream_Tell( p_demux->s ) );
628
2
#endif
629
2
        msg_Warn( p_demux, "failed to recognize subtitle type" );
630
2
        free( p_sys );
631
2
        return VLC_EGENERIC;
632
2
    }
633
634
2.79k
    for( int i = 0; ; i++ )
635
3.08k
    {
636
3.08k
        if( sub_read_subtitle_function[i].i_type == p_sys->props.i_type )
637
285
        {
638
285
            msg_Dbg( p_demux, "detected %s format",
639
285
                     sub_read_subtitle_function[i].psz_name );
640
285
            pf_read = sub_read_subtitle_function[i].pf_read;
641
285
            break;
642
285
        }
643
3.08k
    }
644
645
285
    msg_Dbg( p_demux, "loading all subtitles..." );
646
647
285
    if( e_bom == UTF8BOM && /* skip BOM */
648
15
        vlc_stream_Read( p_demux->s, NULL, 3 ) != 3 )
649
0
    {
650
0
        Close( p_this );
651
0
        return VLC_EGENERIC;
652
0
    }
653
654
    /* Load the whole file */
655
285
    text_t txtlines;
656
285
    TextLoad( &txtlines, p_demux->s );
657
658
    /* Parse it */
659
274k
    for( size_t i_max = 0; i_max < SIZE_MAX - 500 * sizeof(subtitle_t); )
660
274k
    {
661
274k
        if( p_sys->subtitles.i_count >= i_max )
662
813
        {
663
813
            i_max += 500;
664
813
            subtitle_t *p_realloc = realloc( p_sys->subtitles.p_array, sizeof(subtitle_t) * i_max );
665
813
            if( p_realloc == NULL )
666
0
            {
667
0
                TextUnload( &txtlines );
668
0
                Close( p_this );
669
0
                return VLC_ENOMEM;
670
0
            }
671
813
            p_sys->subtitles.p_array = p_realloc;
672
813
        }
673
674
274k
        if( pf_read( VLC_OBJECT(p_demux), &p_sys->props, &txtlines,
675
274k
                     &p_sys->subtitles.p_array[p_sys->subtitles.i_count],
676
274k
                     p_sys->subtitles.i_count ) )
677
285
            break;
678
679
273k
        p_sys->subtitles.i_count++;
680
273k
    }
681
    /* Unload */
682
285
    TextUnload( &txtlines );
683
684
285
    msg_Dbg(p_demux, "loaded %zu subtitles", p_sys->subtitles.i_count );
685
686
    /* *** add subtitle ES *** */
687
285
    if( p_sys->props.i_type == SUB_TYPE_SSA1 ||
688
284
             p_sys->props.i_type == SUB_TYPE_SSA2_4 ||
689
280
             p_sys->props.i_type == SUB_TYPE_ASS )
690
15
    {
691
15
        Fix( p_demux );
692
15
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SSA );
693
15
    }
694
270
    else if( p_sys->props.i_type == SUB_TYPE_SCC )
695
20
    {
696
20
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_CEA608 );
697
20
        fmt.subs.cc.i_reorder_depth = -1;
698
20
    }
699
250
    else
700
250
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SUBT );
701
702
285
    p_sys->subtitles.i_current = 0;
703
285
    p_sys->i_length = 0;
704
285
    if( p_sys->subtitles.i_count > 0 )
705
192
        p_sys->i_length = p_sys->subtitles.p_array[p_sys->subtitles.i_count-1].i_stop;
706
707
285
    if( p_sys->props.psz_lang )
708
0
    {
709
0
        fmt.psz_language = p_sys->props.psz_lang;
710
0
        p_sys->props.psz_lang = NULL;
711
0
        msg_Dbg( p_demux, "detected language '%s' of subtitle: %s", fmt.psz_language,
712
0
                 p_demux->psz_location );
713
0
    }
714
285
    else
715
285
    {
716
285
        fmt.psz_language = get_language_from_url( p_demux->psz_url );
717
285
        if( fmt.psz_language )
718
285
            msg_Dbg( p_demux, "selected '%s' as possible filename language substring of subtitle: %s",
719
285
                     fmt.psz_language, p_demux->psz_location );
720
285
    }
721
722
285
    char *psz_description = var_InheritString( p_demux, "sub-description" );
723
285
    if( psz_description && *psz_description )
724
0
        fmt.psz_description = psz_description;
725
285
    else
726
285
        free( psz_description );
727
285
    if( p_sys->props.psz_header != NULL &&
728
15
       (fmt.p_extra = strdup( p_sys->props.psz_header )) )
729
15
    {
730
15
        fmt.i_extra = strlen( p_sys->props.psz_header ) + 1;
731
15
    }
732
733
285
    fmt.i_id = 0;
734
285
    p_sys->es = es_out_Add( p_demux->out, &fmt );
735
285
    es_format_Clean( &fmt );
736
285
    if( p_sys->es == NULL )
737
0
    {
738
0
        Close( p_this );
739
0
        return VLC_EGENERIC;
740
0
    }
741
742
285
    return VLC_SUCCESS;
743
285
}
744
745
/*****************************************************************************
746
 * Close: Close subtitle demux
747
 *****************************************************************************/
748
static void Close( vlc_object_t *p_this )
749
285
{
750
285
    demux_t *p_demux = (demux_t*)p_this;
751
285
    demux_sys_t *p_sys = p_demux->p_sys;
752
753
274k
    for( size_t i = 0; i < p_sys->subtitles.i_count; i++ )
754
273k
        free( p_sys->subtitles.p_array[i].psz_text );
755
285
    free( p_sys->subtitles.p_array );
756
285
    free( p_sys->props.psz_header );
757
758
285
    free( p_sys );
759
285
}
760
761
static void
762
ResetCurrentIndex( demux_t *p_demux )
763
0
{
764
0
    demux_sys_t *p_sys = p_demux->p_sys;
765
0
    for( size_t i = 0; i < p_sys->subtitles.i_count; i++ )
766
0
    {
767
0
        if( p_sys->subtitles.p_array[i].i_start * p_sys->f_rate >
768
0
            p_sys->i_next_demux_date && i > 0 )
769
0
            break;
770
0
        p_sys->subtitles.i_current = i;
771
0
    }
772
0
}
773
774
/*****************************************************************************
775
 * Control:
776
 *****************************************************************************/
777
static int Control( demux_t *p_demux, int i_query, va_list args )
778
0
{
779
0
    demux_sys_t *p_sys = p_demux->p_sys;
780
0
    double *pf, f;
781
782
0
    switch( i_query )
783
0
    {
784
0
        case DEMUX_CAN_SEEK:
785
0
            *va_arg( args, bool * ) = true;
786
0
            return VLC_SUCCESS;
787
788
0
        case DEMUX_GET_LENGTH:
789
0
            *va_arg( args, vlc_tick_t * ) = p_sys->i_length;
790
0
            return VLC_SUCCESS;
791
792
0
        case DEMUX_GET_TIME:
793
0
            *va_arg( args, vlc_tick_t * ) = p_sys->i_next_demux_date;
794
0
            return VLC_SUCCESS;
795
796
0
        case DEMUX_SET_TIME:
797
0
        {
798
0
            p_sys->b_first_time = true;
799
0
            p_sys->i_next_demux_date = va_arg( args, vlc_tick_t );
800
0
            ResetCurrentIndex( p_demux );
801
0
            return VLC_SUCCESS;
802
0
        }
803
804
0
        case DEMUX_GET_POSITION:
805
0
            pf = va_arg( args, double * );
806
0
            if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
807
0
            {
808
0
                *pf = 1.0;
809
0
            }
810
0
            else if( p_sys->subtitles.i_count > 0 && p_sys->i_length )
811
0
            {
812
0
                *pf = p_sys->i_next_demux_date;
813
0
                *pf /= p_sys->i_length;
814
0
            }
815
0
            else
816
0
            {
817
0
                *pf = 0.0;
818
0
            }
819
0
            return VLC_SUCCESS;
820
821
0
        case DEMUX_SET_POSITION:
822
0
            f = va_arg( args, double );
823
0
            if( p_sys->subtitles.i_count && p_sys->i_length )
824
0
            {
825
0
                vlc_tick_t i64 = VLC_TICK_0 + f * p_sys->i_length;
826
0
                return demux_Control( p_demux, DEMUX_SET_TIME, i64 );
827
0
            }
828
0
            break;
829
830
0
        case DEMUX_CAN_CONTROL_RATE:
831
0
            *va_arg( args, bool * ) = true;
832
0
            return VLC_SUCCESS;
833
0
        case DEMUX_SET_RATE:
834
0
            p_sys->f_rate = *va_arg( args, float * );
835
0
            ResetCurrentIndex( p_demux );
836
0
            return VLC_SUCCESS;
837
0
        case DEMUX_SET_NEXT_DEMUX_TIME:
838
0
            p_sys->b_slave = true;
839
0
            p_sys->i_next_demux_date = va_arg( args, vlc_tick_t ) - VLC_TICK_0;
840
0
            return VLC_SUCCESS;
841
842
0
        case DEMUX_CAN_PAUSE:
843
0
        case DEMUX_SET_PAUSE_STATE:
844
0
        case DEMUX_CAN_CONTROL_PACE:
845
0
            return demux_vaControlHelper( p_demux->s, 0, -1, 0, 1, i_query, args );
846
847
0
        case DEMUX_GET_PTS_DELAY:
848
0
        case DEMUX_GET_FPS:
849
0
        case DEMUX_GET_META:
850
0
        case DEMUX_GET_ATTACHMENTS:
851
0
        case DEMUX_GET_TITLE_INFO:
852
0
        case DEMUX_HAS_UNSUPPORTED_META:
853
0
        case DEMUX_CAN_RECORD:
854
0
        default:
855
0
            break;
856
857
0
    }
858
0
    return VLC_EGENERIC;
859
0
}
860
861
/*****************************************************************************
862
 * Demux: Send subtitle to decoder
863
 *****************************************************************************/
864
static int Demux( demux_t *p_demux )
865
86.6M
{
866
86.6M
    demux_sys_t *p_sys = p_demux->p_sys;
867
868
86.6M
    if ( !p_sys->b_slave )
869
86.6M
        Fix( p_demux );
870
871
86.6M
    vlc_tick_t i_barrier = p_sys->i_next_demux_date;
872
873
86.9M
    while( p_sys->subtitles.i_current < p_sys->subtitles.i_count &&
874
86.9M
           ( p_sys->subtitles.p_array[p_sys->subtitles.i_current].i_start *
875
86.9M
             p_sys->f_rate ) <= i_barrier )
876
273k
    {
877
273k
        const subtitle_t *p_subtitle = &p_sys->subtitles.p_array[p_sys->subtitles.i_current];
878
879
273k
        if ( !p_sys->b_slave && p_sys->b_first_time )
880
192
        {
881
192
            es_out_SetPCR( p_demux->out, VLC_TICK_0 + i_barrier );
882
192
            p_sys->b_first_time = false;
883
192
        }
884
885
273k
        if( p_subtitle->i_start >= 0 )
886
272k
        {
887
272k
            block_t *p_block = p_sys->pf_convert( p_subtitle );
888
272k
            if( p_block )
889
219k
            {
890
219k
                p_block->i_dts =
891
219k
                p_block->i_pts = VLC_TICK_0 + p_subtitle->i_start * p_sys->f_rate;
892
219k
                if( p_subtitle->i_stop != VLC_TICK_INVALID && p_subtitle->i_stop >= p_subtitle->i_start )
893
73.1k
                    p_block->i_length = (p_subtitle->i_stop - p_subtitle->i_start) * p_sys->f_rate;
894
895
219k
                es_out_Send( p_demux->out, p_sys->es, p_block );
896
219k
            }
897
272k
        }
898
899
273k
        p_sys->subtitles.i_current++;
900
273k
    }
901
902
86.6M
    if ( !p_sys->b_slave )
903
86.6M
    {
904
86.6M
        es_out_SetPCR( p_demux->out, VLC_TICK_0 + i_barrier );
905
86.6M
        p_sys->i_next_demux_date += VLC_TICK_FROM_MS(125);
906
86.6M
    }
907
908
86.6M
    if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
909
285
        return VLC_DEMUXER_EOF;
910
911
86.6M
    return VLC_DEMUXER_SUCCESS;
912
86.6M
}
913
914
915
static int subtitle_cmp( const void *first, const void *second )
916
2.70M
{
917
2.70M
    vlc_tick_t result = ((subtitle_t *)(first))->i_start - ((subtitle_t *)(second))->i_start;
918
    /* Return -1, 0 ,1, and not directly subtraction
919
     * as result can be > INT_MAX */
920
2.70M
    return result == 0 ? 0 : result > 0 ? 1 : -1;
921
2.70M
}
922
/*****************************************************************************
923
 * Fix: fix time stamp and order of subtitle
924
 *****************************************************************************/
925
static void Fix( demux_t *p_demux )
926
86.6M
{
927
86.6M
    demux_sys_t *p_sys = p_demux->p_sys;
928
86.6M
    if (p_sys->b_sorted)
929
86.6M
        return;
930
931
    /* *** fix order (to be sure...) *** */
932
285
    qsort( p_sys->subtitles.p_array, p_sys->subtitles.i_count, sizeof( p_sys->subtitles.p_array[0] ), subtitle_cmp);
933
285
    p_sys->b_sorted = true;
934
285
}
935
936
static int TextLoad( text_t *txt, stream_t *s )
937
285
{
938
285
    size_t i_line_max;
939
940
    /* init txt */
941
285
    i_line_max          = 500;
942
285
    txt->i_line_count   = 0;
943
285
    txt->i_line         = 0;
944
285
    txt->line           = calloc( i_line_max, sizeof( char * ) );
945
285
    if( !txt->line )
946
0
        return VLC_ENOMEM;
947
948
    /* load the complete file */
949
285
    for( ;; )
950
10.5M
    {
951
10.5M
        char *psz = vlc_stream_ReadLine( s );
952
953
10.5M
        if( psz == NULL )
954
285
            break;
955
956
10.5M
        txt->line[txt->i_line_count] = psz;
957
10.5M
        if( txt->i_line_count + 1 >= i_line_max )
958
104k
        {
959
104k
            i_line_max += 100;
960
104k
            char **p_realloc = realloc( txt->line, i_line_max * sizeof( char * ) );
961
104k
            if( p_realloc == NULL )
962
0
                return VLC_ENOMEM;
963
104k
            txt->line = p_realloc;
964
104k
        }
965
10.5M
        txt->i_line_count++;
966
10.5M
    }
967
968
285
    if( txt->i_line_count == 0 )
969
1
    {
970
1
        free( txt->line );
971
1
        return VLC_EGENERIC;
972
1
    }
973
974
284
    return VLC_SUCCESS;
975
285
}
976
static void TextUnload( text_t *txt )
977
285
{
978
285
    if( txt->i_line_count )
979
284
    {
980
10.5M
        for( size_t i = 0; i < txt->i_line_count; i++ )
981
10.5M
            free( txt->line[i] );
982
284
        free( txt->line );
983
284
    }
984
285
    txt->i_line       = 0;
985
285
    txt->i_line_count = 0;
986
285
}
987
988
static char *TextGetLine( text_t *txt )
989
10.5M
{
990
10.5M
    if( txt->i_line >= txt->i_line_count )
991
298
        return( NULL );
992
993
10.5M
    return txt->line[txt->i_line++];
994
10.5M
}
995
static void TextPreviousLine( text_t *txt )
996
592
{
997
592
    if( txt->i_line > 0 )
998
592
        txt->i_line--;
999
592
}
1000
1001
/*****************************************************************************
1002
 * Specific Subtitle function
1003
 *****************************************************************************/
1004
/* ParseMicroDvd:
1005
 *  Format:
1006
 *      {n1}{n2}Line1|Line2|Line3....
1007
 *  where n1 and n2 are the video frame number (n2 can be empty)
1008
 */
1009
static int ParseMicroDvd( vlc_object_t *p_obj, subs_properties_t *p_props,
1010
                          text_t *txt, subtitle_t *p_subtitle,
1011
                          size_t i_idx )
1012
22
{
1013
22
    VLC_UNUSED( i_idx );
1014
22
    char *psz_text;
1015
22
    int  i_start;
1016
22
    int  i_stop;
1017
22
    int  i;
1018
1019
22
    for( ;; )
1020
524k
    {
1021
524k
        const char *s = TextGetLine( txt );
1022
524k
        if( !s )
1023
9
            return VLC_EGENERIC;
1024
1025
524k
        psz_text = malloc( strlen(s) + 1 );
1026
524k
        if( !psz_text )
1027
0
            return VLC_ENOMEM;
1028
1029
524k
        i_start = 0;
1030
524k
        i_stop  = -1;
1031
524k
        if( sscanf( s, "{%d}{}%[^\r\n]", &i_start, psz_text ) == 2 ||
1032
524k
            sscanf( s, "{%d}{%d}%[^\r\n]", &i_start, &i_stop, psz_text ) == 3)
1033
13
        {
1034
13
            if( i_start != 1 || i_stop != 1 )
1035
13
                break;
1036
1037
            /* We found a possible setting of the framerate "{1}{1}23.976" */
1038
            /* Check if it's usable, and if the sub-original-fps is not set */
1039
0
            float f_fps = vlc_strtof_c( psz_text, NULL );
1040
0
            if( f_fps > 0.f && var_GetFloat( p_obj, "sub-original-fps" ) <= 0.f )
1041
0
                p_props->i_microsecperframe = llroundf((float)CLOCK_FREQ / f_fps);
1042
0
        }
1043
524k
        free( psz_text );
1044
524k
    }
1045
1046
    /* replace | by \n */
1047
465
    for( i = 0; psz_text[i] != '\0'; i++ )
1048
452
    {
1049
452
        if( psz_text[i] == '|' )
1050
8
            psz_text[i] = '\n';
1051
452
    }
1052
1053
    /* */
1054
13
    p_subtitle->i_start  =  VLC_TICK_0 + i_start * p_props->i_microsecperframe;
1055
13
    p_subtitle->i_stop   = i_stop >= 0 ? (VLC_TICK_0 + i_stop  * p_props->i_microsecperframe) : VLC_TICK_INVALID;
1056
13
    p_subtitle->psz_text = psz_text;
1057
13
    return VLC_SUCCESS;
1058
22
}
1059
1060
/* ParseSubRipSubViewer
1061
 *  Format SubRip
1062
 *      n
1063
 *      h1:m1:s1,d1 --> h2:m2:s2,d2
1064
 *      Line1
1065
 *      Line2
1066
 *      ....
1067
 *      [Empty line]
1068
 *  Format SubViewer v1/v2
1069
 *      h1:m1:s1.d1,h2:m2:s2.d2
1070
 *      Line1[br]Line2
1071
 *      Line3
1072
 *      ...
1073
 *      [empty line]
1074
 *  We ignore line number for SubRip
1075
 */
1076
static int ParseSubRipSubViewer( vlc_object_t *p_obj, subs_properties_t *p_props,
1077
                                 text_t *txt, subtitle_t *p_subtitle,
1078
                                 int (* pf_parse_timing)(subtitle_t *, const char *),
1079
                                 bool b_replace_br )
1080
314
{
1081
314
    VLC_UNUSED(p_obj);
1082
314
    VLC_UNUSED(p_props);
1083
314
    char    *psz_text;
1084
1085
314
    for( ;; )
1086
539k
    {
1087
539k
        const char *s = TextGetLine( txt );
1088
1089
539k
        if( !s )
1090
28
            return VLC_EGENERIC;
1091
1092
539k
        if( pf_parse_timing( p_subtitle, s) == VLC_SUCCESS &&
1093
1.13k
            p_subtitle->i_start < p_subtitle->i_stop )
1094
286
        {
1095
286
            break;
1096
286
        }
1097
539k
    }
1098
1099
    /* Now read text until an empty line */
1100
286
    size_t i_old = 0;
1101
286
    psz_text = NULL;
1102
286
    for( ;; )
1103
1.54k
    {
1104
1.54k
        const char *s = TextGetLine( txt );
1105
1.54k
        size_t i_len;
1106
1107
1.54k
        i_len = s ? strlen( s ) : 0;
1108
1.54k
        if( i_len == 0 )
1109
286
        {
1110
286
            p_subtitle->psz_text = psz_text;
1111
286
            return VLC_SUCCESS;
1112
286
        }
1113
1114
1.26k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1115
1.26k
        if( !psz_text )
1116
0
            return VLC_ENOMEM;
1117
1118
1.26k
        memcpy( &psz_text[i_old], s, i_len );
1119
1.26k
        psz_text[i_old + i_len + 0] = '\n';
1120
1.26k
        psz_text[i_old + i_len + 1] = '\0';
1121
1.26k
        i_old += i_len + 1;
1122
1123
        /* replace [br] by \n */
1124
1.26k
        if( b_replace_br )
1125
0
        {
1126
0
            char *p;
1127
1128
0
            while( ( p = strstr( psz_text, "[br]" ) ) )
1129
0
            {
1130
0
                *p++ = '\n';
1131
0
                memmove( p, &p[3], strlen(&p[3])+1 );
1132
0
                i_old -= 3;
1133
0
            }
1134
0
        }
1135
1.26k
    }
1136
286
}
1137
1138
/* subtitle_ParseSubRipTimingValue
1139
 * Parses SubRip timing value.
1140
 */
1141
static int subtitle_ParseSubRipTimingValue(vlc_tick_t *timing_value,
1142
                                           const char *s, size_t length)
1143
2.90k
{
1144
2.90k
    int h1, m1, s1, d1 = 0;
1145
1146
2.90k
    int count;
1147
2.90k
    if (sscanf(s, "%d:%d:%d,%d%n", &h1, &m1, &s1, &d1, &count) == 4
1148
437
        && (size_t)count <= length)
1149
437
        goto success;
1150
1151
2.46k
    if (sscanf(s, "%d:%d:%d.%d%n", &h1, &m1, &s1, &d1, &count) == 4
1152
26
        && (size_t)count <= length)
1153
26
        goto success;
1154
1155
2.44k
    d1 = 0;
1156
2.44k
    if (sscanf(s, "%d:%d:%d%n", &h1, &m1, &s1, &count) == 3
1157
2.02k
        && (size_t)count <= length)
1158
2.02k
        goto success;
1159
1160
415
    return VLC_EGENERIC;
1161
1162
2.49k
success:
1163
2.49k
    (*timing_value) = VLC_TICK_0
1164
2.49k
        + vlc_tick_from_HMS(h1, m1, s1)
1165
2.49k
        + VLC_TICK_FROM_MS(d1);
1166
1167
2.49k
    return VLC_SUCCESS;
1168
1169
2.44k
}
1170
1171
/* subtitle_ParseSubRipTiming
1172
 * Parses SubRip timing.
1173
 */
1174
static int subtitle_ParseSubRipTiming( subtitle_t *p_subtitle,
1175
                                       const char *s )
1176
529k
{
1177
529k
    const char *delimiter = strstr(s, " --> ");
1178
529k
    if (delimiter == NULL || delimiter == s)
1179
528k
        return VLC_EGENERIC;
1180
1181
1.52k
    int ret = subtitle_ParseSubRipTimingValue(&p_subtitle->i_start, s, (size_t)(delimiter - s));
1182
1.52k
    if (ret != VLC_SUCCESS)
1183
147
        return ret;
1184
1185
1.37k
    const char *right = delimiter + strlen(" --> ");
1186
1.37k
    return subtitle_ParseSubRipTimingValue(&p_subtitle->i_stop, right, strlen(right));
1187
1.52k
}
1188
1189
/* ParseSubRip
1190
 */
1191
static int  ParseSubRip( vlc_object_t *p_obj, subs_properties_t *p_props,
1192
                         text_t *txt, subtitle_t *p_subtitle,
1193
                         size_t i_idx )
1194
293
{
1195
293
    VLC_UNUSED( i_idx );
1196
293
    return ParseSubRipSubViewer( p_obj, p_props, txt, p_subtitle,
1197
293
                                 &subtitle_ParseSubRipTiming,
1198
293
                                 false );
1199
293
}
1200
1201
/* subtitle_ParseSubViewerTiming
1202
 * Parses SubViewer timing.
1203
 */
1204
static int subtitle_ParseSubViewerTiming( subtitle_t *p_subtitle,
1205
                                   const char *s )
1206
10.2k
{
1207
10.2k
    int h1, m1, s1, d1, h2, m2, s2, d2;
1208
1209
10.2k
    if( sscanf( s, "%d:%d:%d.%d,%d:%d:%d.%d",
1210
10.2k
                &h1, &m1, &s1, &d1, &h2, &m2, &s2, &d2) != 8 )
1211
10.1k
        return VLC_EGENERIC;
1212
1213
21
    p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1214
21
                          VLC_TICK_FROM_MS( d1 ) + VLC_TICK_0;
1215
1216
21
    p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
1217
21
                          VLC_TICK_FROM_MS( d2 ) + VLC_TICK_0;
1218
21
    return VLC_SUCCESS;
1219
10.2k
}
1220
1221
/* ParseSubViewer
1222
 */
1223
static int  ParseSubViewer( vlc_object_t *p_obj, subs_properties_t *p_props,
1224
                            text_t *txt, subtitle_t *p_subtitle,
1225
                            size_t i_idx )
1226
21
{
1227
21
    VLC_UNUSED( i_idx );
1228
1229
21
    return ParseSubRipSubViewer( p_obj, p_props, txt, p_subtitle,
1230
21
                                 &subtitle_ParseSubViewerTiming,
1231
21
                                 true );
1232
21
}
1233
1234
/* ParseSSA
1235
 */
1236
static int  ParseSSA( vlc_object_t *p_obj, subs_properties_t *p_props,
1237
                      text_t *txt, subtitle_t *p_subtitle,
1238
                      size_t i_idx )
1239
21
{
1240
21
    VLC_UNUSED(p_obj);
1241
21
    size_t header_len = 0;
1242
1243
21
    for( ;; )
1244
485k
    {
1245
485k
        const char *s = TextGetLine( txt );
1246
485k
        int h1, m1, s1, c1, h2, m2, s2, c2;
1247
485k
        char *psz_text, *psz_temp;
1248
485k
        char temp[16];
1249
1250
485k
        if( !s )
1251
15
            return VLC_EGENERIC;
1252
1253
        /* We expect (SSA2-4):
1254
         * Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
1255
         * Dialogue: Marked=0,0:02:40.65,0:02:41.79,Wolf main,Cher,0000,0000,0000,,Et les enregistrements de ses ondes delta ?
1256
         *
1257
         * SSA-1 is similar but only has 8 commas up until the subtitle text. Probably the Effect field is no present, but not 100 % sure.
1258
         */
1259
1260
        /* For ASS:
1261
         * Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
1262
         * Dialogue: Layer#,0:02:40.65,0:02:41.79,Wolf main,Cher,0000,0000,0000,,Et les enregistrements de ses ondes delta ?
1263
         */
1264
1265
485k
        psz_text = NULL;
1266
485k
        if( s[0] == 'D' || s[0] == 'L' )
1267
146
        {
1268
            /* The output text is always shorter than the input text. */
1269
146
            psz_text = malloc( strlen(s) );
1270
146
            if( !psz_text )
1271
0
                return VLC_ENOMEM;
1272
146
        }
1273
1274
        /* Try to capture the language property */
1275
485k
        if( s[0] == 'L' &&
1276
1
            sscanf( s, "Language: %[^\r\n]", psz_text ) == 1 )
1277
0
        {
1278
0
            free( p_props->psz_lang ); /* just in case of multiple instances */
1279
0
            p_props->psz_lang = psz_text;
1280
0
            psz_text = NULL;
1281
0
        }
1282
485k
        else if( s[0] == 'D' &&
1283
145
            sscanf( s,
1284
145
                    "Dialogue: %15[^,],%d:%d:%d.%d,%d:%d:%d.%d,%[^\r\n]",
1285
145
                    temp,
1286
145
                    &h1, &m1, &s1, &c1,
1287
145
                    &h2, &m2, &s2, &c2,
1288
145
                    psz_text ) == 10 )
1289
6
        {
1290
            /* The dec expects: ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text */
1291
            /* (Layer comes from ASS specs ... it's empty for SSA.) */
1292
6
            if( p_props->i_type == SUB_TYPE_SSA1 )
1293
0
            {
1294
                /* SSA1 has only 8 commas before the text starts, not 9 */
1295
0
                memmove( &psz_text[1], psz_text, strlen(psz_text)+1 );
1296
0
                psz_text[0] = ',';
1297
0
            }
1298
6
            else
1299
6
            {
1300
6
                int i_layer = ( p_props->i_type == SUB_TYPE_ASS ) ? atoi( temp ) : 0;
1301
1302
                /* ReadOrder, Layer, %s(rest of fields) */
1303
6
                if( asprintf( &psz_temp, "%zu,%d,%s", i_idx, i_layer, psz_text ) == -1 )
1304
0
                {
1305
0
                    free( psz_text );
1306
0
                    return VLC_ENOMEM;
1307
0
                }
1308
1309
6
                free( psz_text );
1310
6
                psz_text = psz_temp;
1311
6
            }
1312
1313
6
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1314
6
                                  VLC_TICK_FROM_MS( c1 * 10 ) + VLC_TICK_0;
1315
6
            p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
1316
6
                                  VLC_TICK_FROM_MS( c2 * 10 ) + VLC_TICK_0;
1317
6
            p_subtitle->psz_text = psz_text;
1318
6
            return VLC_SUCCESS;
1319
6
        }
1320
485k
        free( psz_text );
1321
1322
        /* All the other stuff we add to the header field */
1323
485k
        if( header_len == 0 && p_props->psz_header )
1324
4
            header_len = strlen( p_props->psz_header );
1325
1326
485k
        size_t s_len = strlen( s );
1327
485k
        p_props->psz_header = realloc_or_free( p_props->psz_header, header_len + s_len + 2 );
1328
485k
        if( !p_props->psz_header )
1329
0
            return VLC_ENOMEM;
1330
485k
        snprintf( p_props->psz_header + header_len, s_len + 2, "%s\n", s );
1331
485k
        header_len += s_len + 1;
1332
485k
    }
1333
21
}
1334
1335
/* ParseVplayer
1336
 *  Format
1337
 *      h:m:s:Line1|Line2|Line3....
1338
 *  or
1339
 *      h:m:s Line1|Line2|Line3....
1340
 */
1341
static int ParseVplayer( vlc_object_t *p_obj, subs_properties_t *p_props,
1342
                         text_t *txt, subtitle_t *p_subtitle,
1343
                         size_t i_idx )
1344
141k
{
1345
141k
    VLC_UNUSED(p_obj);
1346
141k
    VLC_UNUSED(p_props);
1347
141k
    VLC_UNUSED( i_idx );
1348
141k
    char *psz_text;
1349
1350
141k
    for( ;; )
1351
913k
    {
1352
913k
        const char *s = TextGetLine( txt );
1353
913k
        int h1, m1, s1;
1354
1355
913k
        if( !s )
1356
23
            return VLC_EGENERIC;
1357
1358
913k
        psz_text = malloc( strlen( s ) + 1 );
1359
913k
        if( !psz_text )
1360
0
            return VLC_ENOMEM;
1361
1362
913k
        if( sscanf( s, "%d:%d:%d%*c%[^\r\n]",
1363
913k
                    &h1, &m1, &s1, psz_text ) == 4 )
1364
141k
        {
1365
141k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
1366
141k
            p_subtitle->i_stop  = -1;
1367
141k
            break;
1368
141k
        }
1369
771k
        free( psz_text );
1370
771k
    }
1371
1372
    /* replace | by \n */
1373
2.46M
    for( size_t i = 0; psz_text[i] != '\0'; i++ )
1374
2.32M
    {
1375
2.32M
        if( psz_text[i] == '|' )
1376
13
            psz_text[i] = '\n';
1377
2.32M
    }
1378
141k
    p_subtitle->psz_text = psz_text;
1379
141k
    return VLC_SUCCESS;
1380
141k
}
1381
1382
/* ParseSami
1383
 */
1384
static const char *ParseSamiSearch( text_t *txt,
1385
                                    const char *psz_start, const char *psz_str )
1386
3.60k
{
1387
3.60k
    if( psz_start && strcasestr( psz_start, psz_str ) )
1388
2.89k
    {
1389
2.89k
        const char *s = strcasestr( psz_start, psz_str );
1390
2.89k
        return &s[strlen( psz_str )];
1391
2.89k
    }
1392
1393
710
    for( ;; )
1394
1.19M
    {
1395
1.19M
        const char *p = TextGetLine( txt );
1396
1.19M
        if( !p )
1397
37
            return NULL;
1398
1399
1.19M
        const char *s = strcasestr( p, psz_str );
1400
1.19M
        if( s != NULL )
1401
673
            return &s[strlen( psz_str )];
1402
1.19M
    }
1403
710
}
1404
static int ParseSami( vlc_object_t *p_obj, subs_properties_t *p_props,
1405
                      text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1406
46
{
1407
46
    VLC_UNUSED(p_obj);
1408
46
    VLC_UNUSED(p_props);
1409
46
    VLC_UNUSED( i_idx );
1410
46
    const char *s;
1411
46
    int64_t i_start;
1412
1413
46
    unsigned int i_text;
1414
46
    char text[8192]; /* Arbitrary but should be long enough */
1415
1416
    /* search "Start=" */
1417
46
    s = ParseSamiSearch( txt, p_props->sami.psz_start, "Start=" );
1418
46
    p_props->sami.psz_start = NULL;
1419
46
    if( !s )
1420
32
        return VLC_EGENERIC;
1421
1422
    /* get start value */
1423
14
    char *psz_end;
1424
14
    i_start = strtol( s, &psz_end, 0 );
1425
14
    s = psz_end;
1426
1427
    /* search <P */
1428
14
    if( !( s = ParseSamiSearch( txt, s, "<P" ) ) )
1429
1
        return VLC_EGENERIC;
1430
1431
    /* search > */
1432
13
    if( !( s = ParseSamiSearch( txt, s, ">" ) ) )
1433
0
        return VLC_EGENERIC;
1434
1435
13
    i_text = 0;
1436
13
    text[0] = '\0';
1437
    /* now get all txt until  a "Start=" line */
1438
13
    for( ;; )
1439
25.7k
    {
1440
25.7k
        char c = '\0';
1441
        /* Search non empty line */
1442
136k
        while( s && *s == '\0' )
1443
110k
            s = TextGetLine( txt );
1444
25.7k
        if( !s )
1445
10
            break;
1446
1447
25.7k
        if( *s == '<' )
1448
3.53k
        {
1449
3.53k
            if( !strncasecmp( s, "<br", 3 ) )
1450
88
            {
1451
88
                c = '\n';
1452
88
            }
1453
3.44k
            else if( strcasestr( s, "Start=" ) )
1454
3
            {
1455
3
                p_props->sami.psz_start = s;
1456
3
                break;
1457
3
            }
1458
3.53k
            s = ParseSamiSearch( txt, s, ">" );
1459
3.53k
        }
1460
22.1k
        else if( !strncmp( s, "&nbsp;", 6 ) )
1461
27
        {
1462
27
            c = ' ';
1463
27
            s += 6;
1464
27
        }
1465
22.1k
        else if( *s == '\t' )
1466
271
        {
1467
271
            c = ' ';
1468
271
            s++;
1469
271
        }
1470
21.9k
        else
1471
21.9k
        {
1472
21.9k
            c = *s;
1473
21.9k
            s++;
1474
21.9k
        }
1475
25.7k
        if( c != '\0' && i_text+1 < sizeof(text) )
1476
22.2k
        {
1477
22.2k
            text[i_text++] = c;
1478
22.2k
            text[i_text] = '\0';
1479
22.2k
        }
1480
25.7k
    }
1481
1482
13
    p_subtitle->i_start = VLC_TICK_0 + VLC_TICK_FROM_MS(i_start);
1483
13
    p_subtitle->i_stop  = -1;
1484
13
    p_subtitle->psz_text = strdup( text );
1485
1486
13
    return VLC_SUCCESS;
1487
13
}
1488
1489
/* ParseDVDSubtitle
1490
 *  Format
1491
 *      {T h1:m1:s1:c1
1492
 *      Line1
1493
 *      Line2
1494
 *      ...
1495
 *      }
1496
 * TODO it can have a header
1497
 *      { HEAD
1498
 *          ...
1499
 *          CODEPAGE=...
1500
 *          FORMAT=...
1501
 *          LANG=English
1502
 *      }
1503
 *      LANG support would be cool
1504
 *      CODEPAGE is probably mandatory FIXME
1505
 */
1506
static int ParseDVDSubtitle(vlc_object_t *p_obj, subs_properties_t *p_props,
1507
                            text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1508
25
{
1509
25
    VLC_UNUSED(p_obj);
1510
25
    VLC_UNUSED(p_props);
1511
25
    VLC_UNUSED( i_idx );
1512
25
    char *psz_text;
1513
1514
25
    for( ;; )
1515
21.2k
    {
1516
21.2k
        const char *s = TextGetLine( txt );
1517
21.2k
        int h1, m1, s1, c1;
1518
1519
21.2k
        if( !s )
1520
1
            return VLC_EGENERIC;
1521
1522
21.2k
        if( sscanf( s,
1523
21.2k
                    "{T %d:%d:%d:%d",
1524
21.2k
                    &h1, &m1, &s1, &c1 ) == 4 )
1525
24
        {
1526
24
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1527
24
                                  VLC_TICK_FROM_MS( c1 * 10 ) + VLC_TICK_0;
1528
24
            p_subtitle->i_stop = -1;
1529
24
            break;
1530
24
        }
1531
21.2k
    }
1532
1533
    /* Now read text until a line containing "}" */
1534
24
    size_t i_old = 0;
1535
24
    psz_text = NULL;
1536
24
    for( ;; )
1537
201k
    {
1538
201k
        const char *s = TextGetLine( txt );
1539
201k
        size_t i_len;
1540
1541
201k
        if( !s )
1542
3
        {
1543
3
            free( psz_text );
1544
3
            return VLC_EGENERIC;
1545
3
        }
1546
1547
201k
        i_len = strlen( s );
1548
201k
        if( i_len == 1 && s[0] == '}')
1549
21
        {
1550
21
            if (psz_text)
1551
14
                psz_text[i_old] = '\0';
1552
21
            p_subtitle->psz_text = psz_text;
1553
21
            return VLC_SUCCESS;
1554
21
        }
1555
1556
201k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1557
201k
        if( !psz_text )
1558
0
            return VLC_ENOMEM;
1559
1560
201k
        memcpy( &psz_text[i_old], s, i_len );
1561
201k
        psz_text[i_old + i_len + 0] = '\n';
1562
201k
        i_old += i_len + 1;
1563
201k
    }
1564
24
}
1565
1566
/* ParseMPL2
1567
 *  Format
1568
 *     [n1][n2]Line1|Line2|Line3...
1569
 *  where n1 and n2 are the video frame number (n2 can be empty)
1570
 */
1571
static int ParseMPL2(vlc_object_t *p_obj, subs_properties_t *p_props,
1572
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1573
33
{
1574
33
    VLC_UNUSED(p_obj);
1575
33
    VLC_UNUSED(p_props);
1576
33
    VLC_UNUSED( i_idx );
1577
33
    char *psz_text;
1578
33
    int i;
1579
1580
33
    for( ;; )
1581
461k
    {
1582
461k
        const char *s = TextGetLine( txt );
1583
461k
        int i_start;
1584
461k
        int i_stop;
1585
1586
461k
        if( !s )
1587
24
            return VLC_EGENERIC;
1588
1589
461k
        psz_text = malloc( strlen(s) + 1 );
1590
461k
        if( !psz_text )
1591
0
            return VLC_ENOMEM;
1592
1593
461k
        i_start = 0;
1594
461k
        i_stop  = -1;
1595
461k
        if( sscanf( s, "[%d][] %[^\r\n]", &i_start, psz_text ) == 2 ||
1596
461k
            sscanf( s, "[%d][%d] %[^\r\n]", &i_start, &i_stop, psz_text ) == 3)
1597
9
        {
1598
9
            p_subtitle->i_start = VLC_TICK_0 + VLC_TICK_FROM_MS(i_start * 100);
1599
9
            p_subtitle->i_stop  = i_stop >= 0 ? VLC_TICK_0 + VLC_TICK_FROM_MS(i_stop  * 100) : VLC_TICK_INVALID;
1600
9
            break;
1601
9
        }
1602
461k
        free( psz_text );
1603
461k
    }
1604
1605
381k
    for( i = 0; psz_text[i] != '\0'; )
1606
381k
    {
1607
        /* replace | by \n */
1608
381k
        if( psz_text[i] == '|' )
1609
0
            psz_text[i] = '\n';
1610
1611
        /* Remove italic */
1612
381k
        if( psz_text[i] == '/' && ( i == 0 || psz_text[i-1] == '\n' ) )
1613
24
            memmove( &psz_text[i], &psz_text[i+1], strlen(&psz_text[i+1])+1 );
1614
381k
        else
1615
381k
            i++;
1616
381k
    }
1617
9
    p_subtitle->psz_text = psz_text;
1618
9
    return VLC_SUCCESS;
1619
33
}
1620
1621
static int ParseAQT(vlc_object_t *p_obj, subs_properties_t *p_props, text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1622
56
{
1623
56
    VLC_UNUSED(p_obj);
1624
56
    VLC_UNUSED(p_props);
1625
56
    VLC_UNUSED( i_idx );
1626
1627
56
    char *psz_text = NULL;
1628
56
    size_t i_old = 0;
1629
56
    size_t i_len;
1630
56
    int i_firstline = 1;
1631
1632
56
    for( ;; )
1633
260k
    {
1634
260k
        int t; /* Time */
1635
1636
260k
        const char *s = TextGetLine( txt );
1637
1638
260k
        if( !s )
1639
28
        {
1640
28
            free( psz_text );
1641
28
            return VLC_EGENERIC;
1642
28
        }
1643
1644
        /* Data Lines */
1645
260k
        if( sscanf (s, "-->> %d", &t) == 1)
1646
30
        {
1647
            /* Starting of a subtitle */
1648
30
            if( i_firstline )
1649
29
            {
1650
29
                p_subtitle->i_start = VLC_TICK_0 + t * p_props->i_microsecperframe;
1651
29
                i_firstline = 0;
1652
29
            }
1653
            /* We have been too far: end of the subtitle, begin of next */
1654
1
            else
1655
1
            {
1656
1
                p_subtitle->i_stop  = VLC_TICK_0 + t * p_props->i_microsecperframe;
1657
1
                break;
1658
1
            }
1659
30
        }
1660
        /* Text Lines */
1661
260k
        else
1662
260k
        {
1663
260k
            i_len = strlen( s );
1664
260k
            psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1665
260k
            if( !psz_text )
1666
0
                 return VLC_ENOMEM;
1667
1668
260k
            memcpy( &psz_text[i_old], s, i_len );
1669
260k
            psz_text[i_old + i_len + 0] = '\n';
1670
260k
            i_old += i_len + 1;
1671
260k
            if( txt->i_line == txt->i_line_count )
1672
27
                break;
1673
260k
        }
1674
260k
    }
1675
28
    if (psz_text)
1676
28
        psz_text[i_old] = '\0';
1677
28
    p_subtitle->psz_text = psz_text;
1678
28
    return VLC_SUCCESS;
1679
56
}
1680
1681
static int ParsePJS(vlc_object_t *p_obj, subs_properties_t *p_props,
1682
                    text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1683
6
{
1684
6
    VLC_UNUSED(p_obj);
1685
6
    VLC_UNUSED(p_props);
1686
6
    VLC_UNUSED( i_idx );
1687
1688
6
    char *psz_text;
1689
6
    int i;
1690
1691
6
    for( ;; )
1692
519k
    {
1693
519k
        const char *s = TextGetLine( txt );
1694
519k
        int t1, t2;
1695
1696
519k
        if( !s )
1697
5
            return VLC_EGENERIC;
1698
1699
519k
        psz_text = malloc( strlen(s) + 1 );
1700
519k
        if( !psz_text )
1701
0
            return VLC_ENOMEM;
1702
1703
        /* Data Lines */
1704
519k
        if( sscanf (s, "%d,%d,\"%[^\n\r]", &t1, &t2, psz_text ) == 3 )
1705
1
        {
1706
            /* 1/10th of second ? Frame based ? FIXME */
1707
1
            p_subtitle->i_start = VLC_TICK_0 + INT64_C(10) * t1;
1708
1
            p_subtitle->i_stop = VLC_TICK_0 + INT64_C(10) * t2;
1709
            /* Remove latest " */
1710
1
            psz_text[ strlen(psz_text) - 1 ] = '\0';
1711
1712
1
            break;
1713
1
        }
1714
519k
        free( psz_text );
1715
519k
    }
1716
1717
    /* replace | by \n */
1718
83
    for( i = 0; psz_text[i] != '\0'; i++ )
1719
82
    {
1720
82
        if( psz_text[i] == '|' )
1721
0
            psz_text[i] = '\n';
1722
82
    }
1723
1724
1
    p_subtitle->psz_text = psz_text;
1725
1
    msg_Dbg( p_obj, "%s", psz_text );
1726
1
    return VLC_SUCCESS;
1727
6
}
1728
1729
static int ParseMPSub( vlc_object_t *p_obj, subs_properties_t *p_props,
1730
                       text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1731
2.60k
{
1732
2.60k
    VLC_UNUSED( i_idx );
1733
1734
2.60k
    if( !p_props->mpsub.b_inited )
1735
16
    {
1736
16
        p_props->mpsub.f_total = 0.0;
1737
16
        p_props->mpsub.i_factor = 0;
1738
1739
16
        p_props->mpsub.b_inited = true;
1740
16
    }
1741
1742
2.60k
    for( ;; )
1743
676k
    {
1744
676k
        const char *s = TextGetLine( txt );
1745
676k
        if( !s )
1746
14
        {
1747
14
            return VLC_EGENERIC;
1748
14
        }
1749
1750
676k
        if ( *s =='#' || *s == '\0' )
1751
638k
            continue;
1752
1753
        /* Data Lines */
1754
38.6k
        float wait, duration;
1755
38.6k
        if( sscanf( s, "%f %f", &wait, &duration ) == 2 )
1756
2.58k
        {
1757
2.58k
            float f1 = wait;
1758
2.58k
            float f2 = duration;
1759
2.58k
            p_props->mpsub.f_total += f1 * p_props->mpsub.i_factor;
1760
2.58k
            p_subtitle->i_start = VLC_TICK_0 + llroundf(10000.f * p_props->mpsub.f_total);
1761
2.58k
            p_props->mpsub.f_total += f2 * p_props->mpsub.i_factor;
1762
2.58k
            p_subtitle->i_stop = VLC_TICK_0 + llroundf(10000.f * p_props->mpsub.f_total);
1763
2.58k
            break;
1764
2.58k
        }
1765
1766
36.0k
        if( !strncmp( s, "FORMAT=", strlen("FORMAT=") ) )
1767
608
        {
1768
608
            const char *psz_format = s + strlen( "FORMAT=" );
1769
608
            if( !strncmp( psz_format, "TIME", strlen("TIME") ) && (psz_format[4] == '\0' || psz_format[4] == ' ') )
1770
64
            {
1771
                // FORMAT=TIME may be followed by a comment
1772
64
                p_props->mpsub.i_factor = 100;
1773
64
            }
1774
544
            else
1775
544
            {
1776
544
                float f_fps;
1777
544
                if( sscanf( psz_format, "%f", &f_fps ) == 1 )
1778
436
                {
1779
436
                    if( f_fps > 0.f && var_GetFloat( p_obj, "sub-original-fps" ) <= 0.f )
1780
7
                        var_SetFloat( p_obj, "sub-original-fps", f_fps );
1781
1782
436
                    p_props->mpsub.i_factor = 1;
1783
436
                }
1784
544
            }
1785
608
        }
1786
36.0k
    }
1787
1788
2.58k
    char *psz_text = NULL;
1789
2.58k
    size_t i_old = 0;
1790
2.58k
    for( ;; )
1791
3.42k
    {
1792
3.42k
        const char *s = TextGetLine( txt );
1793
1794
3.42k
        if( !s )
1795
2
        {
1796
2
            free( psz_text );
1797
2
            return VLC_EGENERIC;
1798
2
        }
1799
1800
3.42k
        size_t i_len = strlen( s );
1801
3.42k
        if( i_len == 0 )
1802
2.58k
            break;
1803
1804
838
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1805
838
        if( !psz_text )
1806
0
             return VLC_ENOMEM;
1807
1808
838
        memcpy( &psz_text[i_old], s, i_len );
1809
838
        psz_text[i_old + i_len + 0] = '\n';
1810
838
        i_old += i_len + 1;
1811
838
    }
1812
1813
2.58k
    if (psz_text)
1814
467
        psz_text[i_old] = '\0';
1815
2.58k
    p_subtitle->psz_text = psz_text;
1816
2.58k
    return VLC_SUCCESS;
1817
2.58k
}
1818
1819
static int ParseJSS( vlc_object_t *p_obj, subs_properties_t *p_props,
1820
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1821
123k
{
1822
123k
    VLC_UNUSED( i_idx );
1823
123k
    char         *psz_text, *psz_orig;
1824
123k
    char         *psz_text2, *psz_orig2;
1825
1826
123k
    if( !p_props->jss.b_inited )
1827
37
    {
1828
37
        p_props->jss.i_comment = 0;
1829
37
        p_props->jss.i_time_resolution = 30;
1830
37
        p_props->jss.i_time_shift = 0;
1831
1832
37
        p_props->jss.b_inited = true;
1833
37
    }
1834
1835
    /* Parse the main lines */
1836
123k
    for( ;; )
1837
669k
    {
1838
669k
        const char *s = TextGetLine( txt );
1839
669k
        if( !s )
1840
37
            return VLC_EGENERIC;
1841
1842
669k
        size_t line_length = strlen( s );
1843
669k
        psz_orig = malloc( line_length + 1 );
1844
669k
        if( !psz_orig )
1845
0
            return VLC_ENOMEM;
1846
669k
        psz_text = psz_orig;
1847
1848
        /* Complete time lines */
1849
669k
        int h1, h2, m1, m2, s1, s2, f1, f2;
1850
669k
        if( sscanf( s, "%d:%d:%d.%d %d:%d:%d.%d %[^\n\r]",
1851
669k
                    &h1, &m1, &s1, &f1, &h2, &m2, &s2, &f2, psz_text ) == 9 )
1852
0
        {
1853
0
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 ) +
1854
0
                vlc_tick_from_sec( ( f1 +  p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1855
0
            p_subtitle->i_stop = VLC_TICK_0 + vlc_tick_from_HMS( h2, m2, s2 ) +
1856
0
                vlc_tick_from_sec( ( f2 +  p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1857
0
            break;
1858
0
        }
1859
        /* Short time lines */
1860
669k
        else if( sscanf( s, "@%d @%d %[^\n\r]", &f1, &f2, psz_text ) == 3 )
1861
123k
        {
1862
123k
            p_subtitle->i_start = VLC_TICK_0 +
1863
123k
                    vlc_tick_from_sec( (f1 + p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1864
123k
            p_subtitle->i_stop = VLC_TICK_0 +
1865
123k
                    vlc_tick_from_sec( (f2 + p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1866
123k
            break;
1867
123k
        }
1868
        /* General Directive lines */
1869
        /* Only TIME and SHIFT are supported so far */
1870
546k
        else if( s[0] == '#' )
1871
2
        {
1872
2
            int h = 0, m =0, sec = 1, f = 1;
1873
2
            unsigned shift = 1;
1874
2
            int inv = 1;
1875
1876
2
            strcpy( psz_text, s );
1877
1878
2
            switch( toupper( (unsigned char)psz_text[1] ) )
1879
2
            {
1880
1
            case 'S':
1881
1
                 shift = isalpha( (unsigned char)psz_text[2] ) ? 6 : 2 ;
1882
1
                 if ( shift > line_length )
1883
1
                     break;
1884
1885
0
                 if( sscanf( &psz_text[shift], "%d", &h ) )
1886
0
                 {
1887
                     /* Negative shifting */
1888
0
                     if( h < 0 )
1889
0
                     {
1890
0
                         h *= -1;
1891
0
                         inv = -1;
1892
0
                     }
1893
1894
0
                     if( sscanf( &psz_text[shift], "%*d:%d", &m ) )
1895
0
                     {
1896
0
                         if( sscanf( &psz_text[shift], "%*d:%*d:%d", &sec ) )
1897
0
                         {
1898
0
                             sscanf( &psz_text[shift], "%*d:%*d:%*d.%d", &f );
1899
0
                         }
1900
0
                         else
1901
0
                         {
1902
0
                             h = 0;
1903
0
                             sscanf( &psz_text[shift], "%d:%d.%d",
1904
0
                                     &m, &sec, &f );
1905
0
                             m *= inv;
1906
0
                         }
1907
0
                     }
1908
0
                     else
1909
0
                     {
1910
0
                         h = m = 0;
1911
0
                         sscanf( &psz_text[shift], "%d.%d", &sec, &f);
1912
0
                         sec *= inv;
1913
0
                     }
1914
0
                     p_props->jss.i_time_shift = ( ( h * INT64_C(3600) + m * INT64_C(60) + sec )
1915
0
                         * p_props->jss.i_time_resolution + f ) * inv;
1916
0
                 }
1917
0
                 break;
1918
1919
0
            case 'T':
1920
0
                shift = isalpha( (unsigned char)psz_text[2] ) ? 8 : 2 ;
1921
0
                if ( shift > line_length )
1922
0
                    break;
1923
1924
0
                sscanf( &psz_text[shift], "%d", &p_props->jss.i_time_resolution );
1925
0
                if( !p_props->jss.i_time_resolution || p_props->jss.i_time_resolution < 0 )
1926
0
                    p_props->jss.i_time_resolution = 30;
1927
0
                break;
1928
2
            }
1929
2
            free( psz_orig );
1930
2
            continue;
1931
2
        }
1932
546k
        else
1933
            /* Unknown type line, probably a comment */
1934
546k
        {
1935
546k
            free( psz_orig );
1936
546k
            continue;
1937
546k
        }
1938
669k
    }
1939
1940
123k
    while( psz_text[ strlen( psz_text ) - 1 ] == '\\' )
1941
255
    {
1942
255
        const char *s2 = TextGetLine( txt );
1943
1944
255
        if( !s2 )
1945
0
        {
1946
0
            free( psz_orig );
1947
0
            return VLC_EGENERIC;
1948
0
        }
1949
1950
255
        size_t i_len = strlen( s2 );
1951
255
        if( i_len == 0 )
1952
0
            break;
1953
1954
255
        size_t i_old = strlen( psz_text );
1955
1956
255
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 );
1957
255
        if( !psz_text )
1958
0
             return VLC_ENOMEM;
1959
1960
255
        psz_orig = psz_text;
1961
255
        strcat( psz_text, s2 );
1962
255
    }
1963
1964
    /* Skip the blanks */
1965
123k
    while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++;
1966
1967
    /* Parse the directives */
1968
123k
    if( isalpha( (unsigned char)*psz_text ) || *psz_text == '[' )
1969
1.48k
    {
1970
96.4k
        while( *psz_text && *psz_text != ' ' )
1971
95.0k
            ++psz_text;
1972
1973
        /* Directives are NOT parsed yet */
1974
        /* This has probably a better place in a decoder ? */
1975
        /* directive = malloc( strlen( psz_text ) + 1 );
1976
           if( sscanf( psz_text, "%s %[^\n\r]", directive, psz_text2 ) == 2 )*/
1977
1.48k
    }
1978
1979
    /* Skip the blanks after directives */
1980
124k
    while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++;
1981
1982
    /* Clean all the lines from inline comments and other stuffs */
1983
123k
    psz_orig2 = calloc( strlen( psz_text) + 1, 1 );
1984
123k
    psz_text2 = psz_orig2;
1985
1986
1.38M
    for( ; *psz_text != '\0' && *psz_text != '\n' && *psz_text != '\r'; )
1987
1.26M
    {
1988
1.26M
        switch( *psz_text )
1989
1.26M
        {
1990
199k
        case '{':
1991
199k
            p_props->jss.i_comment++;
1992
199k
            break;
1993
30
        case '}':
1994
30
            if( p_props->jss.i_comment )
1995
25
            {
1996
25
                p_props->jss.i_comment = 0;
1997
25
                if( (*(psz_text + 1 ) ) == ' ' ) psz_text++;
1998
25
            }
1999
30
            break;
2000
3.04k
        case '~':
2001
3.04k
            if( !p_props->jss.i_comment )
2002
22
            {
2003
22
                *psz_text2 = ' ';
2004
22
                psz_text2++;
2005
22
            }
2006
3.04k
            break;
2007
4.17k
        case ' ':
2008
4.17k
        case '\t':
2009
4.17k
            if( (*(psz_text + 1 ) ) == ' ' || (*(psz_text + 1 ) ) == '\t' )
2010
2
                break;
2011
4.17k
            if( !p_props->jss.i_comment )
2012
1.99k
            {
2013
1.99k
                *psz_text2 = ' ';
2014
1.99k
                psz_text2++;
2015
1.99k
            }
2016
4.17k
            break;
2017
10.4k
        case '\\':
2018
10.4k
            if( (*(psz_text + 1 ) ) == 'n' )
2019
1.43k
            {
2020
1.43k
                *psz_text2 = '\n';
2021
1.43k
                psz_text++;
2022
1.43k
                psz_text2++;
2023
1.43k
                break;
2024
1.43k
            }
2025
8.96k
            if( ( toupper((unsigned char)*(psz_text + 1 ) ) == 'C' ) ||
2026
8.91k
                    ( toupper((unsigned char)*(psz_text + 1 ) ) == 'F' ) )
2027
51
            {
2028
51
                psz_text++;
2029
51
                break;
2030
51
            }
2031
8.91k
            if( (*(psz_text + 1 ) ) == 'B' || (*(psz_text + 1 ) ) == 'b' ||
2032
8.91k
                (*(psz_text + 1 ) ) == 'I' || (*(psz_text + 1 ) ) == 'i' ||
2033
8.91k
                (*(psz_text + 1 ) ) == 'U' || (*(psz_text + 1 ) ) == 'u' ||
2034
8.91k
                (*(psz_text + 1 ) ) == 'D' || (*(psz_text + 1 ) ) == 'N' )
2035
70
            {
2036
70
                psz_text++;
2037
70
                break;
2038
70
            }
2039
8.84k
            if( (*(psz_text + 1 ) ) == '~' || (*(psz_text + 1 ) ) == '{' ||
2040
4.59k
                (*(psz_text + 1 ) ) == '\\' )
2041
4.40k
                psz_text++;
2042
4.43k
            else if( ( *(psz_text + 1 ) == '\r' ||  *(psz_text + 1 ) == '\n' ) &&
2043
0
                     *(psz_text + 1 ) != '\0' )
2044
0
            {
2045
0
                psz_text++;
2046
0
            }
2047
8.84k
            break;
2048
1.04M
        default:
2049
1.04M
            if( !p_props->jss.i_comment )
2050
654k
            {
2051
654k
                *psz_text2 = *psz_text;
2052
654k
                psz_text2++;
2053
654k
            }
2054
1.26M
        }
2055
1.26M
        psz_text++;
2056
1.26M
    }
2057
2058
123k
    p_subtitle->psz_text = psz_orig2;
2059
123k
    msg_Dbg( p_obj, "%s", p_subtitle->psz_text );
2060
123k
    free( psz_orig );
2061
123k
    return VLC_SUCCESS;
2062
123k
}
2063
2064
static int ParsePSB( vlc_object_t *p_obj, subs_properties_t *p_props,
2065
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2066
0
{
2067
0
    VLC_UNUSED(p_obj);
2068
0
    VLC_UNUSED(p_props);
2069
0
    VLC_UNUSED( i_idx );
2070
2071
0
    char *psz_text;
2072
0
    int i;
2073
2074
0
    for( ;; )
2075
0
    {
2076
0
        int h1, m1, s1;
2077
0
        int h2, m2, s2;
2078
0
        const char *s = TextGetLine( txt );
2079
2080
0
        if( !s )
2081
0
            return VLC_EGENERIC;
2082
2083
0
        psz_text = malloc( strlen( s ) + 1 );
2084
0
        if( !psz_text )
2085
0
            return VLC_ENOMEM;
2086
2087
0
        if( sscanf( s, "{%d:%d:%d}{%d:%d:%d}%[^\r\n]",
2088
0
                    &h1, &m1, &s1, &h2, &m2, &s2, psz_text ) == 7 )
2089
0
        {
2090
0
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2091
0
            p_subtitle->i_stop  = VLC_TICK_0 + vlc_tick_from_HMS( h2, m2, s2 );
2092
0
            break;
2093
0
        }
2094
0
        free( psz_text );
2095
0
    }
2096
2097
    /* replace | by \n */
2098
0
    for( i = 0; psz_text[i] != '\0'; i++ )
2099
0
    {
2100
0
        if( psz_text[i] == '|' )
2101
0
            psz_text[i] = '\n';
2102
0
    }
2103
0
    p_subtitle->psz_text = psz_text;
2104
0
    return VLC_SUCCESS;
2105
0
}
2106
2107
static vlc_tick_t ParseRealTime( const char *psz )
2108
1.16k
{
2109
1.16k
    if( *psz == '\0' ) return VLC_TICK_0;
2110
1.16k
    int h, m, s, f;
2111
1.16k
    if( sscanf( psz, "%d:%d:%d.%d", &h, &m, &s, &f ) == 4 )
2112
0
    {
2113
0
        return vlc_tick_from_HMS( h, m, s )
2114
0
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2115
0
    }
2116
1.16k
    if( sscanf( psz, "%d:%d.%d", &m, &s, &f ) == 3 )
2117
0
    {
2118
0
        return vlc_tick_from_HMS( 0, m, s )
2119
0
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2120
0
    }
2121
1.16k
    if( sscanf( psz, "%d.%d", &s, &f ) == 2 )
2122
0
    {
2123
0
        return vlc_tick_from_sec( s )
2124
0
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2125
0
    }
2126
1.16k
    if( sscanf( psz, "%d:%d", &m, &s ) == 2 )
2127
485
    {
2128
485
        return vlc_tick_from_HMS( 0, m, s )
2129
485
               + VLC_TICK_0;
2130
485
    }
2131
675
    if( sscanf( psz, "%d", &s ) == 1 )
2132
496
    {
2133
496
        return vlc_tick_from_sec( s )
2134
496
               + VLC_TICK_0;
2135
496
    }
2136
179
    return VLC_TICK_MIN;
2137
675
}
2138
2139
static int ParseRealText( vlc_object_t *p_obj, subs_properties_t *p_props,
2140
                          text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2141
671
{
2142
671
    VLC_UNUSED(p_obj);
2143
671
    VLC_UNUSED(p_props);
2144
671
    VLC_UNUSED( i_idx );
2145
671
    char *psz_text = NULL;
2146
2147
671
    for( ;; )
2148
895k
    {
2149
895k
        const char *s = TextGetLine( txt );
2150
895k
        free( psz_text );
2151
2152
895k
        if( !s )
2153
16
            return VLC_EGENERIC;
2154
2155
895k
        psz_text = malloc( strlen( s ) + 1 );
2156
895k
        if( !psz_text )
2157
0
            return VLC_ENOMEM;
2158
2159
        /* Find the good beginning. This removes extra spaces at the beginning
2160
           of the line.*/
2161
895k
        char *psz_temp = strcasestr( s, "<time");
2162
895k
        if( psz_temp != NULL )
2163
862
        {
2164
862
            char psz_end[12], psz_begin[12];
2165
862
            vlc_tick_t end = VLC_TICK_MIN;
2166
            /* Line has begin and end */
2167
862
            if( sscanf( psz_temp,
2168
862
                  "<%*[t|T]ime %*[b|B]egin=\"%11[^\"]\" %*[e|E]nd=\"%11[^\"]%*[^>]%[^\n\r]",
2169
862
                            psz_begin, psz_end, psz_text) == 3 )
2170
505
            {
2171
505
                end = ParseRealTime( psz_end );
2172
505
            }
2173
357
            else if ( sscanf( psz_temp,
2174
357
                                "<%*[t|T]ime %*[b|B]egin=\"%11[^\"]\"%*[^>]%[^\n\r]",
2175
357
                                psz_begin, psz_text ) != 2)
2176
                /* Line is not recognized */
2177
207
            {
2178
207
                continue;
2179
207
            }
2180
2181
            /* Get the times */
2182
655
            vlc_tick_t i_time = ParseRealTime( psz_begin );
2183
655
            if (i_time != VLC_TICK_MIN)
2184
642
                p_subtitle->i_start = i_time;
2185
13
            else
2186
13
                p_subtitle->i_start = -1;
2187
2188
655
            if (end != VLC_TICK_MIN)
2189
339
                p_subtitle->i_stop = end;
2190
316
            else
2191
316
                p_subtitle->i_stop = -1;
2192
655
            break;
2193
862
        }
2194
895k
    }
2195
2196
    /* Get the following Lines */
2197
655
    size_t i_old = strlen( psz_text );
2198
655
    for( ;; )
2199
1.20k
    {
2200
1.20k
        const char *s = TextGetLine( txt );
2201
2202
1.20k
        if( !s )
2203
0
        {
2204
0
            free( psz_text );
2205
0
            return VLC_EGENERIC;
2206
0
        }
2207
2208
1.20k
        size_t i_len = strlen( s );
2209
1.20k
        if( i_len == 0 ) break;
2210
2211
1.14k
        if( strcasestr( s, "<time" ) ||
2212
626
            strcasestr( s, "<clear/") )
2213
592
        {
2214
592
            TextPreviousLine( txt );
2215
592
            break;
2216
592
        }
2217
2218
550
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
2219
550
        if( !psz_text )
2220
0
            return VLC_ENOMEM;
2221
2222
550
        memcpy( &psz_text[i_old], s, i_len );
2223
550
        psz_text[i_old + i_len + 0] = '\n';
2224
550
        i_old += i_len + 1;
2225
550
    }
2226
2227
655
    psz_text[i_old] = '\0';
2228
    /* Remove the starting ">" that remained after the sscanf */
2229
655
    memmove( &psz_text[0], &psz_text[1], strlen( psz_text ) );
2230
2231
655
    p_subtitle->psz_text = psz_text;
2232
2233
655
    return VLC_SUCCESS;
2234
655
}
2235
2236
static int ParseDKS( vlc_object_t *p_obj, subs_properties_t *p_props,
2237
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2238
1.45k
{
2239
1.45k
    VLC_UNUSED(p_obj);
2240
1.45k
    VLC_UNUSED(p_props);
2241
1.45k
    VLC_UNUSED( i_idx );
2242
2243
1.45k
    char *psz_text;
2244
2245
1.45k
    for( ;; )
2246
811k
    {
2247
811k
        int h1, m1, s1;
2248
811k
        int h2, m2, s2;
2249
811k
        char *s = TextGetLine( txt );
2250
2251
811k
        if( !s )
2252
11
            return VLC_EGENERIC;
2253
2254
811k
        psz_text = malloc( strlen( s ) + 1 );
2255
811k
        if( !psz_text )
2256
0
            return VLC_ENOMEM;
2257
2258
811k
        if( sscanf( s, "[%d:%d:%d]%[^\r\n]",
2259
811k
                    &h1, &m1, &s1, psz_text ) == 4 )
2260
1.44k
        {
2261
1.44k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2262
2263
1.44k
            s = TextGetLine( txt );
2264
1.44k
            if( !s )
2265
0
            {
2266
0
                free( psz_text );
2267
0
                return VLC_EGENERIC;
2268
0
            }
2269
2270
1.44k
            if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 )
2271
1.20k
                p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 );
2272
241
            else
2273
241
                p_subtitle->i_stop  = -1;
2274
1.44k
            break;
2275
1.44k
        }
2276
810k
        free( psz_text );
2277
810k
    }
2278
2279
    /* replace [br] by \n */
2280
1.44k
    char *p;
2281
1.44k
    while( ( p = strstr( psz_text, "[br]" ) ) )
2282
0
    {
2283
0
        *p++ = '\n';
2284
0
        memmove( p, &p[3], strlen(&p[3])+1 );
2285
0
    }
2286
2287
1.44k
    p_subtitle->psz_text = psz_text;
2288
1.44k
    return VLC_SUCCESS;
2289
1.45k
}
2290
2291
static int ParseSubViewer1( vlc_object_t *p_obj, subs_properties_t *p_props,
2292
                            text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2293
2.62k
{
2294
2.62k
    VLC_UNUSED(p_obj);
2295
2.62k
    VLC_UNUSED(p_props);
2296
2.62k
    VLC_UNUSED( i_idx );
2297
2.62k
    char *psz_text;
2298
2299
2.62k
    for( ;; )
2300
1.50M
    {
2301
1.50M
        int h1, m1, s1;
2302
1.50M
        int h2, m2, s2;
2303
1.50M
        char *s = TextGetLine( txt );
2304
2305
1.50M
        if( !s )
2306
12
            return VLC_EGENERIC;
2307
2308
1.50M
        if( sscanf( s, "[%d:%d:%d]", &h1, &m1, &s1 ) == 3 )
2309
2.61k
        {
2310
2.61k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2311
2312
2.61k
            s = TextGetLine( txt );
2313
2.61k
            if( !s )
2314
0
                return VLC_EGENERIC;
2315
2316
2.61k
            psz_text = strdup( s );
2317
2.61k
            if( !psz_text )
2318
0
                return VLC_ENOMEM;
2319
2320
2.61k
            s = TextGetLine( txt );
2321
2.61k
            if( !s )
2322
0
            {
2323
0
                free( psz_text );
2324
0
                return VLC_EGENERIC;
2325
0
            }
2326
2327
2.61k
            if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 )
2328
2.28k
                p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 );
2329
331
            else
2330
331
                p_subtitle->i_stop  = -1;
2331
2332
2.61k
            break;
2333
2.61k
        }
2334
1.50M
    }
2335
2336
2.61k
    p_subtitle->psz_text = psz_text;
2337
2338
2.61k
    return VLC_SUCCESS;
2339
2.62k
}
2340
2341
static int ParseCommonSBV( vlc_object_t *p_obj, subs_properties_t *p_props,
2342
                           text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2343
140
{
2344
140
    VLC_UNUSED(p_obj);
2345
140
    VLC_UNUSED( i_idx );
2346
140
    VLC_UNUSED( p_props );
2347
140
    char        *psz_text;
2348
2349
140
    for( ;; )
2350
161k
    {
2351
161k
        const char *s = TextGetLine( txt );
2352
161k
        int h1 = 0, m1 = 0, s1 = 0, d1 = 0;
2353
161k
        int h2 = 0, m2 = 0, s2 = 0, d2 = 0;
2354
2355
161k
        if( !s )
2356
4
            return VLC_EGENERIC;
2357
2358
161k
        if( sscanf( s,"%d:%d:%d.%d,%d:%d:%d.%d",
2359
161k
                    &h1, &m1, &s1, &d1,
2360
161k
                    &h2, &m2, &s2, &d2 ) == 8 )
2361
136
        {
2362
136
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
2363
136
                                  VLC_TICK_FROM_MS( d1 ) + VLC_TICK_0;
2364
2365
136
            p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
2366
136
                                  VLC_TICK_FROM_MS( d2 ) + VLC_TICK_0;
2367
136
            if( p_subtitle->i_start < p_subtitle->i_stop )
2368
136
                break;
2369
136
        }
2370
161k
    }
2371
2372
    /* Now read text until an empty line */
2373
136
    size_t i_old = 0;
2374
136
    psz_text = NULL;
2375
136
    for( ;; )
2376
421
    {
2377
421
        const char *s = TextGetLine( txt );
2378
421
        size_t i_len;
2379
2380
421
        i_len = s ? strlen( s ) : 0;
2381
421
        if( i_len <= 0 )
2382
136
        {
2383
136
            if (psz_text)
2384
75
                psz_text[i_old] = '\0';
2385
136
            p_subtitle->psz_text = psz_text;
2386
136
            return VLC_SUCCESS;
2387
136
        }
2388
2389
285
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
2390
285
        if( !psz_text )
2391
0
            return VLC_ENOMEM;
2392
2393
285
        memcpy( &psz_text[i_old], s, i_len );
2394
285
        psz_text[i_old + i_len + 0] = '\n';
2395
285
        i_old += i_len + 1;
2396
285
    }
2397
136
}
2398
2399
static int ParseSCC( vlc_object_t *p_obj, subs_properties_t *p_props,
2400
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2401
1.22k
{
2402
1.22k
    VLC_UNUSED(p_obj);
2403
1.22k
    VLC_UNUSED( i_idx );
2404
1.22k
    VLC_UNUSED( p_props );
2405
2406
1.22k
    static const struct rates
2407
1.22k
    {
2408
1.22k
        unsigned val;
2409
1.22k
        vlc_rational_t rate;
2410
1.22k
        bool b_drop_allowed;
2411
1.22k
    } framerates[] = {
2412
1.22k
        { 2398, { 24000, 1001 }, false },
2413
1.22k
        { 2400, { 24, 1 },       false },
2414
1.22k
        { 2500, { 25, 1 },       false },
2415
1.22k
        { 2997, { 30000, 1001 }, true }, /* encoding rate */
2416
1.22k
        { 3000, { 30, 1 },       false },
2417
1.22k
        { 5000, { 50, 1 },       false },
2418
1.22k
        { 5994, { 60000, 1001 }, true },
2419
1.22k
        { 6000, { 60, 1 },       false },
2420
1.22k
    };
2421
1.22k
    const struct rates *p_rate = &framerates[3];
2422
1.22k
    float f_fps = var_GetFloat( p_obj, "sub-original-fps" );
2423
1.22k
    if( f_fps > 1.0 )
2424
0
    {
2425
0
        for( size_t i=0; i<ARRAY_SIZE(framerates); i++ )
2426
0
        {
2427
0
            if( (unsigned)(f_fps * 100) == framerates[i].val )
2428
0
            {
2429
0
                p_rate = &framerates[i];
2430
0
                break;
2431
0
            }
2432
0
        }
2433
0
    }
2434
2435
1.22k
    for( ;; )
2436
558k
    {
2437
558k
        const char *psz_line = TextGetLine( txt );
2438
558k
        if( !psz_line )
2439
20
            return VLC_EGENERIC;
2440
2441
558k
        unsigned h, m, s, f;
2442
558k
        char c;
2443
558k
        if( sscanf( psz_line, "%u:%u:%u%c%u ", &h, &m, &s, &c, &f ) != 5 ||
2444
1.37k
                ( c != ':' && c != ';' ) )
2445
557k
            continue;
2446
2447
        /* convert everything to seconds */
2448
1.20k
        int64_t i_frames = h * INT64_C(3600) + m * INT64_C(60) + s;
2449
2450
1.20k
        if( c == ';' && p_rate->b_drop_allowed ) /* dropframe */
2451
0
        {
2452
            /* convert to frame # to be accurate between inter drop drift
2453
             * of 18 frames see http://andrewduncan.net/timecodes/ */
2454
0
            const unsigned i_mins = h * 60 + m;
2455
0
            i_frames = i_frames * p_rate[+1].rate.num + f
2456
0
                    - (p_rate[+1].rate.den * 2 * (i_mins - i_mins % 10));
2457
0
        }
2458
1.20k
        else
2459
1.20k
        {
2460
            /* convert to frame # at 29.97 */
2461
1.20k
            i_frames = i_frames * framerates[3].rate.num / framerates[3].rate.den + f;
2462
1.20k
        }
2463
1.20k
        p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_sec(i_frames)*
2464
1.20k
                                         p_rate->rate.den / p_rate->rate.num;
2465
1.20k
        p_subtitle->i_stop = -1;
2466
2467
1.20k
        const char *psz_text = strchr( psz_line, '\t' );
2468
1.20k
        if( !psz_text && !(psz_text = strchr( psz_line, ' ' )) )
2469
2
            continue;
2470
2471
1.20k
        if ( psz_text[1] == '\0' )
2472
2
            continue;
2473
2474
1.20k
        p_subtitle->psz_text = strdup( psz_text + 1 );
2475
1.20k
        if( !p_subtitle->psz_text )
2476
0
            return VLC_ENOMEM;
2477
2478
1.20k
        break;
2479
1.20k
    }
2480
2481
1.20k
    return VLC_SUCCESS;
2482
1.22k
}
2483
2484
/* Tries to extract language from common filename patterns PATH/filename.LANG.ext
2485
   and PATH/Subs/x_LANG.ext (where 'x' is an integer). */
2486
static char *get_language_from_url(const char *urlstr)
2487
285
{
2488
285
    vlc_url_t url;
2489
285
    const char *filename = NULL;
2490
285
    char *ret = NULL;
2491
2492
285
    assert(urlstr != NULL);
2493
2494
285
    if (vlc_UrlParse(&url, urlstr) != 0)
2495
0
    {
2496
0
        vlc_UrlClean(&url);
2497
0
        return NULL;
2498
0
    }
2499
285
    if (url.psz_path != NULL)
2500
0
        filename = strrchr(url.psz_path, '/');
2501
285
    if (filename != NULL) {
2502
0
        filename++; // skip forward slash
2503
2504
0
        const char *ext = strrchr(filename, '.');
2505
2506
0
        if (ext != NULL) {
2507
            /* Get string between last two periods, hopefully the language. */
2508
0
            const char *lang = memrchr(filename, '.', ext - filename);
2509
2510
            /* Otherwise try string after last underscore. */
2511
0
            if (lang == NULL)
2512
0
                lang = memrchr(filename, '_', ext - filename);
2513
2514
0
            if (lang != NULL) {
2515
0
                lang++; // skip period or underscore
2516
0
                ret = strndup(lang, ext - lang);
2517
0
            }
2518
0
       }
2519
0
    }
2520
2521
285
    vlc_UrlClean(&url);
2522
285
    return ret;
2523
285
}
2524
2525
#ifdef ENABLE_TEST
2526
static void test_subtitle_ParseSubRipTimingValue(void)
2527
{
2528
    fprintf(stderr, "\n# %s:\n", __func__);
2529
2530
    struct test_timing_value
2531
    {
2532
        const char *str;
2533
        vlc_tick_t value;
2534
    };
2535
2536
    static const struct test_timing_value timing_values_success[] =
2537
    {
2538
        { "0:0:0,0",        VLC_TICK_0 },
2539
        { "0:0:0.0",        VLC_TICK_0 },
2540
        { "0:0:0",          VLC_TICK_0 },
2541
    };
2542
2543
    struct test_sized_timing_value
2544
    {
2545
        const char *str;
2546
        vlc_tick_t value;
2547
        size_t length;
2548
    };
2549
2550
    static const struct test_sized_timing_value sized_timing_values_success[] =
2551
    {
2552
        { "0:0:0,1",        VLC_TICK_0, strlen("0:0:0") },
2553
        { "0:0:0.1",        VLC_TICK_0, strlen("0:0:0") },
2554
    };
2555
2556
    static const char *timing_values_fail[] =
2557
    {
2558
        "0:0",
2559
        "0",
2560
    };
2561
2562
    for (size_t i=0; i<ARRAY_SIZE(timing_values_success); ++i)
2563
    {
2564
        fprintf(stderr, "Checking that %s parses into %" PRId64 "\n",
2565
                timing_values_success[i].str, timing_values_success[i].value);
2566
2567
        vlc_tick_t value;
2568
        int ret = subtitle_ParseSubRipTimingValue(&value,
2569
                timing_values_success[i].str,
2570
                strlen(timing_values_success[i].str));
2571
        fprintf(stderr, " -> %" PRId64 "\n", value);
2572
        assert(ret == VLC_SUCCESS);
2573
        assert(value == timing_values_success[i].value);
2574
    }
2575
2576
    for (size_t i=0; i<ARRAY_SIZE(sized_timing_values_success); ++i)
2577
    {
2578
        fprintf(stderr, "Checking that %s (length=%zu) parses into %" PRId64 "\n",
2579
                sized_timing_values_success[i].str,
2580
                sized_timing_values_success[i].length,
2581
                sized_timing_values_success[i].value);
2582
2583
        vlc_tick_t value;
2584
        int ret = subtitle_ParseSubRipTimingValue(&value,
2585
                sized_timing_values_success[i].str,
2586
                sized_timing_values_success[i].length);
2587
        assert(ret == VLC_SUCCESS);
2588
        fprintf(stderr, " -> %" PRId64 "\n", value);
2589
        assert(value == sized_timing_values_success[i].value);
2590
    }
2591
2592
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2593
    {
2594
        fprintf(stderr, "Checking that %s fails to parse\n",
2595
                timing_values_fail[i]);
2596
        vlc_tick_t value;
2597
        int ret = subtitle_ParseSubRipTimingValue(&value,
2598
                timing_values_fail[i], strlen(timing_values_fail[i]));
2599
        (void)value;
2600
        assert(ret != VLC_SUCCESS);
2601
    }
2602
2603
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2604
    {
2605
        fprintf(stderr, "Checking that %s fails to parse\n",
2606
                timing_values_fail[i]);
2607
        vlc_tick_t value;
2608
        int ret = subtitle_ParseSubRipTimingValue(&value,
2609
                timing_values_fail[i], strlen(timing_values_fail[i]));
2610
        (void)value;
2611
        assert(ret != VLC_SUCCESS);
2612
    }
2613
}
2614
2615
static void test_subtitle_ParseSubRipTiming(void)
2616
{
2617
    fprintf(stderr, "\n# %s:\n", __func__);
2618
2619
    struct test_timing_value
2620
    {
2621
        const char *str;
2622
        vlc_tick_t left;
2623
        vlc_tick_t right;
2624
    };
2625
2626
    static const struct test_timing_value timing_values_success[] =
2627
    {
2628
        { "0:0:0,0 --> 0:0:0,0",        VLC_TICK_0,     VLC_TICK_0 },
2629
        { "0:0:0.0 --> 0:0:0.0",        VLC_TICK_0,     VLC_TICK_0 },
2630
        { "0:0:0   --> 0:0:0",          VLC_TICK_0,     VLC_TICK_0 },
2631
    };
2632
2633
    static const char *timing_values_fail[] =
2634
    {
2635
        "0:0 --> 0:0",
2636
        "0:0 --> 0:0:0,0",
2637
        "0:0:0,0 --> 0:0",
2638
        "0 -> 0",
2639
    };
2640
2641
    for (size_t i=0; i<ARRAY_SIZE(timing_values_success); ++i)
2642
    {
2643
        fprintf(stderr, "Checking that %s parses into %" PRId64 " --> %" PRId64 "\n",
2644
                timing_values_success[i].str,
2645
                timing_values_success[i].left,
2646
                timing_values_success[i].right);
2647
2648
        subtitle_t sub = { .i_start = VLC_TICK_INVALID, .i_stop = VLC_TICK_INVALID };
2649
        int ret = subtitle_ParseSubRipTiming(&sub, timing_values_success[i].str);
2650
        fprintf(stderr, " -> %" PRId64 " --> %" PRId64 "\n", sub.i_start, sub.i_stop);
2651
        assert(ret == VLC_SUCCESS);
2652
        assert(sub.i_start == timing_values_success[i].left);
2653
        assert(sub.i_stop == timing_values_success[i].right);
2654
    }
2655
2656
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2657
    {
2658
        fprintf(stderr, "Checking that %s fails to parse\n",
2659
                timing_values_fail[i]);
2660
        subtitle_t sub = { .i_start = VLC_TICK_INVALID, .i_stop = VLC_TICK_INVALID };
2661
        int ret = subtitle_ParseSubRipTiming(&sub, timing_values_fail[i]);
2662
        (void)sub;
2663
        assert(ret != VLC_SUCCESS);
2664
    }
2665
}
2666
2667
int main(int argc, char **argv)
2668
{
2669
    (void)argc; (void)argv;
2670
    test_subtitle_ParseSubRipTimingValue();
2671
    test_subtitle_ParseSubRipTiming();
2672
2673
    return 0;
2674
}
2675
#endif