Coverage Report

Created: 2026-02-05 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vlc/modules/demux/subtitle.c
Line
Count
Source
1
/*****************************************************************************
2
 * subtitle.c: Demux for subtitle text files.
3
 *****************************************************************************
4
 * Copyright (C) 1999-2007 VLC authors and VideoLAN
5
 * Copyright (C) 2023      Videolabs
6
 *
7
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8
 *          Derk-Jan Hartman <hartman at videolan dot org>
9
 *          Jean-Baptiste Kempf <jb@videolan.org>
10
 *          Alexandre Janniaux <ajanni@videolabs.io>
11
 *
12
 * This program is free software; you can redistribute it and/or modify it
13
 * under the terms of the GNU Lesser General Public License as published by
14
 * the Free Software Foundation; either version 2.1 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
 * GNU Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public License
23
 * along with this program; if not, write to the Free Software Foundation,
24
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25
 *****************************************************************************/
26
27
/*****************************************************************************
28
 * Preamble
29
 *****************************************************************************/
30
31
#ifdef HAVE_CONFIG_H
32
# include "config.h"
33
#endif
34
35
#include <vlc_common.h>
36
#include <vlc_arrays.h>
37
#include <vlc_plugin.h>
38
#include <vlc_url.h>
39
40
#include <ctype.h>
41
#include <math.h>
42
#include <assert.h>
43
44
#include <vlc_demux.h>
45
#include <vlc_charset.h>
46
47
/*****************************************************************************
48
 * Module descriptor
49
 *****************************************************************************/
50
static int  Open ( vlc_object_t *p_this );
51
static void Close( vlc_object_t *p_this );
52
53
#define SUB_TYPE_LONGTEXT \
54
    N_("Force the subtitles format. Selecting \"auto\" means autodetection and should always work.")
55
#define SUB_DESCRIPTION_LONGTEXT \
56
    N_("Override the default track description.")
57
58
static const char *const ppsz_sub_type[] =
59
{
60
    "auto", "microdvd", "subrip", "subviewer", "ssa1",
61
    "ssa2-4", "ass", "vplayer", "sami", "dvdsubtitle", "mpl2",
62
    "aqt", "pjs", "mpsub", "jacosub", "psb", "realtext", "dks",
63
    "subviewer1", "sbv"
64
};
65
66
108
vlc_module_begin ()
67
54
    set_shortname( N_("Subtitles"))
68
54
    set_description( N_("Text subtitle parser") )
69
54
    set_capability( "demux", 0 )
70
54
    set_subcategory( SUBCAT_INPUT_DEMUX )
71
54
    add_string( "sub-type", "auto", N_("Subtitle format"),
72
54
                SUB_TYPE_LONGTEXT )
73
54
        change_string_list( ppsz_sub_type, ppsz_sub_type )
74
54
    add_string( "sub-description", NULL, N_("Subtitle description"),
75
54
                SUB_DESCRIPTION_LONGTEXT )
76
54
    set_callbacks( Open, Close )
77
78
54
    add_shortcut( "subtitle" )
79
54
vlc_module_end ()
80
81
/*****************************************************************************
82
 * Prototypes:
83
 *****************************************************************************/
84
enum subtitle_type_e
85
{
86
    SUB_TYPE_UNKNOWN = -1,
87
    SUB_TYPE_MICRODVD,
88
    SUB_TYPE_SUBRIP,
89
    SUB_TYPE_SSA1,
90
    SUB_TYPE_SSA2_4,
91
    SUB_TYPE_ASS,
92
    SUB_TYPE_VPLAYER,
93
    SUB_TYPE_SAMI,
94
    SUB_TYPE_SUBVIEWER, /* SUBVIEWER 2 */
95
    SUB_TYPE_DVDSUBTITLE, /* Mplayer calls it subviewer2 */
96
    SUB_TYPE_MPL2,
97
    SUB_TYPE_AQT,
98
    SUB_TYPE_PJS,
99
    SUB_TYPE_MPSUB,
100
    SUB_TYPE_JACOSUB,
101
    SUB_TYPE_PSB,
102
    SUB_TYPE_RT,
103
    SUB_TYPE_DKS,
104
    SUB_TYPE_SUBVIEW1, /* SUBVIEWER 1 - mplayer calls it subrip09,
105
                         and Gnome subtitles SubViewer 1.0 */
106
    SUB_TYPE_SBV,
107
    SUB_TYPE_SCC,      /* Scenarist Closed Caption */
108
};
109
110
typedef struct
111
{
112
    size_t  i_line_count;
113
    size_t  i_line;
114
    char    **line;
115
} text_t;
116
117
static int  TextLoad( text_t *, stream_t *s );
118
static void TextUnload( text_t * );
119
120
typedef struct
121
{
122
    vlc_tick_t i_start;
123
    vlc_tick_t i_stop;
124
125
    char    *psz_text;
126
} subtitle_t;
127
128
typedef struct
129
{
130
    enum subtitle_type_e i_type;
131
    vlc_tick_t  i_microsecperframe;
132
133
    char        *psz_header; /* SSA */
134
    char        *psz_lang;
135
136
    struct
137
    {
138
        bool b_inited;
139
140
        int i_comment;
141
        int i_time_resolution;
142
        int i_time_shift;
143
    } jss;
144
145
    struct
146
    {
147
        bool  b_inited;
148
149
        float f_total;
150
        int i_factor;
151
    } mpsub;
152
153
    struct
154
    {
155
        const char *psz_start;
156
    } sami;
157
158
} subs_properties_t;
159
160
typedef struct
161
{
162
    es_out_id_t *es;
163
    bool        b_slave;
164
    bool        b_first_time;
165
    bool        b_sorted;
166
167
    double      f_rate;
168
    vlc_tick_t  i_next_demux_date;
169
170
    struct
171
    {
172
        subtitle_t *p_array;
173
        size_t      i_count;
174
        size_t      i_current;
175
    } subtitles;
176
177
    vlc_tick_t  i_length;
178
179
    /* */
180
    subs_properties_t props;
181
182
    block_t * (*pf_convert)( const subtitle_t * );
183
} demux_sys_t;
184
185
static int  ParseMicroDvd   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
186
static int  ParseSubRip     ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
187
static int  ParseSubViewer  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
188
static int  ParseSSA        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
189
static int  ParseVplayer    ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
190
static int  ParseSami       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
191
static int  ParseDVDSubtitle( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
192
static int  ParseMPL2       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
193
static int  ParseAQT        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
194
static int  ParsePJS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
195
static int  ParseMPSub      ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
196
static int  ParseJSS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
197
static int  ParsePSB        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
198
static int  ParseRealText   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
199
static int  ParseDKS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
200
static int  ParseSubViewer1 ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
201
static int  ParseCommonSBV  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
202
static int  ParseSCC        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
203
204
static const struct
205
{
206
    const char *psz_type_name;
207
    int  i_type;
208
    const char *psz_name;
209
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
210
} sub_read_subtitle_function [] =
211
{
212
    { "microdvd",   SUB_TYPE_MICRODVD,    "MicroDVD",    ParseMicroDvd },
213
    { "subrip",     SUB_TYPE_SUBRIP,      "SubRIP",      ParseSubRip },
214
    { "subviewer",  SUB_TYPE_SUBVIEWER,   "SubViewer",   ParseSubViewer },
215
    { "ssa1",       SUB_TYPE_SSA1,        "SSA-1",       ParseSSA },
216
    { "ssa2-4",     SUB_TYPE_SSA2_4,      "SSA-2/3/4",   ParseSSA },
217
    { "ass",        SUB_TYPE_ASS,         "SSA/ASS",     ParseSSA },
218
    { "vplayer",    SUB_TYPE_VPLAYER,     "VPlayer",     ParseVplayer },
219
    { "sami",       SUB_TYPE_SAMI,        "SAMI",        ParseSami },
220
    { "dvdsubtitle",SUB_TYPE_DVDSUBTITLE, "DVDSubtitle", ParseDVDSubtitle },
221
    { "mpl2",       SUB_TYPE_MPL2,        "MPL2",        ParseMPL2 },
222
    { "aqt",        SUB_TYPE_AQT,         "AQTitle",     ParseAQT },
223
    { "pjs",        SUB_TYPE_PJS,         "PhoenixSub",  ParsePJS },
224
    { "mpsub",      SUB_TYPE_MPSUB,       "MPSub",       ParseMPSub },
225
    { "jacosub",    SUB_TYPE_JACOSUB,     "JacoSub",     ParseJSS },
226
    { "psb",        SUB_TYPE_PSB,         "PowerDivx",   ParsePSB },
227
    { "realtext",   SUB_TYPE_RT,          "RealText",    ParseRealText },
228
    { "dks",        SUB_TYPE_DKS,         "DKS",         ParseDKS },
229
    { "subviewer1", SUB_TYPE_SUBVIEW1,    "Subviewer 1", ParseSubViewer1 },
230
    { "sbv",        SUB_TYPE_SBV,         "SBV",         ParseCommonSBV },
231
    { "scc",        SUB_TYPE_SCC,         "SCC",         ParseSCC },
232
    { NULL,         SUB_TYPE_UNKNOWN,     "Unknown",     NULL }
233
};
234
/* When adding support for more formats, be sure to add their file extension
235
 * to src/input/subtitles.c to enable auto-detection.
236
 */
237
238
static int Demux( demux_t * );
239
static int Control( demux_t *, int, va_list );
240
241
static void Fix( demux_t * );
242
static char *get_language_from_url(const char *);
243
244
static vlc_tick_t vlc_tick_from_HMS( int h, int m, int s )
245
50.9k
{
246
50.9k
    return vlc_tick_from_sec(h * INT64_C(3600) + m * INT64_C(60) + s);
247
50.9k
}
248
249
/*****************************************************************************
250
 * Decoder format output function
251
 *****************************************************************************/
252
253
static block_t *ToTextBlock( const subtitle_t *p_subtitle )
254
230k
{
255
230k
    if ( p_subtitle->psz_text == NULL )
256
4.84k
        return NULL;
257
258
225k
    block_t *p_block;
259
225k
    size_t i_len = strlen( p_subtitle->psz_text ) + 1;
260
261
225k
    if( i_len <= 1 || !(p_block = block_Alloc( i_len )) )
262
27.5k
        return NULL;
263
264
197k
    memcpy( p_block->p_buffer, p_subtitle->psz_text, i_len );
265
266
197k
    return p_block;
267
225k
}
268
269
static block_t *ToEIA608Block( const subtitle_t *p_subtitle )
270
5.36k
{
271
5.36k
    if ( p_subtitle->psz_text == NULL )
272
0
        return NULL;
273
274
5.36k
    block_t *p_block;
275
5.36k
    const size_t i_len = strlen( p_subtitle->psz_text );
276
5.36k
    const size_t i_block = (1 + i_len / 5) * 3;
277
278
5.36k
    if( i_len < 4 || !(p_block = block_Alloc( i_block )) )
279
300
        return NULL;
280
281
5.06k
    p_block->i_buffer = 0;
282
283
5.06k
    char *saveptr = NULL;
284
5.06k
    char *psz_tok = strtok_r( p_subtitle->psz_text, " ", &saveptr );
285
5.06k
    unsigned a, b;
286
72.7k
    while( psz_tok &&
287
70.4k
           sscanf( psz_tok, "%2x%2x", &a, &b ) == 2 &&
288
67.6k
           i_block - p_block->i_buffer >= 3 )
289
67.6k
    {
290
67.6k
        uint8_t *p_data = &p_block->p_buffer[p_block->i_buffer];
291
67.6k
        p_data[0] = 0xFC;
292
67.6k
        p_data[1] = a;
293
67.6k
        p_data[2] = b;
294
67.6k
        p_block->i_buffer += 3;
295
67.6k
        psz_tok = strtok_r( NULL, " ", &saveptr );
296
67.6k
    }
297
298
5.06k
    return p_block;
299
5.36k
}
300
301
/*****************************************************************************
302
 * Module initializer
303
 *****************************************************************************/
304
static int Open ( vlc_object_t *p_this )
305
951
{
306
951
    demux_t        *p_demux = (demux_t*)p_this;
307
951
    demux_sys_t    *p_sys;
308
951
    es_format_t    fmt;
309
951
    float          f_fps;
310
951
    char           *psz_type;
311
951
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
312
313
951
    if( !p_demux->obj.force )
314
0
    {
315
0
        msg_Dbg( p_demux, "subtitle demux discarded" );
316
0
        return VLC_EGENERIC;
317
0
    }
318
319
951
    p_demux->pf_demux = Demux;
320
951
    p_demux->pf_control = Control;
321
951
    p_demux->p_sys = p_sys = malloc( sizeof( demux_sys_t ) );
322
951
    if( p_sys == NULL )
323
0
        return VLC_ENOMEM;
324
325
951
    p_sys->b_slave = false;
326
951
    p_sys->b_first_time = true;
327
951
    p_sys->b_sorted = false;
328
951
    p_sys->i_next_demux_date = 0;
329
951
    p_sys->f_rate = 1.0;
330
331
951
    p_sys->pf_convert = ToTextBlock;
332
333
951
    p_sys->subtitles.i_current= 0;
334
951
    p_sys->subtitles.i_count  = 0;
335
951
    p_sys->subtitles.p_array  = NULL;
336
337
951
    p_sys->props.psz_header         = NULL;
338
951
    p_sys->props.psz_lang           = NULL;
339
951
    p_sys->props.i_microsecperframe = VLC_TICK_FROM_MS(40);
340
951
    p_sys->props.jss.b_inited       = false;
341
951
    p_sys->props.mpsub.b_inited     = false;
342
951
    p_sys->props.sami.psz_start     = NULL;
343
344
    /* Get the FPS */
345
951
    f_fps = var_CreateGetFloat( p_demux, "sub-original-fps" );
346
951
    if( f_fps >= 1.f )
347
0
    {
348
0
        p_sys->props.i_microsecperframe = llroundf( (float)CLOCK_FREQ / f_fps );
349
0
        msg_Dbg( p_demux, "Override subtitle fps %f", (double) f_fps );
350
0
    }
351
352
    /* Get or probe the type */
353
951
    p_sys->props.i_type = SUB_TYPE_UNKNOWN;
354
951
    psz_type = var_CreateGetString( p_demux, "sub-type" );
355
951
    if( psz_type && *psz_type )
356
951
    {
357
19.0k
        for( int i = 0; ; i++ )
358
19.9k
        {
359
19.9k
            if( sub_read_subtitle_function[i].psz_type_name == NULL )
360
951
                break;
361
362
19.0k
            if( !strcmp( sub_read_subtitle_function[i].psz_type_name,
363
19.0k
                         psz_type ) )
364
0
            {
365
0
                p_sys->props.i_type = sub_read_subtitle_function[i].i_type;
366
0
                break;
367
0
            }
368
19.0k
        }
369
951
    }
370
951
    free( psz_type );
371
372
951
#ifndef NDEBUG
373
951
    const uint64_t i_start_pos = vlc_stream_Tell( p_demux->s );
374
951
#endif
375
376
951
    ssize_t i_peek;
377
951
    const uint8_t *p_peek;
378
951
    if( vlc_stream_Peek( p_demux->s, &p_peek, 16 ) < 16 )
379
0
    {
380
0
        free( p_sys );
381
0
        return VLC_EGENERIC;
382
0
    }
383
384
951
    enum
385
951
    {
386
951
        UTF8BOM,
387
951
        UTF16LE,
388
951
        UTF16BE,
389
951
        NOBOM,
390
951
    } e_bom = NOBOM;
391
951
    const char *psz_bom = NULL;
392
393
951
    i_peek = 4096;
394
    /* Detect Unicode while skipping the UTF-8 Byte Order Mark */
395
951
    if( !memcmp( p_peek, "\xEF\xBB\xBF", 3 ) )
396
34
    {
397
34
        e_bom = UTF8BOM;
398
34
        psz_bom = "UTF-8";
399
34
    }
400
917
    else if( !memcmp( p_peek, "\xFF\xFE", 2 ) )
401
29
    {
402
29
        e_bom = UTF16LE;
403
29
        psz_bom = "UTF-16LE";
404
29
        i_peek *= 2;
405
29
    }
406
888
    else if( !memcmp( p_peek, "\xFE\xFF", 2 ) )
407
5
    {
408
5
        e_bom = UTF16BE;
409
5
        psz_bom = "UTF-16BE";
410
5
        i_peek *= 2;
411
5
    }
412
413
951
    if( e_bom != NOBOM )
414
951
        msg_Dbg( p_demux, "detected %s Byte Order Mark", psz_bom );
415
416
951
    i_peek = vlc_stream_Peek( p_demux->s, &p_peek, i_peek );
417
951
    if( unlikely(i_peek < 16) )
418
0
    {
419
0
        free( p_sys );
420
0
        return VLC_EGENERIC;
421
0
    }
422
423
951
    stream_t *p_probestream = NULL;
424
951
    if( e_bom != UTF8BOM && e_bom != NOBOM )
425
34
    {
426
34
        if( i_peek > 16 )
427
34
        {
428
34
            char *p_outbuf = FromCharset( psz_bom, p_peek, i_peek );
429
34
            if( p_outbuf != NULL )
430
30
                p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *)p_outbuf,
431
34
                                                      strlen( p_outbuf ),
432
34
                                                      false ); /* free p_outbuf on release */
433
34
        }
434
34
    }
435
917
    else
436
917
    {
437
917
        const size_t i_skip = (e_bom == UTF8BOM) ? 3 : 0;
438
917
        p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *) &p_peek[i_skip],
439
917
                                              i_peek - i_skip, true );
440
917
    }
441
442
951
    if( p_probestream == NULL )
443
4
    {
444
4
        free( p_sys );
445
4
        return VLC_EGENERIC;
446
4
    }
447
448
    /* Probe if unknown type */
449
947
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
450
947
    {
451
947
        int     i_try;
452
947
        char    *s = NULL;
453
454
947
        msg_Dbg( p_demux, "autodetecting subtitle format" );
455
5.57k
        for( i_try = 0; i_try < 256; i_try++ )
456
5.57k
        {
457
5.57k
            int i_dummy;
458
5.57k
            char p_dummy;
459
460
5.57k
            if( (s = vlc_stream_ReadLine( p_probestream ) ) == NULL )
461
18
                break;
462
463
5.55k
            if( strcasestr( s, "<SAMI>" ) )
464
81
            {
465
81
                p_sys->props.i_type = SUB_TYPE_SAMI;
466
81
                break;
467
81
            }
468
5.47k
            else if( sscanf( s, "{%d}{%d}", &i_dummy, &i_dummy ) == 2 ||
469
5.46k
                     sscanf( s, "{%d}{}", &i_dummy ) == 1)
470
28
            {
471
28
                p_sys->props.i_type = SUB_TYPE_MICRODVD;
472
28
                break;
473
28
            }
474
5.44k
            else if( sscanf( s, "%d:%d:%d,%d --> %d:%d:%d,%d",
475
5.44k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
476
5.44k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
477
5.42k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d,%d",
478
5.42k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
479
5.42k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
480
5.42k
                     sscanf( s, "%d:%d:%d,%d --> %d:%d:%d",
481
5.42k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
482
5.42k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
483
5.40k
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d.%d",
484
5.40k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
485
5.40k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
486
5.38k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d.%d",
487
5.38k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
488
5.38k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
489
5.38k
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d",
490
5.38k
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
491
5.38k
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
492
5.38k
                     sscanf( s, "%d:%d:%d --> %d:%d:%d",
493
5.38k
                             &i_dummy,&i_dummy,&i_dummy,
494
5.38k
                             &i_dummy,&i_dummy,&i_dummy ) == 6 )
495
72
            {
496
72
                p_sys->props.i_type = SUB_TYPE_SUBRIP;
497
72
                break;
498
72
            }
499
5.37k
            else if( !strncasecmp( s, "!: This is a Sub Station Alpha v1", 33 ) )
500
3
            {
501
3
                p_sys->props.i_type = SUB_TYPE_SSA1;
502
3
                break;
503
3
            }
504
5.37k
            else if( !strncasecmp( s, "ScriptType: v4.00+", 18 ) )
505
2
            {
506
2
                p_sys->props.i_type = SUB_TYPE_ASS;
507
2
                break;
508
2
            }
509
5.37k
            else if( !strncasecmp( s, "ScriptType: v4.00", 17 ) )
510
4
            {
511
4
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
512
4
                break;
513
4
            }
514
5.36k
            else if( !strncasecmp( s, "Dialogue: Marked", 16  ) )
515
3
            {
516
3
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
517
3
                break;
518
3
            }
519
5.36k
            else if( !strncasecmp( s, "Dialogue:", 9  ) )
520
35
            {
521
35
                p_sys->props.i_type = SUB_TYPE_ASS;
522
35
                break;
523
35
            }
524
5.32k
            else if( strcasestr( s, "[INFORMATION]" ) )
525
14
            {
526
14
                p_sys->props.i_type = SUB_TYPE_SUBVIEWER; /* I hope this will work */
527
14
                break;
528
14
            }
529
5.31k
            else if( sscanf( s, "%d:%d:%d.%d %d:%d:%d",
530
5.31k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
531
5.31k
                                 &i_dummy, &i_dummy, &i_dummy ) == 7 ||
532
5.31k
                     sscanf( s, "@%d @%d", &i_dummy, &i_dummy) == 2)
533
127
            {
534
127
                p_sys->props.i_type = SUB_TYPE_JACOSUB;
535
127
                break;
536
127
            }
537
5.18k
            else if( sscanf( s, "%d:%d:%d.%d,%d:%d:%d.%d",
538
5.18k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
539
5.18k
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy ) == 8 )
540
15
            {
541
15
                p_sys->props.i_type = SUB_TYPE_SBV;
542
15
                break;
543
15
            }
544
5.17k
            else if( sscanf( s, "%d:%d:%d:", &i_dummy, &i_dummy, &i_dummy ) == 3 ||
545
5.11k
                     sscanf( s, "%d:%d:%d ", &i_dummy, &i_dummy, &i_dummy ) == 3 )
546
54
            {
547
54
                p_sys->props.i_type = SUB_TYPE_VPLAYER;
548
54
                break;
549
54
            }
550
5.11k
            else if( sscanf( s, "{T %d:%d:%d:%d", &i_dummy, &i_dummy,
551
5.11k
                             &i_dummy, &i_dummy ) == 4 )
552
8
            {
553
8
                p_sys->props.i_type = SUB_TYPE_DVDSUBTITLE;
554
8
                break;
555
8
            }
556
5.11k
            else if( sscanf( s, "[%d:%d:%d]%c",
557
5.11k
                     &i_dummy, &i_dummy, &i_dummy, &p_dummy ) == 4 )
558
17
            {
559
17
                p_sys->props.i_type = SUB_TYPE_DKS;
560
17
                break;
561
17
            }
562
5.09k
            else if( strstr( s, "*** START SCRIPT" ) )
563
27
            {
564
27
                p_sys->props.i_type = SUB_TYPE_SUBVIEW1;
565
27
                break;
566
27
            }
567
5.06k
            else if( sscanf( s, "[%d][%d]", &i_dummy, &i_dummy ) == 2 ||
568
5.05k
                     sscanf( s, "[%d][]", &i_dummy ) == 1)
569
39
            {
570
39
                p_sys->props.i_type = SUB_TYPE_MPL2;
571
39
                break;
572
39
            }
573
5.02k
            else if( sscanf (s, "FORMAT=%d", &i_dummy) == 1 ||
574
4.97k
                     ( sscanf (s, "FORMAT=TIM%c", &p_dummy) == 1
575
135
                       && p_dummy =='E' ) )
576
55
            {
577
55
                p_sys->props.i_type = SUB_TYPE_MPSUB;
578
55
                break;
579
55
            }
580
4.97k
            else if( sscanf( s, "-->> %d", &i_dummy) == 1 )
581
119
            {
582
119
                p_sys->props.i_type = SUB_TYPE_AQT;
583
119
                break;
584
119
            }
585
4.85k
            else if( sscanf( s, "%d,%d,", &i_dummy, &i_dummy ) == 2 )
586
6
            {
587
6
                p_sys->props.i_type = SUB_TYPE_PJS;
588
6
                break;
589
6
            }
590
4.84k
            else if( sscanf( s, "{%d:%d:%d}",
591
4.84k
                                &i_dummy, &i_dummy, &i_dummy ) == 3 )
592
0
            {
593
0
                p_sys->props.i_type = SUB_TYPE_PSB;
594
0
                break;
595
0
            }
596
4.84k
            else if( strcasestr( s, "<time" ) )
597
55
            {
598
55
                p_sys->props.i_type = SUB_TYPE_RT;
599
55
                break;
600
55
            }
601
4.79k
            else if( !strncasecmp( s, "WEBVTT",6 ) )
602
0
            {
603
                /* FAIL */
604
0
                break;
605
0
            }
606
4.79k
            else if( !strncasecmp( s, "Scenarist_SCC V1.0", 18 ) )
607
165
            {
608
165
                p_sys->props.i_type = SUB_TYPE_SCC;
609
165
                p_sys->pf_convert = ToEIA608Block;
610
165
                break;
611
165
            }
612
613
4.62k
            free( s );
614
4.62k
            s = NULL;
615
4.62k
        }
616
617
947
        free( s );
618
947
    }
619
620
947
    vlc_stream_Delete( p_probestream );
621
622
    /* Quit on unknown subtitles */
623
947
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
624
18
    {
625
18
#ifndef NDEBUG
626
        /* Ensure it will work with non seekable streams */
627
18
        assert( i_start_pos == vlc_stream_Tell( p_demux->s ) );
628
18
#endif
629
18
        msg_Warn( p_demux, "failed to recognize subtitle type" );
630
18
        free( p_sys );
631
18
        return VLC_EGENERIC;
632
18
    }
633
634
10.1k
    for( int i = 0; ; i++ )
635
11.0k
    {
636
11.0k
        if( sub_read_subtitle_function[i].i_type == p_sys->props.i_type )
637
929
        {
638
929
            msg_Dbg( p_demux, "detected %s format",
639
929
                     sub_read_subtitle_function[i].psz_name );
640
929
            pf_read = sub_read_subtitle_function[i].pf_read;
641
929
            break;
642
929
        }
643
11.0k
    }
644
645
929
    msg_Dbg( p_demux, "loading all subtitles..." );
646
647
929
    if( e_bom == UTF8BOM && /* skip BOM */
648
32
        vlc_stream_Read( p_demux->s, NULL, 3 ) != 3 )
649
0
    {
650
0
        Close( p_this );
651
0
        return VLC_EGENERIC;
652
0
    }
653
654
    /* Load the whole file */
655
929
    text_t txtlines;
656
929
    TextLoad( &txtlines, p_demux->s );
657
658
    /* Parse it */
659
237k
    for( size_t i_max = 0; i_max < SIZE_MAX - 500 * sizeof(subtitle_t); )
660
237k
    {
661
237k
        if( p_sys->subtitles.i_count >= i_max )
662
1.35k
        {
663
1.35k
            i_max += 500;
664
1.35k
            subtitle_t *p_realloc = realloc( p_sys->subtitles.p_array, sizeof(subtitle_t) * i_max );
665
1.35k
            if( p_realloc == NULL )
666
0
            {
667
0
                TextUnload( &txtlines );
668
0
                Close( p_this );
669
0
                return VLC_ENOMEM;
670
0
            }
671
1.35k
            p_sys->subtitles.p_array = p_realloc;
672
1.35k
        }
673
674
237k
        if( pf_read( VLC_OBJECT(p_demux), &p_sys->props, &txtlines,
675
237k
                     &p_sys->subtitles.p_array[p_sys->subtitles.i_count],
676
237k
                     p_sys->subtitles.i_count ) )
677
929
            break;
678
679
236k
        p_sys->subtitles.i_count++;
680
236k
    }
681
    /* Unload */
682
929
    TextUnload( &txtlines );
683
684
929
    msg_Dbg(p_demux, "loaded %zu subtitles", p_sys->subtitles.i_count );
685
686
    /* *** add subtitle ES *** */
687
929
    if( p_sys->props.i_type == SUB_TYPE_SSA1 ||
688
926
             p_sys->props.i_type == SUB_TYPE_SSA2_4 ||
689
919
             p_sys->props.i_type == SUB_TYPE_ASS )
690
47
    {
691
47
        Fix( p_demux );
692
47
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SSA );
693
47
    }
694
882
    else if( p_sys->props.i_type == SUB_TYPE_SCC )
695
165
    {
696
165
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_CEA608 );
697
165
        fmt.subs.cc.i_reorder_depth = -1;
698
165
    }
699
717
    else
700
717
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SUBT );
701
702
929
    p_sys->subtitles.i_current = 0;
703
929
    p_sys->i_length = 0;
704
929
    if( p_sys->subtitles.i_count > 0 )
705
739
        p_sys->i_length = p_sys->subtitles.p_array[p_sys->subtitles.i_count-1].i_stop;
706
707
929
    if( p_sys->props.psz_lang )
708
0
    {
709
0
        fmt.psz_language = p_sys->props.psz_lang;
710
0
        p_sys->props.psz_lang = NULL;
711
0
        msg_Dbg( p_demux, "detected language '%s' of subtitle: %s", fmt.psz_language,
712
0
                 p_demux->psz_location );
713
0
    }
714
929
    else
715
929
    {
716
929
        fmt.psz_language = get_language_from_url( p_demux->psz_url );
717
929
        if( fmt.psz_language )
718
929
            msg_Dbg( p_demux, "selected '%s' as possible filename language substring of subtitle: %s",
719
929
                     fmt.psz_language, p_demux->psz_location );
720
929
    }
721
722
929
    char *psz_description = var_InheritString( p_demux, "sub-description" );
723
929
    if( psz_description && *psz_description )
724
0
        fmt.psz_description = psz_description;
725
929
    else
726
929
        free( psz_description );
727
929
    if( p_sys->props.psz_header != NULL &&
728
46
       (fmt.p_extra = strdup( p_sys->props.psz_header )) )
729
46
    {
730
46
        fmt.i_extra = strlen( p_sys->props.psz_header ) + 1;
731
46
    }
732
733
929
    fmt.i_id = 0;
734
929
    p_sys->es = es_out_Add( p_demux->out, &fmt );
735
929
    es_format_Clean( &fmt );
736
929
    if( p_sys->es == NULL )
737
0
    {
738
0
        Close( p_this );
739
0
        return VLC_EGENERIC;
740
0
    }
741
742
929
    return VLC_SUCCESS;
743
929
}
744
745
/*****************************************************************************
746
 * Close: Close subtitle demux
747
 *****************************************************************************/
748
static void Close( vlc_object_t *p_this )
749
929
{
750
929
    demux_t *p_demux = (demux_t*)p_this;
751
929
    demux_sys_t *p_sys = p_demux->p_sys;
752
753
237k
    for( size_t i = 0; i < p_sys->subtitles.i_count; i++ )
754
236k
        free( p_sys->subtitles.p_array[i].psz_text );
755
929
    free( p_sys->subtitles.p_array );
756
929
    free( p_sys->props.psz_header );
757
758
929
    free( p_sys );
759
929
}
760
761
static void
762
ResetCurrentIndex( demux_t *p_demux )
763
0
{
764
0
    demux_sys_t *p_sys = p_demux->p_sys;
765
0
    for( size_t i = 0; i < p_sys->subtitles.i_count; i++ )
766
0
    {
767
0
        if( p_sys->subtitles.p_array[i].i_start * p_sys->f_rate >
768
0
            p_sys->i_next_demux_date && i > 0 )
769
0
            break;
770
0
        p_sys->subtitles.i_current = i;
771
0
    }
772
0
}
773
774
/*****************************************************************************
775
 * Control:
776
 *****************************************************************************/
777
static int Control( demux_t *p_demux, int i_query, va_list args )
778
0
{
779
0
    demux_sys_t *p_sys = p_demux->p_sys;
780
0
    double *pf, f;
781
782
0
    switch( i_query )
783
0
    {
784
0
        case DEMUX_CAN_SEEK:
785
0
            *va_arg( args, bool * ) = true;
786
0
            return VLC_SUCCESS;
787
788
0
        case DEMUX_GET_LENGTH:
789
0
            *va_arg( args, vlc_tick_t * ) = p_sys->i_length;
790
0
            return VLC_SUCCESS;
791
792
0
        case DEMUX_GET_TIME:
793
0
            *va_arg( args, vlc_tick_t * ) = p_sys->i_next_demux_date;
794
0
            return VLC_SUCCESS;
795
796
0
        case DEMUX_SET_TIME:
797
0
        {
798
0
            p_sys->b_first_time = true;
799
0
            p_sys->i_next_demux_date = va_arg( args, vlc_tick_t );
800
0
            ResetCurrentIndex( p_demux );
801
0
            return VLC_SUCCESS;
802
0
        }
803
804
0
        case DEMUX_GET_POSITION:
805
0
            pf = va_arg( args, double * );
806
0
            if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
807
0
            {
808
0
                *pf = 1.0;
809
0
            }
810
0
            else if( p_sys->subtitles.i_count > 0 && p_sys->i_length )
811
0
            {
812
0
                *pf = p_sys->i_next_demux_date;
813
0
                *pf /= p_sys->i_length;
814
0
            }
815
0
            else
816
0
            {
817
0
                *pf = 0.0;
818
0
            }
819
0
            return VLC_SUCCESS;
820
821
0
        case DEMUX_SET_POSITION:
822
0
            f = va_arg( args, double );
823
0
            if( p_sys->subtitles.i_count && p_sys->i_length )
824
0
            {
825
0
                vlc_tick_t i64 = VLC_TICK_0 + f * p_sys->i_length;
826
0
                return demux_Control( p_demux, DEMUX_SET_TIME, i64 );
827
0
            }
828
0
            break;
829
830
0
        case DEMUX_CAN_CONTROL_RATE:
831
0
            *va_arg( args, bool * ) = true;
832
0
            return VLC_SUCCESS;
833
0
        case DEMUX_SET_RATE:
834
0
            p_sys->f_rate = *va_arg( args, float * );
835
0
            ResetCurrentIndex( p_demux );
836
0
            return VLC_SUCCESS;
837
0
        case DEMUX_SET_NEXT_DEMUX_TIME:
838
0
            p_sys->b_slave = true;
839
0
            p_sys->i_next_demux_date = va_arg( args, vlc_tick_t ) - VLC_TICK_0;
840
0
            return VLC_SUCCESS;
841
842
0
        case DEMUX_CAN_PAUSE:
843
0
        case DEMUX_SET_PAUSE_STATE:
844
0
        case DEMUX_CAN_CONTROL_PACE:
845
0
            return demux_vaControlHelper( p_demux->s, 0, -1, 0, 1, i_query, args );
846
847
0
        case DEMUX_GET_PTS_DELAY:
848
0
        case DEMUX_GET_FPS:
849
0
        case DEMUX_GET_META:
850
0
        case DEMUX_GET_ATTACHMENTS:
851
0
        case DEMUX_GET_TITLE_INFO:
852
0
        case DEMUX_HAS_UNSUPPORTED_META:
853
0
        case DEMUX_CAN_RECORD:
854
0
        default:
855
0
            break;
856
857
0
    }
858
0
    return VLC_EGENERIC;
859
0
}
860
861
/*****************************************************************************
862
 * Demux: Send subtitle to decoder
863
 *****************************************************************************/
864
static int Demux( demux_t *p_demux )
865
998M
{
866
998M
    demux_sys_t *p_sys = p_demux->p_sys;
867
868
998M
    if ( !p_sys->b_slave )
869
998M
        Fix( p_demux );
870
871
998M
    vlc_tick_t i_barrier = p_sys->i_next_demux_date;
872
873
998M
    while( p_sys->subtitles.i_current < p_sys->subtitles.i_count &&
874
998M
           ( p_sys->subtitles.p_array[p_sys->subtitles.i_current].i_start *
875
998M
             p_sys->f_rate ) <= i_barrier )
876
236k
    {
877
236k
        const subtitle_t *p_subtitle = &p_sys->subtitles.p_array[p_sys->subtitles.i_current];
878
879
236k
        if ( !p_sys->b_slave && p_sys->b_first_time )
880
739
        {
881
739
            es_out_SetPCR( p_demux->out, VLC_TICK_0 + i_barrier );
882
739
            p_sys->b_first_time = false;
883
739
        }
884
885
236k
        if( p_subtitle->i_start >= 0 )
886
235k
        {
887
235k
            block_t *p_block = p_sys->pf_convert( p_subtitle );
888
235k
            if( p_block )
889
202k
            {
890
202k
                p_block->i_dts =
891
202k
                p_block->i_pts = VLC_TICK_0 + p_subtitle->i_start * p_sys->f_rate;
892
202k
                if( p_subtitle->i_stop != VLC_TICK_INVALID && p_subtitle->i_stop >= p_subtitle->i_start )
893
56.4k
                    p_block->i_length = (p_subtitle->i_stop - p_subtitle->i_start) * p_sys->f_rate;
894
895
202k
                es_out_Send( p_demux->out, p_sys->es, p_block );
896
202k
            }
897
235k
        }
898
899
236k
        p_sys->subtitles.i_current++;
900
236k
    }
901
902
998M
    if ( !p_sys->b_slave )
903
998M
    {
904
998M
        es_out_SetPCR( p_demux->out, VLC_TICK_0 + i_barrier );
905
998M
        p_sys->i_next_demux_date += VLC_TICK_FROM_MS(125);
906
998M
    }
907
908
998M
    if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
909
929
        return VLC_DEMUXER_EOF;
910
911
998M
    return VLC_DEMUXER_SUCCESS;
912
998M
}
913
914
915
static int subtitle_cmp( const void *first, const void *second )
916
1.90M
{
917
1.90M
    vlc_tick_t result = ((subtitle_t *)(first))->i_start - ((subtitle_t *)(second))->i_start;
918
    /* Return -1, 0 ,1, and not directly subtraction
919
     * as result can be > INT_MAX */
920
1.90M
    return result == 0 ? 0 : result > 0 ? 1 : -1;
921
1.90M
}
922
/*****************************************************************************
923
 * Fix: fix time stamp and order of subtitle
924
 *****************************************************************************/
925
static void Fix( demux_t *p_demux )
926
998M
{
927
998M
    demux_sys_t *p_sys = p_demux->p_sys;
928
998M
    if (p_sys->b_sorted)
929
998M
        return;
930
931
    /* *** fix order (to be sure...) *** */
932
929
    qsort( p_sys->subtitles.p_array, p_sys->subtitles.i_count, sizeof( p_sys->subtitles.p_array[0] ), subtitle_cmp);
933
929
    p_sys->b_sorted = true;
934
929
}
935
936
static int TextLoad( text_t *txt, stream_t *s )
937
929
{
938
929
    size_t i_line_max;
939
940
    /* init txt */
941
929
    i_line_max          = 500;
942
929
    txt->i_line_count   = 0;
943
929
    txt->i_line         = 0;
944
929
    txt->line           = calloc( i_line_max, sizeof( char * ) );
945
929
    if( !txt->line )
946
0
        return VLC_ENOMEM;
947
948
    /* load the complete file */
949
929
    for( ;; )
950
10.2M
    {
951
10.2M
        char *psz = vlc_stream_ReadLine( s );
952
953
10.2M
        if( psz == NULL )
954
929
            break;
955
956
10.2M
        txt->line[txt->i_line_count] = psz;
957
10.2M
        if( txt->i_line_count + 1 >= i_line_max )
958
100k
        {
959
100k
            i_line_max += 100;
960
100k
            char **p_realloc = realloc( txt->line, i_line_max * sizeof( char * ) );
961
100k
            if( p_realloc == NULL )
962
0
                return VLC_ENOMEM;
963
100k
            txt->line = p_realloc;
964
100k
        }
965
10.2M
        txt->i_line_count++;
966
10.2M
    }
967
968
929
    if( txt->i_line_count == 0 )
969
1
    {
970
1
        free( txt->line );
971
1
        return VLC_EGENERIC;
972
1
    }
973
974
928
    return VLC_SUCCESS;
975
929
}
976
static void TextUnload( text_t *txt )
977
929
{
978
929
    if( txt->i_line_count )
979
928
    {
980
10.2M
        for( size_t i = 0; i < txt->i_line_count; i++ )
981
10.2M
            free( txt->line[i] );
982
928
        free( txt->line );
983
928
    }
984
929
    txt->i_line       = 0;
985
929
    txt->i_line_count = 0;
986
929
}
987
988
static char *TextGetLine( text_t *txt )
989
10.2M
{
990
10.2M
    if( txt->i_line >= txt->i_line_count )
991
994
        return( NULL );
992
993
10.2M
    return txt->line[txt->i_line++];
994
10.2M
}
995
static void TextPreviousLine( text_t *txt )
996
1.72k
{
997
1.72k
    if( txt->i_line > 0 )
998
1.72k
        txt->i_line--;
999
1.72k
}
1000
1001
/*****************************************************************************
1002
 * Specific Subtitle function
1003
 *****************************************************************************/
1004
/* ParseMicroDvd:
1005
 *  Format:
1006
 *      {n1}{n2}Line1|Line2|Line3....
1007
 *  where n1 and n2 are the video frame number (n2 can be empty)
1008
 */
1009
static int ParseMicroDvd( vlc_object_t *p_obj, subs_properties_t *p_props,
1010
                          text_t *txt, subtitle_t *p_subtitle,
1011
                          size_t i_idx )
1012
1.00k
{
1013
1.00k
    VLC_UNUSED( i_idx );
1014
1.00k
    char *psz_text;
1015
1.00k
    int  i_start;
1016
1.00k
    int  i_stop;
1017
1.00k
    int  i;
1018
1019
1.00k
    for( ;; )
1020
541k
    {
1021
541k
        const char *s = TextGetLine( txt );
1022
541k
        if( !s )
1023
28
            return VLC_EGENERIC;
1024
1025
541k
        psz_text = malloc( strlen(s) + 1 );
1026
541k
        if( !psz_text )
1027
0
            return VLC_ENOMEM;
1028
1029
541k
        i_start = 0;
1030
541k
        i_stop  = -1;
1031
541k
        if( sscanf( s, "{%d}{}%[^\r\n]", &i_start, psz_text ) == 2 ||
1032
541k
            sscanf( s, "{%d}{%d}%[^\r\n]", &i_start, &i_stop, psz_text ) == 3)
1033
1.42k
        {
1034
1.42k
            if( i_start != 1 || i_stop != 1 )
1035
972
                break;
1036
1037
            /* We found a possible setting of the framerate "{1}{1}23.976" */
1038
            /* Check if it's usable, and if the sub-original-fps is not set */
1039
449
            float f_fps = vlc_strtof_c( psz_text, NULL );
1040
449
            if( f_fps > 0.f && var_GetFloat( p_obj, "sub-original-fps" ) <= 0.f )
1041
133
                p_props->i_microsecperframe = llroundf((float)CLOCK_FREQ / f_fps);
1042
449
        }
1043
540k
        free( psz_text );
1044
540k
    }
1045
1046
    /* replace | by \n */
1047
6.77k
    for( i = 0; psz_text[i] != '\0'; i++ )
1048
5.80k
    {
1049
5.80k
        if( psz_text[i] == '|' )
1050
10
            psz_text[i] = '\n';
1051
5.80k
    }
1052
1053
    /* */
1054
972
    p_subtitle->i_start  =  VLC_TICK_0 + i_start * p_props->i_microsecperframe;
1055
972
    p_subtitle->i_stop   = i_stop >= 0 ? (VLC_TICK_0 + i_stop  * p_props->i_microsecperframe) : VLC_TICK_INVALID;
1056
972
    p_subtitle->psz_text = psz_text;
1057
972
    return VLC_SUCCESS;
1058
1.00k
}
1059
1060
/* ParseSubRipSubViewer
1061
 *  Format SubRip
1062
 *      n
1063
 *      h1:m1:s1,d1 --> h2:m2:s2,d2
1064
 *      Line1
1065
 *      Line2
1066
 *      ....
1067
 *      [Empty line]
1068
 *  Format SubViewer v1/v2
1069
 *      h1:m1:s1.d1,h2:m2:s2.d2
1070
 *      Line1[br]Line2
1071
 *      Line3
1072
 *      ...
1073
 *      [empty line]
1074
 *  We ignore line number for SubRip
1075
 */
1076
static int ParseSubRipSubViewer( vlc_object_t *p_obj, subs_properties_t *p_props,
1077
                                 text_t *txt, subtitle_t *p_subtitle,
1078
                                 int (* pf_parse_timing)(subtitle_t *, const char *),
1079
                                 bool b_replace_br )
1080
679
{
1081
679
    VLC_UNUSED(p_obj);
1082
679
    VLC_UNUSED(p_props);
1083
679
    char    *psz_text;
1084
1085
679
    for( ;; )
1086
1.15M
    {
1087
1.15M
        const char *s = TextGetLine( txt );
1088
1089
1.15M
        if( !s )
1090
86
            return VLC_EGENERIC;
1091
1092
1.15M
        if( pf_parse_timing( p_subtitle, s) == VLC_SUCCESS &&
1093
1.64k
            p_subtitle->i_start < p_subtitle->i_stop )
1094
593
        {
1095
593
            break;
1096
593
        }
1097
1.15M
    }
1098
1099
    /* Now read text until an empty line */
1100
593
    size_t i_old = 0;
1101
593
    psz_text = NULL;
1102
593
    for( ;; )
1103
2.48k
    {
1104
2.48k
        const char *s = TextGetLine( txt );
1105
2.48k
        size_t i_len;
1106
1107
2.48k
        i_len = s ? strlen( s ) : 0;
1108
2.48k
        if( i_len == 0 )
1109
593
        {
1110
593
            p_subtitle->psz_text = psz_text;
1111
593
            return VLC_SUCCESS;
1112
593
        }
1113
1114
1.89k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1115
1.89k
        if( !psz_text )
1116
0
            return VLC_ENOMEM;
1117
1118
1.89k
        memcpy( &psz_text[i_old], s, i_len );
1119
1.89k
        psz_text[i_old + i_len + 0] = '\n';
1120
1.89k
        psz_text[i_old + i_len + 1] = '\0';
1121
1.89k
        i_old += i_len + 1;
1122
1123
        /* replace [br] by \n */
1124
1.89k
        if( b_replace_br )
1125
567
        {
1126
567
            char *p;
1127
1128
897
            while( ( p = strstr( psz_text, "[br]" ) ) )
1129
330
            {
1130
330
                *p++ = '\n';
1131
330
                memmove( p, &p[3], strlen(&p[3])+1 );
1132
330
                i_old -= 3;
1133
330
            }
1134
567
        }
1135
1.89k
    }
1136
593
}
1137
1138
/* subtitle_ParseSubRipTimingValue
1139
 * Parses SubRip timing value.
1140
 */
1141
static int subtitle_ParseSubRipTimingValue(vlc_tick_t *timing_value,
1142
                                           const char *s, size_t length)
1143
3.67k
{
1144
3.67k
    int h1, m1, s1, d1 = 0;
1145
1146
3.67k
    int count;
1147
3.67k
    if (sscanf(s, "%d:%d:%d,%d%n", &h1, &m1, &s1, &d1, &count) == 4
1148
525
        && (size_t)count <= length)
1149
525
        goto success;
1150
1151
3.15k
    if (sscanf(s, "%d:%d:%d.%d%n", &h1, &m1, &s1, &d1, &count) == 4
1152
273
        && (size_t)count <= length)
1153
273
        goto success;
1154
1155
2.88k
    d1 = 0;
1156
2.88k
    if (sscanf(s, "%d:%d:%d%n", &h1, &m1, &s1, &count) == 3
1157
2.28k
        && (size_t)count <= length)
1158
2.28k
        goto success;
1159
1160
597
    return VLC_EGENERIC;
1161
1162
3.08k
success:
1163
3.08k
    (*timing_value) = VLC_TICK_0
1164
3.08k
        + vlc_tick_from_HMS(h1, m1, s1)
1165
3.08k
        + VLC_TICK_FROM_MS(d1);
1166
1167
3.08k
    return VLC_SUCCESS;
1168
1169
2.88k
}
1170
1171
/* subtitle_ParseSubRipTiming
1172
 * Parses SubRip timing.
1173
 */
1174
static int subtitle_ParseSubRipTiming( subtitle_t *p_subtitle,
1175
                                       const char *s )
1176
989k
{
1177
989k
    const char *delimiter = strstr(s, " --> ");
1178
989k
    if (delimiter == NULL || delimiter == s)
1179
987k
        return VLC_EGENERIC;
1180
1181
2.00k
    int ret = subtitle_ParseSubRipTimingValue(&p_subtitle->i_start, s, (size_t)(delimiter - s));
1182
2.00k
    if (ret != VLC_SUCCESS)
1183
324
        return ret;
1184
1185
1.67k
    const char *right = delimiter + strlen(" --> ");
1186
1.67k
    return subtitle_ParseSubRipTimingValue(&p_subtitle->i_stop, right, strlen(right));
1187
2.00k
}
1188
1189
/* ParseSubRip
1190
 */
1191
static int  ParseSubRip( vlc_object_t *p_obj, subs_properties_t *p_props,
1192
                         text_t *txt, subtitle_t *p_subtitle,
1193
                         size_t i_idx )
1194
434
{
1195
434
    VLC_UNUSED( i_idx );
1196
434
    return ParseSubRipSubViewer( p_obj, p_props, txt, p_subtitle,
1197
434
                                 &subtitle_ParseSubRipTiming,
1198
434
                                 false );
1199
434
}
1200
1201
/* subtitle_ParseSubViewerTiming
1202
 * Parses SubViewer timing.
1203
 */
1204
static int subtitle_ParseSubViewerTiming( subtitle_t *p_subtitle,
1205
                                   const char *s )
1206
161k
{
1207
161k
    int h1, m1, s1, d1, h2, m2, s2, d2;
1208
1209
161k
    if( sscanf( s, "%d:%d:%d.%d,%d:%d:%d.%d",
1210
161k
                &h1, &m1, &s1, &d1, &h2, &m2, &s2, &d2) != 8 )
1211
161k
        return VLC_EGENERIC;
1212
1213
237
    p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1214
237
                          VLC_TICK_FROM_MS( d1 ) + VLC_TICK_0;
1215
1216
237
    p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
1217
237
                          VLC_TICK_FROM_MS( d2 ) + VLC_TICK_0;
1218
237
    return VLC_SUCCESS;
1219
161k
}
1220
1221
/* ParseSubViewer
1222
 */
1223
static int  ParseSubViewer( vlc_object_t *p_obj, subs_properties_t *p_props,
1224
                            text_t *txt, subtitle_t *p_subtitle,
1225
                            size_t i_idx )
1226
245
{
1227
245
    VLC_UNUSED( i_idx );
1228
1229
245
    return ParseSubRipSubViewer( p_obj, p_props, txt, p_subtitle,
1230
245
                                 &subtitle_ParseSubViewerTiming,
1231
245
                                 true );
1232
245
}
1233
1234
/* ParseSSA
1235
 */
1236
static int  ParseSSA( vlc_object_t *p_obj, subs_properties_t *p_props,
1237
                      text_t *txt, subtitle_t *p_subtitle,
1238
                      size_t i_idx )
1239
711
{
1240
711
    VLC_UNUSED(p_obj);
1241
711
    size_t header_len = 0;
1242
1243
711
    for( ;; )
1244
589k
    {
1245
589k
        const char *s = TextGetLine( txt );
1246
589k
        int h1, m1, s1, c1, h2, m2, s2, c2;
1247
589k
        char *psz_text, *psz_temp;
1248
589k
        char temp[16];
1249
1250
589k
        if( !s )
1251
47
            return VLC_EGENERIC;
1252
1253
        /* We expect (SSA2-4):
1254
         * Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
1255
         * Dialogue: Marked=0,0:02:40.65,0:02:41.79,Wolf main,Cher,0000,0000,0000,,Et les enregistrements de ses ondes delta ?
1256
         *
1257
         * SSA-1 is similar but only has 8 commas up until the subtitle text. Probably the Effect field is no present, but not 100 % sure.
1258
         */
1259
1260
        /* For ASS:
1261
         * Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
1262
         * Dialogue: Layer#,0:02:40.65,0:02:41.79,Wolf main,Cher,0000,0000,0000,,Et les enregistrements de ses ondes delta ?
1263
         */
1264
1265
589k
        psz_text = NULL;
1266
589k
        if( s[0] == 'D' || s[0] == 'L' )
1267
4.82k
        {
1268
            /* The output text is always shorter than the input text. */
1269
4.82k
            psz_text = malloc( strlen(s) );
1270
4.82k
            if( !psz_text )
1271
0
                return VLC_ENOMEM;
1272
4.82k
        }
1273
1274
        /* Try to capture the language property */
1275
589k
        if( s[0] == 'L' &&
1276
91
            sscanf( s, "Language: %[^\r\n]", psz_text ) == 1 )
1277
0
        {
1278
0
            free( p_props->psz_lang ); /* just in case of multiple instances */
1279
0
            p_props->psz_lang = psz_text;
1280
0
            psz_text = NULL;
1281
0
        }
1282
589k
        else if( s[0] == 'D' &&
1283
4.73k
            sscanf( s,
1284
4.73k
                    "Dialogue: %15[^,],%d:%d:%d.%d,%d:%d:%d.%d,%[^\r\n]",
1285
4.73k
                    temp,
1286
4.73k
                    &h1, &m1, &s1, &c1,
1287
4.73k
                    &h2, &m2, &s2, &c2,
1288
4.73k
                    psz_text ) == 10 )
1289
664
        {
1290
            /* The dec expects: ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text */
1291
            /* (Layer comes from ASS specs ... it's empty for SSA.) */
1292
664
            if( p_props->i_type == SUB_TYPE_SSA1 )
1293
160
            {
1294
                /* SSA1 has only 8 commas before the text starts, not 9 */
1295
160
                memmove( &psz_text[1], psz_text, strlen(psz_text)+1 );
1296
160
                psz_text[0] = ',';
1297
160
            }
1298
504
            else
1299
504
            {
1300
504
                int i_layer = ( p_props->i_type == SUB_TYPE_ASS ) ? atoi( temp ) : 0;
1301
1302
                /* ReadOrder, Layer, %s(rest of fields) */
1303
504
                if( asprintf( &psz_temp, "%zu,%d,%s", i_idx, i_layer, psz_text ) == -1 )
1304
0
                {
1305
0
                    free( psz_text );
1306
0
                    return VLC_ENOMEM;
1307
0
                }
1308
1309
504
                free( psz_text );
1310
504
                psz_text = psz_temp;
1311
504
            }
1312
1313
664
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1314
664
                                  VLC_TICK_FROM_MS( c1 * 10 ) + VLC_TICK_0;
1315
664
            p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
1316
664
                                  VLC_TICK_FROM_MS( c2 * 10 ) + VLC_TICK_0;
1317
664
            p_subtitle->psz_text = psz_text;
1318
664
            return VLC_SUCCESS;
1319
664
        }
1320
588k
        free( psz_text );
1321
1322
        /* All the other stuff we add to the header field */
1323
588k
        if( header_len == 0 && p_props->psz_header )
1324
243
            header_len = strlen( p_props->psz_header );
1325
1326
588k
        size_t s_len = strlen( s );
1327
588k
        p_props->psz_header = realloc_or_free( p_props->psz_header, header_len + s_len + 2 );
1328
588k
        if( !p_props->psz_header )
1329
0
            return VLC_ENOMEM;
1330
588k
        snprintf( p_props->psz_header + header_len, s_len + 2, "%s\n", s );
1331
588k
        header_len += s_len + 1;
1332
588k
    }
1333
711
}
1334
1335
/* ParseVplayer
1336
 *  Format
1337
 *      h:m:s:Line1|Line2|Line3....
1338
 *  or
1339
 *      h:m:s Line1|Line2|Line3....
1340
 */
1341
static int ParseVplayer( vlc_object_t *p_obj, subs_properties_t *p_props,
1342
                         text_t *txt, subtitle_t *p_subtitle,
1343
                         size_t i_idx )
1344
34.0k
{
1345
34.0k
    VLC_UNUSED(p_obj);
1346
34.0k
    VLC_UNUSED(p_props);
1347
34.0k
    VLC_UNUSED( i_idx );
1348
34.0k
    char *psz_text;
1349
1350
34.0k
    for( ;; )
1351
332k
    {
1352
332k
        const char *s = TextGetLine( txt );
1353
332k
        int h1, m1, s1;
1354
1355
332k
        if( !s )
1356
54
            return VLC_EGENERIC;
1357
1358
332k
        psz_text = malloc( strlen( s ) + 1 );
1359
332k
        if( !psz_text )
1360
0
            return VLC_ENOMEM;
1361
1362
332k
        if( sscanf( s, "%d:%d:%d%*c%[^\r\n]",
1363
332k
                    &h1, &m1, &s1, psz_text ) == 4 )
1364
33.9k
        {
1365
33.9k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
1366
33.9k
            p_subtitle->i_stop  = -1;
1367
33.9k
            break;
1368
33.9k
        }
1369
298k
        free( psz_text );
1370
298k
    }
1371
1372
    /* replace | by \n */
1373
1.16M
    for( size_t i = 0; psz_text[i] != '\0'; i++ )
1374
1.13M
    {
1375
1.13M
        if( psz_text[i] == '|' )
1376
36
            psz_text[i] = '\n';
1377
1.13M
    }
1378
33.9k
    p_subtitle->psz_text = psz_text;
1379
33.9k
    return VLC_SUCCESS;
1380
34.0k
}
1381
1382
/* ParseSami
1383
 */
1384
static const char *ParseSamiSearch( text_t *txt,
1385
                                    const char *psz_start, const char *psz_str )
1386
6.07k
{
1387
6.07k
    if( psz_start && strcasestr( psz_start, psz_str ) )
1388
4.71k
    {
1389
4.71k
        const char *s = strcasestr( psz_start, psz_str );
1390
4.71k
        return &s[strlen( psz_str )];
1391
4.71k
    }
1392
1393
1.36k
    for( ;; )
1394
1.52M
    {
1395
1.52M
        const char *p = TextGetLine( txt );
1396
1.52M
        if( !p )
1397
94
            return NULL;
1398
1399
1.52M
        const char *s = strcasestr( p, psz_str );
1400
1.52M
        if( s != NULL )
1401
1.26k
            return &s[strlen( psz_str )];
1402
1.52M
    }
1403
1.36k
}
1404
static int ParseSami( vlc_object_t *p_obj, subs_properties_t *p_props,
1405
                      text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1406
594
{
1407
594
    VLC_UNUSED(p_obj);
1408
594
    VLC_UNUSED(p_props);
1409
594
    VLC_UNUSED( i_idx );
1410
594
    const char *s;
1411
594
    int64_t i_start;
1412
1413
594
    unsigned int i_text;
1414
594
    char text[8192]; /* Arbitrary but should be long enough */
1415
1416
    /* search "Start=" */
1417
594
    s = ParseSamiSearch( txt, p_props->sami.psz_start, "Start=" );
1418
594
    p_props->sami.psz_start = NULL;
1419
594
    if( !s )
1420
74
        return VLC_EGENERIC;
1421
1422
    /* get start value */
1423
520
    char *psz_end;
1424
520
    i_start = strtol( s, &psz_end, 0 );
1425
520
    s = psz_end;
1426
1427
    /* search <P */
1428
520
    if( !( s = ParseSamiSearch( txt, s, "<P" ) ) )
1429
4
        return VLC_EGENERIC;
1430
1431
    /* search > */
1432
516
    if( !( s = ParseSamiSearch( txt, s, ">" ) ) )
1433
3
        return VLC_EGENERIC;
1434
1435
513
    i_text = 0;
1436
513
    text[0] = '\0';
1437
    /* now get all txt until  a "Start=" line */
1438
513
    for( ;; )
1439
142k
    {
1440
142k
        char c = '\0';
1441
        /* Search non empty line */
1442
436k
        while( s && *s == '\0' )
1443
293k
            s = TextGetLine( txt );
1444
142k
        if( !s )
1445
42
            break;
1446
1447
142k
        if( *s == '<' )
1448
4.91k
        {
1449
4.91k
            if( !strncasecmp( s, "<br", 3 ) )
1450
111
            {
1451
111
                c = '\n';
1452
111
            }
1453
4.80k
            else if( strcasestr( s, "Start=" ) )
1454
471
            {
1455
471
                p_props->sami.psz_start = s;
1456
471
                break;
1457
471
            }
1458
4.44k
            s = ParseSamiSearch( txt, s, ">" );
1459
4.44k
        }
1460
137k
        else if( !strncmp( s, "&nbsp;", 6 ) )
1461
470
        {
1462
470
            c = ' ';
1463
470
            s += 6;
1464
470
        }
1465
137k
        else if( *s == '\t' )
1466
769
        {
1467
769
            c = ' ';
1468
769
            s++;
1469
769
        }
1470
136k
        else
1471
136k
        {
1472
136k
            c = *s;
1473
136k
            s++;
1474
136k
        }
1475
141k
        if( c != '\0' && i_text+1 < sizeof(text) )
1476
137k
        {
1477
137k
            text[i_text++] = c;
1478
137k
            text[i_text] = '\0';
1479
137k
        }
1480
141k
    }
1481
1482
513
    p_subtitle->i_start = VLC_TICK_0 + VLC_TICK_FROM_MS(i_start);
1483
513
    p_subtitle->i_stop  = -1;
1484
513
    p_subtitle->psz_text = strdup( text );
1485
1486
513
    return VLC_SUCCESS;
1487
516
}
1488
1489
/* ParseDVDSubtitle
1490
 *  Format
1491
 *      {T h1:m1:s1:c1
1492
 *      Line1
1493
 *      Line2
1494
 *      ...
1495
 *      }
1496
 * TODO it can have a header
1497
 *      { HEAD
1498
 *          ...
1499
 *          CODEPAGE=...
1500
 *          FORMAT=...
1501
 *          LANG=English
1502
 *      }
1503
 *      LANG support would be cool
1504
 *      CODEPAGE is probably mandatory FIXME
1505
 */
1506
static int ParseDVDSubtitle(vlc_object_t *p_obj, subs_properties_t *p_props,
1507
                            text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1508
481
{
1509
481
    VLC_UNUSED(p_obj);
1510
481
    VLC_UNUSED(p_props);
1511
481
    VLC_UNUSED( i_idx );
1512
481
    char *psz_text;
1513
1514
481
    for( ;; )
1515
4.20k
    {
1516
4.20k
        const char *s = TextGetLine( txt );
1517
4.20k
        int h1, m1, s1, c1;
1518
1519
4.20k
        if( !s )
1520
4
            return VLC_EGENERIC;
1521
1522
4.19k
        if( sscanf( s,
1523
4.19k
                    "{T %d:%d:%d:%d",
1524
4.19k
                    &h1, &m1, &s1, &c1 ) == 4 )
1525
477
        {
1526
477
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
1527
477
                                  VLC_TICK_FROM_MS( c1 * 10 ) + VLC_TICK_0;
1528
477
            p_subtitle->i_stop = -1;
1529
477
            break;
1530
477
        }
1531
4.19k
    }
1532
1533
    /* Now read text until a line containing "}" */
1534
477
    size_t i_old = 0;
1535
477
    psz_text = NULL;
1536
477
    for( ;; )
1537
7.17k
    {
1538
7.17k
        const char *s = TextGetLine( txt );
1539
7.17k
        size_t i_len;
1540
1541
7.17k
        if( !s )
1542
4
        {
1543
4
            free( psz_text );
1544
4
            return VLC_EGENERIC;
1545
4
        }
1546
1547
7.17k
        i_len = strlen( s );
1548
7.17k
        if( i_len == 1 && s[0] == '}')
1549
473
        {
1550
473
            if (psz_text)
1551
198
                psz_text[i_old] = '\0';
1552
473
            p_subtitle->psz_text = psz_text;
1553
473
            return VLC_SUCCESS;
1554
473
        }
1555
1556
6.69k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1557
6.69k
        if( !psz_text )
1558
0
            return VLC_ENOMEM;
1559
1560
6.69k
        memcpy( &psz_text[i_old], s, i_len );
1561
6.69k
        psz_text[i_old + i_len + 0] = '\n';
1562
6.69k
        i_old += i_len + 1;
1563
6.69k
    }
1564
477
}
1565
1566
/* ParseMPL2
1567
 *  Format
1568
 *     [n1][n2]Line1|Line2|Line3...
1569
 *  where n1 and n2 are the video frame number (n2 can be empty)
1570
 */
1571
static int ParseMPL2(vlc_object_t *p_obj, subs_properties_t *p_props,
1572
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1573
99.9k
{
1574
99.9k
    VLC_UNUSED(p_obj);
1575
99.9k
    VLC_UNUSED(p_props);
1576
99.9k
    VLC_UNUSED( i_idx );
1577
99.9k
    char *psz_text;
1578
99.9k
    int i;
1579
1580
99.9k
    for( ;; )
1581
125k
    {
1582
125k
        const char *s = TextGetLine( txt );
1583
125k
        int i_start;
1584
125k
        int i_stop;
1585
1586
125k
        if( !s )
1587
39
            return VLC_EGENERIC;
1588
1589
125k
        psz_text = malloc( strlen(s) + 1 );
1590
125k
        if( !psz_text )
1591
0
            return VLC_ENOMEM;
1592
1593
125k
        i_start = 0;
1594
125k
        i_stop  = -1;
1595
125k
        if( sscanf( s, "[%d][] %[^\r\n]", &i_start, psz_text ) == 2 ||
1596
25.7k
            sscanf( s, "[%d][%d] %[^\r\n]", &i_start, &i_stop, psz_text ) == 3)
1597
99.8k
        {
1598
99.8k
            p_subtitle->i_start = VLC_TICK_0 + VLC_TICK_FROM_MS(i_start * 100);
1599
99.8k
            p_subtitle->i_stop  = i_stop >= 0 ? VLC_TICK_0 + VLC_TICK_FROM_MS(i_stop  * 100) : VLC_TICK_INVALID;
1600
99.8k
            break;
1601
99.8k
        }
1602
25.5k
        free( psz_text );
1603
25.5k
    }
1604
1605
564k
    for( i = 0; psz_text[i] != '\0'; )
1606
464k
    {
1607
        /* replace | by \n */
1608
464k
        if( psz_text[i] == '|' )
1609
8
            psz_text[i] = '\n';
1610
1611
        /* Remove italic */
1612
464k
        if( psz_text[i] == '/' && ( i == 0 || psz_text[i-1] == '\n' ) )
1613
292
            memmove( &psz_text[i], &psz_text[i+1], strlen(&psz_text[i+1])+1 );
1614
464k
        else
1615
464k
            i++;
1616
464k
    }
1617
99.8k
    p_subtitle->psz_text = psz_text;
1618
99.8k
    return VLC_SUCCESS;
1619
99.9k
}
1620
1621
static int ParseAQT(vlc_object_t *p_obj, subs_properties_t *p_props, text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1622
678
{
1623
678
    VLC_UNUSED(p_obj);
1624
678
    VLC_UNUSED(p_props);
1625
678
    VLC_UNUSED( i_idx );
1626
1627
678
    char *psz_text = NULL;
1628
678
    size_t i_old = 0;
1629
678
    size_t i_len;
1630
678
    int i_firstline = 1;
1631
1632
678
    for( ;; )
1633
325k
    {
1634
325k
        int t; /* Time */
1635
1636
325k
        const char *s = TextGetLine( txt );
1637
1638
325k
        if( !s )
1639
119
        {
1640
119
            free( psz_text );
1641
119
            return VLC_EGENERIC;
1642
119
        }
1643
1644
        /* Data Lines */
1645
325k
        if( sscanf (s, "-->> %d", &t) == 1)
1646
1.00k
        {
1647
            /* Starting of a subtitle */
1648
1.00k
            if( i_firstline )
1649
558
            {
1650
558
                p_subtitle->i_start = VLC_TICK_0 + t * p_props->i_microsecperframe;
1651
558
                i_firstline = 0;
1652
558
            }
1653
            /* We have been too far: end of the subtitle, begin of next */
1654
442
            else
1655
442
            {
1656
442
                p_subtitle->i_stop  = VLC_TICK_0 + t * p_props->i_microsecperframe;
1657
442
                break;
1658
442
            }
1659
1.00k
        }
1660
        /* Text Lines */
1661
324k
        else
1662
324k
        {
1663
324k
            i_len = strlen( s );
1664
324k
            psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1665
324k
            if( !psz_text )
1666
0
                 return VLC_ENOMEM;
1667
1668
324k
            memcpy( &psz_text[i_old], s, i_len );
1669
324k
            psz_text[i_old + i_len + 0] = '\n';
1670
324k
            i_old += i_len + 1;
1671
324k
            if( txt->i_line == txt->i_line_count )
1672
117
                break;
1673
324k
        }
1674
325k
    }
1675
559
    if (psz_text)
1676
425
        psz_text[i_old] = '\0';
1677
559
    p_subtitle->psz_text = psz_text;
1678
559
    return VLC_SUCCESS;
1679
678
}
1680
1681
static int ParsePJS(vlc_object_t *p_obj, subs_properties_t *p_props,
1682
                    text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1683
270
{
1684
270
    VLC_UNUSED(p_obj);
1685
270
    VLC_UNUSED(p_props);
1686
270
    VLC_UNUSED( i_idx );
1687
1688
270
    char *psz_text;
1689
270
    int i;
1690
1691
270
    for( ;; )
1692
39.0k
    {
1693
39.0k
        const char *s = TextGetLine( txt );
1694
39.0k
        int t1, t2;
1695
1696
39.0k
        if( !s )
1697
6
            return VLC_EGENERIC;
1698
1699
39.0k
        psz_text = malloc( strlen(s) + 1 );
1700
39.0k
        if( !psz_text )
1701
0
            return VLC_ENOMEM;
1702
1703
        /* Data Lines */
1704
39.0k
        if( sscanf (s, "%d,%d,\"%[^\n\r]", &t1, &t2, psz_text ) == 3 )
1705
264
        {
1706
            /* 1/10th of second ? Frame based ? FIXME */
1707
264
            p_subtitle->i_start = VLC_TICK_0 + INT64_C(10) * t1;
1708
264
            p_subtitle->i_stop = VLC_TICK_0 + INT64_C(10) * t2;
1709
            /* Remove latest " */
1710
264
            psz_text[ strlen(psz_text) - 1 ] = '\0';
1711
1712
264
            break;
1713
264
        }
1714
38.8k
        free( psz_text );
1715
38.8k
    }
1716
1717
    /* replace | by \n */
1718
1.06k
    for( i = 0; psz_text[i] != '\0'; i++ )
1719
797
    {
1720
797
        if( psz_text[i] == '|' )
1721
3
            psz_text[i] = '\n';
1722
797
    }
1723
1724
264
    p_subtitle->psz_text = psz_text;
1725
264
    msg_Dbg( p_obj, "%s", psz_text );
1726
264
    return VLC_SUCCESS;
1727
270
}
1728
1729
static int ParseMPSub( vlc_object_t *p_obj, subs_properties_t *p_props,
1730
                       text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1731
5.26k
{
1732
5.26k
    VLC_UNUSED( i_idx );
1733
1734
5.26k
    if( !p_props->mpsub.b_inited )
1735
55
    {
1736
55
        p_props->mpsub.f_total = 0.0;
1737
55
        p_props->mpsub.i_factor = 0;
1738
1739
55
        p_props->mpsub.b_inited = true;
1740
55
    }
1741
1742
5.26k
    for( ;; )
1743
705k
    {
1744
705k
        const char *s = TextGetLine( txt );
1745
705k
        if( !s )
1746
49
        {
1747
49
            return VLC_EGENERIC;
1748
49
        }
1749
1750
705k
        if ( *s =='#' || *s == '\0' )
1751
657k
            continue;
1752
1753
        /* Data Lines */
1754
48.3k
        float wait, duration;
1755
48.3k
        if( sscanf( s, "%f %f", &wait, &duration ) == 2 )
1756
5.22k
        {
1757
5.22k
            float f1 = wait;
1758
5.22k
            float f2 = duration;
1759
5.22k
            p_props->mpsub.f_total += f1 * p_props->mpsub.i_factor;
1760
5.22k
            p_subtitle->i_start = VLC_TICK_0 + llroundf(10000.f * p_props->mpsub.f_total);
1761
5.22k
            p_props->mpsub.f_total += f2 * p_props->mpsub.i_factor;
1762
5.22k
            p_subtitle->i_stop = VLC_TICK_0 + llroundf(10000.f * p_props->mpsub.f_total);
1763
5.22k
            break;
1764
5.22k
        }
1765
1766
43.1k
        if( !strncmp( s, "FORMAT=", strlen("FORMAT=") ) )
1767
5.43k
        {
1768
5.43k
            const char *psz_format = s + strlen( "FORMAT=" );
1769
5.43k
            if( !strncmp( psz_format, "TIME", strlen("TIME") ) && (psz_format[4] == '\0' || psz_format[4] == ' ') )
1770
343
            {
1771
                // FORMAT=TIME may be followed by a comment
1772
343
                p_props->mpsub.i_factor = 100;
1773
343
            }
1774
5.08k
            else
1775
5.08k
            {
1776
5.08k
                float f_fps;
1777
5.08k
                if( sscanf( psz_format, "%f", &f_fps ) == 1 )
1778
4.55k
                {
1779
4.55k
                    if( f_fps > 0.f && var_GetFloat( p_obj, "sub-original-fps" ) <= 0.f )
1780
38
                        var_SetFloat( p_obj, "sub-original-fps", f_fps );
1781
1782
4.55k
                    p_props->mpsub.i_factor = 1;
1783
4.55k
                }
1784
5.08k
            }
1785
5.43k
        }
1786
43.1k
    }
1787
1788
5.22k
    char *psz_text = NULL;
1789
5.22k
    size_t i_old = 0;
1790
5.22k
    for( ;; )
1791
6.92k
    {
1792
6.92k
        const char *s = TextGetLine( txt );
1793
1794
6.92k
        if( !s )
1795
6
        {
1796
6
            free( psz_text );
1797
6
            return VLC_EGENERIC;
1798
6
        }
1799
1800
6.91k
        size_t i_len = strlen( s );
1801
6.91k
        if( i_len == 0 )
1802
5.21k
            break;
1803
1804
1.70k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
1805
1.70k
        if( !psz_text )
1806
0
             return VLC_ENOMEM;
1807
1808
1.70k
        memcpy( &psz_text[i_old], s, i_len );
1809
1.70k
        psz_text[i_old + i_len + 0] = '\n';
1810
1.70k
        i_old += i_len + 1;
1811
1.70k
    }
1812
1813
5.21k
    if (psz_text)
1814
977
        psz_text[i_old] = '\0';
1815
5.21k
    p_subtitle->psz_text = psz_text;
1816
5.21k
    return VLC_SUCCESS;
1817
5.22k
}
1818
1819
static int ParseJSS( vlc_object_t *p_obj, subs_properties_t *p_props,
1820
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
1821
80.3k
{
1822
80.3k
    VLC_UNUSED( i_idx );
1823
80.3k
    char         *psz_text, *psz_orig;
1824
80.3k
    char         *psz_text2, *psz_orig2;
1825
1826
80.3k
    if( !p_props->jss.b_inited )
1827
127
    {
1828
127
        p_props->jss.i_comment = 0;
1829
127
        p_props->jss.i_time_resolution = 30;
1830
127
        p_props->jss.i_time_shift = 0;
1831
1832
127
        p_props->jss.b_inited = true;
1833
127
    }
1834
1835
    /* Parse the main lines */
1836
80.3k
    for( ;; )
1837
572k
    {
1838
572k
        const char *s = TextGetLine( txt );
1839
572k
        if( !s )
1840
127
            return VLC_EGENERIC;
1841
1842
572k
        size_t line_length = strlen( s );
1843
572k
        psz_orig = malloc( line_length + 1 );
1844
572k
        if( !psz_orig )
1845
0
            return VLC_ENOMEM;
1846
572k
        psz_text = psz_orig;
1847
1848
        /* Complete time lines */
1849
572k
        int h1, h2, m1, m2, s1, s2, f1, f2;
1850
572k
        if( sscanf( s, "%d:%d:%d.%d %d:%d:%d.%d %[^\n\r]",
1851
572k
                    &h1, &m1, &s1, &f1, &h2, &m2, &s2, &f2, psz_text ) == 9 )
1852
129
        {
1853
129
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 ) +
1854
129
                vlc_tick_from_sec( ( f1 +  p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1855
129
            p_subtitle->i_stop = VLC_TICK_0 + vlc_tick_from_HMS( h2, m2, s2 ) +
1856
129
                vlc_tick_from_sec( ( f2 +  p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1857
129
            break;
1858
129
        }
1859
        /* Short time lines */
1860
572k
        else if( sscanf( s, "@%d @%d %[^\n\r]", &f1, &f2, psz_text ) == 3 )
1861
80.1k
        {
1862
80.1k
            p_subtitle->i_start = VLC_TICK_0 +
1863
80.1k
                    vlc_tick_from_sec( (f1 + p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1864
80.1k
            p_subtitle->i_stop = VLC_TICK_0 +
1865
80.1k
                    vlc_tick_from_sec( (f2 + p_props->jss.i_time_shift ) / p_props->jss.i_time_resolution );
1866
80.1k
            break;
1867
80.1k
        }
1868
        /* General Directive lines */
1869
        /* Only TIME and SHIFT are supported so far */
1870
492k
        else if( s[0] == '#' )
1871
852
        {
1872
852
            int h = 0, m =0, sec = 1, f = 1;
1873
852
            unsigned shift = 1;
1874
852
            int inv = 1;
1875
1876
852
            strcpy( psz_text, s );
1877
1878
852
            switch( toupper( (unsigned char)psz_text[1] ) )
1879
852
            {
1880
501
            case 'S':
1881
501
                 shift = isalpha( (unsigned char)psz_text[2] ) ? 6 : 2 ;
1882
501
                 if ( shift > line_length )
1883
21
                     break;
1884
1885
480
                 if( sscanf( &psz_text[shift], "%d", &h ) )
1886
323
                 {
1887
                     /* Negative shifting */
1888
323
                     if( h < 0 )
1889
130
                     {
1890
130
                         h *= -1;
1891
130
                         inv = -1;
1892
130
                     }
1893
1894
323
                     if( sscanf( &psz_text[shift], "%*d:%d", &m ) )
1895
317
                     {
1896
317
                         if( sscanf( &psz_text[shift], "%*d:%*d:%d", &sec ) )
1897
131
                         {
1898
131
                             sscanf( &psz_text[shift], "%*d:%*d:%*d.%d", &f );
1899
131
                         }
1900
186
                         else
1901
186
                         {
1902
186
                             h = 0;
1903
186
                             sscanf( &psz_text[shift], "%d:%d.%d",
1904
186
                                     &m, &sec, &f );
1905
186
                             m *= inv;
1906
186
                         }
1907
317
                     }
1908
6
                     else
1909
6
                     {
1910
6
                         h = m = 0;
1911
6
                         sscanf( &psz_text[shift], "%d.%d", &sec, &f);
1912
6
                         sec *= inv;
1913
6
                     }
1914
323
                     p_props->jss.i_time_shift = ( ( h * INT64_C(3600) + m * INT64_C(60) + sec )
1915
323
                         * p_props->jss.i_time_resolution + f ) * inv;
1916
323
                 }
1917
480
                 break;
1918
1919
320
            case 'T':
1920
320
                shift = isalpha( (unsigned char)psz_text[2] ) ? 8 : 2 ;
1921
320
                if ( shift > line_length )
1922
144
                    break;
1923
1924
176
                sscanf( &psz_text[shift], "%d", &p_props->jss.i_time_resolution );
1925
176
                if( !p_props->jss.i_time_resolution || p_props->jss.i_time_resolution < 0 )
1926
159
                    p_props->jss.i_time_resolution = 30;
1927
176
                break;
1928
852
            }
1929
852
            free( psz_orig );
1930
852
            continue;
1931
852
        }
1932
491k
        else
1933
            /* Unknown type line, probably a comment */
1934
491k
        {
1935
491k
            free( psz_orig );
1936
491k
            continue;
1937
491k
        }
1938
572k
    }
1939
1940
80.4k
    while( psz_text[ strlen( psz_text ) - 1 ] == '\\' )
1941
263
    {
1942
263
        const char *s2 = TextGetLine( txt );
1943
1944
263
        if( !s2 )
1945
0
        {
1946
0
            free( psz_orig );
1947
0
            return VLC_EGENERIC;
1948
0
        }
1949
1950
263
        size_t i_len = strlen( s2 );
1951
263
        if( i_len == 0 )
1952
129
            break;
1953
1954
134
        size_t i_old = strlen( psz_text );
1955
1956
134
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 );
1957
134
        if( !psz_text )
1958
0
             return VLC_ENOMEM;
1959
1960
134
        psz_orig = psz_text;
1961
134
        strcat( psz_text, s2 );
1962
134
    }
1963
1964
    /* Skip the blanks */
1965
80.2k
    while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++;
1966
1967
    /* Parse the directives */
1968
80.2k
    if( isalpha( (unsigned char)*psz_text ) || *psz_text == '[' )
1969
1.58k
    {
1970
203k
        while( *psz_text && *psz_text != ' ' )
1971
202k
            ++psz_text;
1972
1973
        /* Directives are NOT parsed yet */
1974
        /* This has probably a better place in a decoder ? */
1975
        /* directive = malloc( strlen( psz_text ) + 1 );
1976
           if( sscanf( psz_text, "%s %[^\n\r]", directive, psz_text2 ) == 2 )*/
1977
1.58k
    }
1978
1979
    /* Skip the blanks after directives */
1980
81.8k
    while( *psz_text == ' ' || *psz_text == '\t' ) psz_text++;
1981
1982
    /* Clean all the lines from inline comments and other stuffs */
1983
80.2k
    psz_orig2 = calloc( strlen( psz_text) + 1, 1 );
1984
80.2k
    psz_text2 = psz_orig2;
1985
1986
2.13M
    for( ; *psz_text != '\0' && *psz_text != '\n' && *psz_text != '\r'; )
1987
2.05M
    {
1988
2.05M
        switch( *psz_text )
1989
2.05M
        {
1990
627k
        case '{':
1991
627k
            p_props->jss.i_comment++;
1992
627k
            break;
1993
519
        case '}':
1994
519
            if( p_props->jss.i_comment )
1995
268
            {
1996
268
                p_props->jss.i_comment = 0;
1997
268
                if( (*(psz_text + 1 ) ) == ' ' ) psz_text++;
1998
268
            }
1999
519
            break;
2000
3.82k
        case '~':
2001
3.82k
            if( !p_props->jss.i_comment )
2002
333
            {
2003
333
                *psz_text2 = ' ';
2004
333
                psz_text2++;
2005
333
            }
2006
3.82k
            break;
2007
13.0k
        case ' ':
2008
13.4k
        case '\t':
2009
13.4k
            if( (*(psz_text + 1 ) ) == ' ' || (*(psz_text + 1 ) ) == '\t' )
2010
341
                break;
2011
13.0k
            if( !p_props->jss.i_comment )
2012
6.32k
            {
2013
6.32k
                *psz_text2 = ' ';
2014
6.32k
                psz_text2++;
2015
6.32k
            }
2016
13.0k
            break;
2017
12.4k
        case '\\':
2018
12.4k
            if( (*(psz_text + 1 ) ) == 'n' )
2019
1.77k
            {
2020
1.77k
                *psz_text2 = '\n';
2021
1.77k
                psz_text++;
2022
1.77k
                psz_text2++;
2023
1.77k
                break;
2024
1.77k
            }
2025
10.6k
            if( ( toupper((unsigned char)*(psz_text + 1 ) ) == 'C' ) ||
2026
10.5k
                    ( toupper((unsigned char)*(psz_text + 1 ) ) == 'F' ) )
2027
330
            {
2028
330
                psz_text++;
2029
330
                break;
2030
330
            }
2031
10.3k
            if( (*(psz_text + 1 ) ) == 'B' || (*(psz_text + 1 ) ) == 'b' ||
2032
10.3k
                (*(psz_text + 1 ) ) == 'I' || (*(psz_text + 1 ) ) == 'i' ||
2033
10.3k
                (*(psz_text + 1 ) ) == 'U' || (*(psz_text + 1 ) ) == 'u' ||
2034
10.2k
                (*(psz_text + 1 ) ) == 'D' || (*(psz_text + 1 ) ) == 'N' )
2035
8
            {
2036
8
                psz_text++;
2037
8
                break;
2038
8
            }
2039
10.2k
            if( (*(psz_text + 1 ) ) == '~' || (*(psz_text + 1 ) ) == '{' ||
2040
5.72k
                (*(psz_text + 1 ) ) == '\\' )
2041
5.36k
                psz_text++;
2042
4.93k
            else if( ( *(psz_text + 1 ) == '\r' ||  *(psz_text + 1 ) == '\n' ) &&
2043
135
                     *(psz_text + 1 ) != '\0' )
2044
135
            {
2045
135
                psz_text++;
2046
135
            }
2047
10.2k
            break;
2048
1.39M
        default:
2049
1.39M
            if( !p_props->jss.i_comment )
2050
821k
            {
2051
821k
                *psz_text2 = *psz_text;
2052
821k
                psz_text2++;
2053
821k
            }
2054
2.05M
        }
2055
2.05M
        psz_text++;
2056
2.05M
    }
2057
2058
80.2k
    p_subtitle->psz_text = psz_orig2;
2059
80.2k
    msg_Dbg( p_obj, "%s", p_subtitle->psz_text );
2060
80.2k
    free( psz_orig );
2061
80.2k
    return VLC_SUCCESS;
2062
80.2k
}
2063
2064
static int ParsePSB( vlc_object_t *p_obj, subs_properties_t *p_props,
2065
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2066
0
{
2067
0
    VLC_UNUSED(p_obj);
2068
0
    VLC_UNUSED(p_props);
2069
0
    VLC_UNUSED( i_idx );
2070
2071
0
    char *psz_text;
2072
0
    int i;
2073
2074
0
    for( ;; )
2075
0
    {
2076
0
        int h1, m1, s1;
2077
0
        int h2, m2, s2;
2078
0
        const char *s = TextGetLine( txt );
2079
2080
0
        if( !s )
2081
0
            return VLC_EGENERIC;
2082
2083
0
        psz_text = malloc( strlen( s ) + 1 );
2084
0
        if( !psz_text )
2085
0
            return VLC_ENOMEM;
2086
2087
0
        if( sscanf( s, "{%d:%d:%d}{%d:%d:%d}%[^\r\n]",
2088
0
                    &h1, &m1, &s1, &h2, &m2, &s2, psz_text ) == 7 )
2089
0
        {
2090
0
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2091
0
            p_subtitle->i_stop  = VLC_TICK_0 + vlc_tick_from_HMS( h2, m2, s2 );
2092
0
            break;
2093
0
        }
2094
0
        free( psz_text );
2095
0
    }
2096
2097
    /* replace | by \n */
2098
0
    for( i = 0; psz_text[i] != '\0'; i++ )
2099
0
    {
2100
0
        if( psz_text[i] == '|' )
2101
0
            psz_text[i] = '\n';
2102
0
    }
2103
0
    p_subtitle->psz_text = psz_text;
2104
0
    return VLC_SUCCESS;
2105
0
}
2106
2107
static vlc_tick_t ParseRealTime( const char *psz )
2108
2.29k
{
2109
2.29k
    if( *psz == '\0' ) return VLC_TICK_0;
2110
2.29k
    int h, m, s, f;
2111
2.29k
    if( sscanf( psz, "%d:%d:%d.%d", &h, &m, &s, &f ) == 4 )
2112
160
    {
2113
160
        return vlc_tick_from_HMS( h, m, s )
2114
160
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2115
160
    }
2116
2.13k
    if( sscanf( psz, "%d:%d.%d", &m, &s, &f ) == 3 )
2117
168
    {
2118
168
        return vlc_tick_from_HMS( 0, m, s )
2119
168
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2120
168
    }
2121
1.97k
    if( sscanf( psz, "%d.%d", &s, &f ) == 2 )
2122
369
    {
2123
369
        return vlc_tick_from_sec( s )
2124
369
               + VLC_TICK_FROM_MS(f * 10) + VLC_TICK_0;
2125
369
    }
2126
1.60k
    if( sscanf( psz, "%d:%d", &m, &s ) == 2 )
2127
608
    {
2128
608
        return vlc_tick_from_HMS( 0, m, s )
2129
608
               + VLC_TICK_0;
2130
608
    }
2131
994
    if( sscanf( psz, "%d", &s ) == 1 )
2132
651
    {
2133
651
        return vlc_tick_from_sec( s )
2134
651
               + VLC_TICK_0;
2135
651
    }
2136
343
    return VLC_TICK_MIN;
2137
994
}
2138
2139
static int ParseRealText( vlc_object_t *p_obj, subs_properties_t *p_props,
2140
                          text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2141
2.00k
{
2142
2.00k
    VLC_UNUSED(p_obj);
2143
2.00k
    VLC_UNUSED(p_props);
2144
2.00k
    VLC_UNUSED( i_idx );
2145
2.00k
    char *psz_text = NULL;
2146
2147
2.00k
    for( ;; )
2148
977k
    {
2149
977k
        const char *s = TextGetLine( txt );
2150
977k
        free( psz_text );
2151
2152
977k
        if( !s )
2153
39
            return VLC_EGENERIC;
2154
2155
977k
        psz_text = malloc( strlen( s ) + 1 );
2156
977k
        if( !psz_text )
2157
0
            return VLC_ENOMEM;
2158
2159
        /* Find the good beginning. This removes extra spaces at the beginning
2160
           of the line.*/
2161
977k
        char *psz_temp = strcasestr( s, "<time");
2162
977k
        if( psz_temp != NULL )
2163
2.45k
        {
2164
2.45k
            char psz_end[12], psz_begin[12];
2165
2.45k
            vlc_tick_t end = VLC_TICK_MIN;
2166
            /* Line has begin and end */
2167
2.45k
            if( sscanf( psz_temp,
2168
2.45k
                  "<%*[t|T]ime %*[b|B]egin=\"%11[^\"]\" %*[e|E]nd=\"%11[^\"]%*[^>]%[^\n\r]",
2169
2.45k
                            psz_begin, psz_end, psz_text) == 3 )
2170
333
            {
2171
333
                end = ParseRealTime( psz_end );
2172
333
            }
2173
2.12k
            else if ( sscanf( psz_temp,
2174
2.12k
                                "<%*[t|T]ime %*[b|B]egin=\"%11[^\"]\"%*[^>]%[^\n\r]",
2175
2.12k
                                psz_begin, psz_text ) != 2)
2176
                /* Line is not recognized */
2177
488
            {
2178
488
                continue;
2179
488
            }
2180
2181
            /* Get the times */
2182
1.96k
            vlc_tick_t i_time = ParseRealTime( psz_begin );
2183
1.96k
            if (i_time != VLC_TICK_MIN)
2184
1.76k
                p_subtitle->i_start = i_time;
2185
200
            else
2186
200
                p_subtitle->i_start = -1;
2187
2188
1.96k
            if (end != VLC_TICK_MIN)
2189
190
                p_subtitle->i_stop = end;
2190
1.77k
            else
2191
1.77k
                p_subtitle->i_stop = -1;
2192
1.96k
            break;
2193
2.45k
        }
2194
977k
    }
2195
2196
    /* Get the following Lines */
2197
1.96k
    size_t i_old = strlen( psz_text );
2198
1.96k
    for( ;; )
2199
3.38k
    {
2200
3.38k
        const char *s = TextGetLine( txt );
2201
2202
3.38k
        if( !s )
2203
16
        {
2204
16
            free( psz_text );
2205
16
            return VLC_EGENERIC;
2206
16
        }
2207
2208
3.37k
        size_t i_len = strlen( s );
2209
3.37k
        if( i_len == 0 ) break;
2210
2211
3.14k
        if( strcasestr( s, "<time" ) ||
2212
1.65k
            strcasestr( s, "<clear/") )
2213
1.72k
        {
2214
1.72k
            TextPreviousLine( txt );
2215
1.72k
            break;
2216
1.72k
        }
2217
2218
1.42k
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
2219
1.42k
        if( !psz_text )
2220
0
            return VLC_ENOMEM;
2221
2222
1.42k
        memcpy( &psz_text[i_old], s, i_len );
2223
1.42k
        psz_text[i_old + i_len + 0] = '\n';
2224
1.42k
        i_old += i_len + 1;
2225
1.42k
    }
2226
2227
1.95k
    psz_text[i_old] = '\0';
2228
    /* Remove the starting ">" that remained after the sscanf */
2229
1.95k
    memmove( &psz_text[0], &psz_text[1], strlen( psz_text ) );
2230
2231
1.95k
    p_subtitle->psz_text = psz_text;
2232
2233
1.95k
    return VLC_SUCCESS;
2234
1.96k
}
2235
2236
static int ParseDKS( vlc_object_t *p_obj, subs_properties_t *p_props,
2237
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2238
2.03k
{
2239
2.03k
    VLC_UNUSED(p_obj);
2240
2.03k
    VLC_UNUSED(p_props);
2241
2.03k
    VLC_UNUSED( i_idx );
2242
2243
2.03k
    char *psz_text;
2244
2245
2.03k
    for( ;; )
2246
823k
    {
2247
823k
        int h1, m1, s1;
2248
823k
        int h2, m2, s2;
2249
823k
        char *s = TextGetLine( txt );
2250
2251
823k
        if( !s )
2252
17
            return VLC_EGENERIC;
2253
2254
823k
        psz_text = malloc( strlen( s ) + 1 );
2255
823k
        if( !psz_text )
2256
0
            return VLC_ENOMEM;
2257
2258
823k
        if( sscanf( s, "[%d:%d:%d]%[^\r\n]",
2259
823k
                    &h1, &m1, &s1, psz_text ) == 4 )
2260
2.01k
        {
2261
2.01k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2262
2263
2.01k
            s = TextGetLine( txt );
2264
2.01k
            if( !s )
2265
0
            {
2266
0
                free( psz_text );
2267
0
                return VLC_EGENERIC;
2268
0
            }
2269
2270
2.01k
            if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 )
2271
1.32k
                p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 );
2272
690
            else
2273
690
                p_subtitle->i_stop  = -1;
2274
2.01k
            break;
2275
2.01k
        }
2276
821k
        free( psz_text );
2277
821k
    }
2278
2279
    /* replace [br] by \n */
2280
2.01k
    char *p;
2281
2.32k
    while( ( p = strstr( psz_text, "[br]" ) ) )
2282
306
    {
2283
306
        *p++ = '\n';
2284
306
        memmove( p, &p[3], strlen(&p[3])+1 );
2285
306
    }
2286
2287
2.01k
    p_subtitle->psz_text = psz_text;
2288
2.01k
    return VLC_SUCCESS;
2289
2.03k
}
2290
2291
static int ParseSubViewer1( vlc_object_t *p_obj, subs_properties_t *p_props,
2292
                            text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2293
3.09k
{
2294
3.09k
    VLC_UNUSED(p_obj);
2295
3.09k
    VLC_UNUSED(p_props);
2296
3.09k
    VLC_UNUSED( i_idx );
2297
3.09k
    char *psz_text;
2298
2299
3.09k
    for( ;; )
2300
1.56M
    {
2301
1.56M
        int h1, m1, s1;
2302
1.56M
        int h2, m2, s2;
2303
1.56M
        char *s = TextGetLine( txt );
2304
2305
1.56M
        if( !s )
2306
21
            return VLC_EGENERIC;
2307
2308
1.56M
        if( sscanf( s, "[%d:%d:%d]", &h1, &m1, &s1 ) == 3 )
2309
3.07k
        {
2310
3.07k
            p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_HMS( h1, m1, s1 );
2311
2312
3.07k
            s = TextGetLine( txt );
2313
3.07k
            if( !s )
2314
1
                return VLC_EGENERIC;
2315
2316
3.07k
            psz_text = strdup( s );
2317
3.07k
            if( !psz_text )
2318
0
                return VLC_ENOMEM;
2319
2320
3.07k
            s = TextGetLine( txt );
2321
3.07k
            if( !s )
2322
5
            {
2323
5
                free( psz_text );
2324
5
                return VLC_EGENERIC;
2325
5
            }
2326
2327
3.07k
            if( sscanf( s, "[%d:%d:%d]", &h2, &m2, &s2 ) == 3 )
2328
2.55k
                p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 );
2329
513
            else
2330
513
                p_subtitle->i_stop  = -1;
2331
2332
3.07k
            break;
2333
3.07k
        }
2334
1.56M
    }
2335
2336
3.07k
    p_subtitle->psz_text = psz_text;
2337
2338
3.07k
    return VLC_SUCCESS;
2339
3.09k
}
2340
2341
static int ParseCommonSBV( vlc_object_t *p_obj, subs_properties_t *p_props,
2342
                           text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2343
578
{
2344
578
    VLC_UNUSED(p_obj);
2345
578
    VLC_UNUSED( i_idx );
2346
578
    VLC_UNUSED( p_props );
2347
578
    char        *psz_text;
2348
2349
578
    for( ;; )
2350
18.6k
    {
2351
18.6k
        const char *s = TextGetLine( txt );
2352
18.6k
        int h1 = 0, m1 = 0, s1 = 0, d1 = 0;
2353
18.6k
        int h2 = 0, m2 = 0, s2 = 0, d2 = 0;
2354
2355
18.6k
        if( !s )
2356
15
            return VLC_EGENERIC;
2357
2358
18.5k
        if( sscanf( s,"%d:%d:%d.%d,%d:%d:%d.%d",
2359
18.5k
                    &h1, &m1, &s1, &d1,
2360
18.5k
                    &h2, &m2, &s2, &d2 ) == 8 )
2361
702
        {
2362
702
            p_subtitle->i_start = vlc_tick_from_HMS( h1, m1, s1 ) +
2363
702
                                  VLC_TICK_FROM_MS( d1 ) + VLC_TICK_0;
2364
2365
702
            p_subtitle->i_stop  = vlc_tick_from_HMS( h2, m2, s2 ) +
2366
702
                                  VLC_TICK_FROM_MS( d2 ) + VLC_TICK_0;
2367
702
            if( p_subtitle->i_start < p_subtitle->i_stop )
2368
563
                break;
2369
702
        }
2370
18.5k
    }
2371
2372
    /* Now read text until an empty line */
2373
563
    size_t i_old = 0;
2374
563
    psz_text = NULL;
2375
563
    for( ;; )
2376
1.34k
    {
2377
1.34k
        const char *s = TextGetLine( txt );
2378
1.34k
        size_t i_len;
2379
2380
1.34k
        i_len = s ? strlen( s ) : 0;
2381
1.34k
        if( i_len <= 0 )
2382
563
        {
2383
563
            if (psz_text)
2384
250
                psz_text[i_old] = '\0';
2385
563
            p_subtitle->psz_text = psz_text;
2386
563
            return VLC_SUCCESS;
2387
563
        }
2388
2389
780
        psz_text = realloc_or_free( psz_text, i_old + i_len + 1 + 1 );
2390
780
        if( !psz_text )
2391
0
            return VLC_ENOMEM;
2392
2393
780
        memcpy( &psz_text[i_old], s, i_len );
2394
780
        psz_text[i_old + i_len + 0] = '\n';
2395
780
        i_old += i_len + 1;
2396
780
    }
2397
563
}
2398
2399
static int ParseSCC( vlc_object_t *p_obj, subs_properties_t *p_props,
2400
                     text_t *txt, subtitle_t *p_subtitle, size_t i_idx )
2401
5.53k
{
2402
5.53k
    VLC_UNUSED(p_obj);
2403
5.53k
    VLC_UNUSED( i_idx );
2404
5.53k
    VLC_UNUSED( p_props );
2405
2406
5.53k
    static const struct rates
2407
5.53k
    {
2408
5.53k
        unsigned val;
2409
5.53k
        vlc_rational_t rate;
2410
5.53k
        bool b_drop_allowed;
2411
5.53k
    } framerates[] = {
2412
5.53k
        { 2398, { 24000, 1001 }, false },
2413
5.53k
        { 2400, { 24, 1 },       false },
2414
5.53k
        { 2500, { 25, 1 },       false },
2415
5.53k
        { 2997, { 30000, 1001 }, true }, /* encoding rate */
2416
5.53k
        { 3000, { 30, 1 },       false },
2417
5.53k
        { 5000, { 50, 1 },       false },
2418
5.53k
        { 5994, { 60000, 1001 }, true },
2419
5.53k
        { 6000, { 60, 1 },       false },
2420
5.53k
    };
2421
5.53k
    const struct rates *p_rate = &framerates[3];
2422
5.53k
    float f_fps = var_GetFloat( p_obj, "sub-original-fps" );
2423
5.53k
    if( f_fps > 1.0 )
2424
0
    {
2425
0
        for( size_t i=0; i<ARRAY_SIZE(framerates); i++ )
2426
0
        {
2427
0
            if( (unsigned)(f_fps * 100) == framerates[i].val )
2428
0
            {
2429
0
                p_rate = &framerates[i];
2430
0
                break;
2431
0
            }
2432
0
        }
2433
0
    }
2434
2435
5.53k
    for( ;; )
2436
590k
    {
2437
590k
        const char *psz_line = TextGetLine( txt );
2438
590k
        if( !psz_line )
2439
165
            return VLC_EGENERIC;
2440
2441
590k
        unsigned h, m, s, f;
2442
590k
        char c;
2443
590k
        if( sscanf( psz_line, "%u:%u:%u%c%u ", &h, &m, &s, &c, &f ) != 5 ||
2444
6.16k
                ( c != ':' && c != ';' ) )
2445
584k
            continue;
2446
2447
        /* convert everything to seconds */
2448
5.94k
        int64_t i_frames = h * INT64_C(3600) + m * INT64_C(60) + s;
2449
2450
5.94k
        if( c == ';' && p_rate->b_drop_allowed ) /* dropframe */
2451
134
        {
2452
            /* convert to frame # to be accurate between inter drop drift
2453
             * of 18 frames see http://andrewduncan.net/timecodes/ */
2454
134
            const unsigned i_mins = h * 60 + m;
2455
134
            i_frames = i_frames * p_rate[+1].rate.num + f
2456
134
                    - (p_rate[+1].rate.den * 2 * (i_mins - i_mins % 10));
2457
134
        }
2458
5.80k
        else
2459
5.80k
        {
2460
            /* convert to frame # at 29.97 */
2461
5.80k
            i_frames = i_frames * framerates[3].rate.num / framerates[3].rate.den + f;
2462
5.80k
        }
2463
5.94k
        p_subtitle->i_start = VLC_TICK_0 + vlc_tick_from_sec(i_frames)*
2464
5.94k
                                         p_rate->rate.den / p_rate->rate.num;
2465
5.94k
        p_subtitle->i_stop = -1;
2466
2467
5.94k
        const char *psz_text = strchr( psz_line, '\t' );
2468
5.94k
        if( !psz_text && !(psz_text = strchr( psz_line, ' ' )) )
2469
192
            continue;
2470
2471
5.74k
        if ( psz_text[1] == '\0' )
2472
377
            continue;
2473
2474
5.37k
        p_subtitle->psz_text = strdup( psz_text + 1 );
2475
5.37k
        if( !p_subtitle->psz_text )
2476
0
            return VLC_ENOMEM;
2477
2478
5.37k
        break;
2479
5.37k
    }
2480
2481
5.37k
    return VLC_SUCCESS;
2482
5.53k
}
2483
2484
/* Tries to extract language from common filename patterns PATH/filename.LANG.ext
2485
   and PATH/Subs/x_LANG.ext (where 'x' is an integer). */
2486
static char *get_language_from_url(const char *urlstr)
2487
929
{
2488
929
    vlc_url_t url;
2489
929
    const char *filename = NULL;
2490
929
    char *ret = NULL;
2491
2492
929
    assert(urlstr != NULL);
2493
2494
929
    if (vlc_UrlParse(&url, urlstr) != 0)
2495
0
    {
2496
0
        vlc_UrlClean(&url);
2497
0
        return NULL;
2498
0
    }
2499
929
    if (url.psz_path != NULL)
2500
0
        filename = strrchr(url.psz_path, '/');
2501
929
    if (filename != NULL) {
2502
0
        filename++; // skip forward slash
2503
2504
0
        const char *ext = strrchr(filename, '.');
2505
2506
0
        if (ext != NULL) {
2507
            /* Get string between last two periods, hopefully the language. */
2508
0
            const char *lang = memrchr(filename, '.', ext - filename);
2509
2510
            /* Otherwise try string after last underscore. */
2511
0
            if (lang == NULL)
2512
0
                lang = memrchr(filename, '_', ext - filename);
2513
2514
0
            if (lang != NULL) {
2515
0
                lang++; // skip period or underscore
2516
0
                ret = strndup(lang, ext - lang);
2517
0
            }
2518
0
       }
2519
0
    }
2520
2521
929
    vlc_UrlClean(&url);
2522
929
    return ret;
2523
929
}
2524
2525
#ifdef ENABLE_TEST
2526
static void test_subtitle_ParseSubRipTimingValue(void)
2527
{
2528
    fprintf(stderr, "\n# %s:\n", __func__);
2529
2530
    struct test_timing_value
2531
    {
2532
        const char *str;
2533
        vlc_tick_t value;
2534
    };
2535
2536
    static const struct test_timing_value timing_values_success[] =
2537
    {
2538
        { "0:0:0,0",        VLC_TICK_0 },
2539
        { "0:0:0.0",        VLC_TICK_0 },
2540
        { "0:0:0",          VLC_TICK_0 },
2541
    };
2542
2543
    struct test_sized_timing_value
2544
    {
2545
        const char *str;
2546
        vlc_tick_t value;
2547
        size_t length;
2548
    };
2549
2550
    static const struct test_sized_timing_value sized_timing_values_success[] =
2551
    {
2552
        { "0:0:0,1",        VLC_TICK_0, strlen("0:0:0") },
2553
        { "0:0:0.1",        VLC_TICK_0, strlen("0:0:0") },
2554
    };
2555
2556
    static const char *timing_values_fail[] =
2557
    {
2558
        "0:0",
2559
        "0",
2560
    };
2561
2562
    for (size_t i=0; i<ARRAY_SIZE(timing_values_success); ++i)
2563
    {
2564
        fprintf(stderr, "Checking that %s parses into %" PRId64 "\n",
2565
                timing_values_success[i].str, timing_values_success[i].value);
2566
2567
        vlc_tick_t value;
2568
        int ret = subtitle_ParseSubRipTimingValue(&value,
2569
                timing_values_success[i].str,
2570
                strlen(timing_values_success[i].str));
2571
        fprintf(stderr, " -> %" PRId64 "\n", value);
2572
        assert(ret == VLC_SUCCESS);
2573
        assert(value == timing_values_success[i].value);
2574
    }
2575
2576
    for (size_t i=0; i<ARRAY_SIZE(sized_timing_values_success); ++i)
2577
    {
2578
        fprintf(stderr, "Checking that %s (length=%zu) parses into %" PRId64 "\n",
2579
                sized_timing_values_success[i].str,
2580
                sized_timing_values_success[i].length,
2581
                sized_timing_values_success[i].value);
2582
2583
        vlc_tick_t value;
2584
        int ret = subtitle_ParseSubRipTimingValue(&value,
2585
                sized_timing_values_success[i].str,
2586
                sized_timing_values_success[i].length);
2587
        assert(ret == VLC_SUCCESS);
2588
        fprintf(stderr, " -> %" PRId64 "\n", value);
2589
        assert(value == sized_timing_values_success[i].value);
2590
    }
2591
2592
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2593
    {
2594
        fprintf(stderr, "Checking that %s fails to parse\n",
2595
                timing_values_fail[i]);
2596
        vlc_tick_t value;
2597
        int ret = subtitle_ParseSubRipTimingValue(&value,
2598
                timing_values_fail[i], strlen(timing_values_fail[i]));
2599
        (void)value;
2600
        assert(ret != VLC_SUCCESS);
2601
    }
2602
2603
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2604
    {
2605
        fprintf(stderr, "Checking that %s fails to parse\n",
2606
                timing_values_fail[i]);
2607
        vlc_tick_t value;
2608
        int ret = subtitle_ParseSubRipTimingValue(&value,
2609
                timing_values_fail[i], strlen(timing_values_fail[i]));
2610
        (void)value;
2611
        assert(ret != VLC_SUCCESS);
2612
    }
2613
}
2614
2615
static void test_subtitle_ParseSubRipTiming(void)
2616
{
2617
    fprintf(stderr, "\n# %s:\n", __func__);
2618
2619
    struct test_timing_value
2620
    {
2621
        const char *str;
2622
        vlc_tick_t left;
2623
        vlc_tick_t right;
2624
    };
2625
2626
    static const struct test_timing_value timing_values_success[] =
2627
    {
2628
        { "0:0:0,0 --> 0:0:0,0",        VLC_TICK_0,     VLC_TICK_0 },
2629
        { "0:0:0.0 --> 0:0:0.0",        VLC_TICK_0,     VLC_TICK_0 },
2630
        { "0:0:0   --> 0:0:0",          VLC_TICK_0,     VLC_TICK_0 },
2631
    };
2632
2633
    static const char *timing_values_fail[] =
2634
    {
2635
        "0:0 --> 0:0",
2636
        "0:0 --> 0:0:0,0",
2637
        "0:0:0,0 --> 0:0",
2638
        "0 -> 0",
2639
    };
2640
2641
    for (size_t i=0; i<ARRAY_SIZE(timing_values_success); ++i)
2642
    {
2643
        fprintf(stderr, "Checking that %s parses into %" PRId64 " --> %" PRId64 "\n",
2644
                timing_values_success[i].str,
2645
                timing_values_success[i].left,
2646
                timing_values_success[i].right);
2647
2648
        subtitle_t sub = { .i_start = VLC_TICK_INVALID, .i_stop = VLC_TICK_INVALID };
2649
        int ret = subtitle_ParseSubRipTiming(&sub, timing_values_success[i].str);
2650
        fprintf(stderr, " -> %" PRId64 " --> %" PRId64 "\n", sub.i_start, sub.i_stop);
2651
        assert(ret == VLC_SUCCESS);
2652
        assert(sub.i_start == timing_values_success[i].left);
2653
        assert(sub.i_stop == timing_values_success[i].right);
2654
    }
2655
2656
    for (size_t i=0; i<ARRAY_SIZE(timing_values_fail); ++i)
2657
    {
2658
        fprintf(stderr, "Checking that %s fails to parse\n",
2659
                timing_values_fail[i]);
2660
        subtitle_t sub = { .i_start = VLC_TICK_INVALID, .i_stop = VLC_TICK_INVALID };
2661
        int ret = subtitle_ParseSubRipTiming(&sub, timing_values_fail[i]);
2662
        (void)sub;
2663
        assert(ret != VLC_SUCCESS);
2664
    }
2665
}
2666
2667
int main(int argc, char **argv)
2668
{
2669
    (void)argc; (void)argv;
2670
    test_subtitle_ParseSubRipTimingValue();
2671
    test_subtitle_ParseSubRipTiming();
2672
2673
    return 0;
2674
}
2675
#endif