Coverage Report

Created: 2025-07-23 07:11

/src/vlc/modules/codec/webvtt/webvtt.c
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
 * webvtt.c: WEBVTT shared code
3
 *****************************************************************************
4
 * Copyright (C) 2017 VideoLabs, VLC authors and VideoLAN
5
 *
6
 * This program is free software; you can redistribute it and/or modify it
7
 * under the terms of the GNU Lesser General Public License as published by
8
 * the Free Software Foundation; either version 2.1 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 * GNU Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public License
17
 * along with this program; if not, write to the Free Software Foundation,
18
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19
 *****************************************************************************/
20
21
#ifdef HAVE_CONFIG_H
22
# include "config.h"
23
#endif
24
25
#include <vlc_common.h>
26
#include <vlc_charset.h>
27
#include <vlc_plugin.h>
28
29
#include "webvtt.h"
30
31
#include <ctype.h>
32
#include <assert.h>
33
34
/*****************************************************************************
35
 * Modules descriptor.
36
 *****************************************************************************/
37
38
104
vlc_module_begin ()
39
52
    set_capability( "spu decoder", 10 )
40
52
    set_shortname( N_("WEBVTT decoder"))
41
52
    set_description( N_("WEBVTT subtitles decoder") )
42
104
    set_callbacks( webvtt_OpenDecoder, webvtt_CloseDecoder )
43
52
    set_subcategory( SUBCAT_INPUT_SCODEC )
44
52
    add_submodule()
45
52
        set_shortname( "WEBVTT" )
46
52
        set_description( N_("WEBVTT subtitles parser") )
47
52
        set_capability( "demux", 11 )
48
52
        set_subcategory( SUBCAT_INPUT_DEMUX )
49
52
        set_callbacks( webvtt_OpenDemux, webvtt_CloseDemux )
50
52
        add_shortcut( "webvtt" )
51
52
    add_submodule()
52
52
        set_shortname( "WEBVTT" )
53
52
        set_description( N_("WEBVTT subtitles parser") )
54
52
        set_capability( "demux", 0 )
55
52
        set_subcategory( SUBCAT_INPUT_DEMUX )
56
52
        set_callbacks( webvtt_OpenDemuxStream, webvtt_CloseDemux )
57
52
        add_shortcut( "webvttstream" )
58
52
#ifdef ENABLE_SOUT
59
52
    add_submodule()
60
52
        set_description( "WEBVTT text encoder" )
61
52
        set_capability( "spu encoder", 101 )
62
52
        set_subcategory( SUBCAT_INPUT_SCODEC )
63
52
        set_callback( webvtt_OpenEncoder )
64
52
    add_submodule()
65
52
        set_description( N_("Raw WebVTT muxer") )
66
52
        set_capability( "sout mux", 0 )
67
52
        set_subcategory( SUBCAT_SOUT_MUX )
68
52
        add_shortcut( "webvtt", "rawvtt" )
69
104
        set_callbacks( webvtt_OpenMuxer, webvtt_CloseMuxer )
70
52
#endif
71
52
vlc_module_end ()
72
73
struct webvtt_text_parser_t
74
{
75
    enum
76
    {
77
        WEBVTT_SECTION_UNDEFINED = WEBVTT_HEADER_STYLE - 1,
78
        WEBVTT_SECTION_STYLE = WEBVTT_HEADER_STYLE,
79
        WEBVTT_SECTION_REGION = WEBVTT_HEADER_REGION,
80
        WEBVTT_SECTION_NOTE,
81
        WEBVTT_SECTION_CUES,
82
    } section;
83
    char * reads[3];
84
85
    void * priv;
86
    webvtt_cue_t *(*pf_get_cue)( void * );
87
    void (*pf_cue_done)( void *, webvtt_cue_t * );
88
    void (*pf_header)( void *, enum webvtt_header_line_e, bool, const char * );
89
90
    webvtt_cue_t *p_cue;
91
};
92
93
static vlc_tick_t MakeTime( unsigned t[4] )
94
206k
{
95
206k
    return vlc_tick_from_sec( t[0] * 3600 + t[1] * 60 + t[2] ) +
96
206k
           VLC_TICK_FROM_MS(t[3]);
97
206k
}
98
99
bool webvtt_scan_time( const char *psz, vlc_tick_t *p_time )
100
217k
{
101
217k
    unsigned t[4];
102
217k
    if( sscanf( psz, "%2u:%2u.%3u",
103
217k
                      &t[1], &t[2], &t[3] ) == 3 )
104
36.7k
    {
105
36.7k
        t[0] = 0;
106
36.7k
        *p_time = MakeTime( t );
107
36.7k
        return true;
108
36.7k
    }
109
180k
    else if( sscanf( psz, "%u:%2u:%2u.%3u",
110
180k
                          &t[0], &t[1], &t[2], &t[3] ) == 4 )
111
169k
    {
112
169k
        *p_time = MakeTime( t );
113
169k
        return true;
114
169k
    }
115
11.3k
    else return false;
116
217k
}
117
118
static bool KeywordMatch( const char *psz, const char *keyword )
119
197k
{
120
197k
    const size_t i_len = strlen(keyword);
121
197k
    return( !strncmp( keyword, psz, i_len ) && (!psz[i_len] || isspace(psz[i_len])) );
122
197k
}
123
124
/*
125
126
*/
127
128
webvtt_text_parser_t * webvtt_text_parser_New( void *priv,
129
                    webvtt_cue_t *(*pf_get_cue)( void * ),
130
                    void (*pf_cue_done)( void *, webvtt_cue_t * ),
131
                    void (*pf_header)( void *, enum webvtt_header_line_e, bool, const char * ) )
132
12.5k
{
133
12.5k
    webvtt_text_parser_t *p = malloc(sizeof(*p));
134
12.5k
    if( p )
135
12.5k
    {
136
12.5k
        p->section = WEBVTT_SECTION_UNDEFINED;
137
50.3k
        for( int i=0; i<3; i++ )
138
37.7k
            p->reads[i] = NULL;
139
12.5k
        p->p_cue = NULL;
140
12.5k
        p->priv = priv;
141
12.5k
        p->pf_cue_done = pf_cue_done;
142
12.5k
        p->pf_get_cue = pf_get_cue;
143
12.5k
        p->pf_header = pf_header;
144
12.5k
    }
145
12.5k
    return p;
146
12.5k
}
147
148
void webvtt_text_parser_Delete( webvtt_text_parser_t *p )
149
12.5k
{
150
50.3k
    for( int i=0; i<3; i++ )
151
37.7k
        free( p->reads[i] );
152
12.5k
    free( p );
153
12.5k
}
154
155
static void forward_line( webvtt_text_parser_t *p, const char *psz_line, bool b_new )
156
197k
{
157
197k
    if( p->pf_header )
158
197k
        p->pf_header( p->priv, (enum webvtt_header_line_e)p->section,
159
197k
                      b_new, psz_line );
160
197k
}
161
162
void webvtt_text_parser_Feed( webvtt_text_parser_t *p, char *psz_line )
163
978k
{
164
978k
    if( psz_line == NULL )
165
6.28k
    {
166
6.28k
        if( p->p_cue )
167
2.97k
        {
168
2.97k
            if( p->pf_cue_done )
169
2.97k
                p->pf_cue_done( p->priv, p->p_cue );
170
2.97k
            p->p_cue = NULL;
171
2.97k
        }
172
6.28k
        return;
173
6.28k
    }
174
175
972k
    free(p->reads[0]);
176
972k
    p->reads[0] = p->reads[1];
177
972k
    p->reads[1] = p->reads[2];
178
972k
    p->reads[2] = psz_line;
179
180
    /* Lookup keywords */
181
972k
    if( unlikely(p->section == WEBVTT_SECTION_UNDEFINED) )
182
53.1k
    {
183
53.1k
        if( KeywordMatch( psz_line, "\xEF\xBB\xBFWEBVTT" ) ||
184
53.1k
            KeywordMatch( psz_line, "WEBVTT" )  )
185
9.60k
        {
186
9.60k
            p->section = WEBVTT_SECTION_UNDEFINED;
187
9.60k
            if( p->p_cue )
188
0
            {
189
0
                if( p->pf_cue_done )
190
0
                    p->pf_cue_done( p->priv, p->p_cue );
191
0
                p->p_cue = NULL;
192
0
            }
193
9.60k
            return;
194
9.60k
        }
195
43.5k
        else if( KeywordMatch( psz_line, "STYLE" ) )
196
16.0k
        {
197
16.0k
            p->section = WEBVTT_SECTION_STYLE;
198
16.0k
            forward_line( p, psz_line, true );
199
16.0k
            return;
200
16.0k
        }
201
27.4k
        else if( KeywordMatch( psz_line, "REGION" ) )
202
5.47k
        {
203
5.47k
            p->section = WEBVTT_SECTION_REGION;
204
5.47k
            forward_line( p, psz_line, true );
205
5.47k
            return;
206
5.47k
        }
207
21.9k
        else if( KeywordMatch( psz_line, "NOTE" ) )
208
383
        {
209
383
            p->section = WEBVTT_SECTION_NOTE;
210
383
            return;
211
383
        }
212
21.6k
        else if( psz_line[0] != 0 )
213
4.11k
        {
214
4.11k
            p->section = WEBVTT_SECTION_CUES;
215
4.11k
        }
216
53.1k
    }
217
218
940k
    if( likely(p->section == WEBVTT_SECTION_CUES) )
219
746k
    {
220
746k
        if( p->p_cue )
221
188k
        {
222
188k
            if( psz_line[0] == 0 )
223
85.7k
            {
224
85.7k
                if( p->p_cue )
225
85.7k
                {
226
85.7k
                    if( p->pf_cue_done )
227
85.7k
                        p->pf_cue_done( p->priv, p->p_cue );
228
85.7k
                    p->p_cue = NULL;
229
85.7k
                }
230
85.7k
            }
231
102k
            else
232
102k
            {
233
102k
                char *psz_merged;
234
102k
                if( -1 < asprintf( &psz_merged, "%s\n%s", p->p_cue->psz_text, psz_line ) )
235
102k
                {
236
102k
                    free( p->p_cue->psz_text );
237
102k
                    p->p_cue->psz_text = psz_merged;
238
102k
                }
239
102k
                return;
240
102k
            }
241
188k
        }
242
243
644k
        if( p->reads[1] == NULL )
244
6.57k
            return;
245
246
637k
        const char *psz_split = strstr( p->reads[1], " --> " );
247
637k
        if( psz_split )
248
92.4k
        {
249
92.4k
            vlc_tick_t i_start, i_stop;
250
251
92.4k
            if( webvtt_scan_time( p->reads[1], &i_start ) &&
252
92.4k
                webvtt_scan_time( psz_split + 5,  &i_stop ) && i_start <= i_stop )
253
89.0k
            {
254
89.0k
                const char *psz_attrs = strchr( psz_split + 5 + 5, ' ' );
255
89.0k
                p->p_cue = ( p->pf_get_cue ) ? p->pf_get_cue( p->priv ) : NULL;
256
89.0k
                if( p->p_cue )
257
88.7k
                {
258
88.7k
                    p->p_cue->psz_attrs = ( psz_attrs ) ? strdup( psz_attrs ) : NULL;
259
88.7k
                    p->p_cue->psz_id = p->reads[0];
260
88.7k
                    p->reads[0] = NULL;
261
88.7k
                    p->p_cue->psz_text = p->reads[2];
262
88.7k
                    p->reads[2] = NULL;
263
88.7k
                    p->p_cue->i_start = i_start;
264
88.7k
                    p->p_cue->i_stop = i_stop;
265
88.7k
                }
266
89.0k
            }
267
92.4k
        }
268
637k
    }
269
194k
    else if( p->section == WEBVTT_SECTION_STYLE )
270
136k
    {
271
136k
        forward_line( p, psz_line, false );
272
136k
        if( psz_line[0] == 0 )
273
13.1k
            p->section = WEBVTT_SECTION_UNDEFINED;
274
136k
    }
275
57.1k
    else if( p->section == WEBVTT_SECTION_REGION )
276
38.6k
    {
277
38.6k
        forward_line( p, psz_line, false );
278
38.6k
        if( psz_line[0] == 0 ) /* End of region declaration */
279
4.27k
            p->section = WEBVTT_SECTION_UNDEFINED;
280
38.6k
    }
281
18.5k
    else if( p->section == WEBVTT_SECTION_NOTE )
282
1.03k
    {
283
1.03k
        if( psz_line[0] == 0 )
284
372
            p->section = WEBVTT_SECTION_UNDEFINED;
285
1.03k
    }
286
940k
}