/src/vlc/modules/codec/webvtt/webvtt.c
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * webvtt.c: WEBVTT shared code |
3 | | ***************************************************************************** |
4 | | * Copyright (C) 2017 VideoLabs, VLC authors and VideoLAN |
5 | | * |
6 | | * This program is free software; you can redistribute it and/or modify it |
7 | | * under the terms of the GNU Lesser General Public License as published by |
8 | | * the Free Software Foundation; either version 2.1 of the License, or |
9 | | * (at your option) any later version. |
10 | | * |
11 | | * This program is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public License |
17 | | * along with this program; if not, write to the Free Software Foundation, |
18 | | * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. |
19 | | *****************************************************************************/ |
20 | | |
21 | | #ifdef HAVE_CONFIG_H |
22 | | # include "config.h" |
23 | | #endif |
24 | | |
25 | | #include <vlc_common.h> |
26 | | #include <vlc_charset.h> |
27 | | #include <vlc_plugin.h> |
28 | | |
29 | | #include "webvtt.h" |
30 | | |
31 | | #include <ctype.h> |
32 | | #include <assert.h> |
33 | | |
34 | | /***************************************************************************** |
35 | | * Modules descriptor. |
36 | | *****************************************************************************/ |
37 | | |
38 | 104 | vlc_module_begin () |
39 | 52 | set_capability( "spu decoder", 10 ) |
40 | 52 | set_shortname( N_("WEBVTT decoder")) |
41 | 52 | set_description( N_("WEBVTT subtitles decoder") ) |
42 | 104 | set_callbacks( webvtt_OpenDecoder, webvtt_CloseDecoder ) |
43 | 52 | set_subcategory( SUBCAT_INPUT_SCODEC ) |
44 | 52 | add_submodule() |
45 | 52 | set_shortname( "WEBVTT" ) |
46 | 52 | set_description( N_("WEBVTT subtitles parser") ) |
47 | 52 | set_capability( "demux", 11 ) |
48 | 52 | set_subcategory( SUBCAT_INPUT_DEMUX ) |
49 | 52 | set_callbacks( webvtt_OpenDemux, webvtt_CloseDemux ) |
50 | 52 | add_shortcut( "webvtt" ) |
51 | 52 | add_submodule() |
52 | 52 | set_shortname( "WEBVTT" ) |
53 | 52 | set_description( N_("WEBVTT subtitles parser") ) |
54 | 52 | set_capability( "demux", 0 ) |
55 | 52 | set_subcategory( SUBCAT_INPUT_DEMUX ) |
56 | 52 | set_callbacks( webvtt_OpenDemuxStream, webvtt_CloseDemux ) |
57 | 52 | add_shortcut( "webvttstream" ) |
58 | 52 | #ifdef ENABLE_SOUT |
59 | 52 | add_submodule() |
60 | 52 | set_description( "WEBVTT text encoder" ) |
61 | 52 | set_capability( "spu encoder", 101 ) |
62 | 52 | set_subcategory( SUBCAT_INPUT_SCODEC ) |
63 | 52 | set_callback( webvtt_OpenEncoder ) |
64 | 52 | add_submodule() |
65 | 52 | set_description( N_("Raw WebVTT muxer") ) |
66 | 52 | set_capability( "sout mux", 0 ) |
67 | 52 | set_subcategory( SUBCAT_SOUT_MUX ) |
68 | 52 | add_shortcut( "webvtt", "rawvtt" ) |
69 | 104 | set_callbacks( webvtt_OpenMuxer, webvtt_CloseMuxer ) |
70 | 52 | #endif |
71 | 52 | vlc_module_end () |
72 | | |
73 | | struct webvtt_text_parser_t |
74 | | { |
75 | | enum |
76 | | { |
77 | | WEBVTT_SECTION_UNDEFINED = WEBVTT_HEADER_STYLE - 1, |
78 | | WEBVTT_SECTION_STYLE = WEBVTT_HEADER_STYLE, |
79 | | WEBVTT_SECTION_REGION = WEBVTT_HEADER_REGION, |
80 | | WEBVTT_SECTION_NOTE, |
81 | | WEBVTT_SECTION_CUES, |
82 | | } section; |
83 | | char * reads[3]; |
84 | | |
85 | | void * priv; |
86 | | webvtt_cue_t *(*pf_get_cue)( void * ); |
87 | | void (*pf_cue_done)( void *, webvtt_cue_t * ); |
88 | | void (*pf_header)( void *, enum webvtt_header_line_e, bool, const char * ); |
89 | | |
90 | | webvtt_cue_t *p_cue; |
91 | | }; |
92 | | |
93 | | static vlc_tick_t MakeTime( unsigned t[4] ) |
94 | 206k | { |
95 | 206k | return vlc_tick_from_sec( t[0] * 3600 + t[1] * 60 + t[2] ) + |
96 | 206k | VLC_TICK_FROM_MS(t[3]); |
97 | 206k | } |
98 | | |
99 | | bool webvtt_scan_time( const char *psz, vlc_tick_t *p_time ) |
100 | 217k | { |
101 | 217k | unsigned t[4]; |
102 | 217k | if( sscanf( psz, "%2u:%2u.%3u", |
103 | 217k | &t[1], &t[2], &t[3] ) == 3 ) |
104 | 36.7k | { |
105 | 36.7k | t[0] = 0; |
106 | 36.7k | *p_time = MakeTime( t ); |
107 | 36.7k | return true; |
108 | 36.7k | } |
109 | 180k | else if( sscanf( psz, "%u:%2u:%2u.%3u", |
110 | 180k | &t[0], &t[1], &t[2], &t[3] ) == 4 ) |
111 | 169k | { |
112 | 169k | *p_time = MakeTime( t ); |
113 | 169k | return true; |
114 | 169k | } |
115 | 11.3k | else return false; |
116 | 217k | } |
117 | | |
118 | | static bool KeywordMatch( const char *psz, const char *keyword ) |
119 | 197k | { |
120 | 197k | const size_t i_len = strlen(keyword); |
121 | 197k | return( !strncmp( keyword, psz, i_len ) && (!psz[i_len] || isspace(psz[i_len])) ); |
122 | 197k | } |
123 | | |
124 | | /* |
125 | | |
126 | | */ |
127 | | |
128 | | webvtt_text_parser_t * webvtt_text_parser_New( void *priv, |
129 | | webvtt_cue_t *(*pf_get_cue)( void * ), |
130 | | void (*pf_cue_done)( void *, webvtt_cue_t * ), |
131 | | void (*pf_header)( void *, enum webvtt_header_line_e, bool, const char * ) ) |
132 | 12.5k | { |
133 | 12.5k | webvtt_text_parser_t *p = malloc(sizeof(*p)); |
134 | 12.5k | if( p ) |
135 | 12.5k | { |
136 | 12.5k | p->section = WEBVTT_SECTION_UNDEFINED; |
137 | 50.3k | for( int i=0; i<3; i++ ) |
138 | 37.7k | p->reads[i] = NULL; |
139 | 12.5k | p->p_cue = NULL; |
140 | 12.5k | p->priv = priv; |
141 | 12.5k | p->pf_cue_done = pf_cue_done; |
142 | 12.5k | p->pf_get_cue = pf_get_cue; |
143 | 12.5k | p->pf_header = pf_header; |
144 | 12.5k | } |
145 | 12.5k | return p; |
146 | 12.5k | } |
147 | | |
148 | | void webvtt_text_parser_Delete( webvtt_text_parser_t *p ) |
149 | 12.5k | { |
150 | 50.3k | for( int i=0; i<3; i++ ) |
151 | 37.7k | free( p->reads[i] ); |
152 | 12.5k | free( p ); |
153 | 12.5k | } |
154 | | |
155 | | static void forward_line( webvtt_text_parser_t *p, const char *psz_line, bool b_new ) |
156 | 197k | { |
157 | 197k | if( p->pf_header ) |
158 | 197k | p->pf_header( p->priv, (enum webvtt_header_line_e)p->section, |
159 | 197k | b_new, psz_line ); |
160 | 197k | } |
161 | | |
162 | | void webvtt_text_parser_Feed( webvtt_text_parser_t *p, char *psz_line ) |
163 | 978k | { |
164 | 978k | if( psz_line == NULL ) |
165 | 6.28k | { |
166 | 6.28k | if( p->p_cue ) |
167 | 2.97k | { |
168 | 2.97k | if( p->pf_cue_done ) |
169 | 2.97k | p->pf_cue_done( p->priv, p->p_cue ); |
170 | 2.97k | p->p_cue = NULL; |
171 | 2.97k | } |
172 | 6.28k | return; |
173 | 6.28k | } |
174 | | |
175 | 972k | free(p->reads[0]); |
176 | 972k | p->reads[0] = p->reads[1]; |
177 | 972k | p->reads[1] = p->reads[2]; |
178 | 972k | p->reads[2] = psz_line; |
179 | | |
180 | | /* Lookup keywords */ |
181 | 972k | if( unlikely(p->section == WEBVTT_SECTION_UNDEFINED) ) |
182 | 53.1k | { |
183 | 53.1k | if( KeywordMatch( psz_line, "\xEF\xBB\xBFWEBVTT" ) || |
184 | 53.1k | KeywordMatch( psz_line, "WEBVTT" ) ) |
185 | 9.60k | { |
186 | 9.60k | p->section = WEBVTT_SECTION_UNDEFINED; |
187 | 9.60k | if( p->p_cue ) |
188 | 0 | { |
189 | 0 | if( p->pf_cue_done ) |
190 | 0 | p->pf_cue_done( p->priv, p->p_cue ); |
191 | 0 | p->p_cue = NULL; |
192 | 0 | } |
193 | 9.60k | return; |
194 | 9.60k | } |
195 | 43.5k | else if( KeywordMatch( psz_line, "STYLE" ) ) |
196 | 16.0k | { |
197 | 16.0k | p->section = WEBVTT_SECTION_STYLE; |
198 | 16.0k | forward_line( p, psz_line, true ); |
199 | 16.0k | return; |
200 | 16.0k | } |
201 | 27.4k | else if( KeywordMatch( psz_line, "REGION" ) ) |
202 | 5.47k | { |
203 | 5.47k | p->section = WEBVTT_SECTION_REGION; |
204 | 5.47k | forward_line( p, psz_line, true ); |
205 | 5.47k | return; |
206 | 5.47k | } |
207 | 21.9k | else if( KeywordMatch( psz_line, "NOTE" ) ) |
208 | 383 | { |
209 | 383 | p->section = WEBVTT_SECTION_NOTE; |
210 | 383 | return; |
211 | 383 | } |
212 | 21.6k | else if( psz_line[0] != 0 ) |
213 | 4.11k | { |
214 | 4.11k | p->section = WEBVTT_SECTION_CUES; |
215 | 4.11k | } |
216 | 53.1k | } |
217 | | |
218 | 940k | if( likely(p->section == WEBVTT_SECTION_CUES) ) |
219 | 746k | { |
220 | 746k | if( p->p_cue ) |
221 | 188k | { |
222 | 188k | if( psz_line[0] == 0 ) |
223 | 85.7k | { |
224 | 85.7k | if( p->p_cue ) |
225 | 85.7k | { |
226 | 85.7k | if( p->pf_cue_done ) |
227 | 85.7k | p->pf_cue_done( p->priv, p->p_cue ); |
228 | 85.7k | p->p_cue = NULL; |
229 | 85.7k | } |
230 | 85.7k | } |
231 | 102k | else |
232 | 102k | { |
233 | 102k | char *psz_merged; |
234 | 102k | if( -1 < asprintf( &psz_merged, "%s\n%s", p->p_cue->psz_text, psz_line ) ) |
235 | 102k | { |
236 | 102k | free( p->p_cue->psz_text ); |
237 | 102k | p->p_cue->psz_text = psz_merged; |
238 | 102k | } |
239 | 102k | return; |
240 | 102k | } |
241 | 188k | } |
242 | | |
243 | 644k | if( p->reads[1] == NULL ) |
244 | 6.57k | return; |
245 | | |
246 | 637k | const char *psz_split = strstr( p->reads[1], " --> " ); |
247 | 637k | if( psz_split ) |
248 | 92.4k | { |
249 | 92.4k | vlc_tick_t i_start, i_stop; |
250 | | |
251 | 92.4k | if( webvtt_scan_time( p->reads[1], &i_start ) && |
252 | 92.4k | webvtt_scan_time( psz_split + 5, &i_stop ) && i_start <= i_stop ) |
253 | 89.0k | { |
254 | 89.0k | const char *psz_attrs = strchr( psz_split + 5 + 5, ' ' ); |
255 | 89.0k | p->p_cue = ( p->pf_get_cue ) ? p->pf_get_cue( p->priv ) : NULL; |
256 | 89.0k | if( p->p_cue ) |
257 | 88.7k | { |
258 | 88.7k | p->p_cue->psz_attrs = ( psz_attrs ) ? strdup( psz_attrs ) : NULL; |
259 | 88.7k | p->p_cue->psz_id = p->reads[0]; |
260 | 88.7k | p->reads[0] = NULL; |
261 | 88.7k | p->p_cue->psz_text = p->reads[2]; |
262 | 88.7k | p->reads[2] = NULL; |
263 | 88.7k | p->p_cue->i_start = i_start; |
264 | 88.7k | p->p_cue->i_stop = i_stop; |
265 | 88.7k | } |
266 | 89.0k | } |
267 | 92.4k | } |
268 | 637k | } |
269 | 194k | else if( p->section == WEBVTT_SECTION_STYLE ) |
270 | 136k | { |
271 | 136k | forward_line( p, psz_line, false ); |
272 | 136k | if( psz_line[0] == 0 ) |
273 | 13.1k | p->section = WEBVTT_SECTION_UNDEFINED; |
274 | 136k | } |
275 | 57.1k | else if( p->section == WEBVTT_SECTION_REGION ) |
276 | 38.6k | { |
277 | 38.6k | forward_line( p, psz_line, false ); |
278 | 38.6k | if( psz_line[0] == 0 ) /* End of region declaration */ |
279 | 4.27k | p->section = WEBVTT_SECTION_UNDEFINED; |
280 | 38.6k | } |
281 | 18.5k | else if( p->section == WEBVTT_SECTION_NOTE ) |
282 | 1.03k | { |
283 | 1.03k | if( psz_line[0] == 0 ) |
284 | 372 | p->section = WEBVTT_SECTION_UNDEFINED; |
285 | 1.03k | } |
286 | 940k | } |