/src/vlc/modules/codec/webvtt/webvtt.c
Line | Count | Source |
1 | | /***************************************************************************** |
2 | | * webvtt.c: WEBVTT shared code |
3 | | ***************************************************************************** |
4 | | * Copyright (C) 2017 VideoLabs, VLC authors and VideoLAN |
5 | | * |
6 | | * This program is free software; you can redistribute it and/or modify it |
7 | | * under the terms of the GNU Lesser General Public License as published by |
8 | | * the Free Software Foundation; either version 2.1 of the License, or |
9 | | * (at your option) any later version. |
10 | | * |
11 | | * This program is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public License |
17 | | * along with this program; if not, write to the Free Software Foundation, |
18 | | * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. |
19 | | *****************************************************************************/ |
20 | | |
21 | | #ifdef HAVE_CONFIG_H |
22 | | # include "config.h" |
23 | | #endif |
24 | | |
25 | | #include <vlc_common.h> |
26 | | #include <vlc_charset.h> |
27 | | #include <vlc_plugin.h> |
28 | | |
29 | | #include "webvtt.h" |
30 | | |
31 | | #include <ctype.h> |
32 | | #include <assert.h> |
33 | | |
34 | | /***************************************************************************** |
35 | | * Modules descriptor. |
36 | | *****************************************************************************/ |
37 | | |
38 | 134 | vlc_module_begin () |
39 | 67 | set_capability( "spu decoder", 10 ) |
40 | 67 | set_shortname( N_("WEBVTT decoder")) |
41 | 67 | set_description( N_("WEBVTT subtitles decoder") ) |
42 | 134 | set_callbacks( webvtt_OpenDecoder, webvtt_CloseDecoder ) |
43 | 67 | set_subcategory( SUBCAT_INPUT_SCODEC ) |
44 | 67 | add_submodule() |
45 | 67 | set_shortname( "WEBVTT" ) |
46 | 67 | set_description( N_("WEBVTT subtitles parser") ) |
47 | 67 | set_capability( "demux", 11 ) |
48 | 67 | set_subcategory( SUBCAT_INPUT_DEMUX ) |
49 | 67 | set_callbacks( webvtt_OpenDemux, webvtt_CloseDemux ) |
50 | 67 | add_shortcut( "webvtt" ) |
51 | 67 | add_submodule() |
52 | 67 | set_shortname( "WEBVTT" ) |
53 | 67 | set_description( N_("WEBVTT subtitles parser") ) |
54 | 67 | set_capability( "demux", 0 ) |
55 | 67 | set_subcategory( SUBCAT_INPUT_DEMUX ) |
56 | 67 | set_callbacks( webvtt_OpenDemuxStream, webvtt_CloseDemux ) |
57 | 67 | add_shortcut( "webvttstream" ) |
58 | 67 | #ifdef ENABLE_SOUT |
59 | 67 | add_submodule() |
60 | 67 | set_description( "WEBVTT text encoder" ) |
61 | 67 | set_capability( "spu encoder", 101 ) |
62 | 67 | set_subcategory( SUBCAT_INPUT_SCODEC ) |
63 | 67 | set_callback( webvtt_OpenEncoder ) |
64 | 67 | add_submodule() |
65 | 67 | set_description( N_("Raw WebVTT muxer") ) |
66 | 67 | set_capability( "sout mux", 0 ) |
67 | 67 | set_subcategory( SUBCAT_SOUT_MUX ) |
68 | 67 | add_shortcut( "webvtt", "rawvtt" ) |
69 | 134 | set_callbacks( webvtt_OpenMuxer, webvtt_CloseMuxer ) |
70 | 67 | #endif |
71 | 67 | vlc_module_end () |
72 | | |
73 | | struct webvtt_text_parser_t |
74 | | { |
75 | | enum |
76 | | { |
77 | | WEBVTT_SECTION_UNDEFINED = WEBVTT_HEADER_STYLE - 1, |
78 | | WEBVTT_SECTION_STYLE = WEBVTT_HEADER_STYLE, |
79 | | WEBVTT_SECTION_REGION = WEBVTT_HEADER_REGION, |
80 | | WEBVTT_SECTION_NOTE, |
81 | | WEBVTT_SECTION_CUES, |
82 | | } section; |
83 | | char * reads[3]; |
84 | | |
85 | | void * priv; |
86 | | webvtt_cue_t *(*pf_get_cue)( void * ); |
87 | | void (*pf_cue_done)( void *, webvtt_cue_t * ); |
88 | | void (*pf_header)( void *, enum webvtt_header_line_e, bool, const char * ); |
89 | | |
90 | | webvtt_cue_t *p_cue; |
91 | | }; |
92 | | |
93 | | static vlc_tick_t MakeTime( int32_t t[4] ) |
94 | 862k | { |
95 | 862k | return vlc_tick_from_sec( (int64_t)t[0] * 3600 + t[1] * 60 + t[2] ) + |
96 | 862k | VLC_TICK_FROM_MS(t[3]); |
97 | 862k | } |
98 | | |
99 | | bool webvtt_scan_time( const char *psz, vlc_tick_t *p_time ) |
100 | 920k | { |
101 | 920k | int32_t t[4]; |
102 | 920k | if( sscanf( psz, "%2" SCNd32 ":%2" SCNd32 ".%3" SCNd32, |
103 | 920k | &t[1], &t[2], &t[3] ) == 3 ) |
104 | 82.9k | { |
105 | 82.9k | t[0] = 0; |
106 | 82.9k | if( t[1] < 0 || t[2] < 0 || t[3] < 0 ) |
107 | 606 | return false; |
108 | | |
109 | 82.3k | *p_time = MakeTime( t ); |
110 | 82.3k | return true; |
111 | 82.9k | } |
112 | 837k | else if( sscanf( psz, "%" SCNd32 ":%2" SCNd32 ":%2" SCNd32 ".%3" SCNd32, |
113 | 837k | &t[0], &t[1], &t[2], &t[3] ) == 4 ) |
114 | 787k | { |
115 | 787k | if( t[0] < 0 || t[1] < 0 || t[2] < 0 || t[3] < 0 ) |
116 | 6.92k | return false; |
117 | | |
118 | 780k | *p_time = MakeTime( t ); |
119 | 780k | return true; |
120 | 787k | } |
121 | 50.1k | else return false; |
122 | 920k | } |
123 | | |
124 | | static bool KeywordMatch( const char *psz, const char *keyword ) |
125 | 192k | { |
126 | 192k | const size_t i_len = strlen(keyword); |
127 | 192k | return( !strncmp( keyword, psz, i_len ) && (!psz[i_len] || isspace(psz[i_len])) ); |
128 | 192k | } |
129 | | |
130 | | /* |
131 | | |
132 | | */ |
133 | | |
134 | | webvtt_text_parser_t * webvtt_text_parser_New( void *priv, |
135 | | webvtt_cue_t *(*pf_get_cue)( void * ), |
136 | | void (*pf_cue_done)( void *, webvtt_cue_t * ), |
137 | | void (*pf_header)( void *, enum webvtt_header_line_e, bool, const char * ) ) |
138 | 9.08k | { |
139 | 9.08k | webvtt_text_parser_t *p = malloc(sizeof(*p)); |
140 | 9.08k | if( p ) |
141 | 9.08k | { |
142 | 9.08k | p->section = WEBVTT_SECTION_UNDEFINED; |
143 | 36.3k | for( int i=0; i<3; i++ ) |
144 | 27.2k | p->reads[i] = NULL; |
145 | 9.08k | p->p_cue = NULL; |
146 | 9.08k | p->priv = priv; |
147 | 9.08k | p->pf_cue_done = pf_cue_done; |
148 | 9.08k | p->pf_get_cue = pf_get_cue; |
149 | 9.08k | p->pf_header = pf_header; |
150 | 9.08k | } |
151 | 9.08k | return p; |
152 | 9.08k | } |
153 | | |
154 | | void webvtt_text_parser_Delete( webvtt_text_parser_t *p ) |
155 | 9.08k | { |
156 | 36.3k | for( int i=0; i<3; i++ ) |
157 | 27.2k | free( p->reads[i] ); |
158 | 9.08k | free( p ); |
159 | 9.08k | } |
160 | | |
161 | | static void forward_line( webvtt_text_parser_t *p, const char *psz_line, bool b_new ) |
162 | 270k | { |
163 | 270k | if( p->pf_header ) |
164 | 270k | p->pf_header( p->priv, (enum webvtt_header_line_e)p->section, |
165 | 270k | b_new, psz_line ); |
166 | 270k | } |
167 | | |
168 | | void webvtt_text_parser_Feed( webvtt_text_parser_t *p, char *psz_line ) |
169 | 2.58M | { |
170 | 2.58M | if( psz_line == NULL ) |
171 | 4.54k | { |
172 | 4.54k | if( p->p_cue ) |
173 | 2.12k | { |
174 | 2.12k | if( p->pf_cue_done ) |
175 | 2.12k | p->pf_cue_done( p->priv, p->p_cue ); |
176 | 2.12k | p->p_cue = NULL; |
177 | 2.12k | } |
178 | 4.54k | return; |
179 | 4.54k | } |
180 | | |
181 | 2.57M | free(p->reads[0]); |
182 | 2.57M | p->reads[0] = p->reads[1]; |
183 | 2.57M | p->reads[1] = p->reads[2]; |
184 | 2.57M | p->reads[2] = psz_line; |
185 | | |
186 | | /* Lookup keywords */ |
187 | 2.57M | if( unlikely(p->section == WEBVTT_SECTION_UNDEFINED) ) |
188 | 52.6k | { |
189 | 52.6k | if( KeywordMatch( psz_line, "\xEF\xBB\xBFWEBVTT" ) || |
190 | 50.2k | KeywordMatch( psz_line, "WEBVTT" ) ) |
191 | 8.05k | { |
192 | 8.05k | p->section = WEBVTT_SECTION_UNDEFINED; |
193 | 8.05k | if( p->p_cue ) |
194 | 0 | { |
195 | 0 | if( p->pf_cue_done ) |
196 | 0 | p->pf_cue_done( p->priv, p->p_cue ); |
197 | 0 | p->p_cue = NULL; |
198 | 0 | } |
199 | 8.05k | return; |
200 | 8.05k | } |
201 | 44.5k | else if( KeywordMatch( psz_line, "STYLE" ) ) |
202 | 19.0k | { |
203 | 19.0k | p->section = WEBVTT_SECTION_STYLE; |
204 | 19.0k | forward_line( p, psz_line, true ); |
205 | 19.0k | return; |
206 | 19.0k | } |
207 | 25.4k | else if( KeywordMatch( psz_line, "REGION" ) ) |
208 | 6.30k | { |
209 | 6.30k | p->section = WEBVTT_SECTION_REGION; |
210 | 6.30k | forward_line( p, psz_line, true ); |
211 | 6.30k | return; |
212 | 6.30k | } |
213 | 19.1k | else if( KeywordMatch( psz_line, "NOTE" ) ) |
214 | 191 | { |
215 | 191 | p->section = WEBVTT_SECTION_NOTE; |
216 | 191 | return; |
217 | 191 | } |
218 | 18.9k | else if( psz_line[0] != 0 ) |
219 | 3.85k | { |
220 | 3.85k | p->section = WEBVTT_SECTION_CUES; |
221 | 3.85k | } |
222 | 52.6k | } |
223 | | |
224 | 2.54M | if( likely(p->section == WEBVTT_SECTION_CUES) ) |
225 | 2.28M | { |
226 | 2.28M | if( p->p_cue ) |
227 | 482k | { |
228 | 482k | if( psz_line[0] == 0 ) |
229 | 382k | { |
230 | 382k | if( p->p_cue ) |
231 | 382k | { |
232 | 382k | if( p->pf_cue_done ) |
233 | 382k | p->pf_cue_done( p->priv, p->p_cue ); |
234 | 382k | p->p_cue = NULL; |
235 | 382k | } |
236 | 382k | } |
237 | 99.9k | else |
238 | 99.9k | { |
239 | 99.9k | char *psz_merged; |
240 | 99.9k | if( -1 < asprintf( &psz_merged, "%s\n%s", p->p_cue->psz_text, psz_line ) ) |
241 | 99.9k | { |
242 | 99.9k | free( p->p_cue->psz_text ); |
243 | 99.9k | p->p_cue->psz_text = psz_merged; |
244 | 99.9k | } |
245 | 99.9k | return; |
246 | 99.9k | } |
247 | 482k | } |
248 | | |
249 | 2.18M | if( p->reads[1] == NULL ) |
250 | 330k | return; |
251 | | |
252 | 1.85M | const char *psz_split = strstr( p->reads[1], " --> " ); |
253 | 1.85M | if( psz_split ) |
254 | 437k | { |
255 | 437k | vlc_tick_t i_start, i_stop; |
256 | | |
257 | 437k | if( webvtt_scan_time( p->reads[1], &i_start ) && |
258 | 423k | webvtt_scan_time( psz_split + 5, &i_stop ) && i_start <= i_stop ) |
259 | 384k | { |
260 | 384k | const char *psz_attrs = strchr( psz_split + 5 + 5, ' ' ); |
261 | 384k | p->p_cue = ( p->pf_get_cue ) ? p->pf_get_cue( p->priv ) : NULL; |
262 | 384k | if( p->p_cue ) |
263 | 384k | { |
264 | 384k | p->p_cue->psz_attrs = ( psz_attrs ) ? strdup( psz_attrs ) : NULL; |
265 | 384k | p->p_cue->psz_id = p->reads[0]; |
266 | 384k | p->reads[0] = NULL; |
267 | 384k | p->p_cue->psz_text = p->reads[2]; |
268 | 384k | p->reads[2] = NULL; |
269 | 384k | p->p_cue->i_start = i_start; |
270 | 384k | p->p_cue->i_stop = i_stop; |
271 | 384k | } |
272 | 384k | } |
273 | 437k | } |
274 | 1.85M | } |
275 | 260k | else if( p->section == WEBVTT_SECTION_STYLE ) |
276 | 200k | { |
277 | 200k | forward_line( p, psz_line, false ); |
278 | 200k | if( psz_line[0] == 0 ) |
279 | 17.9k | p->section = WEBVTT_SECTION_UNDEFINED; |
280 | 200k | } |
281 | 59.3k | else if( p->section == WEBVTT_SECTION_REGION ) |
282 | 43.7k | { |
283 | 43.7k | forward_line( p, psz_line, false ); |
284 | 43.7k | if( psz_line[0] == 0 ) /* End of region declaration */ |
285 | 6.09k | p->section = WEBVTT_SECTION_UNDEFINED; |
286 | 43.7k | } |
287 | 15.6k | else if( p->section == WEBVTT_SECTION_NOTE ) |
288 | 475 | { |
289 | 475 | if( psz_line[0] == 0 ) |
290 | 189 | p->section = WEBVTT_SECTION_UNDEFINED; |
291 | 475 | } |
292 | 2.54M | } |