/src/vlc/modules/codec/webvtt/css_parser.c
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * css_parser.c : CSS parser |
3 | | ***************************************************************************** |
4 | | * Copyright (C) 2017 VideoLabs, VLC authors and VideoLAN |
5 | | * |
6 | | * This program is free software; you can redistribute it and/or modify it |
7 | | * under the terms of the GNU Lesser General Public License as published by |
8 | | * the Free Software Foundation; either version 2.1 of the License, or |
9 | | * (at your option) any later version. |
10 | | * |
11 | | * This program is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public License |
17 | | * along with this program; if not, write to the Free Software Foundation, |
18 | | * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. |
19 | | *****************************************************************************/ |
20 | | #ifdef HAVE_CONFIG_H |
21 | | # include "config.h" |
22 | | #endif |
23 | | |
24 | | #include <vlc_common.h> |
25 | | |
26 | | #include "css_bridge.h" |
27 | | #include "css_parser.h" |
28 | | #include "CSSGrammar.h" |
29 | | |
30 | | #include <ctype.h> |
31 | | |
32 | | static void vlc_css_term_Debug( const vlc_css_term_t a, int depth ); |
33 | | static void vlc_css_expression_Debug( const vlc_css_expr_t *p_expr, int depth ); |
34 | | static void vlc_css_declarations_Debug( const vlc_css_declaration_t *p_decl, int depth ); |
35 | | static void vlc_css_selectors_Debug( const vlc_css_selector_t *p_sel, int depth ); |
36 | | static void vlc_css_rules_Debug( const vlc_css_rule_t *p_rule, int depth ); |
37 | | |
38 | 0 | #define CHAIN_APPEND_IMPL(n, t) CHAIN_APPEND_DECL(n ,t)\ |
39 | 0 | {\ |
40 | 0 | t ** insert = &p_a->p_next;\ |
41 | 0 | while( *insert ) insert = &((*insert)->p_next);\ |
42 | 0 | *insert = p_b;\ |
43 | 0 | } Unexecuted instantiation: vlc_css_declarations_Append Unexecuted instantiation: vlc_css_selector_Append |
44 | | |
45 | | void vlc_css_term_Clean( vlc_css_term_t a ) |
46 | 0 | { |
47 | 0 | if( a.type >= TYPE_STRING ) |
48 | 0 | free( a.psz ); |
49 | |
|
50 | 0 | if( a.type == TYPE_FUNCTION ) |
51 | 0 | { |
52 | 0 | if( a.function ) |
53 | 0 | vlc_css_expression_Delete( a.function ); |
54 | 0 | } |
55 | 0 | } |
56 | | |
57 | | static void vlc_css_term_Debug( const vlc_css_term_t a, int depth ) |
58 | 0 | { |
59 | 0 | for(int i=0;i<depth;i++) printf(" "); |
60 | 0 | printf("term: "); |
61 | 0 | if( a.type >= TYPE_STRING ) |
62 | 0 | { |
63 | 0 | printf("%x %s\n", a.type, a.psz); |
64 | 0 | if( a.type == TYPE_FUNCTION && a.function ) |
65 | 0 | vlc_css_expression_Debug( a.function, depth + 1 ); |
66 | 0 | } |
67 | 0 | else printf("%x %f\n", a.type, a.val); |
68 | 0 | } |
69 | | |
70 | | bool vlc_css_expression_AddTerm( vlc_css_expr_t *p_expr, |
71 | | char op, vlc_css_term_t a ) |
72 | 0 | { |
73 | 0 | if( p_expr->i_count >= p_expr->i_alloc ) |
74 | 0 | { |
75 | 0 | size_t i_realloc = (p_expr->i_alloc == 0) ? 1 : p_expr->i_alloc + 4; |
76 | 0 | void *reac = realloc( p_expr->seq, i_realloc * sizeof(p_expr->seq[0]) ); |
77 | 0 | if( reac ) |
78 | 0 | { |
79 | 0 | p_expr->seq = reac; |
80 | 0 | p_expr->i_alloc = i_realloc; |
81 | 0 | } |
82 | 0 | } |
83 | |
|
84 | 0 | if( p_expr->i_count >= p_expr->i_alloc ) |
85 | 0 | return false; |
86 | | |
87 | 0 | p_expr->seq[p_expr->i_count].op = op; |
88 | 0 | p_expr->seq[p_expr->i_count++].term = a; |
89 | 0 | return true; |
90 | 0 | } |
91 | | |
92 | | void vlc_css_expression_Delete( vlc_css_expr_t *p_expr ) |
93 | 0 | { |
94 | 0 | if( p_expr ) |
95 | 0 | { |
96 | 0 | for(size_t i=0; i<p_expr->i_count; i++) |
97 | 0 | vlc_css_term_Clean( p_expr->seq[i].term ); |
98 | 0 | free( p_expr->seq ); |
99 | 0 | } |
100 | 0 | free( p_expr ); |
101 | 0 | } |
102 | | |
103 | | static void vlc_css_expression_Debug( const vlc_css_expr_t *p_expr, int depth ) |
104 | 0 | { |
105 | 0 | if( p_expr ) |
106 | 0 | { |
107 | 0 | for(int i=0;i<depth;i++) printf(" "); |
108 | 0 | printf("expression: \n"); |
109 | 0 | for(size_t i=0; i<p_expr->i_count; i++) |
110 | 0 | vlc_css_term_Debug( p_expr->seq[i].term, depth + 1 ); |
111 | 0 | } |
112 | 0 | } |
113 | | |
114 | | vlc_css_expr_t * vlc_css_expression_New( vlc_css_term_t term ) |
115 | 0 | { |
116 | 0 | vlc_css_expr_t *p_expr = calloc(1, sizeof(*p_expr)); |
117 | 0 | if(!vlc_css_expression_AddTerm( p_expr, 0, term )) |
118 | 0 | { |
119 | 0 | free(p_expr); |
120 | 0 | p_expr = NULL; |
121 | 0 | } |
122 | 0 | return p_expr; |
123 | 0 | } |
124 | | |
125 | | CHAIN_APPEND_IMPL(vlc_css_declarations_Append, vlc_css_declaration_t) |
126 | | |
127 | | void vlc_css_declarations_Delete( vlc_css_declaration_t *p_decl ) |
128 | 0 | { |
129 | 0 | while( p_decl ) |
130 | 0 | { |
131 | 0 | vlc_css_declaration_t *p_next = p_decl->p_next; |
132 | 0 | vlc_css_expression_Delete( p_decl->expr ); |
133 | 0 | free( p_decl->psz_property ); |
134 | 0 | free( p_decl ); |
135 | 0 | p_decl = p_next; |
136 | 0 | } |
137 | 0 | } |
138 | | |
139 | | static void vlc_css_declarations_Debug( const vlc_css_declaration_t *p_decl, int depth ) |
140 | 0 | { |
141 | 0 | while( p_decl ) |
142 | 0 | { |
143 | 0 | for(int i=0;i<depth;i++) printf(" "); |
144 | 0 | printf("declaration: %s\n", p_decl->psz_property ); |
145 | 0 | vlc_css_expression_Debug( p_decl->expr, depth + 1 ); |
146 | 0 | p_decl = p_decl->p_next; |
147 | 0 | } |
148 | 0 | } |
149 | | |
150 | | vlc_css_declaration_t * vlc_css_declaration_New( const char *psz ) |
151 | 0 | { |
152 | 0 | vlc_css_declaration_t *p_decl = calloc(1, sizeof(*p_decl)); |
153 | 0 | p_decl->psz_property = strdup(psz); |
154 | 0 | return p_decl; |
155 | 0 | } |
156 | | |
157 | | CHAIN_APPEND_IMPL(vlc_css_selector_Append, vlc_css_selector_t) |
158 | | |
159 | | void |
160 | | vlc_css_selector_AddSpecifier( vlc_css_selector_t *p_sel, vlc_css_selector_t *p_spec ) |
161 | 0 | { |
162 | 0 | *p_sel->specifiers.pp_append = p_spec; |
163 | 0 | while(p_spec) |
164 | 0 | { |
165 | 0 | p_sel->specifiers.pp_append = &p_spec->p_next; |
166 | 0 | p_spec = p_spec->p_next; |
167 | 0 | } |
168 | 0 | } |
169 | | |
170 | | void vlc_css_selectors_Delete( vlc_css_selector_t *p_sel ) |
171 | 0 | { |
172 | 0 | while( p_sel ) |
173 | 0 | { |
174 | 0 | vlc_css_selector_t *p_next = p_sel->p_next; |
175 | 0 | free( p_sel->psz_name ); |
176 | 0 | vlc_css_selectors_Delete( p_sel->specifiers.p_first ); |
177 | 0 | vlc_css_selectors_Delete( p_sel->p_matchsel ); |
178 | 0 | free( p_sel ); |
179 | 0 | p_sel = p_next; |
180 | 0 | } |
181 | 0 | } |
182 | | |
183 | | static void vlc_css_selectors_Debug( const vlc_css_selector_t *p_sel, int depth ) |
184 | 0 | { |
185 | 0 | while( p_sel ) |
186 | 0 | { |
187 | 0 | for(int i=0;i<depth;i++) printf(" "); printf("selector %c%s:\n", p_sel->combinator, p_sel->psz_name ); |
188 | 0 | vlc_css_selectors_Debug( p_sel->p_matchsel, depth + 1 ); |
189 | 0 | vlc_css_selectors_Debug( p_sel->specifiers.p_first, depth + 1 ); |
190 | 0 | p_sel = p_sel->p_next; |
191 | 0 | } |
192 | 0 | } |
193 | | |
194 | | vlc_css_selector_t * vlc_css_selector_New( int type, const char *psz ) |
195 | 0 | { |
196 | 0 | vlc_css_selector_t *p_sel = calloc(1, sizeof(*p_sel)); |
197 | 0 | p_sel->psz_name = strdup(psz); |
198 | 0 | p_sel->type = type; |
199 | 0 | p_sel->combinator = RELATION_SELF; |
200 | 0 | p_sel->specifiers.pp_append = &p_sel->specifiers.p_first; |
201 | 0 | return p_sel; |
202 | 0 | } |
203 | | |
204 | | void vlc_css_rules_Delete( vlc_css_rule_t *p_rule ) |
205 | 0 | { |
206 | 0 | while(p_rule) |
207 | 0 | { |
208 | 0 | vlc_css_rule_t *p_next = p_rule->p_next; |
209 | 0 | vlc_css_selectors_Delete( p_rule->p_selectors ); |
210 | 0 | vlc_css_declarations_Delete( p_rule->p_declarations ); |
211 | 0 | free(p_rule); |
212 | 0 | p_rule = p_next; |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | | static void vlc_css_rules_Debug( const vlc_css_rule_t *p_rule, int depth ) |
217 | 0 | { |
218 | 0 | int j = 0; |
219 | 0 | while(p_rule) |
220 | 0 | { |
221 | 0 | for(int i=0;i<depth;i++) printf(" "); printf("rule %d:\n", j++); |
222 | 0 | vlc_css_selectors_Debug( p_rule->p_selectors, depth + 1 ); |
223 | 0 | vlc_css_declarations_Debug( p_rule->p_declarations, depth + 1 ); |
224 | 0 | p_rule = p_rule->p_next; |
225 | 0 | } |
226 | 0 | } |
227 | | |
228 | | vlc_css_rule_t * vlc_css_rule_New( void ) |
229 | 0 | { |
230 | 0 | vlc_css_rule_t *p_rule = calloc(1, sizeof(*p_rule)); |
231 | 0 | return p_rule; |
232 | 0 | } |
233 | | |
234 | | void vlc_css_parser_AddRule( vlc_css_parser_t *p_parser, |
235 | | vlc_css_rule_t *p_rule ) |
236 | 0 | { |
237 | 0 | (*p_parser->rules.pp_append) = p_rule; |
238 | 0 | p_parser->rules.pp_append = &p_rule->p_next; |
239 | 0 | } |
240 | | |
241 | | void vlc_css_parser_Debug( const vlc_css_parser_t *p_parser ) |
242 | 0 | { |
243 | 0 | vlc_css_rules_Debug( p_parser->rules.p_first, 0 ); |
244 | 0 | } |
245 | | |
246 | | void vlc_css_parser_Clean( vlc_css_parser_t *p_parser ) |
247 | 0 | { |
248 | 0 | vlc_css_rules_Delete( p_parser->rules.p_first ); |
249 | 0 | } |
250 | | |
251 | | void vlc_css_parser_Init( vlc_css_parser_t *p_parser ) |
252 | 0 | { |
253 | 0 | memset(p_parser, 0, sizeof(vlc_css_parser_t)); |
254 | 0 | p_parser->rules.pp_append = &p_parser->rules.p_first; |
255 | 0 | } |
256 | | |
257 | | bool vlc_css_parser_ParseBytes( vlc_css_parser_t *p_parser, const uint8_t *p_data, size_t i_data ) |
258 | 0 | { |
259 | 0 | yyscan_t yy; |
260 | 0 | csslex_init(&yy); |
261 | |
|
262 | 0 | YY_BUFFER_STATE buf = css_scan_bytes( (const char*) p_data, i_data, yy ); |
263 | |
|
264 | 0 | bool b_ret = !cssparse( yy, p_parser ); |
265 | |
|
266 | 0 | css_delete_buffer( buf, yy ); |
267 | 0 | csslex_destroy( yy ); |
268 | |
|
269 | 0 | return b_ret; |
270 | 0 | } |
271 | | |
272 | | bool vlc_css_parser_ParseString( vlc_css_parser_t *p_parser, const char *psz_css ) |
273 | 0 | { |
274 | 0 | yyscan_t yy; |
275 | 0 | csslex_init(&yy); |
276 | |
|
277 | 0 | YY_BUFFER_STATE buf = css_scan_string( psz_css, yy ); |
278 | |
|
279 | 0 | bool b_ret = !cssparse( yy, p_parser ); |
280 | |
|
281 | 0 | css_delete_buffer( buf, yy ); |
282 | 0 | csslex_destroy( yy ); |
283 | |
|
284 | 0 | return b_ret; |
285 | 0 | } |
286 | | |
287 | | static int CodePointToUTF8( uint32_t ucs4, char *p ) |
288 | 0 | { |
289 | | /* adapted from codepoint conversion from strings.h */ |
290 | 0 | if( ucs4 <= 0x7F ) |
291 | 0 | { |
292 | 0 | p[0] = ucs4; |
293 | 0 | return 1; |
294 | 0 | } |
295 | 0 | else if( ucs4 <= 0x7FF ) |
296 | 0 | { |
297 | 0 | p[0] = 0xC0 | (ucs4 >> 6); |
298 | 0 | p[1] = 0x80 | (ucs4 & 0x3F); |
299 | 0 | return 2; |
300 | 0 | } |
301 | 0 | else if( ucs4 <= 0xFFFF ) |
302 | 0 | { |
303 | 0 | p[0] = 0xE0 | (ucs4 >> 12); |
304 | 0 | p[1] = 0x80 | ((ucs4 >> 6) & 0x3F); |
305 | 0 | p[2] = 0x80 | (ucs4 & 0x3F); |
306 | 0 | return 3; |
307 | 0 | } |
308 | 0 | else if( ucs4 <= 0x1FFFFF ) |
309 | 0 | { |
310 | 0 | p[0] = 0xF0 | (ucs4 >> 18); |
311 | 0 | p[1] = 0x80 | ((ucs4 >> 12) & 0x3F); |
312 | 0 | p[2] = 0x80 | ((ucs4 >> 6) & 0x3F); |
313 | 0 | p[3] = 0x80 | (ucs4 & 0x3F); |
314 | 0 | return 4; |
315 | 0 | } |
316 | 0 | else if( ucs4 <= 0x3FFFFFF ) |
317 | 0 | { |
318 | 0 | p[0] = 0xF8 | (ucs4 >> 24); |
319 | 0 | p[1] = 0x80 | ((ucs4 >> 18) & 0x3F); |
320 | 0 | p[2] = 0x80 | ((ucs4 >> 12) & 0x3F); |
321 | 0 | p[3] = 0x80 | ((ucs4 >> 6) & 0x3F); |
322 | 0 | p[4] = 0x80 | (ucs4 & 0x3F); |
323 | 0 | return 5; |
324 | 0 | } |
325 | 0 | else |
326 | 0 | { |
327 | 0 | p[0] = 0xFC | (ucs4 >> 30); |
328 | 0 | p[1] = 0x80 | ((ucs4 >> 24) & 0x3F); |
329 | 0 | p[2] = 0x80 | ((ucs4 >> 18) & 0x3F); |
330 | 0 | p[3] = 0x80 | ((ucs4 >> 12) & 0x3F); |
331 | 0 | p[4] = 0x80 | ((ucs4 >> 6) & 0x3F); |
332 | 0 | p[5] = 0x80 | (ucs4 & 0x3F); |
333 | 0 | return 6; |
334 | 0 | } |
335 | 0 | } |
336 | | |
337 | | void vlc_css_unescape( char *psz ) |
338 | 0 | { |
339 | 0 | if( !psz ) |
340 | 0 | return; |
341 | 0 | char *r = psz; |
342 | 0 | char *w = psz; |
343 | |
|
344 | 0 | while( *r ) |
345 | 0 | { |
346 | 0 | if( *r == '\\' ) |
347 | 0 | { |
348 | 0 | r++; |
349 | | /* newlines */ |
350 | 0 | if( *r == 0 ) |
351 | 0 | { |
352 | 0 | break; |
353 | 0 | } |
354 | 0 | else if( strchr( "nfr", *r ) ) |
355 | 0 | { |
356 | 0 | switch( r[0] ) |
357 | 0 | { |
358 | 0 | case 'n': |
359 | 0 | *w++ = '\n'; |
360 | 0 | r++; |
361 | 0 | break; |
362 | 0 | case 'r': |
363 | 0 | *w++ = '\r'; |
364 | 0 | if( r[1] && r[1] == 'n' ) |
365 | 0 | { |
366 | 0 | *w++ = '\n'; |
367 | 0 | r++; |
368 | 0 | } |
369 | 0 | r++; |
370 | 0 | break; |
371 | 0 | case 'f': |
372 | 0 | *w++ = '\f'; |
373 | 0 | r++; |
374 | 0 | break; |
375 | 0 | } |
376 | 0 | } |
377 | 0 | else if( isxdigit( *r ) ) |
378 | 0 | { |
379 | 0 | const char *p_start = r; |
380 | 0 | int i; |
381 | 0 | for( i=0; i<6 && *r && isxdigit( *r ); i++ ) |
382 | 0 | r++; |
383 | 0 | const char backup = *r; |
384 | 0 | *r = 0; |
385 | 0 | unsigned i_value = strtoul( p_start, NULL, 16 ); |
386 | 0 | *r = backup; |
387 | 0 | if( i < 6 && *r && *r == ' ' ) |
388 | 0 | r++; |
389 | 0 | w += CodePointToUTF8( i_value, w ); |
390 | 0 | } |
391 | 0 | } |
392 | 0 | else |
393 | 0 | { |
394 | 0 | *w++ = *r++; |
395 | 0 | } |
396 | 0 | } |
397 | | |
398 | 0 | *w = 0; |
399 | 0 | } |
400 | | |
401 | | char * vlc_css_unescaped( const char *psz ) |
402 | 0 | { |
403 | 0 | char *psz_ret = strdup( psz ); |
404 | 0 | vlc_css_unescape( psz_ret ); |
405 | 0 | return psz_ret; |
406 | 0 | } |
407 | | |
408 | | char * vlc_css_unquoted( const char *psz ) |
409 | 0 | { |
410 | 0 | char *psz_ret; |
411 | 0 | if( *psz == '\'' || *psz == '\"' ) |
412 | 0 | { |
413 | 0 | size_t i_len = strlen(psz); |
414 | 0 | if( psz[i_len - 1] == psz[0] ) |
415 | 0 | psz_ret = strndup( psz + 1, i_len - 2 ); |
416 | 0 | else |
417 | 0 | psz_ret = strdup( psz ); |
418 | 0 | } |
419 | 0 | else |
420 | 0 | { |
421 | 0 | psz_ret = strdup( psz ); |
422 | 0 | } |
423 | 0 | return psz_ret; |
424 | 0 | } |
425 | | |
426 | | |
427 | | char * vlc_css_unquotedunescaped( const char *psz ) |
428 | 0 | { |
429 | 0 | char *psz_ret = vlc_css_unquoted( psz ); |
430 | 0 | if( psz_ret ) |
431 | 0 | vlc_css_unescape( psz_ret ); |
432 | 0 | return psz_ret; |
433 | 0 | } |
434 | | |
435 | | #ifdef CSS_PARSER_DEBUG |
436 | | |
437 | | |
438 | | static void css_properties_Debug( const vlc_css_declaration_t *p_decl ) |
439 | | { |
440 | | printf("set %s to ", p_decl->psz_property); |
441 | | for( size_t i=0; i<p_decl->expr->i_count; i++ ) |
442 | | { |
443 | | printf("term %s ", p_decl->expr->seq[i].term.psz); |
444 | | } |
445 | | printf("\n"); |
446 | | } |
447 | | |
448 | | void css_selector_Debug( const vlc_css_selector_t *p_sel ) |
449 | | { |
450 | | printf("select its "); |
451 | | switch( p_sel->combinator ) |
452 | | { |
453 | | case RELATION_DESCENDENT: |
454 | | printf("descendent"); |
455 | | break; |
456 | | case RELATION_DIRECTADJACENT: |
457 | | printf("adjacent"); |
458 | | break; |
459 | | case RELATION_INDIRECTADJACENT: |
460 | | printf("indirect adjacent"); |
461 | | break; |
462 | | case RELATION_CHILD: |
463 | | printf("child"); |
464 | | break; |
465 | | case RELATION_SELF: |
466 | | break; |
467 | | } |
468 | | |
469 | | printf(" nodes matching filter: "); |
470 | | switch( p_sel->type ) |
471 | | { |
472 | | case SELECTOR_SIMPLE: |
473 | | printf("<%s>\n", p_sel->psz_name); |
474 | | break; |
475 | | case SELECTOR_PSEUDOCLASS: |
476 | | printf(":%s\n", p_sel->psz_name); |
477 | | break; |
478 | | case SELECTOR_PSEUDOELEMENT: |
479 | | printf("::%s\n", p_sel->psz_name); |
480 | | break; |
481 | | case SPECIFIER_ID: |
482 | | printf("%s\n", p_sel->psz_name); |
483 | | break; |
484 | | case SPECIFIER_CLASS: |
485 | | printf(".%s\n", p_sel->psz_name); |
486 | | break; |
487 | | case SPECIFIER_ATTRIB: |
488 | | printf("[%s]\n", p_sel->psz_name); |
489 | | break; |
490 | | } |
491 | | } |
492 | | |
493 | | void css_rule_Debug( const vlc_css_rule_t *p_rule ) |
494 | | { |
495 | | if( p_rule == NULL ) |
496 | | return; |
497 | | printf("add for rule nodes:\n"); |
498 | | for( const vlc_css_selector_t *p_sel = p_rule->p_selectors; |
499 | | p_sel; p_sel = p_sel->p_next ) |
500 | | { |
501 | | css_selector_Debug( p_sel ); |
502 | | for( const vlc_css_selector_t *p_spec = p_sel->specifiers.p_first; |
503 | | p_spec; p_spec = p_spec->p_next ) |
504 | | css_selector_Debug( p_spec ); |
505 | | |
506 | | if( p_sel->p_next ) |
507 | | printf("add nodes\n"); |
508 | | } |
509 | | |
510 | | for( const vlc_css_declaration_t *p_decl = p_rule->p_declarations; |
511 | | p_decl; p_decl = p_decl->p_next ) |
512 | | { |
513 | | css_properties_Debug( p_decl ); |
514 | | } |
515 | | } |
516 | | |
517 | | #endif |