/src/mupdf/source/html/css-parse.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2022 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "html-imp.h" |
25 | | |
26 | | #include <string.h> |
27 | | |
28 | | #include "css-properties.h" |
29 | | |
30 | | struct lexbuf |
31 | | { |
32 | | fz_context *ctx; |
33 | | fz_pool *pool; |
34 | | const unsigned char *start; |
35 | | const unsigned char *s; |
36 | | const char *file; |
37 | | int line; |
38 | | int lookahead; |
39 | | int c; |
40 | | int string_len; |
41 | | char string[1024]; |
42 | | }; |
43 | | |
44 | | static fz_css_value *parse_expr(struct lexbuf *buf); |
45 | | static fz_css_selector *parse_selector(struct lexbuf *buf); |
46 | | |
47 | | FZ_NORETURN static void fz_css_error(struct lexbuf *buf, const char *msg) |
48 | 9 | { |
49 | 31 | #define PRE_POST_SIZE 30 |
50 | 9 | unsigned char text[PRE_POST_SIZE * 2 + 4]; |
51 | 9 | unsigned char *d = text; |
52 | 9 | const unsigned char *s = buf->start; |
53 | 9 | int n; |
54 | | |
55 | | /* We want to make a helpful fragment for the error message. |
56 | | * We want err_pos to be the point at which we just tripped |
57 | | * the error. err_pos needs to be at least 1 byte behind |
58 | | * our read pointer, as we've read that char. */ |
59 | 9 | const unsigned char *err_pos = buf->s; |
60 | 9 | n = 1; |
61 | | |
62 | | /* And if we're using lookahead, it's further behind. */ |
63 | 9 | if (buf->lookahead >= CSS_KEYWORD) |
64 | 2 | n += buf->string_len; |
65 | 7 | else if (buf->lookahead != EOF) |
66 | 6 | n += 1; |
67 | | |
68 | | /* But it can't be before the start of the buffer */ |
69 | 9 | n = fz_mini(n, err_pos - buf->start); |
70 | 9 | err_pos -= n; |
71 | | |
72 | | /* We're going to try to output: |
73 | | * <section prior to the error> ">" <the char that tripped> "<" <section after the error> |
74 | | */ |
75 | | /* Is the section prior to the error too long? If so, truncate it with an elipsis. */ |
76 | 9 | n = sizeof(text)-1; |
77 | 9 | if (err_pos - s > n-PRE_POST_SIZE - 3) |
78 | 7 | { |
79 | 7 | *d++ = '.'; |
80 | 7 | *d++ = '.'; |
81 | 7 | *d++ = '.'; |
82 | 7 | n -= 3; |
83 | 7 | s = err_pos - (n-PRE_POST_SIZE - 3); |
84 | 7 | } |
85 | | |
86 | | /* Copy the prefix (if there is one) */ |
87 | 9 | if (err_pos > s) |
88 | 9 | { |
89 | 9 | n = err_pos - s; |
90 | 204 | while (n) |
91 | 195 | { |
92 | 195 | unsigned char c = *s++; |
93 | 195 | *d++ = (c < 32 || c > 127) ? ' ' : c; |
94 | 195 | n--; |
95 | 195 | } |
96 | 9 | } |
97 | | |
98 | | /* Marker, char, end marker */ |
99 | 9 | *d++ = '>', n--; |
100 | 9 | if (*err_pos) |
101 | 8 | *d++ = *err_pos++, n--; |
102 | 9 | *d++ = '<', n--; |
103 | | |
104 | | /* Postfix */ |
105 | 9 | n = (int)strlen((const char *)err_pos); |
106 | 9 | if (n <= PRE_POST_SIZE) |
107 | 3 | { |
108 | 47 | while (n > 0) |
109 | 44 | { |
110 | 44 | unsigned char c = *err_pos++; |
111 | 44 | *d++ = (c < 32 || c > 127) ? ' ' : c; |
112 | 44 | n--; |
113 | 44 | } |
114 | 3 | } |
115 | 6 | else |
116 | 6 | { |
117 | 168 | for (n = PRE_POST_SIZE-3; n > 0; n--) |
118 | 162 | { |
119 | 162 | unsigned char c = *err_pos++; |
120 | 162 | *d++ = (c < 32 || c > 127) ? ' ' : c; |
121 | 162 | } |
122 | | |
123 | 6 | *d++ = '.'; |
124 | 6 | *d++ = '.'; |
125 | 6 | *d++ = '.'; |
126 | 6 | } |
127 | 9 | *d = 0; |
128 | | |
129 | 9 | fz_throw(buf->ctx, FZ_ERROR_SYNTAX, "css syntax error: %s (%s:%d) (%s)", msg, buf->file, buf->line, text); |
130 | 9 | } |
131 | | |
132 | | fz_css *fz_new_css(fz_context *ctx) |
133 | 54 | { |
134 | 54 | fz_pool *pool = fz_new_pool(ctx); |
135 | 54 | fz_css *css = NULL; |
136 | | |
137 | 108 | fz_try(ctx) |
138 | 108 | { |
139 | 54 | css = fz_pool_alloc(ctx, pool, sizeof *css); |
140 | 54 | css->pool = pool; |
141 | 54 | css->rule = NULL; |
142 | 54 | } |
143 | 108 | fz_catch(ctx) |
144 | 0 | { |
145 | 0 | fz_drop_pool(ctx, pool); |
146 | 0 | fz_rethrow(ctx); |
147 | 0 | } |
148 | | |
149 | 54 | return css; |
150 | 54 | } |
151 | | |
152 | | void fz_drop_css(fz_context *ctx, fz_css *css) |
153 | 54 | { |
154 | 54 | if (css) |
155 | 54 | fz_drop_pool(ctx, css->pool); |
156 | 54 | } |
157 | | |
158 | | static fz_css_rule *fz_new_css_rule(fz_context *ctx, fz_pool *pool, fz_css_selector *selector, fz_css_property *declaration) |
159 | 2.96k | { |
160 | 2.96k | fz_css_rule *rule = fz_pool_alloc(ctx, pool, sizeof *rule); |
161 | 2.96k | rule->selector = selector; |
162 | 2.96k | rule->declaration = declaration; |
163 | 2.96k | rule->next = NULL; |
164 | 2.96k | return rule; |
165 | 2.96k | } |
166 | | |
167 | | static fz_css_selector *fz_new_css_selector(fz_context *ctx, fz_pool *pool, const char *name) |
168 | 3.28k | { |
169 | 3.28k | fz_css_selector *sel = fz_pool_alloc(ctx, pool, sizeof *sel); |
170 | 3.28k | sel->name = name ? fz_pool_strdup(ctx, pool, name) : NULL; |
171 | 3.28k | sel->combine = 0; |
172 | 3.28k | sel->cond = NULL; |
173 | 3.28k | sel->left = NULL; |
174 | 3.28k | sel->right = NULL; |
175 | 3.28k | sel->next = NULL; |
176 | 3.28k | return sel; |
177 | 3.28k | } |
178 | | |
179 | | static fz_css_condition *fz_new_css_condition(fz_context *ctx, fz_pool *pool, int type, const char *key, const char *val) |
180 | 0 | { |
181 | 0 | fz_css_condition *cond = fz_pool_alloc(ctx, pool, sizeof *cond); |
182 | 0 | cond->type = type; |
183 | 0 | cond->key = key ? fz_pool_strdup(ctx, pool, key) : NULL; |
184 | 0 | cond->val = val ? fz_pool_strdup(ctx, pool, val) : NULL; |
185 | 0 | cond->next = NULL; |
186 | 0 | return cond; |
187 | 0 | } |
188 | | |
189 | | static fz_css_property *fz_new_css_property(fz_context *ctx, fz_pool *pool, const char *name, fz_css_value *value, int spec) |
190 | 6.49k | { |
191 | 6.49k | struct css_property_info *info = css_property_lookup(name, strlen(name)); |
192 | 6.49k | if (info) |
193 | 6.37k | { |
194 | 6.37k | fz_css_property *prop = fz_pool_alloc(ctx, pool, sizeof *prop); |
195 | 6.37k | prop->name = info->key; |
196 | 6.37k | prop->value = value; |
197 | 6.37k | prop->spec = spec; |
198 | 6.37k | prop->important = 0; |
199 | 6.37k | prop->next = NULL; |
200 | 6.37k | return prop; |
201 | 6.37k | } |
202 | 123 | return NULL; |
203 | 6.49k | } |
204 | | |
205 | | static fz_css_value *fz_new_css_value_x(fz_context *ctx, fz_pool *pool, int type) |
206 | 0 | { |
207 | 0 | fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val); |
208 | 0 | val->type = type; |
209 | 0 | val->data = NULL; |
210 | 0 | val->args = NULL; |
211 | 0 | val->next = NULL; |
212 | 0 | return val; |
213 | 0 | } |
214 | | |
215 | | static fz_css_value *fz_new_css_value(fz_context *ctx, fz_pool *pool, int type, const char *data) |
216 | 7.90k | { |
217 | 7.90k | fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val); |
218 | 7.90k | val->type = type; |
219 | 7.90k | val->data = fz_pool_strdup(ctx, pool, data); |
220 | 7.90k | val->args = NULL; |
221 | 7.90k | val->next = NULL; |
222 | 7.90k | return val; |
223 | 7.90k | } |
224 | | |
225 | | static void css_lex_next(struct lexbuf *buf) |
226 | 127k | { |
227 | 127k | if (buf->c == 0) |
228 | 0 | return; |
229 | 127k | buf->s += fz_chartorune(&buf->c, (const char *)buf->s); |
230 | 127k | if (buf->c == '\n') |
231 | 0 | ++buf->line; |
232 | 127k | buf->lookahead = EOF; |
233 | 127k | } |
234 | | |
235 | | static void css_lex_init(fz_context *ctx, struct lexbuf *buf, fz_pool *pool, const char *s, const char *file) |
236 | 175 | { |
237 | 175 | buf->ctx = ctx; |
238 | 175 | buf->pool = pool; |
239 | 175 | buf->s = (const unsigned char *)s; |
240 | 175 | buf->lookahead = EOF; |
241 | 175 | buf->start = buf->s; |
242 | 175 | buf->c = -1; |
243 | 175 | buf->file = file; |
244 | 175 | buf->line = 1; |
245 | 175 | css_lex_next(buf); |
246 | | |
247 | 175 | buf->string_len = 0; |
248 | 175 | } |
249 | | |
250 | | static inline int iswhite(int c) |
251 | 39.0k | { |
252 | 39.0k | return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'; |
253 | 39.0k | } |
254 | | |
255 | | static int isnmstart(int c) |
256 | 32.9k | { |
257 | 32.9k | return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || |
258 | 32.9k | (c >= 128 && c <= UCS_MAX); |
259 | 32.9k | } |
260 | | |
261 | | static int isnmchar(int c) |
262 | 103k | { |
263 | 103k | return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || |
264 | 103k | (c >= '0' && c <= '9') || c == '-' || (c >= 128 && c <= UCS_MAX); |
265 | 103k | } |
266 | | |
267 | | static void css_push_char(struct lexbuf *buf, int c) |
268 | 126k | { |
269 | 126k | char out[4]; |
270 | 126k | int n = fz_runetochar(out, c); |
271 | 126k | if (buf->string_len + n >= (int)nelem(buf->string)) |
272 | 0 | fz_css_error(buf, "token too long"); |
273 | 126k | memcpy(buf->string + buf->string_len, out, n); |
274 | 126k | buf->string_len += n; |
275 | 126k | } |
276 | | |
277 | | static int css_lex_accept(struct lexbuf *buf, int t) |
278 | 271k | { |
279 | 271k | if (buf->c == t) |
280 | 1.45k | { |
281 | 1.45k | css_lex_next(buf); |
282 | 1.45k | return 1; |
283 | 1.45k | } |
284 | 270k | return 0; |
285 | 271k | } |
286 | | |
287 | | static void css_lex_expect(struct lexbuf *buf, int t) |
288 | 1 | { |
289 | 1 | if (!css_lex_accept(buf, t)) |
290 | 1 | fz_css_error(buf, "unexpected character"); |
291 | 1 | } |
292 | | |
293 | | static int css_lex_number(struct lexbuf *buf) |
294 | 3.42k | { |
295 | 7.22k | while (buf->c >= '0' && buf->c <= '9') |
296 | 3.80k | { |
297 | 3.80k | css_push_char(buf, buf->c); |
298 | 3.80k | css_lex_next(buf); |
299 | 3.80k | } |
300 | | |
301 | 3.42k | if (css_lex_accept(buf, '.')) |
302 | 858 | { |
303 | 858 | css_push_char(buf, '.'); |
304 | 2.29k | while (buf->c >= '0' && buf->c <= '9') |
305 | 1.44k | { |
306 | 1.44k | css_push_char(buf, buf->c); |
307 | 1.44k | css_lex_next(buf); |
308 | 1.44k | } |
309 | 858 | } |
310 | | |
311 | 3.42k | if (css_lex_accept(buf, '%')) |
312 | 0 | { |
313 | 0 | css_push_char(buf, '%'); |
314 | 0 | css_push_char(buf, 0); |
315 | 0 | return CSS_PERCENT; |
316 | 0 | } |
317 | | |
318 | 3.42k | if (isnmstart(buf->c)) |
319 | 2.03k | { |
320 | 2.03k | css_push_char(buf, buf->c); |
321 | 2.03k | css_lex_next(buf); |
322 | 4.06k | while (isnmchar(buf->c)) |
323 | 2.03k | { |
324 | 2.03k | css_push_char(buf, buf->c); |
325 | 2.03k | css_lex_next(buf); |
326 | 2.03k | } |
327 | 2.03k | css_push_char(buf, 0); |
328 | 2.03k | return CSS_LENGTH; |
329 | 2.03k | } |
330 | | |
331 | 1.38k | css_push_char(buf, 0); |
332 | 1.38k | return CSS_NUMBER; |
333 | 3.42k | } |
334 | | |
335 | | static int css_lex_keyword(struct lexbuf *buf) |
336 | 13.9k | { |
337 | 98.4k | while (isnmchar(buf->c)) |
338 | 84.4k | { |
339 | 84.4k | css_push_char(buf, buf->c); |
340 | 84.4k | css_lex_next(buf); |
341 | 84.4k | } |
342 | 13.9k | css_push_char(buf, 0); |
343 | 13.9k | return CSS_KEYWORD; |
344 | 13.9k | } |
345 | | |
346 | | static int css_lex_hash(struct lexbuf *buf) |
347 | 118 | { |
348 | 669 | while (isnmchar(buf->c)) |
349 | 551 | { |
350 | 551 | css_push_char(buf, buf->c); |
351 | 551 | css_lex_next(buf); |
352 | 551 | } |
353 | 118 | css_push_char(buf, 0); |
354 | 118 | return CSS_HASH; |
355 | 118 | } |
356 | | |
357 | | static int css_lex_string(struct lexbuf *buf, int q) |
358 | 1 | { |
359 | 26 | while (buf->c && buf->c != q) |
360 | 25 | { |
361 | 25 | if (css_lex_accept(buf, '\\')) |
362 | 0 | { |
363 | 0 | if (css_lex_accept(buf, 'n')) |
364 | 0 | css_push_char(buf, '\n'); |
365 | 0 | else if (css_lex_accept(buf, 'r')) |
366 | 0 | css_push_char(buf, '\r'); |
367 | 0 | else if (css_lex_accept(buf, 'f')) |
368 | 0 | css_push_char(buf, '\f'); |
369 | 0 | else if (css_lex_accept(buf, '\f')) |
370 | 0 | /* line continuation */ ; |
371 | 0 | else if (css_lex_accept(buf, '\n')) |
372 | 0 | /* line continuation */ ; |
373 | 0 | else if (css_lex_accept(buf, '\r')) |
374 | 0 | css_lex_accept(buf, '\n'); |
375 | 0 | else |
376 | 0 | { |
377 | 0 | css_push_char(buf, buf->c); |
378 | 0 | css_lex_next(buf); |
379 | 0 | } |
380 | 0 | } |
381 | 25 | else |
382 | 25 | { |
383 | 25 | css_push_char(buf, buf->c); |
384 | 25 | css_lex_next(buf); |
385 | 25 | } |
386 | 25 | } |
387 | 1 | css_lex_expect(buf, q); |
388 | 1 | css_push_char(buf, 0); |
389 | 1 | return CSS_STRING; |
390 | 1 | } |
391 | | |
392 | | static void css_lex_uri(struct lexbuf *buf) |
393 | 0 | { |
394 | 0 | while (buf->c && buf->c != ')' && !iswhite(buf->c)) |
395 | 0 | { |
396 | 0 | if (css_lex_accept(buf, '\\')) |
397 | 0 | { |
398 | 0 | if (css_lex_accept(buf, 'n')) |
399 | 0 | css_push_char(buf, '\n'); |
400 | 0 | else if (css_lex_accept(buf, 'r')) |
401 | 0 | css_push_char(buf, '\r'); |
402 | 0 | else if (css_lex_accept(buf, 'f')) |
403 | 0 | css_push_char(buf, '\f'); |
404 | 0 | else |
405 | 0 | { |
406 | 0 | css_push_char(buf, buf->c); |
407 | 0 | css_lex_next(buf); |
408 | 0 | } |
409 | 0 | } |
410 | 0 | else if (buf->c == '!' || buf->c == '#' || buf->c == '$' || buf->c == '%' || buf->c == '&' || |
411 | 0 | (buf->c >= '*' && buf->c <= '[') || |
412 | 0 | (buf->c >= ']' && buf->c <= '~') || |
413 | 0 | buf->c > 159) |
414 | 0 | { |
415 | 0 | css_push_char(buf, buf->c); |
416 | 0 | css_lex_next(buf); |
417 | 0 | } |
418 | 0 | else |
419 | 0 | fz_css_error(buf, "unexpected character in url"); |
420 | 0 | } |
421 | 0 | css_push_char(buf, 0); |
422 | 0 | } |
423 | | |
424 | | static int css_lex(struct lexbuf *buf) |
425 | 35.5k | { |
426 | 35.5k | int t; |
427 | | |
428 | | // TODO: keyword escape sequences |
429 | | |
430 | 35.5k | buf->string_len = 0; |
431 | | |
432 | 35.5k | restart: |
433 | 35.5k | if (buf->c == 0) |
434 | 170 | return EOF; |
435 | | |
436 | 35.3k | if (iswhite(buf->c)) |
437 | 1.84k | { |
438 | 3.68k | while (iswhite(buf->c)) |
439 | 1.84k | css_lex_next(buf); |
440 | 1.84k | return ' '; |
441 | 1.84k | } |
442 | | |
443 | 33.5k | if (css_lex_accept(buf, '/')) |
444 | 0 | { |
445 | 0 | if (css_lex_accept(buf, '*')) |
446 | 0 | { |
447 | 0 | while (buf->c) |
448 | 0 | { |
449 | 0 | if (css_lex_accept(buf, '*')) |
450 | 0 | { |
451 | 0 | while (buf->c == '*') |
452 | 0 | css_lex_next(buf); |
453 | 0 | if (css_lex_accept(buf, '/')) |
454 | 0 | goto restart; |
455 | 0 | } |
456 | 0 | css_lex_next(buf); |
457 | 0 | } |
458 | 0 | fz_css_error(buf, "unterminated comment"); |
459 | 0 | } |
460 | 0 | return '/'; |
461 | 0 | } |
462 | | |
463 | 33.5k | if (css_lex_accept(buf, '<')) |
464 | 0 | { |
465 | 0 | if (css_lex_accept(buf, '!')) |
466 | 0 | { |
467 | 0 | css_lex_expect(buf, '-'); |
468 | 0 | css_lex_expect(buf, '-'); |
469 | 0 | goto restart; /* ignore CDO */ |
470 | 0 | } |
471 | 0 | return '<'; |
472 | 0 | } |
473 | | |
474 | 33.5k | if (css_lex_accept(buf, '-')) |
475 | 2 | { |
476 | 2 | if (css_lex_accept(buf, '-')) |
477 | 0 | { |
478 | 0 | if (css_lex_accept(buf, '>')) |
479 | 0 | goto restart; /* ignore CDC */ |
480 | 0 | } |
481 | 2 | if (isnmstart(buf->c)) |
482 | 2 | { |
483 | 2 | css_push_char(buf, '-'); |
484 | 2 | return css_lex_keyword(buf); |
485 | 2 | } |
486 | 0 | return '-'; |
487 | 2 | } |
488 | | |
489 | 33.4k | if (css_lex_accept(buf, '.')) |
490 | 0 | { |
491 | 0 | if (buf->c >= '0' && buf->c <= '9') |
492 | 0 | { |
493 | 0 | css_push_char(buf, '.'); |
494 | 0 | return css_lex_number(buf); |
495 | 0 | } |
496 | 0 | return '.'; |
497 | 0 | } |
498 | | |
499 | 33.4k | if (css_lex_accept(buf, '#')) |
500 | 118 | { |
501 | 118 | if (isnmchar(buf->c)) |
502 | 118 | return css_lex_hash(buf); |
503 | 0 | return '#'; |
504 | 118 | } |
505 | | |
506 | 33.3k | if (css_lex_accept(buf, '"')) |
507 | 0 | return css_lex_string(buf, '"'); |
508 | 33.3k | if (css_lex_accept(buf, '\'')) |
509 | 1 | return css_lex_string(buf, '\''); |
510 | | |
511 | 33.3k | if (buf->c >= '0' && buf->c <= '9') |
512 | 3.42k | return css_lex_number(buf); |
513 | | |
514 | 29.9k | if (css_lex_accept(buf, 'u')) |
515 | 477 | { |
516 | 477 | if (css_lex_accept(buf, 'r')) |
517 | 0 | { |
518 | 0 | if (css_lex_accept(buf, 'l')) |
519 | 0 | { |
520 | 0 | if (css_lex_accept(buf, '(')) |
521 | 0 | { |
522 | 0 | while (iswhite(buf->c)) |
523 | 0 | css_lex_next(buf); |
524 | 0 | if (css_lex_accept(buf, '"')) |
525 | 0 | css_lex_string(buf, '"'); |
526 | 0 | else if (css_lex_accept(buf, '\'')) |
527 | 0 | css_lex_string(buf, '\''); |
528 | 0 | else |
529 | 0 | css_lex_uri(buf); |
530 | 0 | while (iswhite(buf->c)) |
531 | 0 | css_lex_next(buf); |
532 | 0 | css_lex_expect(buf, ')'); |
533 | 0 | return CSS_URI; |
534 | 0 | } |
535 | 0 | css_push_char(buf, 'u'); |
536 | 0 | css_push_char(buf, 'r'); |
537 | 0 | css_push_char(buf, 'l'); |
538 | 0 | return css_lex_keyword(buf); |
539 | 0 | } |
540 | 0 | css_push_char(buf, 'u'); |
541 | 0 | css_push_char(buf, 'r'); |
542 | 0 | return css_lex_keyword(buf); |
543 | 0 | } |
544 | 477 | css_push_char(buf, 'u'); |
545 | 477 | return css_lex_keyword(buf); |
546 | 477 | } |
547 | | |
548 | 29.4k | if (isnmstart(buf->c)) |
549 | 13.5k | { |
550 | 13.5k | css_push_char(buf, buf->c); |
551 | 13.5k | css_lex_next(buf); |
552 | 13.5k | return css_lex_keyword(buf); |
553 | 13.5k | } |
554 | | |
555 | 15.9k | t = buf->c; |
556 | 15.9k | css_lex_next(buf); |
557 | 15.9k | return t; |
558 | 29.4k | } |
559 | | |
560 | | static void next(struct lexbuf *buf) |
561 | 35.5k | { |
562 | 35.5k | buf->lookahead = css_lex(buf); |
563 | 35.5k | } |
564 | | |
565 | | static int accept(struct lexbuf *buf, int t) |
566 | 50.6k | { |
567 | 50.6k | if (buf->lookahead == t) |
568 | 16.1k | { |
569 | 16.1k | next(buf); |
570 | 16.1k | return 1; |
571 | 16.1k | } |
572 | 34.5k | return 0; |
573 | 50.6k | } |
574 | | |
575 | | static void expect(struct lexbuf *buf, int t) |
576 | 12.4k | { |
577 | 12.4k | if (accept(buf, t)) |
578 | 12.4k | return; |
579 | 7 | fz_css_error(buf, "unexpected token"); |
580 | 12.4k | } |
581 | | |
582 | | static void white(struct lexbuf *buf) |
583 | 33.8k | { |
584 | 35.4k | while (buf->lookahead == ' ') |
585 | 1.68k | next(buf); |
586 | 33.8k | } |
587 | | |
588 | | static int iscond(int t) |
589 | 3.03k | { |
590 | 3.03k | return t == ':' || t == '.' || t == '[' || t == CSS_HASH; |
591 | 3.03k | } |
592 | | |
593 | | static fz_css_value *parse_term(struct lexbuf *buf) |
594 | 7.90k | { |
595 | 7.90k | fz_css_value *v; |
596 | | |
597 | 7.90k | if (buf->lookahead == '+' || buf->lookahead == '-') |
598 | 0 | { |
599 | 0 | float sign = buf->lookahead == '-' ? -1 : 1; |
600 | 0 | next(buf); |
601 | 0 | if (buf->lookahead != CSS_NUMBER && buf->lookahead != CSS_LENGTH && buf->lookahead != CSS_PERCENT) |
602 | 0 | fz_css_error(buf, "expected number"); |
603 | 0 | if (sign < 0) |
604 | 0 | { |
605 | 0 | v = fz_new_css_value_x(buf->ctx, buf->pool, buf->lookahead); |
606 | 0 | v->data = fz_pool_alloc(buf->ctx, buf->pool, strlen(buf->string) + 2); |
607 | 0 | v->data[0] = '-'; |
608 | 0 | strcpy(v->data + 1, buf->string); |
609 | 0 | } |
610 | 0 | else |
611 | 0 | { |
612 | 0 | v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string); |
613 | 0 | } |
614 | 0 | next(buf); |
615 | 0 | white(buf); |
616 | 0 | return v; |
617 | 0 | } |
618 | | |
619 | 7.90k | if (buf->lookahead == CSS_KEYWORD) |
620 | 4.36k | { |
621 | 4.36k | v = fz_new_css_value(buf->ctx, buf->pool, CSS_KEYWORD, buf->string); |
622 | 4.36k | next(buf); |
623 | 4.36k | if (accept(buf, '(')) |
624 | 0 | { |
625 | 0 | white(buf); |
626 | 0 | v->type = '('; |
627 | 0 | v->args = parse_expr(buf); |
628 | 0 | expect(buf, ')'); |
629 | 0 | } |
630 | 4.36k | white(buf); |
631 | 4.36k | return v; |
632 | 4.36k | } |
633 | | |
634 | 3.53k | switch (buf->lookahead) |
635 | 3.53k | { |
636 | 116 | case CSS_HASH: |
637 | 116 | case CSS_STRING: |
638 | 116 | case CSS_URI: |
639 | 1.50k | case CSS_NUMBER: |
640 | 3.53k | case CSS_LENGTH: |
641 | 3.53k | case CSS_PERCENT: |
642 | 3.53k | v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string); |
643 | 3.53k | next(buf); |
644 | 3.53k | white(buf); |
645 | 3.53k | return v; |
646 | 3.53k | } |
647 | | |
648 | 0 | fz_css_error(buf, "expected value"); |
649 | 3.53k | } |
650 | | |
651 | | static fz_css_value *parse_expr(struct lexbuf *buf) |
652 | 6.48k | { |
653 | 6.48k | fz_css_value *head, *tail; |
654 | | |
655 | 6.48k | head = tail = parse_term(buf); |
656 | | |
657 | 7.90k | while (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != '!' && |
658 | 7.90k | buf->lookahead != ')' && buf->lookahead != EOF) |
659 | 1.41k | { |
660 | 1.41k | if (accept(buf, ',')) |
661 | 0 | { |
662 | 0 | white(buf); |
663 | 0 | if (buf->lookahead != ';') |
664 | 0 | { |
665 | 0 | tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, ',', ","); |
666 | 0 | tail = tail->next = parse_term(buf); |
667 | 0 | } |
668 | 0 | } |
669 | 1.41k | else if (accept(buf, '/')) |
670 | 0 | { |
671 | 0 | white(buf); |
672 | 0 | tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, '/', "/"); |
673 | 0 | tail = tail->next = parse_term(buf); |
674 | 0 | } |
675 | 1.41k | else |
676 | 1.41k | { |
677 | 1.41k | tail = tail->next = parse_term(buf); |
678 | 1.41k | } |
679 | 1.41k | } |
680 | | |
681 | 6.48k | return head; |
682 | 6.48k | } |
683 | | |
684 | | static fz_css_property *parse_declaration(struct lexbuf *buf) |
685 | 6.49k | { |
686 | 6.49k | fz_css_property *p; |
687 | | |
688 | 6.49k | if (buf->lookahead != CSS_KEYWORD) |
689 | 0 | fz_css_error(buf, "expected keyword in property"); |
690 | 6.49k | p = fz_new_css_property(buf->ctx, buf->pool, buf->string, NULL, 0); |
691 | 6.49k | next(buf); |
692 | | |
693 | 6.49k | white(buf); |
694 | 6.49k | expect(buf, ':'); |
695 | 6.49k | white(buf); |
696 | | |
697 | 6.49k | if (p) |
698 | 6.37k | p->value = parse_expr(buf); |
699 | 123 | else |
700 | 123 | (void) parse_expr(buf); |
701 | | |
702 | | /* !important */ |
703 | 6.49k | if (accept(buf, '!')) |
704 | 0 | { |
705 | 0 | white(buf); |
706 | 0 | if (buf->lookahead != CSS_KEYWORD || strcmp(buf->string, "important")) |
707 | 0 | fz_css_error(buf, "expected keyword 'important' after '!'"); |
708 | 0 | if (p) |
709 | 0 | p->important = 1; |
710 | 0 | next(buf); |
711 | 0 | white(buf); |
712 | 0 | } |
713 | | |
714 | 6.49k | return p; |
715 | 6.49k | } |
716 | | |
717 | | static fz_css_property *parse_declaration_list(struct lexbuf *buf) |
718 | 3.04k | { |
719 | 3.04k | fz_css_property *head, *tail = NULL, *p; |
720 | | |
721 | 3.04k | white(buf); |
722 | | |
723 | 3.04k | if (buf->lookahead == '}' || buf->lookahead == EOF) |
724 | 0 | return NULL; |
725 | | |
726 | 3.04k | p = parse_declaration(buf); |
727 | 3.04k | if (p) |
728 | 3.04k | tail = p; |
729 | 3.04k | head = tail; |
730 | | |
731 | 6.49k | while (accept(buf, ';')) |
732 | 3.44k | { |
733 | 3.44k | white(buf); |
734 | | |
735 | 3.44k | if (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != EOF) |
736 | 3.44k | { |
737 | 3.44k | p = parse_declaration(buf); |
738 | 3.44k | if (p) |
739 | 3.33k | { |
740 | 3.33k | if (!head) |
741 | 2 | head = tail = p; |
742 | 3.33k | else |
743 | 3.33k | tail = tail->next = p; |
744 | 3.33k | } |
745 | 3.44k | } |
746 | 3.44k | } |
747 | | |
748 | 3.04k | return head; |
749 | 3.04k | } |
750 | | |
751 | | static char *parse_attrib_value(struct lexbuf *buf) |
752 | 0 | { |
753 | 0 | char *s; |
754 | |
|
755 | 0 | if (buf->lookahead == CSS_KEYWORD || buf->lookahead == CSS_STRING) |
756 | 0 | { |
757 | 0 | s = fz_pool_strdup(buf->ctx, buf->pool, buf->string); |
758 | 0 | next(buf); |
759 | 0 | white(buf); |
760 | 0 | return s; |
761 | 0 | } |
762 | | |
763 | 0 | fz_css_error(buf, "expected attribute value"); |
764 | 0 | } |
765 | | |
766 | | static fz_css_condition *parse_condition(struct lexbuf *buf) |
767 | 0 | { |
768 | 0 | fz_css_condition *c; |
769 | |
|
770 | 0 | if (accept(buf, ':')) |
771 | 0 | { |
772 | 0 | (void)accept(buf, ':'); /* swallow css3 :: syntax and pretend it's a normal pseudo-class */ |
773 | 0 | if (buf->lookahead != CSS_KEYWORD) |
774 | 0 | fz_css_error(buf, "expected keyword after ':'"); |
775 | 0 | c = fz_new_css_condition(buf->ctx, buf->pool, ':', "pseudo", buf->string); |
776 | 0 | next(buf); |
777 | 0 | if (accept(buf, '(')) |
778 | 0 | { |
779 | 0 | white(buf); |
780 | 0 | if (accept(buf, CSS_KEYWORD)) |
781 | 0 | white(buf); |
782 | 0 | expect(buf, ')'); |
783 | 0 | } |
784 | 0 | return c; |
785 | 0 | } |
786 | | |
787 | 0 | if (accept(buf, '.')) |
788 | 0 | { |
789 | 0 | if (buf->lookahead != CSS_KEYWORD) |
790 | 0 | fz_css_error(buf, "expected keyword after '.'"); |
791 | 0 | c = fz_new_css_condition(buf->ctx, buf->pool, '.', "class", buf->string); |
792 | 0 | next(buf); |
793 | 0 | return c; |
794 | 0 | } |
795 | | |
796 | 0 | if (accept(buf, '[')) |
797 | 0 | { |
798 | 0 | white(buf); |
799 | |
|
800 | 0 | if (buf->lookahead != CSS_KEYWORD) |
801 | 0 | fz_css_error(buf, "expected keyword after '['"); |
802 | 0 | c = fz_new_css_condition(buf->ctx, buf->pool, '[', buf->string, NULL); |
803 | 0 | next(buf); |
804 | |
|
805 | 0 | white(buf); |
806 | |
|
807 | 0 | if (accept(buf, '=')) |
808 | 0 | { |
809 | 0 | c->type = '='; |
810 | 0 | c->val = parse_attrib_value(buf); |
811 | 0 | } |
812 | 0 | else if (accept(buf, '|')) |
813 | 0 | { |
814 | 0 | expect(buf, '='); |
815 | 0 | c->type = '|'; |
816 | 0 | c->val = parse_attrib_value(buf); |
817 | 0 | } |
818 | 0 | else if (accept(buf, '~')) |
819 | 0 | { |
820 | 0 | expect(buf, '='); |
821 | 0 | c->type = '~'; |
822 | 0 | c->val = parse_attrib_value(buf); |
823 | 0 | } |
824 | |
|
825 | 0 | expect(buf, ']'); |
826 | |
|
827 | 0 | return c; |
828 | 0 | } |
829 | | |
830 | 0 | if (buf->lookahead == CSS_HASH) |
831 | 0 | { |
832 | 0 | c = fz_new_css_condition(buf->ctx, buf->pool, '#', "id", buf->string); |
833 | 0 | next(buf); |
834 | 0 | return c; |
835 | 0 | } |
836 | | |
837 | 0 | fz_css_error(buf, "expected condition"); |
838 | 0 | } |
839 | | |
840 | | static fz_css_condition *parse_condition_list(struct lexbuf *buf) |
841 | 0 | { |
842 | 0 | fz_css_condition *head, *tail; |
843 | |
|
844 | 0 | head = tail = parse_condition(buf); |
845 | 0 | while (iscond(buf->lookahead)) |
846 | 0 | { |
847 | 0 | tail = tail->next = parse_condition(buf); |
848 | 0 | } |
849 | 0 | return head; |
850 | 0 | } |
851 | | |
852 | | static fz_css_selector *parse_simple_selector(struct lexbuf *buf) |
853 | 3.03k | { |
854 | 3.03k | fz_css_selector *s; |
855 | | |
856 | 3.03k | if (accept(buf, '*')) |
857 | 0 | { |
858 | 0 | s = fz_new_css_selector(buf->ctx, buf->pool, NULL); |
859 | 0 | if (iscond(buf->lookahead)) |
860 | 0 | s->cond = parse_condition_list(buf); |
861 | 0 | return s; |
862 | 0 | } |
863 | 3.03k | else if (buf->lookahead == CSS_KEYWORD) |
864 | 3.03k | { |
865 | 3.03k | s = fz_new_css_selector(buf->ctx, buf->pool, buf->string); |
866 | 3.03k | next(buf); |
867 | 3.03k | if (iscond(buf->lookahead)) |
868 | 0 | s->cond = parse_condition_list(buf); |
869 | 3.03k | return s; |
870 | 3.03k | } |
871 | 1 | else if (iscond(buf->lookahead)) |
872 | 0 | { |
873 | 0 | s = fz_new_css_selector(buf->ctx, buf->pool, NULL); |
874 | 0 | s->cond = parse_condition_list(buf); |
875 | 0 | return s; |
876 | 0 | } |
877 | | |
878 | 1 | fz_css_error(buf, "expected selector"); |
879 | 3.03k | } |
880 | | |
881 | | static fz_css_selector *parse_combinator(struct lexbuf *buf, int c, fz_css_selector *a) |
882 | 159 | { |
883 | 159 | fz_css_selector *sel, *b; |
884 | 159 | white(buf); |
885 | 159 | b = parse_simple_selector(buf); |
886 | 159 | sel = fz_new_css_selector(buf->ctx, buf->pool, NULL); |
887 | 159 | sel->combine = c; |
888 | 159 | sel->left = a; |
889 | 159 | sel->right = b; |
890 | 159 | return sel; |
891 | 159 | } |
892 | | |
893 | | static fz_css_selector *parse_selector(struct lexbuf *buf) |
894 | 2.87k | { |
895 | 2.87k | fz_css_selector *sel = parse_simple_selector(buf); |
896 | 2.87k | for (;;) |
897 | 3.03k | { |
898 | 3.03k | if (accept(buf, ' ')) |
899 | 159 | { |
900 | 159 | white(buf); |
901 | 159 | if (accept(buf, '+')) |
902 | 0 | sel = parse_combinator(buf, '+', sel); |
903 | 159 | else if (accept(buf, '>')) |
904 | 0 | sel = parse_combinator(buf, '>', sel); |
905 | 159 | else if (buf->lookahead != ',' && buf->lookahead != '{' && buf->lookahead != EOF) |
906 | 159 | sel = parse_combinator(buf, ' ', sel); |
907 | 0 | else |
908 | 0 | break; |
909 | 159 | } |
910 | 2.87k | else if (accept(buf, '+')) |
911 | 0 | sel = parse_combinator(buf, '+', sel); |
912 | 2.87k | else if (accept(buf, '>')) |
913 | 0 | sel = parse_combinator(buf, '>', sel); |
914 | 2.87k | else |
915 | 2.87k | break; |
916 | 3.03k | } |
917 | 2.87k | return sel; |
918 | 2.87k | } |
919 | | |
920 | | static fz_css_selector *parse_selector_list(struct lexbuf *buf) |
921 | 2.87k | { |
922 | 2.87k | fz_css_selector *head, *tail; |
923 | | |
924 | 2.87k | head = tail = parse_selector(buf); |
925 | 2.87k | while (accept(buf, ',')) |
926 | 0 | { |
927 | 0 | white(buf); |
928 | 0 | tail = tail->next = parse_selector(buf); |
929 | 0 | } |
930 | 2.87k | return head; |
931 | 2.87k | } |
932 | | |
933 | | static fz_css_rule *parse_ruleset(struct lexbuf *buf) |
934 | 2.87k | { |
935 | 2.87k | fz_css_selector *s = NULL; |
936 | 2.87k | fz_css_property *p = NULL; |
937 | | |
938 | 5.74k | fz_try(buf->ctx) |
939 | 5.74k | { |
940 | 2.87k | s = parse_selector_list(buf); |
941 | 2.87k | expect(buf, '{'); |
942 | 2.87k | p = parse_declaration_list(buf); |
943 | 2.87k | expect(buf, '}'); |
944 | 2.87k | white(buf); |
945 | 2.87k | } |
946 | 5.74k | fz_catch(buf->ctx) |
947 | 4 | { |
948 | 4 | fz_rethrow_unless(buf->ctx, FZ_ERROR_SYNTAX); |
949 | 4 | fz_report_error(buf->ctx); |
950 | | |
951 | 27 | while (buf->lookahead != EOF) |
952 | 26 | { |
953 | 26 | if (accept(buf, '}')) |
954 | 3 | { |
955 | 3 | white(buf); |
956 | 3 | break; |
957 | 3 | } |
958 | 23 | next(buf); |
959 | 23 | } |
960 | 4 | return NULL; |
961 | 4 | } |
962 | | |
963 | 2.86k | return fz_new_css_rule(buf->ctx, buf->pool, s, p); |
964 | 2.87k | } |
965 | | |
966 | | static fz_css_rule *parse_at_page(struct lexbuf *buf) |
967 | 92 | { |
968 | 92 | fz_css_selector *s = NULL; |
969 | 92 | fz_css_property *p = NULL; |
970 | | |
971 | 92 | white(buf); |
972 | 92 | if (accept(buf, ':')) |
973 | 0 | { |
974 | 0 | expect(buf, CSS_KEYWORD); |
975 | 0 | white(buf); |
976 | 0 | } |
977 | 92 | expect(buf, '{'); |
978 | 92 | p = parse_declaration_list(buf); |
979 | 92 | expect(buf, '}'); |
980 | 92 | white(buf); |
981 | | |
982 | 92 | s = fz_new_css_selector(buf->ctx, buf->pool, "@page"); |
983 | 92 | return fz_new_css_rule(buf->ctx, buf->pool, s, p); |
984 | 92 | } |
985 | | |
986 | | static fz_css_rule *parse_at_font_face(struct lexbuf *buf) |
987 | 0 | { |
988 | 0 | fz_css_selector *s = NULL; |
989 | 0 | fz_css_property *p = NULL; |
990 | |
|
991 | 0 | white(buf); |
992 | 0 | expect(buf, '{'); |
993 | 0 | p = parse_declaration_list(buf); |
994 | 0 | expect(buf, '}'); |
995 | 0 | white(buf); |
996 | |
|
997 | 0 | s = fz_new_css_selector(buf->ctx, buf->pool, "@font-face"); |
998 | 0 | return fz_new_css_rule(buf->ctx, buf->pool, s, p); |
999 | 0 | } |
1000 | | |
1001 | | static void parse_at_rule(struct lexbuf *buf) |
1002 | 0 | { |
1003 | 0 | expect(buf, CSS_KEYWORD); |
1004 | | |
1005 | | /* skip until '{' or ';' */ |
1006 | 0 | while (buf->lookahead != EOF) |
1007 | 0 | { |
1008 | 0 | if (accept(buf, ';')) |
1009 | 0 | { |
1010 | 0 | white(buf); |
1011 | 0 | return; |
1012 | 0 | } |
1013 | 0 | if (accept(buf, '{')) |
1014 | 0 | { |
1015 | 0 | int depth = 1; |
1016 | 0 | while (buf->lookahead != EOF && depth > 0) |
1017 | 0 | { |
1018 | 0 | if (accept(buf, '{')) |
1019 | 0 | ++depth; |
1020 | 0 | else if (accept(buf, '}')) |
1021 | 0 | --depth; |
1022 | 0 | else |
1023 | 0 | next(buf); |
1024 | 0 | } |
1025 | 0 | white(buf); |
1026 | 0 | return; |
1027 | 0 | } |
1028 | 0 | next(buf); |
1029 | 0 | } |
1030 | 0 | } |
1031 | | |
1032 | | static fz_css_rule *parse_stylesheet(struct lexbuf *buf, fz_css_rule *chain) |
1033 | 92 | { |
1034 | 92 | fz_css_rule *rule, **nextp, *tail; |
1035 | | |
1036 | 92 | tail = chain; |
1037 | 92 | if (tail) |
1038 | 39 | { |
1039 | 2.06k | while (tail->next) |
1040 | 2.02k | tail = tail->next; |
1041 | 39 | nextp = &tail->next; |
1042 | 39 | } |
1043 | 53 | else |
1044 | 53 | { |
1045 | 53 | nextp = &tail; |
1046 | 53 | } |
1047 | | |
1048 | 92 | white(buf); |
1049 | | |
1050 | 3.05k | while (buf->lookahead != EOF) |
1051 | 2.96k | { |
1052 | 2.96k | if (accept(buf, '@')) |
1053 | 92 | { |
1054 | 92 | if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "page")) |
1055 | 92 | { |
1056 | 92 | next(buf); |
1057 | 92 | rule = *nextp = parse_at_page(buf); |
1058 | 92 | nextp = &rule->next; |
1059 | 92 | } |
1060 | 0 | else if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "font-face")) |
1061 | 0 | { |
1062 | 0 | next(buf); |
1063 | 0 | rule = *nextp = parse_at_font_face(buf); |
1064 | 0 | nextp = &rule->next; |
1065 | 0 | } |
1066 | 0 | else |
1067 | 0 | { |
1068 | 0 | parse_at_rule(buf); |
1069 | 0 | } |
1070 | 92 | } |
1071 | 2.87k | else |
1072 | 2.87k | { |
1073 | 2.87k | fz_css_rule *x = parse_ruleset(buf); |
1074 | 2.87k | if (x) |
1075 | 2.86k | { |
1076 | 2.86k | rule = *nextp = x; |
1077 | 2.86k | nextp = &rule->next; |
1078 | 2.86k | } |
1079 | 2.87k | } |
1080 | 2.96k | white(buf); |
1081 | 2.96k | } |
1082 | | |
1083 | 92 | return chain ? chain : tail; |
1084 | 92 | } |
1085 | | |
1086 | | const char *fz_css_property_name(int key) |
1087 | 0 | { |
1088 | 0 | const char *name = "unknown"; |
1089 | 0 | size_t i; |
1090 | 0 | for (i = 0; i < nelem(css_property_list); ++i) |
1091 | 0 | if (*css_property_list[i].name && css_property_list[i].key == key) |
1092 | 0 | name = css_property_list[i].name; |
1093 | 0 | return name; |
1094 | 0 | } |
1095 | | |
1096 | | fz_css_property *fz_parse_css_properties(fz_context *ctx, fz_pool *pool, const char *source) |
1097 | 83 | { |
1098 | 83 | struct lexbuf buf; |
1099 | 83 | css_lex_init(ctx, &buf, pool, source, "<inline>"); |
1100 | 83 | next(&buf); |
1101 | 83 | return parse_declaration_list(&buf); |
1102 | 83 | } |
1103 | | |
1104 | | void fz_parse_css(fz_context *ctx, fz_css *css, const char *source, const char *file) |
1105 | 92 | { |
1106 | 92 | struct lexbuf buf; |
1107 | 92 | css_lex_init(ctx, &buf, css->pool, source, file); |
1108 | 92 | next(&buf); |
1109 | 92 | css->rule = parse_stylesheet(&buf, css->rule); |
1110 | 92 | } |