Coverage Report

Created: 2025-07-23 06:37

/src/mupdf/source/html/css-parse.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2025 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "html-imp.h"
25
26
#include <string.h>
27
28
#include "css-properties.h"
29
30
struct lexbuf
31
{
32
  fz_context *ctx;
33
  fz_pool *pool;
34
  const unsigned char *start;
35
  const unsigned char *s;
36
  const char *file;
37
  int line;
38
  int nest;
39
  int lookahead;
40
  int c;
41
  int string_len;
42
  char string[1024];
43
};
44
45
static fz_css_value *parse_expr(struct lexbuf *buf);
46
static fz_css_selector *parse_selector(struct lexbuf *buf);
47
48
FZ_NORETURN static void fz_css_error(struct lexbuf *buf, const char *msg)
49
192
{
50
647
#define PRE_POST_SIZE 30
51
192
  unsigned char text[PRE_POST_SIZE * 2 + 4];
52
192
  unsigned char *d = text;
53
192
  const unsigned char *s = buf->start;
54
192
  int n;
55
56
  /* We want to make a helpful fragment for the error message.
57
   * We want err_pos to be the point at which we just tripped
58
   * the error. err_pos needs to be at least 1 byte behind
59
   * our read pointer, as we've read that char. */
60
192
  const unsigned char *err_pos = buf->s;
61
192
  n = 1;
62
63
  /* And if we're using lookahead, it's further behind. */
64
192
  if (buf->lookahead >= CSS_KEYWORD)
65
19
    n += buf->string_len;
66
173
  else if (buf->lookahead != EOF)
67
158
    n += 1;
68
69
  /* But it can't be before the start of the buffer */
70
192
  n = fz_mini(n, err_pos - buf->start);
71
192
  err_pos -= n;
72
73
  /* We're going to try to output:
74
   * <section prior to the error> ">" <the char that tripped> "<" <section after the error>
75
   */
76
  /* Is the section prior to the error too long? If so, truncate it with an ellipsis. */
77
192
  n = sizeof(text)-1;
78
192
  if (err_pos - s > n-PRE_POST_SIZE - 3)
79
168
  {
80
168
    *d++ = '.';
81
168
    *d++ = '.';
82
168
    *d++ = '.';
83
168
    n -= 3;
84
168
    s = err_pos - (n-PRE_POST_SIZE - 3);
85
168
  }
86
87
  /* Copy the prefix (if there is one) */
88
192
  if (err_pos > s)
89
189
  {
90
189
    n = err_pos - s;
91
5.11k
    while (n)
92
4.92k
    {
93
4.92k
      unsigned char c = *s++;
94
4.92k
      *d++ = (c < 32 || c > 127) ? ' ' : c;
95
4.92k
      n--;
96
4.92k
    }
97
189
  }
98
99
  /* Marker, char, end marker */
100
192
  *d++ = '>', n--;
101
192
  if (*err_pos)
102
188
    *d++ = *err_pos++, n--;
103
192
  *d++ = '<', n--;
104
105
  /* Postfix */
106
192
  n = (int)strlen((const char *)err_pos);
107
192
  if (n <= PRE_POST_SIZE)
108
97
  {
109
1.49k
    while (n > 0)
110
1.39k
    {
111
1.39k
      unsigned char c = *err_pos++;
112
1.39k
      *d++ =  (c < 32 || c > 127) ? ' ' : c;
113
1.39k
      n--;
114
1.39k
    }
115
97
  }
116
95
  else
117
95
  {
118
2.66k
    for (n = PRE_POST_SIZE-3; n > 0; n--)
119
2.56k
    {
120
2.56k
      unsigned char c = *err_pos++;
121
2.56k
      *d++ =  (c < 32 || c > 127) ? ' ' : c;
122
2.56k
    }
123
124
95
    *d++ = '.';
125
95
    *d++ = '.';
126
95
    *d++ = '.';
127
95
  }
128
192
  *d = 0;
129
130
192
  fz_throw(buf->ctx, FZ_ERROR_SYNTAX, "css syntax error: %s (%s:%d) (%s)", msg, buf->file, buf->line, text);
131
192
}
132
133
fz_css *fz_new_css(fz_context *ctx)
134
298
{
135
298
  fz_pool *pool = fz_new_pool(ctx);
136
298
  fz_css *css = NULL;
137
138
596
  fz_try(ctx)
139
596
  {
140
298
    css = fz_pool_alloc(ctx, pool, sizeof *css);
141
298
    css->pool = pool;
142
298
    css->rule = NULL;
143
298
  }
144
596
  fz_catch(ctx)
145
0
  {
146
0
    fz_drop_pool(ctx, pool);
147
0
    fz_rethrow(ctx);
148
0
  }
149
150
298
  return css;
151
298
}
152
153
void fz_drop_css(fz_context *ctx, fz_css *css)
154
298
{
155
298
  if (css)
156
298
    fz_drop_pool(ctx, css->pool);
157
298
}
158
159
static fz_css_rule *fz_new_css_rule(fz_context *ctx, fz_pool *pool, fz_css_selector *selector, fz_css_property *declaration)
160
16.8k
{
161
16.8k
  fz_css_rule *rule = fz_pool_alloc(ctx, pool, sizeof *rule);
162
16.8k
  rule->selector = selector;
163
16.8k
  rule->declaration = declaration;
164
16.8k
  rule->next = NULL;
165
16.8k
  return rule;
166
16.8k
}
167
168
static fz_css_selector *fz_new_css_selector(fz_context *ctx, fz_pool *pool, const char *name)
169
21.9k
{
170
21.9k
  fz_css_selector *sel = fz_pool_alloc(ctx, pool, sizeof *sel);
171
21.9k
  sel->name = name ? fz_pool_strdup(ctx, pool, name) : NULL;
172
21.9k
  sel->combine = 0;
173
21.9k
  sel->cond = NULL;
174
21.9k
  sel->left = NULL;
175
21.9k
  sel->right = NULL;
176
21.9k
  sel->next = NULL;
177
21.9k
  return sel;
178
21.9k
}
179
180
static fz_css_condition *fz_new_css_condition(fz_context *ctx, fz_pool *pool, int type, const char *key, const char *val)
181
0
{
182
0
  fz_css_condition *cond = fz_pool_alloc(ctx, pool, sizeof *cond);
183
0
  cond->type = type;
184
0
  cond->key = key ? fz_pool_strdup(ctx, pool, key) : NULL;
185
0
  cond->val = val ? fz_pool_strdup(ctx, pool, val) : NULL;
186
0
  cond->next = NULL;
187
0
  return cond;
188
0
}
189
190
static fz_css_property *fz_new_css_property(fz_context *ctx, fz_pool *pool, const char *name, fz_css_value *value, int spec)
191
37.0k
{
192
37.0k
  struct css_property_info *info = css_property_lookup(name, strlen(name));
193
37.0k
  if (info)
194
36.4k
  {
195
36.4k
    fz_css_property *prop = fz_pool_alloc(ctx, pool, sizeof *prop);
196
36.4k
    prop->name = info->key;
197
36.4k
    prop->value = value;
198
36.4k
    prop->spec = spec;
199
36.4k
    prop->important = 0;
200
36.4k
    prop->next = NULL;
201
36.4k
    return prop;
202
36.4k
  }
203
603
  return NULL;
204
37.0k
}
205
206
static fz_css_value *fz_new_css_value_x(fz_context *ctx, fz_pool *pool, int type)
207
0
{
208
0
  fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val);
209
0
  val->type = type;
210
0
  val->data = NULL;
211
0
  val->args = NULL;
212
0
  val->next = NULL;
213
0
  return val;
214
0
}
215
216
static fz_css_value *fz_new_css_value(fz_context *ctx, fz_pool *pool, int type, const char *data)
217
45.6k
{
218
45.6k
  fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val);
219
45.6k
  val->type = type;
220
45.6k
  val->data = fz_pool_strdup(ctx, pool, data);
221
45.6k
  val->args = NULL;
222
45.6k
  val->next = NULL;
223
45.6k
  return val;
224
45.6k
}
225
226
static void css_lex_next(struct lexbuf *buf)
227
756k
{
228
756k
  if (buf->c == 0)
229
0
    return;
230
756k
  buf->s += fz_chartorune(&buf->c, (const char *)buf->s);
231
756k
  if (buf->c == '{')
232
16.9k
    ++buf->nest;
233
739k
  else if (buf->c == '}')
234
16.9k
    --buf->nest;
235
722k
  else if (buf->c == '\n')
236
105
    ++buf->line;
237
756k
  buf->lookahead = EOF;
238
756k
}
239
240
static void css_lex_init(fz_context *ctx, struct lexbuf *buf, fz_pool *pool, const char *s, const char *file)
241
1.00k
{
242
1.00k
  buf->ctx = ctx;
243
1.00k
  buf->pool = pool;
244
1.00k
  buf->s = (const unsigned char *)s;
245
1.00k
  buf->lookahead = EOF;
246
1.00k
  buf->start = buf->s;
247
1.00k
  buf->c = -1;
248
1.00k
  buf->file = file;
249
1.00k
  buf->line = 1;
250
1.00k
  buf->nest = 0;
251
1.00k
  css_lex_next(buf);
252
253
1.00k
  buf->string_len = 0;
254
1.00k
}
255
256
static inline int iswhite(int c)
257
232k
{
258
232k
  return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f';
259
232k
}
260
261
static int isnmstart(int c)
262
195k
{
263
195k
  return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
264
195k
    (c >= 128 && c <= UCS_MAX);
265
195k
}
266
267
static int isnmchar(int c)
268
613k
{
269
613k
  return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
270
613k
    (c >= '0' && c <= '9') || c == '-' || (c >= 128 && c <= UCS_MAX);
271
613k
}
272
273
static void css_push_char(struct lexbuf *buf, int c)
274
648k
{
275
648k
  char out[4];
276
648k
  int n = fz_runetochar(out, c);
277
648k
  if (buf->string_len + n >= (int)nelem(buf->string))
278
0
    fz_css_error(buf, "token too long");
279
648k
  memcpy(buf->string + buf->string_len, out, n);
280
648k
  buf->string_len += n;
281
648k
}
282
283
static void css_push_zero(struct lexbuf *buf)
284
103k
{
285
103k
  if (buf->string_len + 1 >= (int)nelem(buf->string))
286
0
    fz_css_error(buf, "token too long");
287
103k
  buf->string[buf->string_len] = 0;
288
103k
  buf->string_len += 1;
289
103k
}
290
291
static int css_lex_accept(struct lexbuf *buf, int t)
292
1.61M
{
293
1.61M
  if (buf->c == t)
294
8.60k
  {
295
8.60k
    css_lex_next(buf);
296
8.60k
    return 1;
297
8.60k
  }
298
1.60M
  return 0;
299
1.61M
}
300
301
static void css_lex_expect(struct lexbuf *buf, int t)
302
48
{
303
48
  if (!css_lex_accept(buf, t))
304
6
    fz_css_error(buf, "unexpected character");
305
48
}
306
307
static int css_lex_number(struct lexbuf *buf)
308
19.4k
{
309
41.2k
  while (buf->c >= '0' && buf->c <= '9')
310
21.7k
  {
311
21.7k
    css_push_char(buf, buf->c);
312
21.7k
    css_lex_next(buf);
313
21.7k
  }
314
315
19.4k
  if (css_lex_accept(buf, '.'))
316
4.96k
  {
317
4.96k
    css_push_char(buf, '.');
318
13.1k
    while (buf->c >= '0' && buf->c <= '9')
319
8.16k
    {
320
8.16k
      css_push_char(buf, buf->c);
321
8.16k
      css_lex_next(buf);
322
8.16k
    }
323
4.96k
  }
324
325
19.4k
  if (css_lex_accept(buf, '%'))
326
0
  {
327
0
    css_push_char(buf, '%');
328
0
    css_push_zero(buf);
329
0
    return CSS_PERCENT;
330
0
  }
331
332
19.4k
  if (isnmstart(buf->c))
333
11.1k
  {
334
11.1k
    css_push_char(buf, buf->c);
335
11.1k
    css_lex_next(buf);
336
22.3k
    while (isnmchar(buf->c))
337
11.2k
    {
338
11.2k
      css_push_char(buf, buf->c);
339
11.2k
      css_lex_next(buf);
340
11.2k
    }
341
11.1k
    css_push_zero(buf);
342
11.1k
    return CSS_LENGTH;
343
11.1k
  }
344
345
8.38k
  css_push_zero(buf);
346
8.38k
  return CSS_NUMBER;
347
19.4k
}
348
349
static int css_lex_keyword(struct lexbuf *buf)
350
83.6k
{
351
585k
  while (isnmchar(buf->c))
352
501k
  {
353
501k
    css_push_char(buf, buf->c);
354
501k
    css_lex_next(buf);
355
501k
  }
356
83.6k
  css_push_zero(buf);
357
83.6k
  return CSS_KEYWORD;
358
83.6k
}
359
360
static int css_lex_hash(struct lexbuf *buf)
361
762
{
362
4.49k
  while (isnmchar(buf->c))
363
3.73k
  {
364
3.73k
    css_push_char(buf, buf->c);
365
3.73k
    css_lex_next(buf);
366
3.73k
  }
367
762
  css_push_zero(buf);
368
762
  return CSS_HASH;
369
762
}
370
371
static int css_lex_string(struct lexbuf *buf, int q)
372
48
{
373
1.70k
  while (buf->c && buf->c != q)
374
1.66k
  {
375
1.66k
    if (css_lex_accept(buf, '\\'))
376
0
    {
377
0
      if (css_lex_accept(buf, 'n'))
378
0
        css_push_char(buf, '\n');
379
0
      else if (css_lex_accept(buf, 'r'))
380
0
        css_push_char(buf, '\r');
381
0
      else if (css_lex_accept(buf, 'f'))
382
0
        css_push_char(buf, '\f');
383
0
      else if (css_lex_accept(buf, '\f'))
384
0
        /* line continuation */ ;
385
0
      else if (css_lex_accept(buf, '\n'))
386
0
        /* line continuation */ ;
387
0
      else if (css_lex_accept(buf, '\r'))
388
0
        css_lex_accept(buf, '\n');
389
0
      else
390
0
      {
391
0
        css_push_char(buf, buf->c);
392
0
        css_lex_next(buf);
393
0
      }
394
0
    }
395
1.66k
    else
396
1.66k
    {
397
1.66k
      css_push_char(buf, buf->c);
398
1.66k
      css_lex_next(buf);
399
1.66k
    }
400
1.66k
  }
401
48
  css_lex_expect(buf, q);
402
48
  css_push_zero(buf);
403
48
  return CSS_STRING;
404
48
}
405
406
static void css_lex_uri(struct lexbuf *buf)
407
0
{
408
0
  while (buf->c && buf->c != ')' && !iswhite(buf->c))
409
0
  {
410
0
    if (css_lex_accept(buf, '\\'))
411
0
    {
412
0
      if (css_lex_accept(buf, 'n'))
413
0
        css_push_char(buf, '\n');
414
0
      else if (css_lex_accept(buf, 'r'))
415
0
        css_push_char(buf, '\r');
416
0
      else if (css_lex_accept(buf, 'f'))
417
0
        css_push_char(buf, '\f');
418
0
      else
419
0
      {
420
0
        css_push_char(buf, buf->c);
421
0
        css_lex_next(buf);
422
0
      }
423
0
    }
424
0
    else if (buf->c == '!' || buf->c == '#' || buf->c == '$' || buf->c == '%' || buf->c == '&' ||
425
0
        (buf->c >= '*' && buf->c <= '[') ||
426
0
        (buf->c >= ']' && buf->c <= '~') ||
427
0
        buf->c > 159)
428
0
    {
429
0
      css_push_char(buf, buf->c);
430
0
      css_lex_next(buf);
431
0
    }
432
0
    else
433
0
      fz_css_error(buf, "unexpected character in url");
434
0
  }
435
0
  css_push_zero(buf);
436
0
}
437
438
static int css_lex(struct lexbuf *buf)
439
211k
{
440
211k
  int t;
441
442
  // TODO: keyword escape sequences
443
444
211k
  buf->string_len = 0;
445
446
211k
restart:
447
211k
  if (buf->c == 0)
448
941
    return EOF;
449
450
210k
  if (iswhite(buf->c))
451
10.8k
  {
452
21.8k
    while (iswhite(buf->c))
453
10.9k
      css_lex_next(buf);
454
10.8k
    return ' ';
455
10.8k
  }
456
457
199k
  if (css_lex_accept(buf, '/'))
458
99
  {
459
99
    if (css_lex_accept(buf, '*'))
460
0
    {
461
0
      while (buf->c)
462
0
      {
463
0
        if (css_lex_accept(buf, '*'))
464
0
        {
465
0
          while (buf->c == '*')
466
0
            css_lex_next(buf);
467
0
          if (css_lex_accept(buf, '/'))
468
0
            goto restart;
469
0
        }
470
0
        css_lex_next(buf);
471
0
      }
472
0
      fz_css_error(buf, "unterminated comment");
473
0
    }
474
99
    return '/';
475
99
  }
476
477
199k
  if (css_lex_accept(buf, '<'))
478
33
  {
479
33
    if (css_lex_accept(buf, '!'))
480
0
    {
481
0
      css_lex_expect(buf, '-');
482
0
      css_lex_expect(buf, '-');
483
0
      goto restart; /* ignore CDO */
484
0
    }
485
33
    return '<';
486
33
  }
487
488
199k
  if (css_lex_accept(buf, '-'))
489
1
  {
490
1
    if (css_lex_accept(buf, '-'))
491
0
    {
492
0
      if (css_lex_accept(buf, '>'))
493
0
        goto restart; /* ignore CDC */
494
0
    }
495
1
    if (isnmstart(buf->c))
496
1
    {
497
1
      css_push_char(buf, '-');
498
1
      return css_lex_keyword(buf);
499
1
    }
500
0
    return '-';
501
1
  }
502
503
199k
  if (css_lex_accept(buf, '.'))
504
17
  {
505
17
    if (buf->c >= '0' && buf->c <= '9')
506
4
    {
507
4
      css_push_char(buf, '.');
508
4
      return css_lex_number(buf);
509
4
    }
510
13
    return '.';
511
17
  }
512
513
199k
  if (css_lex_accept(buf, '#'))
514
763
  {
515
763
    if (isnmchar(buf->c))
516
762
      return css_lex_hash(buf);
517
1
    return '#';
518
763
  }
519
520
198k
  if (css_lex_accept(buf, '"'))
521
10
    return css_lex_string(buf, '"');
522
198k
  if (css_lex_accept(buf, '\''))
523
38
    return css_lex_string(buf, '\'');
524
525
198k
  if (buf->c >= '0' && buf->c <= '9')
526
19.4k
    return css_lex_number(buf);
527
528
178k
  if (css_lex_accept(buf, 'u'))
529
2.63k
  {
530
2.63k
    if (css_lex_accept(buf, 'r'))
531
0
    {
532
0
      if (css_lex_accept(buf, 'l'))
533
0
      {
534
0
        if (css_lex_accept(buf, '('))
535
0
        {
536
0
          while (iswhite(buf->c))
537
0
            css_lex_next(buf);
538
0
          if (css_lex_accept(buf, '"'))
539
0
            css_lex_string(buf, '"');
540
0
          else if (css_lex_accept(buf, '\''))
541
0
            css_lex_string(buf, '\'');
542
0
          else
543
0
            css_lex_uri(buf);
544
0
          while (iswhite(buf->c))
545
0
            css_lex_next(buf);
546
0
          css_lex_expect(buf, ')');
547
0
          return CSS_URI;
548
0
        }
549
0
        css_push_char(buf, 'u');
550
0
        css_push_char(buf, 'r');
551
0
        css_push_char(buf, 'l');
552
0
        return css_lex_keyword(buf);
553
0
      }
554
0
      css_push_char(buf, 'u');
555
0
      css_push_char(buf, 'r');
556
0
      return css_lex_keyword(buf);
557
0
    }
558
2.63k
    css_push_char(buf, 'u');
559
2.63k
    return css_lex_keyword(buf);
560
2.63k
  }
561
562
176k
  if (isnmstart(buf->c))
563
80.9k
  {
564
80.9k
    css_push_char(buf, buf->c);
565
80.9k
    css_lex_next(buf);
566
80.9k
    return css_lex_keyword(buf);
567
80.9k
  }
568
569
95.3k
  t = buf->c;
570
95.3k
  css_lex_next(buf);
571
95.3k
  return t;
572
176k
}
573
574
static void next(struct lexbuf *buf)
575
211k
{
576
211k
  buf->lookahead = css_lex(buf);
577
211k
}
578
579
static int accept(struct lexbuf *buf, int t)
580
307k
{
581
307k
  if (buf->lookahead == t)
582
95.8k
  {
583
95.8k
    next(buf);
584
95.8k
    return 1;
585
95.8k
  }
586
211k
  return 0;
587
307k
}
588
589
static void expect(struct lexbuf *buf, int t)
590
70.8k
{
591
70.8k
  if (accept(buf, t))
592
70.7k
    return;
593
75
  fz_css_error(buf, "unexpected token");
594
70.8k
}
595
596
static void white(struct lexbuf *buf)
597
197k
{
598
206k
  while (buf->lookahead == ' ')
599
9.67k
    next(buf);
600
197k
}
601
602
static int iscond(int t)
603
20.5k
{
604
20.5k
  return t == ':' || t == '.' || t == '[' || t == CSS_HASH;
605
20.5k
}
606
607
static fz_css_value *parse_term(struct lexbuf *buf)
608
45.3k
{
609
45.3k
  fz_css_value *v;
610
611
45.3k
  if (buf->lookahead == '+' || buf->lookahead == '-')
612
8
  {
613
8
    float sign = buf->lookahead == '-' ? -1 : 1;
614
8
    next(buf);
615
8
    if (buf->lookahead != CSS_NUMBER && buf->lookahead != CSS_LENGTH && buf->lookahead != CSS_PERCENT)
616
7
      fz_css_error(buf, "expected number");
617
1
    if (sign < 0)
618
0
    {
619
0
      v = fz_new_css_value_x(buf->ctx, buf->pool, buf->lookahead);
620
0
      v->data = fz_pool_alloc(buf->ctx, buf->pool, strlen(buf->string) + 2);
621
0
      v->data[0] = '-';
622
0
      strcpy(v->data + 1, buf->string);
623
0
    }
624
1
    else
625
1
    {
626
1
      v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string);
627
1
    }
628
1
    next(buf);
629
1
    white(buf);
630
1
    return v;
631
8
  }
632
633
45.3k
  if (buf->lookahead == CSS_KEYWORD)
634
25.1k
  {
635
25.1k
    v = fz_new_css_value(buf->ctx, buf->pool, CSS_KEYWORD, buf->string);
636
25.1k
    next(buf);
637
25.1k
    if (accept(buf, '('))
638
0
    {
639
0
      white(buf);
640
0
      v->type = '(';
641
0
      v->args = parse_expr(buf);
642
0
      expect(buf, ')');
643
0
    }
644
25.1k
    white(buf);
645
25.1k
    return v;
646
25.1k
  }
647
648
20.2k
  switch (buf->lookahead)
649
20.2k
  {
650
680
  case CSS_HASH:
651
712
  case CSS_STRING:
652
712
  case CSS_URI:
653
9.05k
  case CSS_NUMBER:
654
20.1k
  case CSS_LENGTH:
655
20.1k
  case CSS_PERCENT:
656
20.1k
    v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string);
657
20.1k
    next(buf);
658
20.1k
    white(buf);
659
20.1k
    return v;
660
20.2k
  }
661
662
97
  fz_css_error(buf, "expected value");
663
20.2k
}
664
665
static fz_css_value *parse_expr(struct lexbuf *buf)
666
36.9k
{
667
36.9k
  fz_css_value *head, *tail;
668
669
36.9k
  head = tail = parse_term(buf);
670
671
45.3k
  while (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != '!' &&
672
45.3k
      buf->lookahead != ')' && buf->lookahead != EOF)
673
8.44k
  {
674
8.44k
    if (accept(buf, ','))
675
257
    {
676
257
      white(buf);
677
257
      if (buf->lookahead != ';')
678
257
      {
679
257
        tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, ',', ",");
680
257
        tail = tail->next = parse_term(buf);
681
257
      }
682
257
    }
683
8.18k
    else if (accept(buf, '/'))
684
51
    {
685
51
      white(buf);
686
51
      tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, '/', "/");
687
51
      tail = tail->next = parse_term(buf);
688
51
    }
689
8.13k
    else
690
8.13k
    {
691
8.13k
      tail = tail->next = parse_term(buf);
692
8.13k
    }
693
8.44k
  }
694
695
36.9k
  return head;
696
36.9k
}
697
698
static fz_css_property *parse_declaration(struct lexbuf *buf)
699
37.0k
{
700
37.0k
  fz_css_property *p;
701
702
37.0k
  if (buf->lookahead != CSS_KEYWORD)
703
6
    fz_css_error(buf, "expected keyword in property");
704
37.0k
  p = fz_new_css_property(buf->ctx, buf->pool, buf->string, NULL, 0);
705
37.0k
  next(buf);
706
707
37.0k
  white(buf);
708
37.0k
  expect(buf, ':');
709
37.0k
  white(buf);
710
711
37.0k
  if (p)
712
36.4k
    p->value = parse_expr(buf);
713
607
  else
714
607
    (void) parse_expr(buf);
715
716
  /* !important */
717
37.0k
  if (accept(buf, '!'))
718
0
  {
719
0
    white(buf);
720
0
    if (buf->lookahead != CSS_KEYWORD || strcmp(buf->string, "important"))
721
0
      fz_css_error(buf, "expected keyword 'important' after '!'");
722
0
    if (p)
723
0
      p->important = 1;
724
0
    next(buf);
725
0
    white(buf);
726
0
  }
727
728
37.0k
  return p;
729
37.0k
}
730
731
static fz_css_property *parse_declaration_list(struct lexbuf *buf)
732
17.4k
{
733
17.4k
  fz_css_property *head, *tail = NULL, *p;
734
735
17.4k
  white(buf);
736
737
17.4k
  if (buf->lookahead == '}' || buf->lookahead == EOF)
738
2
    return NULL;
739
740
17.3k
  p = parse_declaration(buf);
741
17.3k
  if (p)
742
17.3k
    tail = p;
743
17.3k
  head = tail;
744
745
37.3k
  while (accept(buf, ';'))
746
19.9k
  {
747
19.9k
    white(buf);
748
749
19.9k
    if (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != EOF)
750
19.6k
    {
751
19.6k
      p = parse_declaration(buf);
752
19.6k
      if (p)
753
18.9k
      {
754
18.9k
        if (!head)
755
4
          head = tail = p;
756
18.9k
        else
757
18.9k
          tail = tail->next = p;
758
18.9k
      }
759
19.6k
    }
760
19.9k
  }
761
762
17.3k
  return head;
763
17.4k
}
764
765
static char *parse_attrib_value(struct lexbuf *buf)
766
0
{
767
0
  char *s;
768
769
0
  if (buf->lookahead == CSS_KEYWORD || buf->lookahead == CSS_STRING)
770
0
  {
771
0
    s = fz_pool_strdup(buf->ctx, buf->pool, buf->string);
772
0
    next(buf);
773
0
    white(buf);
774
0
    return s;
775
0
  }
776
777
0
  fz_css_error(buf, "expected attribute value");
778
0
}
779
780
static char *css_lex_pseudo_expr(struct lexbuf *buf)
781
0
{
782
0
  buf->string_len = 0;
783
0
  for (;;)
784
0
  {
785
0
    if (buf->c == 0)
786
0
      return NULL;
787
0
    if (css_lex_accept(buf, ')'))
788
0
    {
789
0
      css_push_zero(buf);
790
0
      return fz_pool_strdup(buf->ctx, buf->pool, buf->string);
791
0
    }
792
0
    css_push_char(buf, buf->c);
793
0
    css_lex_next(buf);
794
0
  }
795
0
}
796
797
static fz_css_condition *parse_condition(struct lexbuf *buf)
798
0
{
799
0
  fz_css_condition *c;
800
801
0
  if (accept(buf, ':'))
802
0
  {
803
0
    (void)accept(buf, ':'); /* swallow css3 :: syntax and pretend it's a normal pseudo-class */
804
0
    if (buf->lookahead != CSS_KEYWORD)
805
0
      fz_css_error(buf, "expected keyword after ':'");
806
0
    c = fz_new_css_condition(buf->ctx, buf->pool, ':', buf->string, NULL);
807
0
    next(buf);
808
809
    // TODO -- parse :is, :not, :where, :has logical combinations with selector list as argument
810
811
0
    if (buf->lookahead == '(')
812
0
    {
813
0
      c->val = css_lex_pseudo_expr(buf);
814
0
      next(buf);
815
0
    }
816
0
    return c;
817
0
  }
818
819
0
  if (accept(buf, '.'))
820
0
  {
821
0
    if (buf->lookahead != CSS_KEYWORD)
822
0
      fz_css_error(buf, "expected keyword after '.'");
823
0
    c = fz_new_css_condition(buf->ctx, buf->pool, '.', "class", buf->string);
824
0
    next(buf);
825
0
    return c;
826
0
  }
827
828
0
  if (accept(buf, '['))
829
0
  {
830
0
    white(buf);
831
832
0
    if (buf->lookahead != CSS_KEYWORD)
833
0
      fz_css_error(buf, "expected keyword after '['");
834
0
    c = fz_new_css_condition(buf->ctx, buf->pool, '[', buf->string, NULL);
835
0
    next(buf);
836
837
0
    white(buf);
838
839
0
    if (accept(buf, '='))
840
0
    {
841
0
      c->type = '=';
842
0
      c->val = parse_attrib_value(buf);
843
0
    }
844
0
    else if (accept(buf, '|'))
845
0
    {
846
0
      expect(buf, '=');
847
0
      c->type = '|';
848
0
      c->val = parse_attrib_value(buf);
849
0
    }
850
0
    else if (accept(buf, '~'))
851
0
    {
852
0
      expect(buf, '=');
853
0
      c->type = '~';
854
0
      c->val = parse_attrib_value(buf);
855
0
    }
856
857
0
    expect(buf, ']');
858
859
0
    return c;
860
0
  }
861
862
0
  if (buf->lookahead == CSS_HASH)
863
0
  {
864
0
    c = fz_new_css_condition(buf->ctx, buf->pool, '#', "id", buf->string);
865
0
    next(buf);
866
0
    return c;
867
0
  }
868
869
0
  fz_css_error(buf, "expected condition");
870
0
}
871
872
static fz_css_condition *parse_condition_list(struct lexbuf *buf)
873
0
{
874
0
  fz_css_condition *head, *tail;
875
876
0
  head = tail = parse_condition(buf);
877
0
  while (iscond(buf->lookahead))
878
0
  {
879
0
    tail = tail->next = parse_condition(buf);
880
0
  }
881
0
  return head;
882
0
}
883
884
static fz_css_selector *parse_simple_selector(struct lexbuf *buf)
885
20.5k
{
886
20.5k
  fz_css_selector *s;
887
888
20.5k
  if (accept(buf, '*'))
889
0
  {
890
0
    s = fz_new_css_selector(buf->ctx, buf->pool, NULL);
891
0
    if (iscond(buf->lookahead))
892
0
      s->cond = parse_condition_list(buf);
893
0
    return s;
894
0
  }
895
20.5k
  else if (buf->lookahead == CSS_KEYWORD)
896
20.5k
  {
897
20.5k
    s = fz_new_css_selector(buf->ctx, buf->pool, buf->string);
898
20.5k
    next(buf);
899
20.5k
    if (iscond(buf->lookahead))
900
0
      s->cond = parse_condition_list(buf);
901
20.5k
    return s;
902
20.5k
  }
903
1
  else if (iscond(buf->lookahead))
904
0
  {
905
0
    s = fz_new_css_selector(buf->ctx, buf->pool, NULL);
906
0
    s->cond = parse_condition_list(buf);
907
0
    return s;
908
0
  }
909
910
1
  fz_css_error(buf, "expected selector");
911
20.5k
}
912
913
static fz_css_selector *parse_combinator(struct lexbuf *buf, int c, fz_css_selector *a)
914
880
{
915
880
  fz_css_selector *sel, *b;
916
880
  white(buf);
917
880
  b = parse_simple_selector(buf);
918
880
  sel = fz_new_css_selector(buf->ctx, buf->pool, NULL);
919
880
  sel->combine = c;
920
880
  sel->left = a;
921
880
  sel->right = b;
922
880
  return sel;
923
880
}
924
925
static fz_css_selector *parse_selector(struct lexbuf *buf)
926
19.6k
{
927
19.6k
  fz_css_selector *sel = parse_simple_selector(buf);
928
19.6k
  for (;;)
929
20.5k
  {
930
20.5k
    if (accept(buf, ' '))
931
880
    {
932
880
      white(buf);
933
880
      if (accept(buf, '+'))
934
0
        sel = parse_combinator(buf, '+', sel);
935
880
      else if (accept(buf, '>'))
936
0
        sel = parse_combinator(buf, '>', sel);
937
880
      else if (buf->lookahead != ',' && buf->lookahead != '{' && buf->lookahead != EOF)
938
880
        sel = parse_combinator(buf, ' ', sel);
939
0
      else
940
0
        break;
941
880
    }
942
19.6k
    else if (accept(buf, '+'))
943
0
      sel = parse_combinator(buf, '+', sel);
944
19.6k
    else if (accept(buf, '>'))
945
0
      sel = parse_combinator(buf, '>', sel);
946
19.6k
    else
947
19.6k
      break;
948
20.5k
  }
949
19.6k
  return sel;
950
19.6k
}
951
952
static fz_css_selector *parse_selector_list(struct lexbuf *buf)
953
16.4k
{
954
16.4k
  fz_css_selector *head, *tail;
955
956
16.4k
  head = tail = parse_selector(buf);
957
19.6k
  while (accept(buf, ','))
958
3.22k
  {
959
3.22k
    white(buf);
960
3.22k
    tail = tail->next = parse_selector(buf);
961
3.22k
  }
962
16.4k
  return head;
963
16.4k
}
964
965
static void parse_recover(struct lexbuf *buf)
966
129
{
967
129
  fz_rethrow_unless(buf->ctx, FZ_ERROR_SYNTAX);
968
129
  fz_report_error(buf->ctx);
969
1.41k
  while (buf->lookahead != EOF)
970
1.39k
  {
971
1.39k
    if (accept(buf, '}') && buf->nest <= 0)
972
114
    {
973
114
      buf->nest = 0;
974
114
      white(buf);
975
114
      break;
976
114
    }
977
1.28k
    next(buf);
978
1.28k
  }
979
129
}
980
981
static fz_css_rule *parse_ruleset(struct lexbuf *buf)
982
16.4k
{
983
16.4k
  fz_css_selector *s = NULL;
984
16.4k
  fz_css_property *p = NULL;
985
986
32.8k
  fz_try(buf->ctx)
987
32.8k
  {
988
16.4k
    s = parse_selector_list(buf);
989
16.4k
    expect(buf, '{');
990
16.4k
    p = parse_declaration_list(buf);
991
16.4k
    expect(buf, '}');
992
16.4k
    white(buf);
993
16.4k
  }
994
32.8k
  fz_catch(buf->ctx)
995
129
  {
996
129
    parse_recover(buf);
997
129
    return NULL;
998
129
  }
999
1000
16.2k
  return fz_new_css_rule(buf->ctx, buf->pool, s, p);
1001
16.4k
}
1002
1003
static fz_css_rule *parse_at_page(struct lexbuf *buf)
1004
584
{
1005
584
  fz_css_selector *s = NULL;
1006
584
  fz_css_property *p = NULL;
1007
1008
1.16k
  fz_try(buf->ctx)
1009
1.16k
  {
1010
584
    white(buf);
1011
584
    if (accept(buf, ':'))
1012
0
    {
1013
0
      expect(buf, CSS_KEYWORD);
1014
0
      white(buf);
1015
0
    }
1016
584
    expect(buf, '{');
1017
584
    p = parse_declaration_list(buf);
1018
584
    expect(buf, '}');
1019
584
    white(buf);
1020
584
  }
1021
1.16k
  fz_catch(buf->ctx)
1022
0
  {
1023
0
    parse_recover(buf);
1024
0
    return NULL;
1025
0
  }
1026
1027
584
  s = fz_new_css_selector(buf->ctx, buf->pool, "@page");
1028
584
  return fz_new_css_rule(buf->ctx, buf->pool, s, p);
1029
584
}
1030
1031
static fz_css_rule *parse_at_font_face(struct lexbuf *buf)
1032
0
{
1033
0
  fz_css_selector *s = NULL;
1034
0
  fz_css_property *p = NULL;
1035
1036
0
  fz_try(buf->ctx)
1037
0
  {
1038
0
    white(buf);
1039
0
    expect(buf, '{');
1040
0
    p = parse_declaration_list(buf);
1041
0
    expect(buf, '}');
1042
0
    white(buf);
1043
0
  }
1044
0
  fz_catch(buf->ctx)
1045
0
  {
1046
0
    parse_recover(buf);
1047
0
    return NULL;
1048
0
  }
1049
1050
0
  s = fz_new_css_selector(buf->ctx, buf->pool, "@font-face");
1051
0
  return fz_new_css_rule(buf->ctx, buf->pool, s, p);
1052
0
}
1053
1054
static void parse_at_rule(struct lexbuf *buf)
1055
0
{
1056
0
  expect(buf, CSS_KEYWORD);
1057
1058
  /* skip until '{' or ';' */
1059
0
  while (buf->lookahead != EOF)
1060
0
  {
1061
0
    if (accept(buf, ';'))
1062
0
    {
1063
0
      white(buf);
1064
0
      return;
1065
0
    }
1066
0
    if (accept(buf, '{'))
1067
0
    {
1068
0
      int depth = 1;
1069
0
      while (buf->lookahead != EOF && depth > 0)
1070
0
      {
1071
0
        if (accept(buf, '{'))
1072
0
          ++depth;
1073
0
        else if (accept(buf, '}'))
1074
0
          --depth;
1075
0
        else
1076
0
          next(buf);
1077
0
      }
1078
0
      white(buf);
1079
0
      return;
1080
0
    }
1081
0
    next(buf);
1082
0
  }
1083
0
}
1084
1085
static fz_css_rule *parse_stylesheet(struct lexbuf *buf, fz_css_rule *chain)
1086
584
{
1087
584
  fz_css_rule *rule, **nextp, *tail, *x;
1088
1089
584
  tail = chain;
1090
584
  if (tail)
1091
291
  {
1092
15.7k
    while (tail->next)
1093
15.4k
      tail = tail->next;
1094
291
    nextp = &tail->next;
1095
291
  }
1096
293
  else
1097
293
  {
1098
293
    nextp = &tail;
1099
293
  }
1100
1101
584
  white(buf);
1102
1103
17.5k
  while (buf->lookahead != EOF)
1104
16.9k
  {
1105
16.9k
    if (accept(buf, '@'))
1106
584
    {
1107
584
      if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "page"))
1108
584
      {
1109
584
        next(buf);
1110
584
        x = parse_at_page(buf);
1111
584
        if (x)
1112
584
        {
1113
584
          rule = *nextp = x;
1114
584
          nextp = &rule->next;
1115
584
        }
1116
584
      }
1117
0
      else if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "font-face"))
1118
0
      {
1119
0
        next(buf);
1120
0
        x = parse_at_font_face(buf);
1121
0
        if (x)
1122
0
        {
1123
0
          rule = *nextp = x;
1124
0
          nextp = &rule->next;
1125
0
        }
1126
0
      }
1127
0
      else
1128
0
      {
1129
0
        parse_at_rule(buf);
1130
0
      }
1131
584
    }
1132
16.4k
    else
1133
16.4k
    {
1134
16.4k
      x = parse_ruleset(buf);
1135
16.4k
      if (x)
1136
16.2k
      {
1137
16.2k
        rule = *nextp = x;
1138
16.2k
        nextp = &rule->next;
1139
16.2k
      }
1140
16.4k
    }
1141
16.9k
    white(buf);
1142
16.9k
  }
1143
1144
584
  return chain ? chain : tail;
1145
584
}
1146
1147
const char *fz_css_property_name(int key)
1148
0
{
1149
0
  const char *name = "unknown";
1150
0
  size_t i;
1151
0
  for (i = 0; i < nelem(css_property_list); ++i)
1152
0
    if (*css_property_list[i].name && css_property_list[i].key == key)
1153
0
      name = css_property_list[i].name;
1154
0
  return name;
1155
0
}
1156
1157
fz_css_property *fz_parse_css_properties(fz_context *ctx, fz_pool *pool, const char *source)
1158
418
{
1159
418
  struct lexbuf buf;
1160
418
  css_lex_init(ctx, &buf, pool, source, "<inline>");
1161
418
  next(&buf);
1162
418
  return parse_declaration_list(&buf);
1163
418
}
1164
1165
void fz_parse_css(fz_context *ctx, fz_css *css, const char *source, const char *file)
1166
584
{
1167
584
  struct lexbuf buf;
1168
584
  css_lex_init(ctx, &buf, css->pool, source, file);
1169
584
  next(&buf);
1170
584
  css->rule = parse_stylesheet(&buf, css->rule);
1171
584
}