Coverage Report

Created: 2025-11-07 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/html/css-parse.c
Line
Count
Source
1
// Copyright (C) 2004-2025 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "html-imp.h"
25
26
#include <string.h>
27
28
#include "css-properties.h"
29
30
struct lexbuf
31
{
32
  fz_context *ctx;
33
  fz_pool *pool;
34
  const unsigned char *start;
35
  const unsigned char *s;
36
  const char *file;
37
  int line;
38
  int nest;
39
  int lookahead;
40
  int c;
41
  int string_len;
42
  char string[1024];
43
};
44
45
static fz_css_value *parse_expr(struct lexbuf *buf);
46
static fz_css_selector *parse_selector(struct lexbuf *buf);
47
48
FZ_NORETURN static void fz_css_error(struct lexbuf *buf, const char *msg)
49
0
{
50
0
#define PRE_POST_SIZE 30
51
0
  unsigned char text[PRE_POST_SIZE * 2 + 4];
52
0
  unsigned char *d = text;
53
0
  const unsigned char *s = buf->start;
54
0
  int n;
55
56
  /* We want to make a helpful fragment for the error message.
57
   * We want err_pos to be the point at which we just tripped
58
   * the error. err_pos needs to be at least 1 byte behind
59
   * our read pointer, as we've read that char. */
60
0
  const unsigned char *err_pos = buf->s;
61
0
  n = 1;
62
63
  /* And if we're using lookahead, it's further behind. */
64
0
  if (buf->lookahead >= CSS_KEYWORD)
65
0
    n += buf->string_len;
66
0
  else if (buf->lookahead != EOF)
67
0
    n += 1;
68
69
  /* But it can't be before the start of the buffer */
70
0
  n = fz_mini(n, err_pos - buf->start);
71
0
  err_pos -= n;
72
73
  /* We're going to try to output:
74
   * <section prior to the error> ">" <the char that tripped> "<" <section after the error>
75
   */
76
  /* Is the section prior to the error too long? If so, truncate it with an ellipsis. */
77
0
  n = sizeof(text)-1;
78
0
  if (err_pos - s > n-PRE_POST_SIZE - 3)
79
0
  {
80
0
    *d++ = '.';
81
0
    *d++ = '.';
82
0
    *d++ = '.';
83
0
    n -= 3;
84
0
    s = err_pos - (n-PRE_POST_SIZE - 3);
85
0
  }
86
87
  /* Copy the prefix (if there is one) */
88
0
  if (err_pos > s)
89
0
  {
90
0
    n = err_pos - s;
91
0
    while (n)
92
0
    {
93
0
      unsigned char c = *s++;
94
0
      *d++ = (c < 32 || c > 127) ? ' ' : c;
95
0
      n--;
96
0
    }
97
0
  }
98
99
  /* Marker, char, end marker */
100
0
  *d++ = '>', n--;
101
0
  if (*err_pos)
102
0
    *d++ = *err_pos++, n--;
103
0
  *d++ = '<', n--;
104
105
  /* Postfix */
106
0
  n = (int)strlen((const char *)err_pos);
107
0
  if (n <= PRE_POST_SIZE)
108
0
  {
109
0
    while (n > 0)
110
0
    {
111
0
      unsigned char c = *err_pos++;
112
0
      *d++ =  (c < 32 || c > 127) ? ' ' : c;
113
0
      n--;
114
0
    }
115
0
  }
116
0
  else
117
0
  {
118
0
    for (n = PRE_POST_SIZE-3; n > 0; n--)
119
0
    {
120
0
      unsigned char c = *err_pos++;
121
0
      *d++ =  (c < 32 || c > 127) ? ' ' : c;
122
0
    }
123
124
0
    *d++ = '.';
125
0
    *d++ = '.';
126
0
    *d++ = '.';
127
0
  }
128
0
  *d = 0;
129
130
0
  fz_throw(buf->ctx, FZ_ERROR_SYNTAX, "css syntax error: %s (%s:%d) (%s)", msg, buf->file, buf->line, text);
131
0
}
132
133
fz_css *fz_new_css(fz_context *ctx)
134
0
{
135
0
  fz_pool *pool = fz_new_pool(ctx);
136
0
  fz_css *css = NULL;
137
138
0
  fz_try(ctx)
139
0
  {
140
0
    css = fz_pool_alloc(ctx, pool, sizeof *css);
141
0
    css->pool = pool;
142
0
    css->rule = NULL;
143
0
  }
144
0
  fz_catch(ctx)
145
0
  {
146
0
    fz_drop_pool(ctx, pool);
147
0
    fz_rethrow(ctx);
148
0
  }
149
150
0
  return css;
151
0
}
152
153
void fz_drop_css(fz_context *ctx, fz_css *css)
154
0
{
155
0
  if (css)
156
0
    fz_drop_pool(ctx, css->pool);
157
0
}
158
159
static fz_css_rule *fz_new_css_rule(fz_context *ctx, fz_pool *pool, fz_css_selector *selector, fz_css_property *declaration)
160
0
{
161
0
  fz_css_rule *rule = fz_pool_alloc(ctx, pool, sizeof *rule);
162
0
  rule->selector = selector;
163
0
  rule->declaration = declaration;
164
0
  rule->next = NULL;
165
0
  return rule;
166
0
}
167
168
static fz_css_selector *fz_new_css_selector(fz_context *ctx, fz_pool *pool, const char *name)
169
0
{
170
0
  fz_css_selector *sel = fz_pool_alloc(ctx, pool, sizeof *sel);
171
0
  sel->name = name ? fz_pool_strdup(ctx, pool, name) : NULL;
172
0
  sel->combine = 0;
173
0
  sel->cond = NULL;
174
0
  sel->left = NULL;
175
0
  sel->right = NULL;
176
0
  sel->next = NULL;
177
0
  return sel;
178
0
}
179
180
static fz_css_condition *fz_new_css_condition(fz_context *ctx, fz_pool *pool, int type, const char *key, const char *val)
181
0
{
182
0
  fz_css_condition *cond = fz_pool_alloc(ctx, pool, sizeof *cond);
183
0
  cond->type = type;
184
0
  cond->key = key ? fz_pool_strdup(ctx, pool, key) : NULL;
185
0
  cond->val = val ? fz_pool_strdup(ctx, pool, val) : NULL;
186
0
  cond->next = NULL;
187
0
  return cond;
188
0
}
189
190
static fz_css_property *fz_new_css_property(fz_context *ctx, fz_pool *pool, const char *name, fz_css_value *value, int spec)
191
0
{
192
0
  struct css_property_info *info = css_property_lookup(name, strlen(name));
193
0
  if (info)
194
0
  {
195
0
    fz_css_property *prop = fz_pool_alloc(ctx, pool, sizeof *prop);
196
0
    prop->name = info->key;
197
0
    prop->value = value;
198
0
    prop->spec = spec;
199
0
    prop->important = 0;
200
0
    prop->next = NULL;
201
0
    return prop;
202
0
  }
203
0
  return NULL;
204
0
}
205
206
static fz_css_value *fz_new_css_value_x(fz_context *ctx, fz_pool *pool, int type)
207
0
{
208
0
  fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val);
209
0
  val->type = type;
210
0
  val->data = NULL;
211
0
  val->args = NULL;
212
0
  val->next = NULL;
213
0
  return val;
214
0
}
215
216
static fz_css_value *fz_new_css_value(fz_context *ctx, fz_pool *pool, int type, const char *data)
217
0
{
218
0
  fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val);
219
0
  val->type = type;
220
0
  val->data = fz_pool_strdup(ctx, pool, data);
221
0
  val->args = NULL;
222
0
  val->next = NULL;
223
0
  return val;
224
0
}
225
226
static void css_lex_next(struct lexbuf *buf)
227
0
{
228
0
  if (buf->c == 0)
229
0
    return;
230
0
  buf->s += fz_chartorune(&buf->c, (const char *)buf->s);
231
0
  if (buf->c == '{')
232
0
    ++buf->nest;
233
0
  else if (buf->c == '}')
234
0
    --buf->nest;
235
0
  else if (buf->c == '\n')
236
0
    ++buf->line;
237
0
  buf->lookahead = EOF;
238
0
}
239
240
static void css_lex_init(fz_context *ctx, struct lexbuf *buf, fz_pool *pool, const char *s, const char *file)
241
0
{
242
0
  buf->ctx = ctx;
243
0
  buf->pool = pool;
244
0
  buf->s = (const unsigned char *)s;
245
0
  buf->lookahead = EOF;
246
0
  buf->start = buf->s;
247
0
  buf->c = -1;
248
0
  buf->file = file;
249
0
  buf->line = 1;
250
0
  buf->nest = 0;
251
0
  css_lex_next(buf);
252
253
0
  buf->string_len = 0;
254
0
}
255
256
static inline int iswhite(int c)
257
0
{
258
0
  return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f';
259
0
}
260
261
static int isnmstart(int c)
262
0
{
263
0
  return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
264
0
    (c >= 128 && c <= UCS_MAX);
265
0
}
266
267
static int isnmchar(int c)
268
0
{
269
0
  return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
270
0
    (c >= '0' && c <= '9') || c == '-' || (c >= 128 && c <= UCS_MAX);
271
0
}
272
273
static void css_push_char(struct lexbuf *buf, int c)
274
0
{
275
0
  char out[4];
276
0
  int n = fz_runetochar(out, c);
277
0
  if (buf->string_len + n >= (int)nelem(buf->string))
278
0
    fz_css_error(buf, "token too long");
279
0
  memcpy(buf->string + buf->string_len, out, n);
280
0
  buf->string_len += n;
281
0
}
282
283
static void css_push_zero(struct lexbuf *buf)
284
0
{
285
0
  if (buf->string_len + 1 >= (int)nelem(buf->string))
286
0
    fz_css_error(buf, "token too long");
287
0
  buf->string[buf->string_len] = 0;
288
0
  buf->string_len += 1;
289
0
}
290
291
static int css_lex_accept(struct lexbuf *buf, int t)
292
0
{
293
0
  if (buf->c == t)
294
0
  {
295
0
    css_lex_next(buf);
296
0
    return 1;
297
0
  }
298
0
  return 0;
299
0
}
300
301
static void css_lex_expect(struct lexbuf *buf, int t)
302
0
{
303
0
  if (!css_lex_accept(buf, t))
304
0
    fz_css_error(buf, "unexpected character");
305
0
}
306
307
static int css_lex_number(struct lexbuf *buf)
308
0
{
309
0
  while (buf->c >= '0' && buf->c <= '9')
310
0
  {
311
0
    css_push_char(buf, buf->c);
312
0
    css_lex_next(buf);
313
0
  }
314
315
0
  if (css_lex_accept(buf, '.'))
316
0
  {
317
0
    css_push_char(buf, '.');
318
0
    while (buf->c >= '0' && buf->c <= '9')
319
0
    {
320
0
      css_push_char(buf, buf->c);
321
0
      css_lex_next(buf);
322
0
    }
323
0
  }
324
325
0
  if (css_lex_accept(buf, '%'))
326
0
  {
327
0
    css_push_char(buf, '%');
328
0
    css_push_zero(buf);
329
0
    return CSS_PERCENT;
330
0
  }
331
332
0
  if (isnmstart(buf->c))
333
0
  {
334
0
    css_push_char(buf, buf->c);
335
0
    css_lex_next(buf);
336
0
    while (isnmchar(buf->c))
337
0
    {
338
0
      css_push_char(buf, buf->c);
339
0
      css_lex_next(buf);
340
0
    }
341
0
    css_push_zero(buf);
342
0
    return CSS_LENGTH;
343
0
  }
344
345
0
  css_push_zero(buf);
346
0
  return CSS_NUMBER;
347
0
}
348
349
static int css_lex_keyword(struct lexbuf *buf)
350
0
{
351
0
  while (isnmchar(buf->c))
352
0
  {
353
0
    css_push_char(buf, buf->c);
354
0
    css_lex_next(buf);
355
0
  }
356
0
  css_push_zero(buf);
357
0
  return CSS_KEYWORD;
358
0
}
359
360
static int css_lex_hash(struct lexbuf *buf)
361
0
{
362
0
  while (isnmchar(buf->c))
363
0
  {
364
0
    css_push_char(buf, buf->c);
365
0
    css_lex_next(buf);
366
0
  }
367
0
  css_push_zero(buf);
368
0
  return CSS_HASH;
369
0
}
370
371
static int css_lex_string(struct lexbuf *buf, int q)
372
0
{
373
0
  while (buf->c && buf->c != q)
374
0
  {
375
0
    if (css_lex_accept(buf, '\\'))
376
0
    {
377
0
      if (css_lex_accept(buf, 'n'))
378
0
        css_push_char(buf, '\n');
379
0
      else if (css_lex_accept(buf, 'r'))
380
0
        css_push_char(buf, '\r');
381
0
      else if (css_lex_accept(buf, 'f'))
382
0
        css_push_char(buf, '\f');
383
0
      else if (css_lex_accept(buf, '\f'))
384
0
        /* line continuation */ ;
385
0
      else if (css_lex_accept(buf, '\n'))
386
0
        /* line continuation */ ;
387
0
      else if (css_lex_accept(buf, '\r'))
388
0
        css_lex_accept(buf, '\n');
389
0
      else
390
0
      {
391
0
        css_push_char(buf, buf->c);
392
0
        css_lex_next(buf);
393
0
      }
394
0
    }
395
0
    else
396
0
    {
397
0
      css_push_char(buf, buf->c);
398
0
      css_lex_next(buf);
399
0
    }
400
0
  }
401
0
  css_lex_expect(buf, q);
402
0
  css_push_zero(buf);
403
0
  return CSS_STRING;
404
0
}
405
406
static void css_lex_uri(struct lexbuf *buf)
407
0
{
408
0
  while (buf->c && buf->c != ')' && !iswhite(buf->c))
409
0
  {
410
0
    if (css_lex_accept(buf, '\\'))
411
0
    {
412
0
      if (css_lex_accept(buf, 'n'))
413
0
        css_push_char(buf, '\n');
414
0
      else if (css_lex_accept(buf, 'r'))
415
0
        css_push_char(buf, '\r');
416
0
      else if (css_lex_accept(buf, 'f'))
417
0
        css_push_char(buf, '\f');
418
0
      else
419
0
      {
420
0
        css_push_char(buf, buf->c);
421
0
        css_lex_next(buf);
422
0
      }
423
0
    }
424
0
    else if (buf->c == '!' || buf->c == '#' || buf->c == '$' || buf->c == '%' || buf->c == '&' ||
425
0
        (buf->c >= '*' && buf->c <= '[') ||
426
0
        (buf->c >= ']' && buf->c <= '~') ||
427
0
        buf->c > 159)
428
0
    {
429
0
      css_push_char(buf, buf->c);
430
0
      css_lex_next(buf);
431
0
    }
432
0
    else
433
0
      fz_css_error(buf, "unexpected character in url");
434
0
  }
435
0
  css_push_zero(buf);
436
0
}
437
438
static int css_lex(struct lexbuf *buf)
439
0
{
440
0
  int t;
441
442
  // TODO: keyword escape sequences
443
444
0
  buf->string_len = 0;
445
446
0
restart:
447
0
  if (buf->c == 0)
448
0
    return EOF;
449
450
0
  if (iswhite(buf->c))
451
0
  {
452
0
    while (iswhite(buf->c))
453
0
      css_lex_next(buf);
454
0
    return ' ';
455
0
  }
456
457
0
  if (css_lex_accept(buf, '/'))
458
0
  {
459
0
    if (css_lex_accept(buf, '*'))
460
0
    {
461
0
      while (buf->c)
462
0
      {
463
0
        if (css_lex_accept(buf, '*'))
464
0
        {
465
0
          while (buf->c == '*')
466
0
            css_lex_next(buf);
467
0
          if (css_lex_accept(buf, '/'))
468
0
            goto restart;
469
0
        }
470
0
        css_lex_next(buf);
471
0
      }
472
0
      fz_css_error(buf, "unterminated comment");
473
0
    }
474
0
    return '/';
475
0
  }
476
477
0
  if (css_lex_accept(buf, '<'))
478
0
  {
479
0
    if (css_lex_accept(buf, '!'))
480
0
    {
481
0
      css_lex_expect(buf, '-');
482
0
      css_lex_expect(buf, '-');
483
0
      goto restart; /* ignore CDO */
484
0
    }
485
0
    return '<';
486
0
  }
487
488
0
  if (css_lex_accept(buf, '-'))
489
0
  {
490
0
    if (css_lex_accept(buf, '-'))
491
0
    {
492
0
      if (css_lex_accept(buf, '>'))
493
0
        goto restart; /* ignore CDC */
494
0
    }
495
0
    if (isnmstart(buf->c))
496
0
    {
497
0
      css_push_char(buf, '-');
498
0
      return css_lex_keyword(buf);
499
0
    }
500
0
    return '-';
501
0
  }
502
503
0
  if (css_lex_accept(buf, '.'))
504
0
  {
505
0
    if (buf->c >= '0' && buf->c <= '9')
506
0
    {
507
0
      css_push_char(buf, '.');
508
0
      return css_lex_number(buf);
509
0
    }
510
0
    return '.';
511
0
  }
512
513
0
  if (css_lex_accept(buf, '#'))
514
0
  {
515
0
    if (isnmchar(buf->c))
516
0
      return css_lex_hash(buf);
517
0
    return '#';
518
0
  }
519
520
0
  if (css_lex_accept(buf, '"'))
521
0
    return css_lex_string(buf, '"');
522
0
  if (css_lex_accept(buf, '\''))
523
0
    return css_lex_string(buf, '\'');
524
525
0
  if (buf->c >= '0' && buf->c <= '9')
526
0
    return css_lex_number(buf);
527
528
0
  if (css_lex_accept(buf, 'u'))
529
0
  {
530
0
    if (css_lex_accept(buf, 'r'))
531
0
    {
532
0
      if (css_lex_accept(buf, 'l'))
533
0
      {
534
0
        if (css_lex_accept(buf, '('))
535
0
        {
536
0
          while (iswhite(buf->c))
537
0
            css_lex_next(buf);
538
0
          if (css_lex_accept(buf, '"'))
539
0
            css_lex_string(buf, '"');
540
0
          else if (css_lex_accept(buf, '\''))
541
0
            css_lex_string(buf, '\'');
542
0
          else
543
0
            css_lex_uri(buf);
544
0
          while (iswhite(buf->c))
545
0
            css_lex_next(buf);
546
0
          css_lex_expect(buf, ')');
547
0
          return CSS_URI;
548
0
        }
549
0
        css_push_char(buf, 'u');
550
0
        css_push_char(buf, 'r');
551
0
        css_push_char(buf, 'l');
552
0
        return css_lex_keyword(buf);
553
0
      }
554
0
      css_push_char(buf, 'u');
555
0
      css_push_char(buf, 'r');
556
0
      return css_lex_keyword(buf);
557
0
    }
558
0
    css_push_char(buf, 'u');
559
0
    return css_lex_keyword(buf);
560
0
  }
561
562
0
  if (isnmstart(buf->c))
563
0
  {
564
0
    css_push_char(buf, buf->c);
565
0
    css_lex_next(buf);
566
0
    return css_lex_keyword(buf);
567
0
  }
568
569
0
  t = buf->c;
570
0
  css_lex_next(buf);
571
0
  return t;
572
0
}
573
574
static void next(struct lexbuf *buf)
575
0
{
576
0
  buf->lookahead = css_lex(buf);
577
0
}
578
579
static int accept(struct lexbuf *buf, int t)
580
0
{
581
0
  if (buf->lookahead == t)
582
0
  {
583
0
    next(buf);
584
0
    return 1;
585
0
  }
586
0
  return 0;
587
0
}
588
589
static void expect(struct lexbuf *buf, int t)
590
0
{
591
0
  if (accept(buf, t))
592
0
    return;
593
0
  fz_css_error(buf, "unexpected token");
594
0
}
595
596
static void white(struct lexbuf *buf)
597
0
{
598
0
  while (buf->lookahead == ' ')
599
0
    next(buf);
600
0
}
601
602
static int iscond(int t)
603
0
{
604
0
  return t == ':' || t == '.' || t == '[' || t == CSS_HASH;
605
0
}
606
607
static fz_css_value *parse_term(struct lexbuf *buf)
608
0
{
609
0
  fz_css_value *v;
610
611
0
  if (buf->lookahead == '+' || buf->lookahead == '-')
612
0
  {
613
0
    float sign = buf->lookahead == '-' ? -1 : 1;
614
0
    next(buf);
615
0
    if (buf->lookahead != CSS_NUMBER && buf->lookahead != CSS_LENGTH && buf->lookahead != CSS_PERCENT)
616
0
      fz_css_error(buf, "expected number");
617
0
    if (sign < 0)
618
0
    {
619
0
      v = fz_new_css_value_x(buf->ctx, buf->pool, buf->lookahead);
620
0
      v->data = fz_pool_alloc(buf->ctx, buf->pool, strlen(buf->string) + 2);
621
0
      v->data[0] = '-';
622
0
      strcpy(v->data + 1, buf->string);
623
0
    }
624
0
    else
625
0
    {
626
0
      v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string);
627
0
    }
628
0
    next(buf);
629
0
    white(buf);
630
0
    return v;
631
0
  }
632
633
0
  if (buf->lookahead == CSS_KEYWORD)
634
0
  {
635
0
    v = fz_new_css_value(buf->ctx, buf->pool, CSS_KEYWORD, buf->string);
636
0
    next(buf);
637
0
    if (accept(buf, '('))
638
0
    {
639
0
      white(buf);
640
0
      v->type = '(';
641
0
      v->args = parse_expr(buf);
642
0
      expect(buf, ')');
643
0
    }
644
0
    white(buf);
645
0
    return v;
646
0
  }
647
648
0
  switch (buf->lookahead)
649
0
  {
650
0
  case CSS_HASH:
651
0
  case CSS_STRING:
652
0
  case CSS_URI:
653
0
  case CSS_NUMBER:
654
0
  case CSS_LENGTH:
655
0
  case CSS_PERCENT:
656
0
    v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string);
657
0
    next(buf);
658
0
    white(buf);
659
0
    return v;
660
0
  }
661
662
0
  fz_css_error(buf, "expected value");
663
0
}
664
665
static fz_css_value *parse_expr(struct lexbuf *buf)
666
0
{
667
0
  fz_css_value *head, *tail;
668
669
0
  head = tail = parse_term(buf);
670
671
0
  while (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != '!' &&
672
0
      buf->lookahead != ')' && buf->lookahead != EOF)
673
0
  {
674
0
    if (accept(buf, ','))
675
0
    {
676
0
      white(buf);
677
0
      if (buf->lookahead != ';')
678
0
      {
679
0
        tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, ',', ",");
680
0
        tail = tail->next = parse_term(buf);
681
0
      }
682
0
    }
683
0
    else if (accept(buf, '/'))
684
0
    {
685
0
      white(buf);
686
0
      tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, '/', "/");
687
0
      tail = tail->next = parse_term(buf);
688
0
    }
689
0
    else
690
0
    {
691
0
      tail = tail->next = parse_term(buf);
692
0
    }
693
0
  }
694
695
0
  return head;
696
0
}
697
698
static fz_css_property *parse_declaration(struct lexbuf *buf)
699
0
{
700
0
  fz_css_property *p;
701
702
0
  if (buf->lookahead != CSS_KEYWORD)
703
0
    fz_css_error(buf, "expected keyword in property");
704
0
  p = fz_new_css_property(buf->ctx, buf->pool, buf->string, NULL, 0);
705
0
  next(buf);
706
707
0
  white(buf);
708
0
  expect(buf, ':');
709
0
  white(buf);
710
711
0
  if (p)
712
0
    p->value = parse_expr(buf);
713
0
  else
714
0
    (void) parse_expr(buf);
715
716
  /* !important */
717
0
  if (accept(buf, '!'))
718
0
  {
719
0
    white(buf);
720
0
    if (buf->lookahead != CSS_KEYWORD || strcmp(buf->string, "important"))
721
0
      fz_css_error(buf, "expected keyword 'important' after '!'");
722
0
    if (p)
723
0
      p->important = 1;
724
0
    next(buf);
725
0
    white(buf);
726
0
  }
727
728
0
  return p;
729
0
}
730
731
static fz_css_property *parse_declaration_list(struct lexbuf *buf)
732
0
{
733
0
  fz_css_property *head, *tail = NULL, *p;
734
735
0
  white(buf);
736
737
0
  if (buf->lookahead == '}' || buf->lookahead == EOF)
738
0
    return NULL;
739
740
0
  p = parse_declaration(buf);
741
0
  if (p)
742
0
    tail = p;
743
0
  head = tail;
744
745
0
  while (accept(buf, ';'))
746
0
  {
747
0
    white(buf);
748
749
0
    if (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != EOF)
750
0
    {
751
0
      p = parse_declaration(buf);
752
0
      if (p)
753
0
      {
754
0
        if (!head)
755
0
          head = tail = p;
756
0
        else
757
0
          tail = tail->next = p;
758
0
      }
759
0
    }
760
0
  }
761
762
0
  return head;
763
0
}
764
765
static char *parse_attrib_value(struct lexbuf *buf)
766
0
{
767
0
  char *s;
768
769
0
  if (buf->lookahead == CSS_KEYWORD || buf->lookahead == CSS_STRING)
770
0
  {
771
0
    s = fz_pool_strdup(buf->ctx, buf->pool, buf->string);
772
0
    next(buf);
773
0
    white(buf);
774
0
    return s;
775
0
  }
776
777
0
  fz_css_error(buf, "expected attribute value");
778
0
}
779
780
static char *css_lex_pseudo_expr(struct lexbuf *buf)
781
0
{
782
0
  buf->string_len = 0;
783
0
  for (;;)
784
0
  {
785
0
    if (buf->c == 0)
786
0
      return NULL;
787
0
    if (css_lex_accept(buf, ')'))
788
0
    {
789
0
      css_push_zero(buf);
790
0
      return fz_pool_strdup(buf->ctx, buf->pool, buf->string);
791
0
    }
792
0
    css_push_char(buf, buf->c);
793
0
    css_lex_next(buf);
794
0
  }
795
0
}
796
797
static fz_css_condition *parse_condition(struct lexbuf *buf)
798
0
{
799
0
  fz_css_condition *c;
800
801
0
  if (accept(buf, ':'))
802
0
  {
803
0
    (void)accept(buf, ':'); /* swallow css3 :: syntax and pretend it's a normal pseudo-class */
804
0
    if (buf->lookahead != CSS_KEYWORD)
805
0
      fz_css_error(buf, "expected keyword after ':'");
806
0
    c = fz_new_css_condition(buf->ctx, buf->pool, ':', buf->string, NULL);
807
0
    next(buf);
808
809
    // TODO -- parse :is, :not, :where, :has logical combinations with selector list as argument
810
811
0
    if (buf->lookahead == '(')
812
0
    {
813
0
      c->val = css_lex_pseudo_expr(buf);
814
0
      next(buf);
815
0
    }
816
0
    return c;
817
0
  }
818
819
0
  if (accept(buf, '.'))
820
0
  {
821
0
    if (buf->lookahead != CSS_KEYWORD)
822
0
      fz_css_error(buf, "expected keyword after '.'");
823
0
    c = fz_new_css_condition(buf->ctx, buf->pool, '.', "class", buf->string);
824
0
    next(buf);
825
0
    return c;
826
0
  }
827
828
0
  if (accept(buf, '['))
829
0
  {
830
0
    white(buf);
831
832
0
    if (buf->lookahead != CSS_KEYWORD)
833
0
      fz_css_error(buf, "expected keyword after '['");
834
0
    c = fz_new_css_condition(buf->ctx, buf->pool, '[', buf->string, NULL);
835
0
    next(buf);
836
837
0
    white(buf);
838
839
0
    if (accept(buf, '='))
840
0
    {
841
0
      c->type = '=';
842
0
      c->val = parse_attrib_value(buf);
843
0
    }
844
0
    else if (accept(buf, '|'))
845
0
    {
846
0
      expect(buf, '=');
847
0
      c->type = '|';
848
0
      c->val = parse_attrib_value(buf);
849
0
    }
850
0
    else if (accept(buf, '~'))
851
0
    {
852
0
      expect(buf, '=');
853
0
      c->type = '~';
854
0
      c->val = parse_attrib_value(buf);
855
0
    }
856
857
0
    expect(buf, ']');
858
859
0
    return c;
860
0
  }
861
862
0
  if (buf->lookahead == CSS_HASH)
863
0
  {
864
0
    c = fz_new_css_condition(buf->ctx, buf->pool, '#', "id", buf->string);
865
0
    next(buf);
866
0
    return c;
867
0
  }
868
869
0
  fz_css_error(buf, "expected condition");
870
0
}
871
872
static fz_css_condition *parse_condition_list(struct lexbuf *buf)
873
0
{
874
0
  fz_css_condition *head, *tail;
875
876
0
  head = tail = parse_condition(buf);
877
0
  while (iscond(buf->lookahead))
878
0
  {
879
0
    tail = tail->next = parse_condition(buf);
880
0
  }
881
0
  return head;
882
0
}
883
884
static fz_css_selector *parse_simple_selector(struct lexbuf *buf)
885
0
{
886
0
  fz_css_selector *s;
887
888
0
  if (accept(buf, '*'))
889
0
  {
890
0
    s = fz_new_css_selector(buf->ctx, buf->pool, NULL);
891
0
    if (iscond(buf->lookahead))
892
0
      s->cond = parse_condition_list(buf);
893
0
    return s;
894
0
  }
895
0
  else if (buf->lookahead == CSS_KEYWORD)
896
0
  {
897
0
    s = fz_new_css_selector(buf->ctx, buf->pool, buf->string);
898
0
    next(buf);
899
0
    if (iscond(buf->lookahead))
900
0
      s->cond = parse_condition_list(buf);
901
0
    return s;
902
0
  }
903
0
  else if (iscond(buf->lookahead))
904
0
  {
905
0
    s = fz_new_css_selector(buf->ctx, buf->pool, NULL);
906
0
    s->cond = parse_condition_list(buf);
907
0
    return s;
908
0
  }
909
910
0
  fz_css_error(buf, "expected selector");
911
0
}
912
913
static fz_css_selector *parse_combinator(struct lexbuf *buf, int c, fz_css_selector *a)
914
0
{
915
0
  fz_css_selector *sel, *b;
916
0
  white(buf);
917
0
  b = parse_simple_selector(buf);
918
0
  sel = fz_new_css_selector(buf->ctx, buf->pool, NULL);
919
0
  sel->combine = c;
920
0
  sel->left = a;
921
0
  sel->right = b;
922
0
  return sel;
923
0
}
924
925
static fz_css_selector *parse_selector(struct lexbuf *buf)
926
0
{
927
0
  fz_css_selector *sel = parse_simple_selector(buf);
928
0
  for (;;)
929
0
  {
930
0
    if (accept(buf, ' '))
931
0
    {
932
0
      white(buf);
933
0
      if (accept(buf, '+'))
934
0
        sel = parse_combinator(buf, '+', sel);
935
0
      else if (accept(buf, '>'))
936
0
        sel = parse_combinator(buf, '>', sel);
937
0
      else if (buf->lookahead != ',' && buf->lookahead != '{' && buf->lookahead != EOF)
938
0
        sel = parse_combinator(buf, ' ', sel);
939
0
      else
940
0
        break;
941
0
    }
942
0
    else if (accept(buf, '+'))
943
0
      sel = parse_combinator(buf, '+', sel);
944
0
    else if (accept(buf, '>'))
945
0
      sel = parse_combinator(buf, '>', sel);
946
0
    else
947
0
      break;
948
0
  }
949
0
  return sel;
950
0
}
951
952
static fz_css_selector *parse_selector_list(struct lexbuf *buf)
953
0
{
954
0
  fz_css_selector *head, *tail;
955
956
0
  head = tail = parse_selector(buf);
957
0
  while (accept(buf, ','))
958
0
  {
959
0
    white(buf);
960
0
    tail = tail->next = parse_selector(buf);
961
0
  }
962
0
  return head;
963
0
}
964
965
static void parse_recover(struct lexbuf *buf)
966
0
{
967
0
  fz_rethrow_unless(buf->ctx, FZ_ERROR_SYNTAX);
968
0
  fz_report_error(buf->ctx);
969
0
  while (buf->lookahead != EOF)
970
0
  {
971
0
    if (accept(buf, '}') && buf->nest <= 0)
972
0
    {
973
0
      buf->nest = 0;
974
0
      white(buf);
975
0
      break;
976
0
    }
977
0
    next(buf);
978
0
  }
979
0
}
980
981
static fz_css_rule *parse_ruleset(struct lexbuf *buf)
982
0
{
983
0
  fz_css_selector *s = NULL;
984
0
  fz_css_property *p = NULL;
985
986
0
  fz_try(buf->ctx)
987
0
  {
988
0
    s = parse_selector_list(buf);
989
0
    expect(buf, '{');
990
0
    p = parse_declaration_list(buf);
991
0
    expect(buf, '}');
992
0
    white(buf);
993
0
  }
994
0
  fz_catch(buf->ctx)
995
0
  {
996
0
    parse_recover(buf);
997
0
    return NULL;
998
0
  }
999
1000
0
  return fz_new_css_rule(buf->ctx, buf->pool, s, p);
1001
0
}
1002
1003
static fz_css_rule *parse_at_page(struct lexbuf *buf)
1004
0
{
1005
0
  fz_css_selector *s = NULL;
1006
0
  fz_css_property *p = NULL;
1007
1008
0
  fz_try(buf->ctx)
1009
0
  {
1010
0
    white(buf);
1011
0
    if (accept(buf, ':'))
1012
0
    {
1013
0
      expect(buf, CSS_KEYWORD);
1014
0
      white(buf);
1015
0
    }
1016
0
    expect(buf, '{');
1017
0
    p = parse_declaration_list(buf);
1018
0
    expect(buf, '}');
1019
0
    white(buf);
1020
0
  }
1021
0
  fz_catch(buf->ctx)
1022
0
  {
1023
0
    parse_recover(buf);
1024
0
    return NULL;
1025
0
  }
1026
1027
0
  s = fz_new_css_selector(buf->ctx, buf->pool, "@page");
1028
0
  return fz_new_css_rule(buf->ctx, buf->pool, s, p);
1029
0
}
1030
1031
static fz_css_rule *parse_at_font_face(struct lexbuf *buf)
1032
0
{
1033
0
  fz_css_selector *s = NULL;
1034
0
  fz_css_property *p = NULL;
1035
1036
0
  fz_try(buf->ctx)
1037
0
  {
1038
0
    white(buf);
1039
0
    expect(buf, '{');
1040
0
    p = parse_declaration_list(buf);
1041
0
    expect(buf, '}');
1042
0
    white(buf);
1043
0
  }
1044
0
  fz_catch(buf->ctx)
1045
0
  {
1046
0
    parse_recover(buf);
1047
0
    return NULL;
1048
0
  }
1049
1050
0
  s = fz_new_css_selector(buf->ctx, buf->pool, "@font-face");
1051
0
  return fz_new_css_rule(buf->ctx, buf->pool, s, p);
1052
0
}
1053
1054
static void parse_at_rule(struct lexbuf *buf)
1055
0
{
1056
0
  expect(buf, CSS_KEYWORD);
1057
1058
  /* skip until '{' or ';' */
1059
0
  while (buf->lookahead != EOF)
1060
0
  {
1061
0
    if (accept(buf, ';'))
1062
0
    {
1063
0
      white(buf);
1064
0
      return;
1065
0
    }
1066
0
    if (accept(buf, '{'))
1067
0
    {
1068
0
      int depth = 1;
1069
0
      while (buf->lookahead != EOF && depth > 0)
1070
0
      {
1071
0
        if (accept(buf, '{'))
1072
0
          ++depth;
1073
0
        else if (accept(buf, '}'))
1074
0
          --depth;
1075
0
        else
1076
0
          next(buf);
1077
0
      }
1078
0
      white(buf);
1079
0
      return;
1080
0
    }
1081
0
    next(buf);
1082
0
  }
1083
0
}
1084
1085
static fz_css_rule *parse_stylesheet(struct lexbuf *buf, fz_css_rule *chain)
1086
0
{
1087
0
  fz_css_rule *rule, **nextp, *tail, *x;
1088
1089
0
  tail = chain;
1090
0
  if (tail)
1091
0
  {
1092
0
    while (tail->next)
1093
0
      tail = tail->next;
1094
0
    nextp = &tail->next;
1095
0
  }
1096
0
  else
1097
0
  {
1098
0
    nextp = &tail;
1099
0
  }
1100
1101
0
  white(buf);
1102
1103
0
  while (buf->lookahead != EOF)
1104
0
  {
1105
0
    if (accept(buf, '@'))
1106
0
    {
1107
0
      if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "page"))
1108
0
      {
1109
0
        next(buf);
1110
0
        x = parse_at_page(buf);
1111
0
        if (x)
1112
0
        {
1113
0
          rule = *nextp = x;
1114
0
          nextp = &rule->next;
1115
0
        }
1116
0
      }
1117
0
      else if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "font-face"))
1118
0
      {
1119
0
        next(buf);
1120
0
        x = parse_at_font_face(buf);
1121
0
        if (x)
1122
0
        {
1123
0
          rule = *nextp = x;
1124
0
          nextp = &rule->next;
1125
0
        }
1126
0
      }
1127
0
      else
1128
0
      {
1129
0
        parse_at_rule(buf);
1130
0
      }
1131
0
    }
1132
0
    else
1133
0
    {
1134
0
      x = parse_ruleset(buf);
1135
0
      if (x)
1136
0
      {
1137
0
        rule = *nextp = x;
1138
0
        nextp = &rule->next;
1139
0
      }
1140
0
    }
1141
0
    white(buf);
1142
0
  }
1143
1144
0
  return chain ? chain : tail;
1145
0
}
1146
1147
const char *fz_css_property_name(int key)
1148
0
{
1149
0
  const char *name = "unknown";
1150
0
  size_t i;
1151
0
  for (i = 0; i < nelem(css_property_list); ++i)
1152
0
    if (*css_property_list[i].name && css_property_list[i].key == key)
1153
0
      name = css_property_list[i].name;
1154
0
  return name;
1155
0
}
1156
1157
fz_css_property *fz_parse_css_properties(fz_context *ctx, fz_pool *pool, const char *source)
1158
0
{
1159
0
  struct lexbuf buf;
1160
0
  css_lex_init(ctx, &buf, pool, source, "<inline>");
1161
0
  next(&buf);
1162
0
  return parse_declaration_list(&buf);
1163
0
}
1164
1165
void fz_parse_css(fz_context *ctx, fz_css *css, const char *source, const char *file)
1166
0
{
1167
0
  struct lexbuf buf;
1168
0
  css_lex_init(ctx, &buf, css->pool, source, file);
1169
0
  next(&buf);
1170
0
  css->rule = parse_stylesheet(&buf, css->rule);
1171
0
}