Coverage Report

Created: 2023-06-07 06:20

/src/mupdf/source/pdf/pdf-lex.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
28
#define IS_NUMBER \
29
36.6M
  '+':case'-':case'.':case'0':case'1':case'2':case'3':\
30
46.8M
  case'4':case'5':case'6':case'7':case'8':case'9'
31
#define IS_WHITE \
32
156M
  '\x00':case'\x09':case'\x0a':case'\x0c':case'\x0d':case'\x20'
33
#define IS_HEX \
34
39.1M
  '0':case'1':case'2':case'3':case'4':case'5':case'6':\
35
41.9M
  case'7':case'8':case'9':case'A':case'B':case'C':\
36
44.0M
  case'D':case'E':case'F':case'a':case'b':case'c':\
37
45.3M
  case'd':case'e':case'f'
38
#define IS_DELIM \
39
78.5M
  '(':case')':case'<':case'>':case'[':case']':case'{':\
40
85.6M
  case'}':case'/':case'%'
41
42
#define RANGE_0_9 \
43
75.8M
  '0':case'1':case'2':case'3':case'4':case'5':\
44
116M
  case'6':case'7':case'8':case'9'
45
#define RANGE_a_f \
46
18.0k
  'a':case'b':case'c':case'd':case'e':case'f'
47
#define RANGE_A_F \
48
8.89k
  'A':case'B':case'C':case'D':case'E':case'F'
49
#define RANGE_0_7 \
50
139k
  '0':case'1':case'2':case'3':case'4':case'5':case'6':case'7'
51
52
/* #define DUMP_LEXER_STREAM */
53
#ifdef DUMP_LEXER_STREAM
54
static inline int lex_byte(fz_context *ctx, fz_stream *stm)
55
{
56
  int c = fz_read_byte(ctx, stm);
57
58
  if (c == EOF)
59
    fz_write_printf(ctx, fz_stdout(ctx), "<EOF>");
60
  else if (c >= 32 && c < 128)
61
    fz_write_printf(ctx, fz_stdout(ctx), "%c", c);
62
  else
63
    fz_write_printf(ctx, fz_stdout(ctx), "<%02x>", c);
64
  return c;
65
}
66
#else
67
1.31G
#define lex_byte(C,S) fz_read_byte(C,S)
68
#endif
69
70
static inline int iswhite(int ch)
71
16.5M
{
72
16.5M
  return
73
16.5M
    ch == '\000' ||
74
16.5M
    ch == '\011' ||
75
16.5M
    ch == '\012' ||
76
16.5M
    ch == '\014' ||
77
16.5M
    ch == '\015' ||
78
16.5M
    ch == '\040';
79
16.5M
}
80
81
static inline int fz_isprint(int ch)
82
25.1M
{
83
25.1M
  return ch >= ' ' && ch <= '~';
84
25.1M
}
85
86
static inline int unhex(int ch)
87
45.3M
{
88
45.3M
  if (ch >= '0' && ch <= '9') return ch - '0';
89
35.6M
  if (ch >= 'A' && ch <= 'F') return ch - 'A' + 0xA;
90
33.4M
  if (ch >= 'a' && ch <= 'f') return ch - 'a' + 0xA;
91
31.1M
  return 0;
92
33.4M
}
93
94
static void
95
lex_white(fz_context *ctx, fz_stream *f)
96
77.8M
{
97
77.8M
  int c;
98
93.9M
  do {
99
93.9M
    c = lex_byte(ctx, f);
100
93.9M
  } while ((c <= 32) && (iswhite(c)));
101
77.8M
  if (c != EOF)
102
77.8M
    fz_unread_byte(ctx, f);
103
77.8M
}
104
105
static void
106
lex_comment(fz_context *ctx, fz_stream *f)
107
306k
{
108
306k
  int c;
109
152M
  do {
110
152M
    c = lex_byte(ctx, f);
111
152M
  } while ((c != '\012') && (c != '\015') && (c != EOF));
112
306k
}
113
114
/* Fast(ish) but inaccurate strtof, with Adobe overflow handling. */
115
static float acrobat_compatible_atof(char *s)
116
11.8k
{
117
11.8k
  int neg = 0;
118
11.8k
  int i = 0;
119
120
12.4k
  while (*s == '-')
121
619
  {
122
619
    neg = 1;
123
619
    ++s;
124
619
  }
125
12.4k
  while (*s == '+')
126
637
  {
127
637
    ++s;
128
637
  }
129
130
199k
  while (*s >= '0' && *s <= '9')
131
187k
  {
132
    /* We deliberately ignore overflow here.
133
     * Tests show that Acrobat handles * overflows in exactly the same way we do:
134
     * 123450000000000000000678 is read as 678.
135
     */
136
187k
    i = i * 10 + (*s - '0');
137
187k
    ++s;
138
187k
  }
139
140
11.8k
  if (*s == '.')
141
11.0k
  {
142
11.0k
    float v = i;
143
11.0k
    float n = 0;
144
11.0k
    float d = 1;
145
11.0k
    ++s;
146
40.2k
    while (*s >= '0' && *s <= '9')
147
29.1k
    {
148
29.1k
      n = 10 * n + (*s - '0');
149
29.1k
      d = 10 * d;
150
29.1k
      ++s;
151
29.1k
    }
152
11.0k
    v += n / d;
153
11.0k
    return neg ? -v : v;
154
11.0k
  }
155
751
  else
156
751
  {
157
751
    return neg ? -i : i;
158
751
  }
159
11.8k
}
160
161
/* Fast but inaccurate atoi. */
162
static int fast_atoi(char *s)
163
32.5M
{
164
32.5M
  int neg = 0;
165
32.5M
  int i = 0;
166
167
34.0M
  while (*s == '-')
168
1.47M
  {
169
1.47M
    neg = 1;
170
1.47M
    ++s;
171
1.47M
  }
172
32.5M
  while (*s == '+')
173
2.80k
  {
174
2.80k
    ++s;
175
2.80k
  }
176
177
111M
  while (*s >= '0' && *s <= '9')
178
79.3M
  {
179
    /* We deliberately ignore overflow here. */
180
79.3M
    i = i * 10 + (*s - '0');
181
79.3M
    ++s;
182
79.3M
  }
183
184
32.5M
  return neg ? -i : i;
185
32.5M
}
186
187
static int
188
lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
189
46.8M
{
190
46.8M
  char *s = buf->scratch;
191
46.8M
  char *e = buf->scratch + buf->size - 1; /* leave space for zero terminator */
192
46.8M
  char *isreal = (c == '.' ? s : NULL);
193
46.8M
  int neg = (c == '-');
194
46.8M
  int isbad = 0;
195
196
46.8M
  *s++ = c;
197
198
46.8M
  c = lex_byte(ctx, f);
199
200
  /* skip extra '-' signs at start of number */
201
46.8M
  if (neg)
202
2.96M
  {
203
2.98M
    while (c == '-')
204
17.0k
      c = lex_byte(ctx, f);
205
2.96M
  }
206
207
180M
  while (s < e)
208
180M
  {
209
180M
    switch (c)
210
180M
    {
211
48.5M
    case IS_WHITE:
212
48.5M
    case IS_DELIM:
213
46.8M
      fz_unread_byte(ctx, f);
214
46.8M
      goto end;
215
6.98k
    case EOF:
216
6.98k
      goto end;
217
13.7M
    case '.':
218
13.7M
      if (isreal)
219
256k
        isbad = 1;
220
13.7M
      isreal = s;
221
13.7M
      *s++ = c;
222
13.7M
      break;
223
97.9k
    case '-':
224
      /* Bug 703248: Some PDFs (particularly those
225
       * generated by google docs) apparently have
226
       * numbers like 0.000000000000-5684342 in them.
227
       * We'll stop our interpretation at the -, but
228
       * keep reading to skip over the trailing
229
       * digits so they aren't parsed later. */
230
97.9k
      *s++ = '\0';
231
97.9k
      break;
232
116M
    case RANGE_0_9:
233
116M
      *s++ = c;
234
116M
      break;
235
2.60M
    default:
236
2.60M
      isbad = 1;
237
2.60M
      *s++ = c;
238
2.60M
      break;
239
180M
    }
240
133M
    c = lex_byte(ctx, f);
241
133M
  }
242
243
46.8M
end:
244
46.8M
  *s = '\0';
245
46.8M
  if (isbad)
246
772k
    return PDF_TOK_KEYWORD;
247
46.0M
  if (isreal)
248
13.4M
  {
249
    /* We'd like to use the fastest possible atof
250
     * routine, but we'd rather match acrobats
251
     * handling of broken numbers. As such, we
252
     * spot common broken cases and call an
253
     * acrobat compatible routine where required. */
254
13.4M
    if (neg > 1 || isreal - buf->scratch >= 10)
255
11.8k
      buf->f = acrobat_compatible_atof(buf->scratch);
256
13.4M
    else
257
13.4M
      buf->f = fz_atof(buf->scratch);
258
13.4M
    return PDF_TOK_REAL;
259
13.4M
  }
260
32.5M
  else
261
32.5M
  {
262
32.5M
    buf->i = fast_atoi(buf->scratch);
263
32.5M
    return PDF_TOK_INT;
264
32.5M
  }
265
46.0M
}
266
267
static void
268
lex_name(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
269
38.8M
{
270
38.8M
  char *s = lb->scratch;
271
38.8M
  char *e = s + fz_minz(127, lb->size);
272
38.8M
  int c;
273
274
294M
  while (1)
275
294M
  {
276
294M
    if (s == e)
277
11.5k
    {
278
11.5k
      if (e - lb->scratch < 127)
279
0
      {
280
0
        s += pdf_lexbuf_grow(ctx, lb);
281
0
        e = lb->scratch + fz_minz(127, lb->size);
282
0
      }
283
11.5k
      else
284
11.5k
      {
285
        /* truncate names that are too long */
286
11.5k
        fz_warn(ctx, "name is too long");
287
11.5k
        *s = 0;
288
11.5k
        lb->len = s - lb->scratch;
289
11.5k
        s = NULL;
290
11.5k
      }
291
11.5k
    }
292
294M
    c = lex_byte(ctx, f);
293
294M
    switch (c)
294
294M
    {
295
78.9M
    case IS_WHITE:
296
78.9M
    case IS_DELIM:
297
38.8M
      fz_unread_byte(ctx, f);
298
38.8M
      goto end;
299
12.1k
    case EOF:
300
12.1k
      goto end;
301
112k
    case '#':
302
112k
    {
303
112k
      int hex[2];
304
112k
      int i;
305
183k
      for (i = 0; i < 2; i++)
306
153k
      {
307
153k
        c = fz_peek_byte(ctx, f);
308
153k
        switch (c)
309
153k
        {
310
325k
        case RANGE_0_9:
311
325k
          if (i == 1 && c == '0' && hex[0] == 0)
312
40
            goto illegal;
313
44.3k
          hex[i] = lex_byte(ctx, f) - '0';
314
44.3k
          break;
315
18.0k
        case RANGE_a_f:
316
18.0k
          hex[i] = lex_byte(ctx, f) - 'a' + 10;
317
18.0k
          break;
318
8.89k
        case RANGE_A_F:
319
8.89k
          hex[i] = lex_byte(ctx, f) - 'A' + 10;
320
8.89k
          break;
321
82.4k
        default:
322
82.5k
        case EOF:
323
82.5k
          goto illegal;
324
153k
        }
325
153k
      }
326
29.8k
      if (s) *s++ = (hex[0] << 4) + hex[1];
327
29.8k
      break;
328
82.5k
illegal:
329
82.5k
      if (i == 1)
330
11.6k
        fz_unread_byte(ctx, f);
331
82.5k
      if (s) *s++ = '#';
332
82.5k
      continue;
333
112k
    }
334
255M
    default:
335
255M
      if (s) *s++ = c;
336
255M
      break;
337
294M
    }
338
294M
  }
339
38.8M
end:
340
38.8M
  if (s)
341
38.8M
  {
342
38.8M
    *s = '\0';
343
38.8M
    lb->len = s - lb->scratch;
344
38.8M
  }
345
38.8M
}
346
347
static int
348
lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
349
1.75M
{
350
1.75M
  char *s = lb->scratch;
351
1.75M
  char *e = s + lb->size;
352
1.75M
  int bal = 1;
353
1.75M
  int oct;
354
1.75M
  int c;
355
356
359M
  while (1)
357
359M
  {
358
359M
    if (s == e)
359
5.74k
    {
360
5.74k
      s += pdf_lexbuf_grow(ctx, lb);
361
5.74k
      e = lb->scratch + lb->size;
362
5.74k
    }
363
359M
    c = lex_byte(ctx, f);
364
359M
    switch (c)
365
359M
    {
366
5.25k
    case EOF:
367
5.25k
      return PDF_TOK_ERROR;
368
488k
    case '(':
369
488k
      bal++;
370
488k
      *s++ = c;
371
488k
      break;
372
2.18M
    case ')':
373
2.18M
      bal --;
374
2.18M
      if (bal == 0)
375
1.74M
        goto end;
376
436k
      *s++ = c;
377
436k
      break;
378
597k
    case '\\':
379
597k
      c = lex_byte(ctx, f);
380
597k
      switch (c)
381
597k
      {
382
8
      case EOF:
383
8
        return PDF_TOK_ERROR;
384
5.54k
      case 'n':
385
5.54k
        *s++ = '\n';
386
5.54k
        break;
387
7.51k
      case 'r':
388
7.51k
        *s++ = '\r';
389
7.51k
        break;
390
4.70k
      case 't':
391
4.70k
        *s++ = '\t';
392
4.70k
        break;
393
2.12k
      case 'b':
394
2.12k
        *s++ = '\b';
395
2.12k
        break;
396
2.22k
      case 'f':
397
2.22k
        *s++ = '\f';
398
2.22k
        break;
399
21.8k
      case '(':
400
21.8k
        *s++ = '(';
401
21.8k
        break;
402
21.6k
      case ')':
403
21.6k
        *s++ = ')';
404
21.6k
        break;
405
72.3k
      case '\\':
406
72.3k
        *s++ = '\\';
407
72.3k
        break;
408
139k
      case RANGE_0_7:
409
139k
        oct = c - '0';
410
139k
        c = lex_byte(ctx, f);
411
139k
        if (c >= '0' && c <= '7')
412
134k
        {
413
134k
          oct = oct * 8 + (c - '0');
414
134k
          c = lex_byte(ctx, f);
415
134k
          if (c >= '0' && c <= '7')
416
133k
            oct = oct * 8 + (c - '0');
417
1.12k
          else if (c != EOF)
418
1.12k
            fz_unread_byte(ctx, f);
419
134k
        }
420
5.76k
        else if (c != EOF)
421
5.71k
          fz_unread_byte(ctx, f);
422
139k
        *s++ = oct;
423
139k
        break;
424
384
      case '\n':
425
384
        break;
426
2.53k
      case '\r':
427
2.53k
        c = lex_byte(ctx, f);
428
2.53k
        if ((c != '\n') && (c != EOF))
429
2.45k
          fz_unread_byte(ctx, f);
430
2.53k
        break;
431
317k
      default:
432
317k
        *s++ = c;
433
597k
      }
434
597k
      break;
435
356M
    default:
436
356M
      *s++ = c;
437
356M
      break;
438
359M
    }
439
359M
  }
440
1.74M
end:
441
1.74M
  lb->len = s - lb->scratch;
442
1.74M
  return PDF_TOK_STRING;
443
1.75M
}
444
445
static int
446
lex_hex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
447
1.28M
{
448
1.28M
  char *s = lb->scratch;
449
1.28M
  char *e = s + lb->size;
450
1.28M
  int a = 0, x = 0;
451
1.28M
  int c;
452
453
51.4M
  while (1)
454
51.4M
  {
455
51.4M
    if (s == e)
456
1.69k
    {
457
1.69k
      s += pdf_lexbuf_grow(ctx, lb);
458
1.69k
      e = lb->scratch + lb->size;
459
1.69k
    }
460
51.4M
    c = lex_byte(ctx, f);
461
51.4M
    switch (c)
462
51.4M
    {
463
4.77M
    case IS_WHITE:
464
4.77M
      break;
465
31.1M
    default:
466
31.1M
      fz_warn(ctx, "invalid character in hex string");
467
      /* fall through */
468
45.3M
    case IS_HEX:
469
45.3M
      if (x)
470
22.6M
      {
471
22.6M
        *s++ = a * 16 + unhex(c);
472
22.6M
        x = !x;
473
22.6M
      }
474
22.7M
      else
475
22.7M
      {
476
22.7M
        a = unhex(c);
477
22.7M
        x = !x;
478
22.7M
      }
479
45.3M
      break;
480
1.28M
    case '>':
481
1.28M
      if (x)
482
91.6k
      {
483
91.6k
        *s++ = a * 16; /* pad truncated string with '0' */
484
91.6k
      }
485
1.28M
      goto end;
486
4.06k
    case EOF:
487
4.06k
      return PDF_TOK_ERROR;
488
51.4M
    }
489
51.4M
  }
490
1.28M
end:
491
1.28M
  lb->len = s - lb->scratch;
492
1.28M
  return PDF_TOK_STRING;
493
1.28M
}
494
495
static pdf_token
496
pdf_token_from_keyword(char *key)
497
25.4M
{
498
25.4M
  switch (*key)
499
25.4M
  {
500
6.15M
  case 'R':
501
6.15M
    if (!strcmp(key, "R")) return PDF_TOK_R;
502
45.1k
    break;
503
208k
  case 't':
504
208k
    if (!strcmp(key, "true")) return PDF_TOK_TRUE;
505
65.4k
    if (!strcmp(key, "trailer")) return PDF_TOK_TRAILER;
506
60.5k
    break;
507
338k
  case 'f':
508
338k
    if (!strcmp(key, "false")) return PDF_TOK_FALSE;
509
215k
    break;
510
506k
  case 'n':
511
506k
    if (!strcmp(key, "null")) return PDF_TOK_NULL;
512
387k
    if (!strcmp(key, "newobj")) return PDF_TOK_NEWOBJ;
513
387k
    break;
514
635k
  case 'o':
515
635k
    if (!strcmp(key, "obj")) return PDF_TOK_OBJ;
516
13.7k
    break;
517
518k
  case 'e':
518
518k
    if (!strcmp(key, "endobj")) return PDF_TOK_ENDOBJ;
519
136k
    if (!strcmp(key, "endstream")) return PDF_TOK_ENDSTREAM;
520
76.1k
    break;
521
375k
  case 's':
522
375k
    if (!strcmp(key, "stream")) return PDF_TOK_STREAM;
523
152k
    if (!strcmp(key, "startxref")) return PDF_TOK_STARTXREF;
524
147k
    break;
525
147k
  case 'x':
526
14.2k
    if (!strcmp(key, "xref")) return PDF_TOK_XREF;
527
10.6k
    break;
528
25.4M
  }
529
530
35.8M
  while (*key)
531
25.1M
  {
532
25.1M
    if (!fz_isprint(*key))
533
6.90M
      return PDF_TOK_ERROR;
534
18.2M
    ++key;
535
18.2M
  }
536
537
10.7M
  return PDF_TOK_KEYWORD;
538
17.6M
}
539
540
void pdf_lexbuf_init(fz_context *ctx, pdf_lexbuf *lb, int size)
541
78.0k
{
542
78.0k
  lb->size = lb->base_size = size;
543
78.0k
  lb->len = 0;
544
78.0k
  lb->scratch = &lb->buffer[0];
545
78.0k
}
546
547
void pdf_lexbuf_fin(fz_context *ctx, pdf_lexbuf *lb)
548
78.0k
{
549
78.0k
  if (lb && lb->size != lb->base_size)
550
2.33k
    fz_free(ctx, lb->scratch);
551
78.0k
}
552
553
ptrdiff_t pdf_lexbuf_grow(fz_context *ctx, pdf_lexbuf *lb)
554
7.44k
{
555
7.44k
  char *old = lb->scratch;
556
7.44k
  size_t newsize = lb->size * 2;
557
7.44k
  if (lb->size == lb->base_size)
558
2.33k
  {
559
2.33k
    lb->scratch = Memento_label(fz_malloc(ctx, newsize), "pdf_lexbuf");
560
2.33k
    memcpy(lb->scratch, lb->buffer, lb->size);
561
2.33k
  }
562
5.10k
  else
563
5.10k
  {
564
5.10k
    lb->scratch = fz_realloc(ctx, lb->scratch, newsize);
565
5.10k
  }
566
7.44k
  lb->size = newsize;
567
7.44k
  return lb->scratch - old;
568
7.44k
}
569
570
pdf_token
571
pdf_lex(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
572
91.8M
{
573
167M
  while (1)
574
167M
  {
575
167M
    int c = lex_byte(ctx, f);
576
167M
    switch (c)
577
167M
    {
578
95.7k
    case EOF:
579
95.7k
      return PDF_TOK_EOF;
580
75.3M
    case IS_WHITE:
581
75.3M
      lex_white(ctx, f);
582
75.3M
      break;
583
254k
    case '%':
584
254k
      lex_comment(ctx, f);
585
254k
      break;
586
13.2M
    case '/':
587
13.2M
      lex_name(ctx, f, buf);
588
13.2M
      return PDF_TOK_NAME;
589
1.75M
    case '(':
590
1.75M
      return lex_string(ctx, f, buf);
591
47.1k
    case ')':
592
47.1k
      return PDF_TOK_ERROR;
593
2.80M
    case '<':
594
2.80M
      c = lex_byte(ctx, f);
595
2.80M
      if (c == '<')
596
1.51M
        return PDF_TOK_OPEN_DICT;
597
1.28M
      if (c != EOF)
598
1.28M
        fz_unread_byte(ctx, f);
599
1.28M
      return lex_hex_string(ctx, f, buf);
600
1.52M
    case '>':
601
1.52M
      c = lex_byte(ctx, f);
602
1.52M
      if (c == '>')
603
1.38M
        return PDF_TOK_CLOSE_DICT;
604
141k
      if (c != EOF)
605
140k
        fz_unread_byte(ctx, f);
606
141k
      return PDF_TOK_ERROR;
607
1.42M
    case '[':
608
1.42M
      return PDF_TOK_OPEN_ARRAY;
609
1.35M
    case ']':
610
1.35M
      return PDF_TOK_CLOSE_ARRAY;
611
39.6k
    case '{':
612
39.6k
      return PDF_TOK_OPEN_BRACE;
613
56.1k
    case '}':
614
56.1k
      return PDF_TOK_CLOSE_BRACE;
615
45.4M
    case IS_NUMBER:
616
45.4M
      return lex_number(ctx, f, buf, c);
617
24.0M
    default: /* isregular: !isdelim && !iswhite && c != EOF */
618
24.0M
      fz_unread_byte(ctx, f);
619
24.0M
      lex_name(ctx, f, buf);
620
24.0M
      return pdf_token_from_keyword(buf->scratch);
621
167M
    }
622
167M
  }
623
91.8M
}
624
625
pdf_token
626
pdf_lex_no_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
627
3.35M
{
628
5.91M
  while (1)
629
5.91M
  {
630
5.91M
    int c = lex_byte(ctx, f);
631
5.91M
    switch (c)
632
5.91M
    {
633
3.58k
    case EOF:
634
3.58k
      return PDF_TOK_EOF;
635
2.49M
    case IS_WHITE:
636
2.49M
      lex_white(ctx, f);
637
2.49M
      break;
638
52.0k
    case '%':
639
52.0k
      lex_comment(ctx, f);
640
52.0k
      break;
641
95.6k
    case '/':
642
95.6k
      lex_name(ctx, f, buf);
643
95.6k
      return PDF_TOK_NAME;
644
40.8k
    case '(':
645
40.8k
      return PDF_TOK_ERROR; /* no strings allowed */
646
41.2k
    case ')':
647
41.2k
      return PDF_TOK_ERROR; /* no strings allowed */
648
59.3k
    case '<':
649
59.3k
      c = lex_byte(ctx, f);
650
59.3k
      if (c == '<')
651
8.19k
        return PDF_TOK_OPEN_DICT;
652
51.1k
      if (c != EOF)
653
51.1k
        fz_unread_byte(ctx, f);
654
51.1k
      return PDF_TOK_ERROR; /* no strings allowed */
655
93.7k
    case '>':
656
93.7k
      c = lex_byte(ctx, f);
657
93.7k
      if (c == '>')
658
10.5k
        return PDF_TOK_CLOSE_DICT;
659
83.2k
      if (c != EOF)
660
83.2k
        fz_unread_byte(ctx, f);
661
83.2k
      return PDF_TOK_ERROR;
662
74.1k
    case '[':
663
74.1k
      return PDF_TOK_OPEN_ARRAY;
664
68.8k
    case ']':
665
68.8k
      return PDF_TOK_CLOSE_ARRAY;
666
40.0k
    case '{':
667
40.0k
      return PDF_TOK_OPEN_BRACE;
668
36.6k
    case '}':
669
36.6k
      return PDF_TOK_CLOSE_BRACE;
670
1.38M
    case IS_NUMBER:
671
1.38M
      return lex_number(ctx, f, buf, c);
672
1.41M
    default: /* isregular: !isdelim && !iswhite && c != EOF */
673
1.41M
      fz_unread_byte(ctx, f);
674
1.41M
      lex_name(ctx, f, buf);
675
1.41M
      return pdf_token_from_keyword(buf->scratch);
676
5.91M
    }
677
5.91M
  }
678
3.35M
}
679
680
void pdf_append_token(fz_context *ctx, fz_buffer *fzbuf, int tok, pdf_lexbuf *buf)
681
0
{
682
0
  switch (tok)
683
0
  {
684
0
  case PDF_TOK_NAME:
685
0
    fz_append_printf(ctx, fzbuf, "/%s", buf->scratch);
686
0
    break;
687
0
  case PDF_TOK_STRING:
688
0
    if (buf->len >= buf->size)
689
0
      pdf_lexbuf_grow(ctx, buf);
690
0
    buf->scratch[buf->len] = 0;
691
0
    fz_append_pdf_string(ctx, fzbuf, buf->scratch);
692
0
    break;
693
0
  case PDF_TOK_OPEN_DICT:
694
0
    fz_append_string(ctx, fzbuf, "<<");
695
0
    break;
696
0
  case PDF_TOK_CLOSE_DICT:
697
0
    fz_append_string(ctx, fzbuf, ">>");
698
0
    break;
699
0
  case PDF_TOK_OPEN_ARRAY:
700
0
    fz_append_byte(ctx, fzbuf, '[');
701
0
    break;
702
0
  case PDF_TOK_CLOSE_ARRAY:
703
0
    fz_append_byte(ctx, fzbuf, ']');
704
0
    break;
705
0
  case PDF_TOK_OPEN_BRACE:
706
0
    fz_append_byte(ctx, fzbuf, '{');
707
0
    break;
708
0
  case PDF_TOK_CLOSE_BRACE:
709
0
    fz_append_byte(ctx, fzbuf, '}');
710
0
    break;
711
0
  case PDF_TOK_INT:
712
0
    fz_append_printf(ctx, fzbuf, "%ld", buf->i);
713
0
    break;
714
0
  case PDF_TOK_REAL:
715
0
    fz_append_printf(ctx, fzbuf, "%g", buf->f);
716
0
    break;
717
0
  default:
718
0
    fz_append_data(ctx, fzbuf, buf->scratch, buf->len);
719
0
    break;
720
0
  }
721
0
}