Coverage Report

Created: 2025-01-11 06:55

/src/mupdf/source/pdf/pdf-lex.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2024 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
28
#define IS_NUMBER \
29
21.6M
  '+':case'-':case'.':case'0':case'1':case'2':case'3':\
30
30.7M
  case'4':case'5':case'6':case'7':case'8':case'9'
31
#define IS_WHITE \
32
92.5M
  '\x00':case'\x09':case'\x0a':case'\x0c':case'\x0d':case'\x20'
33
#define IS_HEX \
34
8.75M
  '0':case'1':case'2':case'3':case'4':case'5':case'6':\
35
10.2M
  case'7':case'8':case'9':case'A':case'B':case'C':\
36
11.1M
  case'D':case'E':case'F':case'a':case'b':case'c':\
37
11.7M
  case'd':case'e':case'f'
38
#define IS_DELIM \
39
47.5M
  '(':case')':case'<':case'>':case'[':case']':case'{':\
40
50.4M
  case'}':case'/':case'%'
41
42
#define RANGE_0_9 \
43
55.1M
  '0':case'1':case'2':case'3':case'4':case'5':\
44
85.2M
  case'6':case'7':case'8':case'9'
45
#define RANGE_a_f \
46
5.69k
  'a':case'b':case'c':case'd':case'e':case'f'
47
#define RANGE_A_F \
48
6.74k
  'A':case'B':case'C':case'D':case'E':case'F'
49
#define RANGE_0_7 \
50
210k
  '0':case'1':case'2':case'3':case'4':case'5':case'6':case'7'
51
52
/* #define DUMP_LEXER_STREAM */
53
#ifdef DUMP_LEXER_STREAM
54
static inline int lex_byte(fz_context *ctx, fz_stream *stm)
55
{
56
  int c = fz_read_byte(ctx, stm);
57
58
  if (c == EOF)
59
    fz_write_printf(ctx, fz_stdout(ctx), "<EOF>");
60
  else if (c >= 32 && c < 128)
61
    fz_write_printf(ctx, fz_stdout(ctx), "%c", c);
62
  else
63
    fz_write_printf(ctx, fz_stdout(ctx), "<%02x>", c);
64
  return c;
65
}
66
#else
67
453M
#define lex_byte(C,S) fz_read_byte(C,S)
68
#endif
69
70
static inline int iswhite(int ch)
71
7.75M
{
72
7.75M
  return
73
7.75M
    ch == '\000' ||
74
7.75M
    ch == '\011' ||
75
7.75M
    ch == '\012' ||
76
7.75M
    ch == '\014' ||
77
7.75M
    ch == '\015' ||
78
7.75M
    ch == '\040';
79
7.75M
}
80
81
static inline int fz_isprint(int ch)
82
13.8M
{
83
13.8M
  return ch >= ' ' && ch <= '~';
84
13.8M
}
85
86
static inline int unhex(int ch)
87
11.7M
{
88
11.7M
  if (ch >= '0' && ch <= '9') return ch - '0';
89
4.58M
  if (ch >= 'A' && ch <= 'F') return ch - 'A' + 0xA;
90
3.85M
  if (ch >= 'a' && ch <= 'f') return ch - 'a' + 0xA;
91
2.66M
  return 0;
92
3.85M
}
93
94
static void
95
lex_white(fz_context *ctx, fz_stream *f)
96
47.5M
{
97
47.5M
  int c;
98
55.0M
  do {
99
55.0M
    c = lex_byte(ctx, f);
100
55.0M
  } while ((c <= 32) && (iswhite(c)));
101
47.5M
  if (c != EOF)
102
47.4M
    fz_unread_byte(ctx, f);
103
47.5M
}
104
105
static void
106
lex_comment(fz_context *ctx, fz_stream *f)
107
249k
{
108
249k
  int c;
109
5.77M
  do {
110
5.77M
    c = lex_byte(ctx, f);
111
5.77M
  } while ((c != '\012') && (c != '\015') && (c != EOF));
112
249k
}
113
114
/* Fast(ish) but inaccurate strtof, with Adobe overflow handling. */
115
static float acrobat_compatible_atof(char *s)
116
12.1k
{
117
12.1k
  int neg = 0;
118
12.1k
  int i = 0;
119
120
13.6k
  while (*s == '-')
121
1.48k
  {
122
1.48k
    neg = 1;
123
1.48k
    ++s;
124
1.48k
  }
125
12.1k
  while (*s == '+')
126
0
  {
127
0
    ++s;
128
0
  }
129
130
167k
  while (*s >= '0' && *s <= '9')
131
155k
  {
132
    /* We deliberately ignore overflow here.
133
     * Tests show that Acrobat handles * overflows in exactly the same way we do:
134
     * 123450000000000000000678 is read as 678.
135
     */
136
155k
    i = i * 10 + (*s - '0');
137
155k
    ++s;
138
155k
  }
139
140
12.1k
  if (*s == '.')
141
11.7k
  {
142
11.7k
    float v = i;
143
11.7k
    float n = 0;
144
11.7k
    float d = 1;
145
11.7k
    ++s;
146
56.6k
    while (*s >= '0' && *s <= '9')
147
44.8k
    {
148
44.8k
      n = 10 * n + (*s - '0');
149
44.8k
      d = 10 * d;
150
44.8k
      ++s;
151
44.8k
    }
152
11.7k
    v += n / d;
153
11.7k
    return neg ? -v : v;
154
11.7k
  }
155
351
  else
156
351
  {
157
351
    return neg ? -i : i;
158
351
  }
159
12.1k
}
160
161
/* Fast but inaccurate atoi. */
162
static int64_t fast_atoi(char *s)
163
20.6M
{
164
20.6M
  int neg = 0;
165
20.6M
  int64_t i = 0;
166
167
21.4M
  while (*s == '-')
168
806k
  {
169
806k
    neg = 1;
170
806k
    ++s;
171
806k
  }
172
20.6M
  while (*s == '+')
173
1.29k
  {
174
1.29k
    ++s;
175
1.29k
  }
176
177
71.8M
  while (*s >= '0' && *s <= '9')
178
51.1M
  {
179
    /* We deliberately ignore overflow here. */
180
51.1M
    i = i * 10 + (*s - '0');
181
51.1M
    ++s;
182
51.1M
  }
183
184
20.6M
  return neg ? -i : i;
185
20.6M
}
186
187
static int
188
lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
189
30.7M
{
190
30.7M
  char *s = buf->scratch;
191
30.7M
  char *e = buf->scratch + buf->size - 1; /* leave space for zero terminator */
192
30.7M
  char *isreal = (c == '.' ? s : NULL);
193
30.7M
  int neg = (c == '-');
194
30.7M
  int isbad = 0;
195
196
30.7M
  *s++ = c;
197
198
30.7M
  c = lex_byte(ctx, f);
199
200
  /* skip extra '-' signs at start of number */
201
30.7M
  if (neg)
202
1.86M
  {
203
1.87M
    while (c == '-')
204
4.93k
      c = lex_byte(ctx, f);
205
1.86M
  }
206
207
129M
  while (s < e)
208
129M
  {
209
129M
    switch (c)
210
129M
    {
211
31.4M
    case IS_WHITE:
212
31.4M
    case IS_DELIM:
213
30.7M
      fz_unread_byte(ctx, f);
214
30.7M
      goto end;
215
3.87k
    case EOF:
216
3.87k
      goto end;
217
10.1M
    case '.':
218
10.1M
      if (isreal)
219
458k
        isbad = 1;
220
10.1M
      isreal = s;
221
10.1M
      *s++ = c;
222
10.1M
      break;
223
21.0k
    case '-':
224
      /* Bug 703248: Some PDFs (particularly those
225
       * generated by google docs) apparently have
226
       * numbers like 0.000000000000-5684342 in them.
227
       * We'll stop our interpretation at the -, but
228
       * keep reading to skip over the trailing
229
       * digits so they aren't parsed later. */
230
21.0k
      *s++ = '\0';
231
21.0k
      break;
232
85.2M
    case RANGE_0_9:
233
85.2M
      *s++ = c;
234
85.2M
      break;
235
3.50M
    default:
236
3.50M
      isbad = 1;
237
3.50M
      *s++ = c;
238
3.50M
      break;
239
129M
    }
240
98.9M
    c = lex_byte(ctx, f);
241
98.9M
  }
242
243
30.7M
end:
244
30.7M
  *s = '\0';
245
30.7M
  if (isbad)
246
459k
    return PDF_TOK_KEYWORD;
247
30.2M
  if (isreal)
248
9.60M
  {
249
    /* We'd like to use the fastest possible atof
250
     * routine, but we'd rather match acrobats
251
     * handling of broken numbers. As such, we
252
     * spot common broken cases and call an
253
     * acrobat compatible routine where required. */
254
9.60M
    if (neg > 1 || isreal - buf->scratch >= 10)
255
12.1k
      buf->f = acrobat_compatible_atof(buf->scratch);
256
9.59M
    else
257
9.59M
      buf->f = fz_atof(buf->scratch);
258
9.60M
    return PDF_TOK_REAL;
259
9.60M
  }
260
20.6M
  else
261
20.6M
  {
262
20.6M
    buf->i = fast_atoi(buf->scratch);
263
20.6M
    return PDF_TOK_INT;
264
20.6M
  }
265
30.2M
}
266
267
static void
268
lex_name(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
269
19.7M
{
270
19.7M
  char *s = lb->scratch;
271
19.7M
  char *e = s + fz_minz(127, lb->size);
272
19.7M
  int c;
273
274
97.5M
  while (1)
275
97.5M
  {
276
97.5M
    if (s == e)
277
2.73k
    {
278
2.73k
      if (e - lb->scratch < 127)
279
0
      {
280
0
        s += pdf_lexbuf_grow(ctx, lb);
281
0
        e = lb->scratch + fz_minz(127, lb->size);
282
0
      }
283
2.73k
      else
284
2.73k
      {
285
        /* truncate names that are too long */
286
2.73k
        fz_warn(ctx, "name is too long");
287
2.73k
        *s = 0;
288
2.73k
        lb->len = s - lb->scratch;
289
2.73k
        s = NULL;
290
2.73k
      }
291
2.73k
    }
292
97.5M
    c = lex_byte(ctx, f);
293
97.5M
    switch (c)
294
97.5M
    {
295
33.2M
    case IS_WHITE:
296
33.2M
    case IS_DELIM:
297
19.7M
      fz_unread_byte(ctx, f);
298
19.7M
      goto end;
299
23.3k
    case EOF:
300
23.3k
      goto end;
301
82.8k
    case '#':
302
82.8k
    {
303
82.8k
      int hex[2];
304
82.8k
      int i;
305
130k
      for (i = 0; i < 2; i++)
306
111k
      {
307
111k
        c = fz_peek_byte(ctx, f);
308
111k
        switch (c)
309
111k
        {
310
266k
        case RANGE_0_9:
311
266k
          if (i == 1 && c == '0' && hex[0] == 0)
312
45
            goto illegal;
313
34.9k
          hex[i] = lex_byte(ctx, f) - '0';
314
34.9k
          break;
315
5.69k
        case RANGE_a_f:
316
5.69k
          hex[i] = lex_byte(ctx, f) - 'a' + 10;
317
5.69k
          break;
318
6.74k
        case RANGE_A_F:
319
6.74k
          hex[i] = lex_byte(ctx, f) - 'A' + 10;
320
6.74k
          break;
321
64.0k
        default:
322
64.0k
          goto illegal;
323
64.0k
        case EOF:
324
13
          goto illegal_eof;
325
111k
        }
326
111k
      }
327
18.7k
      if (s) *s++ = (hex[0] << 4) + hex[1];
328
18.7k
      break;
329
64.1k
illegal:
330
64.1k
      if (i == 1)
331
9.93k
        fz_unread_byte(ctx, f);
332
64.1k
illegal_eof:
333
64.1k
      if (s) *s++ = '#';
334
64.1k
      continue;
335
64.1k
    }
336
77.7M
    default:
337
77.7M
      if (s) *s++ = c;
338
77.7M
      break;
339
97.5M
    }
340
97.5M
  }
341
19.7M
end:
342
19.7M
  if (s)
343
19.7M
  {
344
19.7M
    *s = '\0';
345
19.7M
    lb->len = s - lb->scratch;
346
19.7M
  }
347
19.7M
}
348
349
static int
350
lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
351
1.42M
{
352
1.42M
  char *s = lb->scratch;
353
1.42M
  char *e = s + lb->size;
354
1.42M
  int bal = 1;
355
1.42M
  int oct;
356
1.42M
  int c;
357
358
42.2M
  while (1)
359
42.2M
  {
360
42.2M
    if (s == e)
361
3.37k
    {
362
3.37k
      s += pdf_lexbuf_grow(ctx, lb);
363
3.37k
      e = lb->scratch + lb->size;
364
3.37k
    }
365
42.2M
    c = lex_byte(ctx, f);
366
42.2M
    switch (c)
367
42.2M
    {
368
4.14k
    case EOF:
369
4.14k
      return PDF_TOK_ERROR;
370
148k
    case '(':
371
148k
      bal++;
372
148k
      *s++ = c;
373
148k
      break;
374
1.52M
    case ')':
375
1.52M
      bal --;
376
1.52M
      if (bal == 0)
377
1.42M
        goto end;
378
105k
      *s++ = c;
379
105k
      break;
380
354k
    case '\\':
381
354k
      c = lex_byte(ctx, f);
382
354k
      switch (c)
383
354k
      {
384
7
      case EOF:
385
7
        return PDF_TOK_ERROR;
386
12.8k
      case 'n':
387
12.8k
        *s++ = '\n';
388
12.8k
        break;
389
10.4k
      case 'r':
390
10.4k
        *s++ = '\r';
391
10.4k
        break;
392
2.48k
      case 't':
393
2.48k
        *s++ = '\t';
394
2.48k
        break;
395
2.43k
      case 'b':
396
2.43k
        *s++ = '\b';
397
2.43k
        break;
398
3.44k
      case 'f':
399
3.44k
        *s++ = '\f';
400
3.44k
        break;
401
19.0k
      case '(':
402
19.0k
        *s++ = '(';
403
19.0k
        break;
404
17.0k
      case ')':
405
17.0k
        *s++ = ')';
406
17.0k
        break;
407
2.60k
      case '\\':
408
2.60k
        *s++ = '\\';
409
2.60k
        break;
410
210k
      case RANGE_0_7:
411
210k
        oct = c - '0';
412
210k
        c = lex_byte(ctx, f);
413
210k
        if (c >= '0' && c <= '7')
414
203k
        {
415
203k
          oct = oct * 8 + (c - '0');
416
203k
          c = lex_byte(ctx, f);
417
203k
          if (c >= '0' && c <= '7')
418
199k
            oct = oct * 8 + (c - '0');
419
3.81k
          else if (c != EOF)
420
3.81k
            fz_unread_byte(ctx, f);
421
203k
        }
422
7.33k
        else if (c != EOF)
423
7.32k
          fz_unread_byte(ctx, f);
424
210k
        *s++ = oct;
425
210k
        break;
426
857
      case '\n':
427
857
        break;
428
4.04k
      case '\r':
429
4.04k
        c = lex_byte(ctx, f);
430
4.04k
        if ((c != '\n') && (c != EOF))
431
4.04k
          fz_unread_byte(ctx, f);
432
4.04k
        break;
433
68.5k
      default:
434
68.5k
        *s++ = c;
435
354k
      }
436
354k
      break;
437
40.1M
    default:
438
40.1M
      *s++ = c;
439
40.1M
      break;
440
42.2M
    }
441
42.2M
  }
442
1.42M
end:
443
1.42M
  lb->len = s - lb->scratch;
444
1.42M
  return PDF_TOK_STRING;
445
1.42M
}
446
447
static int
448
lex_hex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
449
918k
{
450
918k
  char *s = lb->scratch;
451
918k
  char *e = s + lb->size;
452
918k
  int a = 0, x = 0;
453
918k
  int c;
454
455
13.4M
  while (1)
456
13.4M
  {
457
13.4M
    if (s == e)
458
349
    {
459
349
      s += pdf_lexbuf_grow(ctx, lb);
460
349
      e = lb->scratch + lb->size;
461
349
    }
462
13.4M
    c = lex_byte(ctx, f);
463
13.4M
    switch (c)
464
13.4M
    {
465
699k
    case IS_WHITE:
466
699k
      break;
467
2.66M
    default:
468
2.66M
      fz_warn(ctx, "invalid character in hex string");
469
      /* fall through */
470
11.7M
    case IS_HEX:
471
11.7M
      if (x)
472
5.87M
      {
473
5.87M
        *s++ = a * 16 + unhex(c);
474
5.87M
        x = !x;
475
5.87M
      }
476
5.90M
      else
477
5.90M
      {
478
5.90M
        a = unhex(c);
479
5.90M
        x = !x;
480
5.90M
      }
481
11.7M
      break;
482
916k
    case '>':
483
916k
      if (x)
484
32.1k
      {
485
32.1k
        *s++ = a * 16; /* pad truncated string with '0' */
486
32.1k
      }
487
916k
      goto end;
488
2.02k
    case EOF:
489
2.02k
      return PDF_TOK_ERROR;
490
13.4M
    }
491
13.4M
  }
492
916k
end:
493
916k
  lb->len = s - lb->scratch;
494
916k
  return PDF_TOK_STRING;
495
918k
}
496
497
static pdf_token
498
pdf_token_from_keyword(char *key)
499
11.4M
{
500
11.4M
  switch (*key)
501
11.4M
  {
502
1.95M
  case 'R':
503
1.95M
    if (!strcmp(key, "R")) return PDF_TOK_R;
504
41.2k
    break;
505
169k
  case 't':
506
169k
    if (!strcmp(key, "true")) return PDF_TOK_TRUE;
507
19.3k
    if (!strcmp(key, "trailer")) return PDF_TOK_TRAILER;
508
15.3k
    break;
509
286k
  case 'f':
510
286k
    if (!strcmp(key, "false")) return PDF_TOK_FALSE;
511
131k
    break;
512
313k
  case 'n':
513
313k
    if (!strcmp(key, "null")) return PDF_TOK_NULL;
514
297k
    if (!strcmp(key, "newobj")) return PDF_TOK_NEWOBJ;
515
297k
    break;
516
536k
  case 'o':
517
536k
    if (!strcmp(key, "obj")) return PDF_TOK_OBJ;
518
10.4k
    break;
519
404k
  case 'e':
520
404k
    if (!strcmp(key, "endobj")) return PDF_TOK_ENDOBJ;
521
98.2k
    if (!strcmp(key, "endstream")) return PDF_TOK_ENDSTREAM;
522
49.3k
    break;
523
290k
  case 's':
524
290k
    if (!strcmp(key, "stream")) return PDF_TOK_STREAM;
525
129k
    if (!strcmp(key, "startxref")) return PDF_TOK_STARTXREF;
526
126k
    break;
527
126k
  case 'x':
528
13.5k
    if (!strcmp(key, "xref")) return PDF_TOK_XREF;
529
10.1k
    break;
530
11.4M
  }
531
532
21.2M
  while (*key)
533
13.8M
  {
534
13.8M
    if (!fz_isprint(*key))
535
759k
      return PDF_TOK_ERROR;
536
13.1M
    ++key;
537
13.1M
  }
538
539
7.36M
  return PDF_TOK_KEYWORD;
540
8.12M
}
541
542
void pdf_lexbuf_init(fz_context *ctx, pdf_lexbuf *lb, int size)
543
70.2k
{
544
70.2k
  lb->size = lb->base_size = size;
545
70.2k
  lb->len = 0;
546
70.2k
  lb->scratch = &lb->buffer[0];
547
70.2k
}
548
549
void pdf_lexbuf_fin(fz_context *ctx, pdf_lexbuf *lb)
550
70.2k
{
551
70.2k
  if (lb && lb->size != lb->base_size)
552
1.31k
    fz_free(ctx, lb->scratch);
553
70.2k
}
554
555
ptrdiff_t pdf_lexbuf_grow(fz_context *ctx, pdf_lexbuf *lb)
556
3.72k
{
557
3.72k
  char *old = lb->scratch;
558
3.72k
  size_t newsize = lb->size * 2;
559
3.72k
  if (lb->size == lb->base_size)
560
1.31k
  {
561
1.31k
    lb->scratch = Memento_label(fz_malloc(ctx, newsize), "pdf_lexbuf");
562
1.31k
    memcpy(lb->scratch, lb->buffer, lb->size);
563
1.31k
  }
564
2.41k
  else
565
2.41k
  {
566
2.41k
    lb->scratch = fz_realloc(ctx, lb->scratch, newsize);
567
2.41k
  }
568
3.72k
  lb->size = newsize;
569
3.72k
  return lb->scratch - old;
570
3.72k
}
571
572
pdf_token
573
pdf_lex(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
574
55.4M
{
575
101M
  while (1)
576
101M
  {
577
101M
    int c = lex_byte(ctx, f);
578
101M
    switch (c)
579
101M
    {
580
78.9k
    case EOF:
581
78.9k
      return PDF_TOK_EOF;
582
45.6M
    case IS_WHITE:
583
45.6M
      lex_white(ctx, f);
584
45.6M
      break;
585
208k
    case '%':
586
208k
      lex_comment(ctx, f);
587
208k
      break;
588
8.23M
    case '/':
589
8.23M
      lex_name(ctx, f, buf);
590
8.23M
      return PDF_TOK_NAME;
591
1.42M
    case '(':
592
1.42M
      return lex_string(ctx, f, buf);
593
21.1k
    case ')':
594
21.1k
      return PDF_TOK_ERROR;
595
2.11M
    case '<':
596
2.11M
      c = lex_byte(ctx, f);
597
2.11M
      if (c == '<')
598
1.19M
        return PDF_TOK_OPEN_DICT;
599
918k
      if (c != EOF)
600
918k
        fz_unread_byte(ctx, f);
601
918k
      return lex_hex_string(ctx, f, buf);
602
1.17M
    case '>':
603
1.17M
      c = lex_byte(ctx, f);
604
1.17M
      if (c == '>')
605
1.06M
        return PDF_TOK_CLOSE_DICT;
606
117k
      if (c != EOF)
607
115k
        fz_unread_byte(ctx, f);
608
117k
      return PDF_TOK_ERROR;
609
1.13M
    case '[':
610
1.13M
      return PDF_TOK_OPEN_ARRAY;
611
1.11M
    case ']':
612
1.11M
      return PDF_TOK_CLOSE_ARRAY;
613
12.8k
    case '{':
614
12.8k
      return PDF_TOK_OPEN_BRACE;
615
12.8k
    case '}':
616
12.8k
      return PDF_TOK_CLOSE_BRACE;
617
29.7M
    case IS_NUMBER:
618
29.7M
      return lex_number(ctx, f, buf, c);
619
10.4M
    default: /* isregular: !isdelim && !iswhite && c != EOF */
620
10.4M
      fz_unread_byte(ctx, f);
621
10.4M
      lex_name(ctx, f, buf);
622
10.4M
      return pdf_token_from_keyword(buf->scratch);
623
101M
    }
624
101M
  }
625
55.4M
}
626
627
pdf_token
628
pdf_lex_no_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
629
2.41M
{
630
4.25M
  while (1)
631
4.25M
  {
632
4.25M
    int c = lex_byte(ctx, f);
633
4.25M
    switch (c)
634
4.25M
    {
635
3.61k
    case EOF:
636
3.61k
      return PDF_TOK_EOF;
637
1.80M
    case IS_WHITE:
638
1.80M
      lex_white(ctx, f);
639
1.80M
      break;
640
40.8k
    case '%':
641
40.8k
      lex_comment(ctx, f);
642
40.8k
      break;
643
100k
    case '/':
644
100k
      lex_name(ctx, f, buf);
645
100k
      return PDF_TOK_NAME;
646
31.9k
    case '(':
647
31.9k
      return PDF_TOK_ERROR; /* no strings allowed */
648
34.4k
    case ')':
649
34.4k
      return PDF_TOK_ERROR; /* no strings allowed */
650
51.6k
    case '<':
651
51.6k
      c = lex_byte(ctx, f);
652
51.6k
      if (c == '<')
653
7.85k
        return PDF_TOK_OPEN_DICT;
654
43.8k
      if (c != EOF)
655
43.8k
        fz_unread_byte(ctx, f);
656
43.8k
      return PDF_TOK_ERROR; /* no strings allowed */
657
51.6k
    case '>':
658
51.6k
      c = lex_byte(ctx, f);
659
51.6k
      if (c == '>')
660
11.3k
        return PDF_TOK_CLOSE_DICT;
661
40.3k
      if (c != EOF)
662
40.3k
        fz_unread_byte(ctx, f);
663
40.3k
      return PDF_TOK_ERROR;
664
42.1k
    case '[':
665
42.1k
      return PDF_TOK_OPEN_ARRAY;
666
58.4k
    case ']':
667
58.4k
      return PDF_TOK_CLOSE_ARRAY;
668
23.1k
    case '{':
669
23.1k
      return PDF_TOK_OPEN_BRACE;
670
22.5k
    case '}':
671
22.5k
      return PDF_TOK_CLOSE_BRACE;
672
1.02M
    case IS_NUMBER:
673
1.02M
      return lex_number(ctx, f, buf, c);
674
968k
    default: /* isregular: !isdelim && !iswhite && c != EOF */
675
968k
      fz_unread_byte(ctx, f);
676
968k
      lex_name(ctx, f, buf);
677
968k
      return pdf_token_from_keyword(buf->scratch);
678
4.25M
    }
679
4.25M
  }
680
2.41M
}
681
682
void pdf_append_token(fz_context *ctx, fz_buffer *fzbuf, int tok, pdf_lexbuf *buf)
683
0
{
684
0
  switch (tok)
685
0
  {
686
0
  case PDF_TOK_NAME:
687
0
    fz_append_printf(ctx, fzbuf, "/%s", buf->scratch);
688
0
    break;
689
0
  case PDF_TOK_STRING:
690
0
    if (buf->len >= buf->size)
691
0
      pdf_lexbuf_grow(ctx, buf);
692
0
    buf->scratch[buf->len] = 0;
693
0
    fz_append_pdf_string(ctx, fzbuf, buf->scratch);
694
0
    break;
695
0
  case PDF_TOK_OPEN_DICT:
696
0
    fz_append_string(ctx, fzbuf, "<<");
697
0
    break;
698
0
  case PDF_TOK_CLOSE_DICT:
699
0
    fz_append_string(ctx, fzbuf, ">>");
700
0
    break;
701
0
  case PDF_TOK_OPEN_ARRAY:
702
0
    fz_append_byte(ctx, fzbuf, '[');
703
0
    break;
704
0
  case PDF_TOK_CLOSE_ARRAY:
705
0
    fz_append_byte(ctx, fzbuf, ']');
706
0
    break;
707
0
  case PDF_TOK_OPEN_BRACE:
708
0
    fz_append_byte(ctx, fzbuf, '{');
709
0
    break;
710
0
  case PDF_TOK_CLOSE_BRACE:
711
0
    fz_append_byte(ctx, fzbuf, '}');
712
0
    break;
713
0
  case PDF_TOK_INT:
714
0
    fz_append_printf(ctx, fzbuf, "%ld", buf->i);
715
0
    break;
716
0
  case PDF_TOK_REAL:
717
0
    fz_append_printf(ctx, fzbuf, "%g", buf->f);
718
0
    break;
719
0
  default:
720
0
    fz_append_data(ctx, fzbuf, buf->scratch, buf->len);
721
0
    break;
722
0
  }
723
0
}