Coverage Report

Created: 2025-09-04 06:50

/src/mupdf/source/pdf/pdf-lex.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2024 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
28
#define IS_NUMBER \
29
101k
  '+':case'-':case'.':case'0':case'1':case'2':case'3':\
30
160k
  case'4':case'5':case'6':case'7':case'8':case'9'
31
#define IS_WHITE \
32
807k
  '\x00':case'\x09':case'\x0a':case'\x0c':case'\x0d':case'\x20'
33
#define IS_HEX \
34
209k
  '0':case'1':case'2':case'3':case'4':case'5':case'6':\
35
264k
  case'7':case'8':case'9':case'A':case'B':case'C':\
36
293k
  case'D':case'E':case'F':case'a':case'b':case'c':\
37
293k
  case'd':case'e':case'f'
38
#define IS_DELIM \
39
405k
  '(':case')':case'<':case'>':case'[':case']':case'{':\
40
425k
  case'}':case'/':case'%'
41
42
#define RANGE_0_9 \
43
131k
  '0':case'1':case'2':case'3':case'4':case'5':\
44
205k
  case'6':case'7':case'8':case'9'
45
#define RANGE_a_f \
46
120
  'a':case'b':case'c':case'd':case'e':case'f'
47
#define RANGE_A_F \
48
91
  'A':case'B':case'C':case'D':case'E':case'F'
49
#define RANGE_0_7 \
50
0
  '0':case'1':case'2':case'3':case'4':case'5':case'6':case'7'
51
52
/* #define DUMP_LEXER_STREAM */
53
#ifdef DUMP_LEXER_STREAM
54
static inline int lex_byte(fz_context *ctx, fz_stream *stm)
55
{
56
  int c = fz_read_byte(ctx, stm);
57
58
  if (c == EOF)
59
    fz_write_printf(ctx, fz_stdout(ctx), "<EOF>");
60
  else if (c >= 32 && c < 128)
61
    fz_write_printf(ctx, fz_stdout(ctx), "%c", c);
62
  else
63
    fz_write_printf(ctx, fz_stdout(ctx), "<%02x>", c);
64
  return c;
65
}
66
#else
67
5.68M
#define lex_byte(C,S) fz_read_byte(C,S)
68
#endif
69
70
static inline int iswhite(int ch)
71
405k
{
72
405k
  return
73
405k
    ch == '\000' ||
74
405k
    ch == '\011' ||
75
405k
    ch == '\012' ||
76
405k
    ch == '\014' ||
77
405k
    ch == '\015' ||
78
405k
    ch == '\040';
79
405k
}
80
81
static inline int fz_isprint(int ch)
82
283k
{
83
283k
  return ch >= ' ' && ch <= '~';
84
283k
}
85
86
static inline int unhex(int ch)
87
293k
{
88
293k
  if (ch >= '0' && ch <= '9') return ch - '0';
89
52.7k
  if (ch >= 'A' && ch <= 'F') return ch - 'A' + 0xA;
90
0
  if (ch >= 'a' && ch <= 'f') return ch - 'a' + 0xA;
91
0
  return 0;
92
0
}
93
94
static void
95
lex_white(fz_context *ctx, fz_stream *f)
96
441k
{
97
441k
  int c;
98
838k
  do {
99
838k
    c = lex_byte(ctx, f);
100
838k
  } while ((c <= 32) && (iswhite(c)));
101
441k
  if (c != EOF)
102
441k
    fz_unread_byte(ctx, f);
103
441k
}
104
105
static void
106
lex_comment(fz_context *ctx, fz_stream *f)
107
4.67k
{
108
4.67k
  int c;
109
1.10M
  do {
110
1.10M
    c = lex_byte(ctx, f);
111
1.10M
  } while ((c != '\012') && (c != '\015') && (c != EOF));
112
4.67k
}
113
114
/* Fast(ish) but inaccurate strtof, with Adobe overflow handling. */
115
static float acrobat_compatible_atof(char *s)
116
0
{
117
0
  int neg = 0;
118
0
  int i = 0;
119
120
0
  while (*s == '-')
121
0
  {
122
0
    neg = 1;
123
0
    ++s;
124
0
  }
125
0
  while (*s == '+')
126
0
  {
127
0
    ++s;
128
0
  }
129
130
0
  while (*s >= '0' && *s <= '9')
131
0
  {
132
    /* We deliberately ignore overflow here.
133
     * Tests show that Acrobat handles * overflows in exactly the same way we do:
134
     * 123450000000000000000678 is read as 678.
135
     */
136
0
    i = i * 10 + (*s - '0');
137
0
    ++s;
138
0
  }
139
140
0
  if (*s == '.')
141
0
  {
142
0
    float v = i;
143
0
    float n = 0;
144
0
    float d = 1;
145
0
    ++s;
146
0
    while (*s >= '0' && *s <= '9')
147
0
    {
148
0
      n = 10 * n + (*s - '0');
149
0
      d = 10 * d;
150
0
      ++s;
151
0
    }
152
0
    v += n / d;
153
0
    return neg ? -v : v;
154
0
  }
155
0
  else
156
0
  {
157
0
    return neg ? -i : i;
158
0
  }
159
0
}
160
161
/* Fast but inaccurate atoi. */
162
static int64_t fast_atoi(char *s)
163
150k
{
164
150k
  int neg = 0;
165
150k
  int64_t i = 0;
166
167
150k
  while (*s == '-')
168
340
  {
169
340
    neg = 1;
170
340
    ++s;
171
340
  }
172
150k
  while (*s == '+')
173
16
  {
174
16
    ++s;
175
16
  }
176
177
489k
  while (*s >= '0' && *s <= '9')
178
339k
  {
179
    /* We deliberately ignore overflow here. */
180
339k
    i = i * 10 + (*s - '0');
181
339k
    ++s;
182
339k
  }
183
184
150k
  return neg ? -i : i;
185
150k
}
186
187
static int
188
lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
189
160k
{
190
160k
  char *s = buf->scratch;
191
160k
  char *e = buf->scratch + buf->size - 1; /* leave space for zero terminator */
192
160k
  char *isreal = (c == '.' ? s : NULL);
193
160k
  int neg = (c == '-');
194
160k
  int isbad = 0;
195
196
160k
  *s++ = c;
197
198
160k
  c = lex_byte(ctx, f);
199
200
  /* skip extra '-' signs at start of number */
201
160k
  if (neg)
202
802
  {
203
804
    while (c == '-')
204
2
      c = lex_byte(ctx, f);
205
802
  }
206
207
417k
  while (s < e)
208
417k
  {
209
417k
    switch (c)
210
417k
    {
211
291k
    case IS_WHITE:
212
291k
    case IS_DELIM:
213
160k
      fz_unread_byte(ctx, f);
214
160k
      goto end;
215
0
    case EOF:
216
0
      goto end;
217
2.38k
    case '.':
218
2.38k
      if (isreal)
219
89
        isbad = 1;
220
2.38k
      isreal = s;
221
2.38k
      *s++ = c;
222
2.38k
      break;
223
236
    case '-':
224
      /* Bug 703248: Some PDFs (particularly those
225
       * generated by google docs) apparently have
226
       * numbers like 0.000000000000-5684342 in them.
227
       * We'll stop our interpretation at the -, but
228
       * keep reading to skip over the trailing
229
       * digits so they aren't parsed later. */
230
236
      *s++ = '\0';
231
236
      break;
232
204k
    case RANGE_0_9:
233
204k
      *s++ = c;
234
204k
      break;
235
50.6k
    default:
236
50.6k
      isbad = 1;
237
50.6k
      *s++ = c;
238
50.6k
      break;
239
417k
    }
240
257k
    c = lex_byte(ctx, f);
241
257k
  }
242
243
160k
end:
244
160k
  *s = '\0';
245
160k
  if (isbad)
246
3.51k
    return PDF_TOK_KEYWORD;
247
156k
  if (isreal)
248
6.43k
  {
249
    /* We'd like to use the fastest possible atof
250
     * routine, but we'd rather match acrobats
251
     * handling of broken numbers. As such, we
252
     * spot common broken cases and call an
253
     * acrobat compatible routine where required. */
254
6.43k
    if (neg > 1 || isreal - buf->scratch >= 10)
255
0
      buf->f = acrobat_compatible_atof(buf->scratch);
256
6.43k
    else
257
6.43k
      buf->f = fz_atof(buf->scratch);
258
6.43k
    return PDF_TOK_REAL;
259
6.43k
  }
260
150k
  else
261
150k
  {
262
150k
    buf->i = fast_atoi(buf->scratch);
263
150k
    return PDF_TOK_INT;
264
150k
  }
265
156k
}
266
267
static void
268
lex_name(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
269
265k
{
270
265k
  char *s = lb->scratch;
271
265k
  char *e = s + fz_minz(127, lb->size);
272
265k
  int c;
273
274
1.87M
  while (1)
275
1.87M
  {
276
1.87M
    if (s == e)
277
30
    {
278
30
      if (e - lb->scratch < 127)
279
0
      {
280
0
        s += pdf_lexbuf_grow(ctx, lb);
281
0
        e = lb->scratch + fz_minz(127, lb->size);
282
0
      }
283
30
      else
284
30
      {
285
        /* truncate names that are too long */
286
30
        fz_warn(ctx, "name is too long");
287
30
        *s = 0;
288
30
        lb->len = s - lb->scratch;
289
30
        s = NULL;
290
30
      }
291
30
    }
292
1.87M
    c = lex_byte(ctx, f);
293
1.87M
    switch (c)
294
1.87M
    {
295
823k
    case IS_WHITE:
296
823k
    case IS_DELIM:
297
265k
      fz_unread_byte(ctx, f);
298
265k
      goto end;
299
2
    case EOF:
300
2
      goto end;
301
6.64k
    case '#':
302
6.64k
    {
303
6.64k
      int hex[2];
304
6.64k
      int i;
305
8.29k
      for (i = 0; i < 2; i++)
306
8.27k
      {
307
8.27k
        c = fz_peek_byte(ctx, f);
308
8.27k
        switch (c)
309
8.27k
        {
310
12.4k
        case RANGE_0_9:
311
12.4k
          if (i == 1 && c == '0' && hex[0] == 0)
312
0
            goto illegal;
313
1.44k
          hex[i] = lex_byte(ctx, f) - '0';
314
1.44k
          break;
315
120
        case RANGE_a_f:
316
120
          hex[i] = lex_byte(ctx, f) - 'a' + 10;
317
120
          break;
318
91
        case RANGE_A_F:
319
91
          hex[i] = lex_byte(ctx, f) - 'A' + 10;
320
91
          break;
321
6.62k
        default:
322
6.62k
          goto illegal;
323
6.62k
        case EOF:
324
0
          goto illegal_eof;
325
8.27k
        }
326
8.27k
      }
327
21
      if (s) *s++ = (hex[0] << 4) + hex[1];
328
21
      break;
329
6.62k
illegal:
330
6.62k
      if (i == 1)
331
1.61k
        fz_unread_byte(ctx, f);
332
6.62k
illegal_eof:
333
6.62k
      if (s) *s++ = '#';
334
6.62k
      continue;
335
6.62k
    }
336
1.60M
    default:
337
1.60M
      if (s) *s++ = c;
338
1.60M
      break;
339
1.87M
    }
340
1.87M
  }
341
265k
end:
342
265k
  if (s)
343
265k
  {
344
265k
    *s = '\0';
345
265k
    lb->len = s - lb->scratch;
346
265k
  }
347
265k
}
348
349
static int
350
lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
351
289
{
352
289
  char *s = lb->scratch;
353
289
  char *e = s + lb->size;
354
289
  int bal = 1;
355
289
  int oct;
356
289
  int c;
357
358
3.81k
  while (1)
359
3.81k
  {
360
3.81k
    if (s == e)
361
4
    {
362
4
      s += pdf_lexbuf_grow(ctx, lb);
363
4
      e = lb->scratch + lb->size;
364
4
    }
365
3.81k
    c = lex_byte(ctx, f);
366
3.81k
    switch (c)
367
3.81k
    {
368
2
    case EOF:
369
2
      return PDF_TOK_ERROR;
370
94
    case '(':
371
94
      bal++;
372
94
      *s++ = c;
373
94
      break;
374
339
    case ')':
375
339
      bal --;
376
339
      if (bal == 0)
377
287
        goto end;
378
52
      *s++ = c;
379
52
      break;
380
2
    case '\\':
381
2
      c = lex_byte(ctx, f);
382
2
      switch (c)
383
2
      {
384
0
      case EOF:
385
0
        return PDF_TOK_ERROR;
386
0
      case 'n':
387
0
        *s++ = '\n';
388
0
        break;
389
0
      case 'r':
390
0
        *s++ = '\r';
391
0
        break;
392
0
      case 't':
393
0
        *s++ = '\t';
394
0
        break;
395
0
      case 'b':
396
0
        *s++ = '\b';
397
0
        break;
398
0
      case 'f':
399
0
        *s++ = '\f';
400
0
        break;
401
1
      case '(':
402
1
        *s++ = '(';
403
1
        break;
404
1
      case ')':
405
1
        *s++ = ')';
406
1
        break;
407
0
      case '\\':
408
0
        *s++ = '\\';
409
0
        break;
410
0
      case RANGE_0_7:
411
0
        oct = c - '0';
412
0
        c = lex_byte(ctx, f);
413
0
        if (c >= '0' && c <= '7')
414
0
        {
415
0
          oct = oct * 8 + (c - '0');
416
0
          c = lex_byte(ctx, f);
417
0
          if (c >= '0' && c <= '7')
418
0
            oct = oct * 8 + (c - '0');
419
0
          else if (c != EOF)
420
0
            fz_unread_byte(ctx, f);
421
0
        }
422
0
        else if (c != EOF)
423
0
          fz_unread_byte(ctx, f);
424
0
        *s++ = oct;
425
0
        break;
426
0
      case '\n':
427
0
        break;
428
0
      case '\r':
429
0
        c = lex_byte(ctx, f);
430
0
        if ((c != '\n') && (c != EOF))
431
0
          fz_unread_byte(ctx, f);
432
0
        break;
433
0
      default:
434
0
        *s++ = c;
435
2
      }
436
2
      break;
437
    /* Bug 708256: PDF 32000-1 says that any occurence of \n, \r, or \r\n in a
438
     * (unless escaped with a '\') should be interpreted as a single 0x0a byte. */
439
13
    case '\n':
440
13
      *s++ = 0x0a;
441
13
      break;
442
77
    case '\r':
443
77
      *s++ = 0x0a;
444
77
      c = lex_byte(ctx, f);
445
77
      if ((c != '\n') && (c != EOF))
446
3
        fz_unread_byte(ctx, f);
447
77
      break;
448
3.28k
    default:
449
3.28k
      *s++ = c;
450
3.28k
      break;
451
3.81k
    }
452
3.81k
  }
453
287
end:
454
287
  lb->len = s - lb->scratch;
455
287
  return PDF_TOK_STRING;
456
289
}
457
458
static int
459
lex_hex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
460
66.8k
{
461
66.8k
  char *s = lb->scratch;
462
66.8k
  char *e = s + lb->size;
463
66.8k
  int a = 0, x = 0;
464
66.8k
  int c;
465
466
360k
  while (1)
467
360k
  {
468
360k
    if (s == e)
469
0
    {
470
0
      s += pdf_lexbuf_grow(ctx, lb);
471
0
      e = lb->scratch + lb->size;
472
0
    }
473
360k
    c = lex_byte(ctx, f);
474
360k
    switch (c)
475
360k
    {
476
0
    case IS_WHITE:
477
0
      break;
478
0
    default:
479
0
      fz_warn(ctx, "invalid character in hex string");
480
      /* fall through */
481
293k
    case IS_HEX:
482
293k
      if (x)
483
146k
      {
484
146k
        *s++ = a * 16 + unhex(c);
485
146k
        x = !x;
486
146k
      }
487
146k
      else
488
146k
      {
489
146k
        a = unhex(c);
490
146k
        x = !x;
491
146k
      }
492
293k
      break;
493
66.8k
    case '>':
494
66.8k
      if (x)
495
1
      {
496
1
        *s++ = a * 16; /* pad truncated string with '0' */
497
1
      }
498
66.8k
      goto end;
499
0
    case EOF:
500
0
      return PDF_TOK_ERROR;
501
360k
    }
502
360k
  }
503
66.8k
end:
504
66.8k
  lb->len = s - lb->scratch;
505
66.8k
  return PDF_TOK_STRING;
506
66.8k
}
507
508
static pdf_token
509
pdf_token_from_keyword(char *key)
510
218k
{
511
218k
  switch (*key)
512
218k
  {
513
22.1k
  case 'R':
514
22.1k
    if (!strcmp(key, "R")) return PDF_TOK_R;
515
265
    break;
516
6.83k
  case 't':
517
6.83k
    if (!strcmp(key, "true")) return PDF_TOK_TRUE;
518
6.80k
    if (!strcmp(key, "trailer")) return PDF_TOK_TRAILER;
519
6.80k
    break;
520
6.80k
  case 'f':
521
1.35k
    if (!strcmp(key, "false")) return PDF_TOK_FALSE;
522
946
    break;
523
4.66k
  case 'n':
524
4.66k
    if (!strcmp(key, "null")) return PDF_TOK_NULL;
525
4.66k
    if (!strcmp(key, "newobj")) return PDF_TOK_NEWOBJ;
526
4.66k
    break;
527
7.30k
  case 'o':
528
7.30k
    if (!strcmp(key, "obj")) return PDF_TOK_OBJ;
529
5.97k
    break;
530
8.77k
  case 'e':
531
8.77k
    if (!strcmp(key, "endobj")) return PDF_TOK_ENDOBJ;
532
7.80k
    if (!strcmp(key, "endstream")) return PDF_TOK_ENDSTREAM;
533
7.51k
    break;
534
8.58k
  case 's':
535
8.58k
    if (!strcmp(key, "stream")) return PDF_TOK_STREAM;
536
8.10k
    if (!strcmp(key, "startxref")) return PDF_TOK_STARTXREF;
537
8.10k
    break;
538
8.10k
  case 'x':
539
579
    if (!strcmp(key, "xref")) return PDF_TOK_XREF;
540
579
    break;
541
218k
  }
542
543
397k
  while (*key)
544
283k
  {
545
283k
    if (!fz_isprint(*key))
546
79.2k
      return PDF_TOK_ERROR;
547
204k
    ++key;
548
204k
  }
549
550
114k
  return PDF_TOK_KEYWORD;
551
193k
}
552
553
void pdf_lexbuf_init(fz_context *ctx, pdf_lexbuf *lb, int size)
554
23
{
555
23
  lb->size = lb->base_size = size;
556
23
  lb->len = 0;
557
23
  lb->scratch = &lb->buffer[0];
558
23
}
559
560
void pdf_lexbuf_fin(fz_context *ctx, pdf_lexbuf *lb)
561
23
{
562
23
  if (lb && lb->size != lb->base_size)
563
2
    fz_free(ctx, lb->scratch);
564
23
}
565
566
ptrdiff_t pdf_lexbuf_grow(fz_context *ctx, pdf_lexbuf *lb)
567
4
{
568
4
  char *old = lb->scratch;
569
4
  size_t newsize = lb->size * 2;
570
4
  if (lb->size == lb->base_size)
571
2
  {
572
2
    lb->scratch = Memento_label(fz_malloc(ctx, newsize), "pdf_lexbuf");
573
2
    memcpy(lb->scratch, lb->buffer, lb->size);
574
2
  }
575
2
  else
576
2
  {
577
2
    lb->scratch = fz_realloc(ctx, lb->scratch, newsize);
578
2
  }
579
4
  lb->size = newsize;
580
4
  return lb->scratch - old;
581
4
}
582
583
pdf_token
584
pdf_lex(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
585
240k
{
586
464k
  while (1)
587
464k
  {
588
464k
    int c = lex_byte(ctx, f);
589
464k
    switch (c)
590
464k
    {
591
27
    case EOF:
592
27
      return PDF_TOK_EOF;
593
223k
    case IS_WHITE:
594
223k
      lex_white(ctx, f);
595
223k
      break;
596
26
    case '%':
597
26
      lex_comment(ctx, f);
598
26
      break;
599
39.4k
    case '/':
600
39.4k
      lex_name(ctx, f, buf);
601
39.4k
      return PDF_TOK_NAME;
602
289
    case '(':
603
289
      return lex_string(ctx, f, buf);
604
213
    case ')':
605
213
      return PDF_TOK_ERROR;
606
69.8k
    case '<':
607
69.8k
      c = lex_byte(ctx, f);
608
69.8k
      if (c == '<')
609
2.96k
        return PDF_TOK_OPEN_DICT;
610
66.8k
      if (c != EOF)
611
66.8k
        fz_unread_byte(ctx, f);
612
66.8k
      return lex_hex_string(ctx, f, buf);
613
2.59k
    case '>':
614
2.59k
      c = lex_byte(ctx, f);
615
2.59k
      if (c == '>')
616
2.59k
        return PDF_TOK_CLOSE_DICT;
617
0
      if (c != EOF)
618
0
        fz_unread_byte(ctx, f);
619
0
      return PDF_TOK_ERROR;
620
1.75k
    case '[':
621
1.75k
      return PDF_TOK_OPEN_ARRAY;
622
1.71k
    case ']':
623
1.71k
      return PDF_TOK_CLOSE_ARRAY;
624
0
    case '{':
625
0
      return PDF_TOK_OPEN_BRACE;
626
0
    case '}':
627
0
      return PDF_TOK_CLOSE_BRACE;
628
99.3k
    case IS_NUMBER:
629
99.3k
      return lex_number(ctx, f, buf, c);
630
25.5k
    default: /* isregular: !isdelim && !iswhite && c != EOF */
631
25.5k
      fz_unread_byte(ctx, f);
632
25.5k
      lex_name(ctx, f, buf);
633
25.5k
      return pdf_token_from_keyword(buf->scratch);
634
464k
    }
635
464k
  }
636
240k
}
637
638
pdf_token
639
pdf_lex_no_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
640
305k
{
641
527k
  while (1)
642
527k
  {
643
527k
    int c = lex_byte(ctx, f);
644
527k
    switch (c)
645
527k
    {
646
3
    case EOF:
647
3
      return PDF_TOK_EOF;
648
217k
    case IS_WHITE:
649
217k
      lex_white(ctx, f);
650
217k
      break;
651
4.64k
    case '%':
652
4.64k
      lex_comment(ctx, f);
653
4.64k
      break;
654
7.20k
    case '/':
655
7.20k
      lex_name(ctx, f, buf);
656
7.20k
      return PDF_TOK_NAME;
657
4.61k
    case '(':
658
4.61k
      return PDF_TOK_ERROR; /* no strings allowed */
659
4.64k
    case ')':
660
4.64k
      return PDF_TOK_ERROR; /* no strings allowed */
661
7.33k
    case '<':
662
7.33k
      c = lex_byte(ctx, f);
663
7.33k
      if (c == '<')
664
16
        return PDF_TOK_OPEN_DICT;
665
7.31k
      if (c != EOF)
666
7.31k
        fz_unread_byte(ctx, f);
667
7.31k
      return PDF_TOK_ERROR; /* no strings allowed */
668
9.05k
    case '>':
669
9.05k
      c = lex_byte(ctx, f);
670
9.05k
      if (c == '>')
671
50
        return PDF_TOK_CLOSE_DICT;
672
9.00k
      if (c != EOF)
673
9.00k
        fz_unread_byte(ctx, f);
674
9.00k
      return PDF_TOK_ERROR;
675
4.55k
    case '[':
676
4.55k
      return PDF_TOK_OPEN_ARRAY;
677
4.67k
    case ']':
678
4.67k
      return PDF_TOK_CLOSE_ARRAY;
679
4.56k
    case '{':
680
4.56k
      return PDF_TOK_OPEN_BRACE;
681
4.60k
    case '}':
682
4.60k
      return PDF_TOK_CLOSE_BRACE;
683
60.7k
    case IS_NUMBER:
684
60.7k
      return lex_number(ctx, f, buf, c);
685
193k
    default: /* isregular: !isdelim && !iswhite && c != EOF */
686
193k
      fz_unread_byte(ctx, f);
687
193k
      lex_name(ctx, f, buf);
688
193k
      return pdf_token_from_keyword(buf->scratch);
689
527k
    }
690
527k
  }
691
305k
}
692
693
void pdf_append_token(fz_context *ctx, fz_buffer *fzbuf, int tok, pdf_lexbuf *buf)
694
0
{
695
0
  switch (tok)
696
0
  {
697
0
  case PDF_TOK_NAME:
698
0
    fz_append_printf(ctx, fzbuf, "/%s", buf->scratch);
699
0
    break;
700
0
  case PDF_TOK_STRING:
701
0
    if (buf->len >= buf->size)
702
0
      pdf_lexbuf_grow(ctx, buf);
703
0
    buf->scratch[buf->len] = 0;
704
0
    fz_append_pdf_string(ctx, fzbuf, buf->scratch);
705
0
    break;
706
0
  case PDF_TOK_OPEN_DICT:
707
0
    fz_append_string(ctx, fzbuf, "<<");
708
0
    break;
709
0
  case PDF_TOK_CLOSE_DICT:
710
0
    fz_append_string(ctx, fzbuf, ">>");
711
0
    break;
712
0
  case PDF_TOK_OPEN_ARRAY:
713
0
    fz_append_byte(ctx, fzbuf, '[');
714
0
    break;
715
0
  case PDF_TOK_CLOSE_ARRAY:
716
0
    fz_append_byte(ctx, fzbuf, ']');
717
0
    break;
718
0
  case PDF_TOK_OPEN_BRACE:
719
0
    fz_append_byte(ctx, fzbuf, '{');
720
0
    break;
721
0
  case PDF_TOK_CLOSE_BRACE:
722
0
    fz_append_byte(ctx, fzbuf, '}');
723
0
    break;
724
0
  case PDF_TOK_INT:
725
0
    fz_append_printf(ctx, fzbuf, "%ld", buf->i);
726
0
    break;
727
0
  case PDF_TOK_REAL:
728
0
    fz_append_printf(ctx, fzbuf, "%g", buf->f);
729
0
    break;
730
0
  default:
731
0
    fz_append_data(ctx, fzbuf, buf->scratch, buf->len);
732
0
    break;
733
0
  }
734
0
}