Coverage Report

Created: 2025-12-31 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/pdf/pdf-lex.c
Line
Count
Source
1
// Copyright (C) 2004-2024 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
#include "mupdf/pdf.h"
25
26
#include <string.h>
27
28
#define IS_NUMBER \
29
201k
  '+':case'-':case'.':case'0':case'1':case'2':case'3':\
30
264k
  case'4':case'5':case'6':case'7':case'8':case'9'
31
#define IS_WHITE \
32
1.18M
  '\x00':case'\x09':case'\x0a':case'\x0c':case'\x0d':case'\x20'
33
#define IS_HEX \
34
209k
  '0':case'1':case'2':case'3':case'4':case'5':case'6':\
35
264k
  case'7':case'8':case'9':case'A':case'B':case'C':\
36
293k
  case'D':case'E':case'F':case'a':case'b':case'c':\
37
293k
  case'd':case'e':case'f'
38
#define IS_DELIM \
39
578k
  '(':case')':case'<':case'>':case'[':case']':case'{':\
40
592k
  case'}':case'/':case'%'
41
42
#define RANGE_0_9 \
43
474k
  '0':case'1':case'2':case'3':case'4':case'5':\
44
584k
  case'6':case'7':case'8':case'9'
45
#define RANGE_a_f \
46
59
  'a':case'b':case'c':case'd':case'e':case'f'
47
#define RANGE_A_F \
48
26
  'A':case'B':case'C':case'D':case'E':case'F'
49
#define RANGE_0_7 \
50
0
  '0':case'1':case'2':case'3':case'4':case'5':case'6':case'7'
51
52
/* #define DUMP_LEXER_STREAM */
53
#ifdef DUMP_LEXER_STREAM
54
static inline int lex_byte(fz_context *ctx, fz_stream *stm)
55
{
56
  int c = fz_read_byte(ctx, stm);
57
58
  if (c == EOF)
59
    fz_write_printf(ctx, fz_stdout(ctx), "<EOF>");
60
  else if (c >= 32 && c < 128)
61
    fz_write_printf(ctx, fz_stdout(ctx), "%c", c);
62
  else
63
    fz_write_printf(ctx, fz_stdout(ctx), "<%02x>", c);
64
  return c;
65
}
66
#else
67
8.20M
#define lex_byte(C,S) fz_read_byte(C,S)
68
#endif
69
70
static inline int iswhite(int ch)
71
411k
{
72
411k
  return
73
411k
    ch == '\000' ||
74
59.8k
    ch == '\011' ||
75
59.7k
    ch == '\012' ||
76
15.7k
    ch == '\014' ||
77
15.3k
    ch == '\015' ||
78
14.3k
    ch == '\040';
79
411k
}
80
81
static inline int fz_isprint(int ch)
82
2.16M
{
83
2.16M
  return ch >= ' ' && ch <= '~';
84
2.16M
}
85
86
static inline int unhex(int ch)
87
293k
{
88
293k
  if (ch >= '0' && ch <= '9') return ch - '0';
89
52.8k
  if (ch >= 'A' && ch <= 'F') return ch - 'A' + 0xA;
90
0
  if (ch >= 'a' && ch <= 'f') return ch - 'a' + 0xA;
91
0
  return 0;
92
0
}
93
94
static void
95
lex_white(fz_context *ctx, fz_stream *f)
96
631k
{
97
631k
  int c;
98
1.03M
  do {
99
1.03M
    c = lex_byte(ctx, f);
100
1.03M
  } while ((c <= 32) && (iswhite(c)));
101
631k
  if (c != EOF)
102
631k
    fz_unread_byte(ctx, f);
103
631k
}
104
105
static void
106
lex_comment(fz_context *ctx, fz_stream *f)
107
1.96k
{
108
1.96k
  int c;
109
759k
  do {
110
759k
    c = lex_byte(ctx, f);
111
759k
  } while ((c != '\012') && (c != '\015') && (c != EOF));
112
1.96k
}
113
114
/* Fast(ish) but inaccurate strtof, with Adobe overflow handling. */
115
static float acrobat_compatible_atof(char *s)
116
0
{
117
0
  int neg = 0;
118
0
  int i = 0;
119
120
0
  while (*s == '-')
121
0
  {
122
0
    neg = 1;
123
0
    ++s;
124
0
  }
125
0
  while (*s == '+')
126
0
  {
127
0
    ++s;
128
0
  }
129
130
0
  while (*s >= '0' && *s <= '9')
131
0
  {
132
    /* We deliberately ignore overflow here.
133
     * Tests show that Acrobat handles * overflows in exactly the same way we do:
134
     * 123450000000000000000678 is read as 678.
135
     */
136
0
    i = i * 10 + (*s - '0');
137
0
    ++s;
138
0
  }
139
140
0
  if (*s == '.')
141
0
  {
142
0
    float v = i;
143
0
    float n = 0;
144
0
    float d = 1;
145
0
    ++s;
146
0
    while (*s >= '0' && *s <= '9')
147
0
    {
148
0
      n = 10 * n + (*s - '0');
149
0
      d = 10 * d;
150
0
      ++s;
151
0
    }
152
0
    v += n / d;
153
0
    return neg ? -v : v;
154
0
  }
155
0
  else
156
0
  {
157
0
    return neg ? -i : i;
158
0
  }
159
0
}
160
161
/* Fast but inaccurate atoi. */
162
static int64_t fast_atoi(char *s)
163
150k
{
164
150k
  int neg = 0;
165
150k
  int64_t i = 0;
166
167
151k
  while (*s == '-')
168
351
  {
169
351
    neg = 1;
170
351
    ++s;
171
351
  }
172
150k
  while (*s == '+')
173
5
  {
174
5
    ++s;
175
5
  }
176
177
491k
  while (*s >= '0' && *s <= '9')
178
341k
  {
179
    /* We deliberately ignore overflow here. */
180
341k
    i = i * 10 + (*s - '0');
181
341k
    ++s;
182
341k
  }
183
184
150k
  return neg ? -i : i;
185
150k
}
186
187
static int
188
lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
189
264k
{
190
264k
  char *s = buf->scratch;
191
264k
  char *e = buf->scratch + buf->size - 1; /* leave space for zero terminator */
192
264k
  char *isreal = (c == '.' ? s : NULL);
193
264k
  int neg = (c == '-');
194
264k
  int isbad = 0;
195
196
264k
  *s++ = c;
197
198
264k
  c = lex_byte(ctx, f);
199
200
  /* skip extra '-' signs at start of number */
201
264k
  if (neg)
202
675
  {
203
709
    while (c == '-')
204
34
      c = lex_byte(ctx, f);
205
675
  }
206
207
1.36M
  while (s < e)
208
1.36M
  {
209
1.36M
    switch (c)
210
1.36M
    {
211
427k
    case IS_WHITE:
212
427k
    case IS_DELIM:
213
264k
      fz_unread_byte(ctx, f);
214
264k
      goto end;
215
1
    case EOF:
216
1
      goto end;
217
310k
    case '.':
218
310k
      if (isreal)
219
202k
        isbad = 1;
220
310k
      isreal = s;
221
310k
      *s++ = c;
222
310k
      break;
223
4.37k
    case '-':
224
      /* Bug 703248: Some PDFs (particularly those
225
       * generated by google docs) apparently have
226
       * numbers like 0.000000000000-5684342 in them.
227
       * We'll stop our interpretation at the -, but
228
       * keep reading to skip over the trailing
229
       * digits so they aren't parsed later. */
230
4.37k
      *s++ = '\0';
231
4.37k
      break;
232
582k
    case RANGE_0_9:
233
582k
      *s++ = c;
234
582k
      break;
235
201k
    default:
236
201k
      isbad = 1;
237
201k
      *s++ = c;
238
201k
      break;
239
1.36M
    }
240
1.09M
    c = lex_byte(ctx, f);
241
1.09M
  }
242
243
264k
end:
244
264k
  *s = '\0';
245
264k
  if (isbad)
246
106k
    return PDF_TOK_KEYWORD;
247
157k
  if (isreal)
248
6.49k
  {
249
    /* We'd like to use the fastest possible atof
250
     * routine, but we'd rather match acrobats
251
     * handling of broken numbers. As such, we
252
     * spot common broken cases and call an
253
     * acrobat compatible routine where required. */
254
6.49k
    if (neg > 1 || isreal - buf->scratch >= 10)
255
0
      buf->f = acrobat_compatible_atof(buf->scratch);
256
6.49k
    else
257
6.49k
      buf->f = fz_atof(buf->scratch);
258
6.49k
    return PDF_TOK_REAL;
259
6.49k
  }
260
150k
  else
261
150k
  {
262
150k
    buf->i = fast_atoi(buf->scratch);
263
150k
    return PDF_TOK_INT;
264
150k
  }
265
157k
}
266
267
static void
268
lex_name(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
269
327k
{
270
327k
  char *s = lb->scratch;
271
327k
  char *e = s + fz_minz(127, lb->size);
272
327k
  int c;
273
274
3.25M
  while (1)
275
3.25M
  {
276
3.25M
    if (s == e)
277
20
    {
278
20
      if (e - lb->scratch < 127)
279
0
      {
280
0
        s += pdf_lexbuf_grow(ctx, lb);
281
0
        e = lb->scratch + fz_minz(127, lb->size);
282
0
      }
283
20
      else
284
20
      {
285
        /* truncate names that are too long */
286
20
        fz_warn(ctx, "name is too long");
287
20
        *s = 0;
288
20
        lb->len = s - lb->scratch;
289
20
        s = NULL;
290
20
      }
291
20
    }
292
3.25M
    c = lex_byte(ctx, f);
293
3.25M
    switch (c)
294
3.25M
    {
295
1.12M
    case IS_WHITE:
296
1.12M
    case IS_DELIM:
297
327k
      fz_unread_byte(ctx, f);
298
327k
      goto end;
299
2
    case EOF:
300
2
      goto end;
301
12.3k
    case '#':
302
12.3k
    {
303
12.3k
      int hex[2];
304
12.3k
      int i;
305
13.7k
      for (i = 0; i < 2; i++)
306
13.7k
      {
307
13.7k
        c = fz_peek_byte(ctx, f);
308
13.7k
        switch (c)
309
13.7k
        {
310
12.0k
        case RANGE_0_9:
311
12.0k
          if (i == 1 && c == '0' && hex[0] == 0)
312
0
            goto illegal;
313
1.35k
          hex[i] = lex_byte(ctx, f) - '0';
314
1.35k
          break;
315
59
        case RANGE_a_f:
316
59
          hex[i] = lex_byte(ctx, f) - 'a' + 10;
317
59
          break;
318
26
        case RANGE_A_F:
319
26
          hex[i] = lex_byte(ctx, f) - 'A' + 10;
320
26
          break;
321
12.3k
        default:
322
12.3k
          goto illegal;
323
12.3k
        case EOF:
324
0
          goto illegal_eof;
325
13.7k
        }
326
13.7k
      }
327
15
      if (s) *s++ = (hex[0] << 4) + hex[1];
328
15
      break;
329
12.3k
illegal:
330
12.3k
      if (i == 1)
331
1.40k
        fz_unread_byte(ctx, f);
332
12.3k
illegal_eof:
333
12.3k
      if (s) *s++ = '#';
334
12.3k
      continue;
335
12.3k
    }
336
2.91M
    default:
337
2.91M
      if (s) *s++ = c;
338
2.91M
      break;
339
3.25M
    }
340
3.25M
  }
341
327k
end:
342
327k
  if (s)
343
327k
  {
344
327k
    *s = '\0';
345
327k
    lb->len = s - lb->scratch;
346
327k
  }
347
327k
}
348
349
static int
350
lex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
351
289
{
352
289
  char *s = lb->scratch;
353
289
  char *e = s + lb->size;
354
289
  int bal = 1;
355
289
  int oct;
356
289
  int c;
357
358
3.81k
  while (1)
359
3.81k
  {
360
3.81k
    if (s == e)
361
4
    {
362
4
      s += pdf_lexbuf_grow(ctx, lb);
363
4
      e = lb->scratch + lb->size;
364
4
    }
365
3.81k
    c = lex_byte(ctx, f);
366
3.81k
    switch (c)
367
3.81k
    {
368
2
    case EOF:
369
2
      return PDF_TOK_ERROR;
370
94
    case '(':
371
94
      bal++;
372
94
      *s++ = c;
373
94
      break;
374
339
    case ')':
375
339
      bal --;
376
339
      if (bal == 0)
377
287
        goto end;
378
52
      *s++ = c;
379
52
      break;
380
2
    case '\\':
381
2
      c = lex_byte(ctx, f);
382
2
      switch (c)
383
2
      {
384
0
      case EOF:
385
0
        return PDF_TOK_ERROR;
386
0
      case 'n':
387
0
        *s++ = '\n';
388
0
        break;
389
0
      case 'r':
390
0
        *s++ = '\r';
391
0
        break;
392
0
      case 't':
393
0
        *s++ = '\t';
394
0
        break;
395
0
      case 'b':
396
0
        *s++ = '\b';
397
0
        break;
398
0
      case 'f':
399
0
        *s++ = '\f';
400
0
        break;
401
1
      case '(':
402
1
        *s++ = '(';
403
1
        break;
404
1
      case ')':
405
1
        *s++ = ')';
406
1
        break;
407
0
      case '\\':
408
0
        *s++ = '\\';
409
0
        break;
410
0
      case RANGE_0_7:
411
0
        oct = c - '0';
412
0
        c = lex_byte(ctx, f);
413
0
        if (c >= '0' && c <= '7')
414
0
        {
415
0
          oct = oct * 8 + (c - '0');
416
0
          c = lex_byte(ctx, f);
417
0
          if (c >= '0' && c <= '7')
418
0
            oct = oct * 8 + (c - '0');
419
0
          else if (c != EOF)
420
0
            fz_unread_byte(ctx, f);
421
0
        }
422
0
        else if (c != EOF)
423
0
          fz_unread_byte(ctx, f);
424
0
        *s++ = oct;
425
0
        break;
426
0
      case '\n':
427
0
        break;
428
0
      case '\r':
429
0
        c = lex_byte(ctx, f);
430
0
        if ((c != '\n') && (c != EOF))
431
0
          fz_unread_byte(ctx, f);
432
0
        break;
433
0
      default:
434
0
        *s++ = c;
435
2
      }
436
2
      break;
437
    /* Bug 708256: PDF 32000-1 says that any occurence of \n, \r, or \r\n in a
438
     * (unless escaped with a '\') should be interpreted as a single 0x0a byte. */
439
13
    case '\n':
440
13
      *s++ = 0x0a;
441
13
      break;
442
77
    case '\r':
443
77
      *s++ = 0x0a;
444
77
      c = lex_byte(ctx, f);
445
77
      if ((c != '\n') && (c != EOF))
446
3
        fz_unread_byte(ctx, f);
447
77
      break;
448
3.28k
    default:
449
3.28k
      *s++ = c;
450
3.28k
      break;
451
3.81k
    }
452
3.81k
  }
453
287
end:
454
287
  lb->len = s - lb->scratch;
455
287
  return PDF_TOK_STRING;
456
289
}
457
458
static int
459
lex_hex_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *lb)
460
66.9k
{
461
66.9k
  char *s = lb->scratch;
462
66.9k
  char *e = s + lb->size;
463
66.9k
  int a = 0, x = 0;
464
66.9k
  int c;
465
466
360k
  while (1)
467
360k
  {
468
360k
    if (s == e)
469
0
    {
470
0
      s += pdf_lexbuf_grow(ctx, lb);
471
0
      e = lb->scratch + lb->size;
472
0
    }
473
360k
    c = lex_byte(ctx, f);
474
360k
    switch (c)
475
360k
    {
476
0
    case IS_WHITE:
477
0
      break;
478
0
    default:
479
0
      fz_warn(ctx, "invalid character in hex string");
480
      /* fall through */
481
293k
    case IS_HEX:
482
293k
      if (x)
483
146k
      {
484
146k
        *s++ = a * 16 + unhex(c);
485
146k
        x = !x;
486
146k
      }
487
146k
      else
488
146k
      {
489
146k
        a = unhex(c);
490
146k
        x = !x;
491
146k
      }
492
293k
      break;
493
66.9k
    case '>':
494
66.9k
      if (x)
495
1
      {
496
1
        *s++ = a * 16; /* pad truncated string with '0' */
497
1
      }
498
66.9k
      goto end;
499
0
    case EOF:
500
0
      return PDF_TOK_ERROR;
501
360k
    }
502
360k
  }
503
66.9k
end:
504
66.9k
  lb->len = s - lb->scratch;
505
66.9k
  return PDF_TOK_STRING;
506
66.9k
}
507
508
static pdf_token
509
pdf_token_from_keyword(char *key)
510
282k
{
511
282k
  switch (*key)
512
282k
  {
513
22.1k
  case 'R':
514
22.1k
    if (!strcmp(key, "R")) return PDF_TOK_R;
515
132
    break;
516
10.0k
  case 't':
517
10.0k
    if (!strcmp(key, "true")) return PDF_TOK_TRUE;
518
10.0k
    if (!strcmp(key, "trailer")) return PDF_TOK_TRAILER;
519
10.0k
    break;
520
10.0k
  case 'f':
521
2.67k
    if (!strcmp(key, "false")) return PDF_TOK_FALSE;
522
2.26k
    break;
523
5.98k
  case 'n':
524
5.98k
    if (!strcmp(key, "null")) return PDF_TOK_NULL;
525
5.98k
    if (!strcmp(key, "newobj")) return PDF_TOK_NEWOBJ;
526
5.98k
    break;
527
8.37k
  case 'o':
528
8.37k
    if (!strcmp(key, "obj")) return PDF_TOK_OBJ;
529
7.01k
    break;
530
11.8k
  case 'e':
531
11.8k
    if (!strcmp(key, "endobj")) return PDF_TOK_ENDOBJ;
532
10.8k
    if (!strcmp(key, "endstream")) return PDF_TOK_ENDSTREAM;
533
10.5k
    break;
534
17.3k
  case 's':
535
17.3k
    if (!strcmp(key, "stream")) return PDF_TOK_STREAM;
536
16.8k
    if (!strcmp(key, "startxref")) return PDF_TOK_STARTXREF;
537
16.8k
    break;
538
16.8k
  case 'x':
539
758
    if (!strcmp(key, "xref")) return PDF_TOK_XREF;
540
758
    break;
541
282k
  }
542
543
2.37M
  while (*key)
544
2.16M
  {
545
2.16M
    if (!fz_isprint(*key))
546
43.8k
      return PDF_TOK_ERROR;
547
2.12M
    ++key;
548
2.12M
  }
549
550
213k
  return PDF_TOK_KEYWORD;
551
257k
}
552
553
void pdf_lexbuf_init(fz_context *ctx, pdf_lexbuf *lb, int size)
554
27
{
555
27
  lb->size = lb->base_size = size;
556
27
  lb->len = 0;
557
27
  lb->scratch = &lb->buffer[0];
558
27
}
559
560
void pdf_lexbuf_fin(fz_context *ctx, pdf_lexbuf *lb)
561
27
{
562
27
  if (lb && lb->size != lb->base_size)
563
2
    fz_free(ctx, lb->scratch);
564
27
}
565
566
ptrdiff_t pdf_lexbuf_grow(fz_context *ctx, pdf_lexbuf *lb)
567
4
{
568
4
  char *old = lb->scratch;
569
4
  size_t newsize = lb->size * 2;
570
4
  if (lb->size == lb->base_size)
571
2
  {
572
2
    lb->scratch = Memento_label(fz_malloc(ctx, newsize), "pdf_lexbuf");
573
2
    memcpy(lb->scratch, lb->buffer, lb->size);
574
2
  }
575
2
  else
576
2
  {
577
2
    lb->scratch = fz_realloc(ctx, lb->scratch, newsize);
578
2
  }
579
4
  lb->size = newsize;
580
4
  return lb->scratch - old;
581
4
}
582
583
pdf_token
584
pdf_lex(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
585
242k
{
586
466k
  while (1)
587
466k
  {
588
466k
    int c = lex_byte(ctx, f);
589
466k
    switch (c)
590
466k
    {
591
30
    case EOF:
592
30
      return PDF_TOK_EOF;
593
223k
    case IS_WHITE:
594
223k
      lex_white(ctx, f);
595
223k
      break;
596
34
    case '%':
597
34
      lex_comment(ctx, f);
598
34
      break;
599
39.8k
    case '/':
600
39.8k
      lex_name(ctx, f, buf);
601
39.8k
      return PDF_TOK_NAME;
602
289
    case '(':
603
289
      return lex_string(ctx, f, buf);
604
213
    case ')':
605
213
      return PDF_TOK_ERROR;
606
69.9k
    case '<':
607
69.9k
      c = lex_byte(ctx, f);
608
69.9k
      if (c == '<')
609
3.03k
        return PDF_TOK_OPEN_DICT;
610
66.9k
      if (c != EOF)
611
66.9k
        fz_unread_byte(ctx, f);
612
66.9k
      return lex_hex_string(ctx, f, buf);
613
2.64k
    case '>':
614
2.64k
      c = lex_byte(ctx, f);
615
2.64k
      if (c == '>')
616
2.64k
        return PDF_TOK_CLOSE_DICT;
617
0
      if (c != EOF)
618
0
        fz_unread_byte(ctx, f);
619
0
      return PDF_TOK_ERROR;
620
1.79k
    case '[':
621
1.79k
      return PDF_TOK_OPEN_ARRAY;
622
1.75k
    case ']':
623
1.75k
      return PDF_TOK_CLOSE_ARRAY;
624
0
    case '{':
625
0
      return PDF_TOK_OPEN_BRACE;
626
0
    case '}':
627
0
      return PDF_TOK_CLOSE_BRACE;
628
99.7k
    case IS_NUMBER:
629
99.7k
      return lex_number(ctx, f, buf, c);
630
25.7k
    default: /* isregular: !isdelim && !iswhite && c != EOF */
631
25.7k
      fz_unread_byte(ctx, f);
632
25.7k
      lex_name(ctx, f, buf);
633
25.7k
      return pdf_token_from_keyword(buf->scratch);
634
466k
    }
635
466k
  }
636
242k
}
637
638
pdf_token
639
pdf_lex_no_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
640
462k
{
641
872k
  while (1)
642
872k
  {
643
872k
    int c = lex_byte(ctx, f);
644
872k
    switch (c)
645
872k
    {
646
5
    case EOF:
647
5
      return PDF_TOK_EOF;
648
407k
    case IS_WHITE:
649
407k
      lex_white(ctx, f);
650
407k
      break;
651
1.93k
    case '%':
652
1.93k
      lex_comment(ctx, f);
653
1.93k
      break;
654
5.19k
    case '/':
655
5.19k
      lex_name(ctx, f, buf);
656
5.19k
      return PDF_TOK_NAME;
657
1.92k
    case '(':
658
1.92k
      return PDF_TOK_ERROR; /* no strings allowed */
659
1.90k
    case ')':
660
1.90k
      return PDF_TOK_ERROR; /* no strings allowed */
661
4.58k
    case '<':
662
4.58k
      c = lex_byte(ctx, f);
663
4.58k
      if (c == '<')
664
19
        return PDF_TOK_OPEN_DICT;
665
4.56k
      if (c != EOF)
666
4.56k
        fz_unread_byte(ctx, f);
667
4.56k
      return PDF_TOK_ERROR; /* no strings allowed */
668
6.35k
    case '>':
669
6.35k
      c = lex_byte(ctx, f);
670
6.35k
      if (c == '>')
671
72
        return PDF_TOK_CLOSE_DICT;
672
6.28k
      if (c != EOF)
673
6.28k
        fz_unread_byte(ctx, f);
674
6.28k
      return PDF_TOK_ERROR;
675
8.73k
    case '[':
676
8.73k
      return PDF_TOK_OPEN_ARRAY;
677
8.82k
    case ']':
678
8.82k
      return PDF_TOK_CLOSE_ARRAY;
679
1.84k
    case '{':
680
1.84k
      return PDF_TOK_OPEN_BRACE;
681
1.82k
    case '}':
682
1.82k
      return PDF_TOK_CLOSE_BRACE;
683
164k
    case IS_NUMBER:
684
164k
      return lex_number(ctx, f, buf, c);
685
257k
    default: /* isregular: !isdelim && !iswhite && c != EOF */
686
257k
      fz_unread_byte(ctx, f);
687
257k
      lex_name(ctx, f, buf);
688
257k
      return pdf_token_from_keyword(buf->scratch);
689
872k
    }
690
872k
  }
691
462k
}
692
693
void pdf_append_token(fz_context *ctx, fz_buffer *fzbuf, int tok, pdf_lexbuf *buf)
694
0
{
695
0
  switch (tok)
696
0
  {
697
0
  case PDF_TOK_NAME:
698
0
    fz_append_printf(ctx, fzbuf, "/%s", buf->scratch);
699
0
    break;
700
0
  case PDF_TOK_STRING:
701
0
    if (buf->len >= buf->size)
702
0
      pdf_lexbuf_grow(ctx, buf);
703
0
    buf->scratch[buf->len] = 0;
704
0
    fz_append_pdf_string(ctx, fzbuf, buf->scratch);
705
0
    break;
706
0
  case PDF_TOK_OPEN_DICT:
707
0
    fz_append_string(ctx, fzbuf, "<<");
708
0
    break;
709
0
  case PDF_TOK_CLOSE_DICT:
710
0
    fz_append_string(ctx, fzbuf, ">>");
711
0
    break;
712
0
  case PDF_TOK_OPEN_ARRAY:
713
0
    fz_append_byte(ctx, fzbuf, '[');
714
0
    break;
715
0
  case PDF_TOK_CLOSE_ARRAY:
716
0
    fz_append_byte(ctx, fzbuf, ']');
717
0
    break;
718
0
  case PDF_TOK_OPEN_BRACE:
719
0
    fz_append_byte(ctx, fzbuf, '{');
720
0
    break;
721
0
  case PDF_TOK_CLOSE_BRACE:
722
0
    fz_append_byte(ctx, fzbuf, '}');
723
0
    break;
724
0
  case PDF_TOK_INT:
725
0
    fz_append_printf(ctx, fzbuf, "%ld", buf->i);
726
0
    break;
727
0
  case PDF_TOK_REAL:
728
0
    fz_append_printf(ctx, fzbuf, "%g", buf->f);
729
0
    break;
730
0
  default:
731
0
    fz_append_data(ctx, fzbuf, buf->scratch, buf->len);
732
0
    break;
733
0
  }
734
0
}