Coverage Report

Created: 2025-06-24 07:01

/src/ghostpdl/pdf/pdf_xref.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2018-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/* xref parsing */
17
18
#include "pdf_int.h"
19
#include "pdf_stack.h"
20
#include "pdf_xref.h"
21
#include "pdf_file.h"
22
#include "pdf_loop_detect.h"
23
#include "pdf_dict.h"
24
#include "pdf_array.h"
25
#include "pdf_repair.h"
26
27
static int resize_xref(pdf_context *ctx, uint64_t new_size)
28
19.3k
{
29
19.3k
    xref_entry *new_xrefs;
30
31
    /* Although we can technically handle object numbers larger than this, on some systems (32-bit Windows)
32
     * memset is limited to a (signed!) integer for the size of memory to clear. We could deal
33
     * with this by clearing the memory in blocks, but really, this is almost certainly a
34
     * corrupted file or something.
35
     */
36
19.3k
    if (new_size >= (0x7ffffff / sizeof(xref_entry)))
37
20
        return_error(gs_error_rangecheck);
38
39
19.3k
    new_xrefs = (xref_entry *)gs_alloc_bytes(ctx->memory, (new_size) * sizeof(xref_entry), "read_xref_stream allocate xref table entries");
40
19.3k
    if (new_xrefs == NULL){
41
0
        pdfi_countdown(ctx->xref_table);
42
0
        ctx->xref_table = NULL;
43
0
        return_error(gs_error_VMerror);
44
0
    }
45
19.3k
    memset(new_xrefs, 0x00, (new_size) * sizeof(xref_entry));
46
19.3k
    memcpy(new_xrefs, ctx->xref_table->xref, ctx->xref_table->xref_size * sizeof(xref_entry));
47
19.3k
    gs_free_object(ctx->memory, ctx->xref_table->xref, "reallocated xref entries");
48
19.3k
    ctx->xref_table->xref = new_xrefs;
49
19.3k
    ctx->xref_table->xref_size = new_size;
50
19.3k
    return 0;
51
19.3k
}
52
53
static int read_xref_stream_entries(pdf_context *ctx, pdf_c_stream *s, int64_t first, int64_t last, int64_t *W)
54
16.7k
{
55
16.7k
    uint i, j;
56
16.7k
    uint64_t field_width = 0;
57
16.7k
    uint32_t type = 0;
58
16.7k
    uint64_t objnum = 0, gen = 0;
59
16.7k
    byte *Buffer;
60
16.7k
    int64_t bytes = 0;
61
16.7k
    xref_entry *entry;
62
63
    /* Find max number of bytes to be read */
64
16.7k
    field_width = W[0];
65
16.7k
    if (W[1] > field_width)
66
16.6k
        field_width = W[1];
67
16.7k
    if (W[2] > field_width)
68
18
        field_width = W[2];
69
70
16.7k
    Buffer = gs_alloc_bytes(ctx->memory, field_width, "read_xref_stream_entry working buffer");
71
16.7k
    if (Buffer == NULL)
72
0
        return_error(gs_error_VMerror);
73
74
710k
    for (i=first;i<=last; i++){
75
        /* Defaults if W[n] = 0 */
76
694k
        type = 1;
77
694k
        objnum = gen = 0;
78
79
694k
        if (W[0] != 0) {
80
693k
            type = 0;
81
693k
            bytes = pdfi_read_bytes(ctx, Buffer, 1, W[0], s);
82
693k
            if (bytes < W[0]){
83
129
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)");
84
129
                return_error(gs_error_ioerror);
85
129
            }
86
1.38M
            for (j=0;j<W[0];j++)
87
693k
                type = (type << 8) + Buffer[j];
88
693k
        }
89
90
694k
        if (W[1] != 0) {
91
693k
            bytes = pdfi_read_bytes(ctx, Buffer, 1, W[1], s);
92
693k
            if (bytes < W[1]){
93
32
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry free working buffer (error)");
94
32
                return_error(gs_error_ioerror);
95
32
            }
96
2.38M
            for (j=0;j<W[1];j++)
97
1.68M
                objnum = (objnum << 8) + Buffer[j];
98
693k
        }
99
100
693k
        if (W[2] != 0) {
101
674k
            bytes = pdfi_read_bytes(ctx, Buffer, 1, W[2], s);
102
674k
            if (bytes < W[2]){
103
40
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)");
104
40
                return_error(gs_error_ioerror);
105
40
            }
106
1.36M
            for (j=0;j<W[2];j++)
107
689k
                gen = (gen << 8) + Buffer[j];
108
674k
        }
109
110
693k
        entry = &ctx->xref_table->xref[i];
111
693k
        if (entry->object_num != 0 && !entry->free)
112
3.92k
            continue;
113
114
690k
        entry->compressed = false;
115
690k
        entry->free = false;
116
690k
        entry->object_num = i;
117
690k
        entry->cache = NULL;
118
119
690k
        switch(type) {
120
20.1k
            case 0:
121
20.1k
                entry->free = true;
122
20.1k
                entry->u.uncompressed.offset = objnum;         /* For free objects we use the offset to store the object number of the next free object */
123
20.1k
                entry->u.uncompressed.generation_num = gen;    /* And the generation number is the numebr to use if this object is used again */
124
20.1k
                break;
125
203k
            case 1:
126
203k
                entry->u.uncompressed.offset = objnum;
127
203k
                entry->u.uncompressed.generation_num = gen;
128
203k
                break;
129
466k
            case 2:
130
466k
                entry->compressed = true;
131
466k
                entry->u.compressed.compressed_stream_num = objnum;   /* The object number of the compressed stream */
132
466k
                entry->u.compressed.object_index = gen;               /* And the index of the object within the stream */
133
466k
                break;
134
146
            default:
135
146
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer");
136
146
                return_error(gs_error_rangecheck);
137
0
                break;
138
690k
        }
139
690k
    }
140
16.4k
    gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer");
141
16.4k
    return 0;
142
16.7k
}
143
144
/* Forward definition */
145
static int read_xref(pdf_context *ctx, pdf_c_stream *s);
146
static int pdfi_check_xref_stream(pdf_context *ctx);
147
/* These two routines are recursive.... */
148
static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num);
149
150
static int pdfi_process_xref_stream(pdf_context *ctx, pdf_stream *stream_obj, pdf_c_stream *s)
151
12.9k
{
152
12.9k
    pdf_c_stream *XRefStrm;
153
12.9k
    int code, i;
154
12.9k
    pdf_dict *sdict = NULL;
155
12.9k
    pdf_name *n;
156
12.9k
    pdf_array *a;
157
12.9k
    int64_t size;
158
12.9k
    int64_t num;
159
12.9k
    int64_t W[3] = {0, 0, 0};
160
12.9k
    int objnum;
161
12.9k
    bool known = false;
162
163
12.9k
    if (pdfi_type_of(stream_obj) != PDF_STREAM)
164
0
        return_error(gs_error_typecheck);
165
166
12.9k
    code = pdfi_dict_from_obj(ctx, (pdf_obj *)stream_obj, &sdict);
167
12.9k
    if (code < 0)
168
0
        return code;
169
170
12.9k
    code = pdfi_dict_get_type(ctx, sdict, "Type", PDF_NAME, (pdf_obj **)&n);
171
12.9k
    if (code < 0)
172
60
        return code;
173
174
12.8k
    if (n->length != 4 || memcmp(n->data, "XRef", 4) != 0) {
175
21
        pdfi_countdown(n);
176
21
        return_error(gs_error_syntaxerror);
177
21
    }
178
12.8k
    pdfi_countdown(n);
179
180
12.8k
    code = pdfi_dict_get_int(ctx, sdict, "Size", &size);
181
12.8k
    if (code < 0)
182
11
        return code;
183
12.8k
    if (size < 1)
184
11
        return 0;
185
186
12.8k
    if (size < 0 || size > floor((double)ARCH_MAX_SIZE_T / (double)sizeof(xref_entry)))
187
0
        return_error(gs_error_rangecheck);
188
189
    /* If this is the first xref stream then allocate the xref table and store the trailer */
190
12.8k
    if (ctx->xref_table == NULL) {
191
7.90k
        ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table");
192
7.90k
        if (ctx->xref_table == NULL) {
193
0
            return_error(gs_error_VMerror);
194
0
        }
195
7.90k
        memset(ctx->xref_table, 0x00, sizeof(xref_table_t));
196
7.90k
        ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, size * sizeof(xref_entry), "read_xref_stream allocate xref table entries");
197
7.90k
        if (ctx->xref_table->xref == NULL){
198
3
            gs_free_object(ctx->memory, ctx->xref_table, "failed to allocate xref table entries");
199
3
            ctx->xref_table = NULL;
200
3
            return_error(gs_error_VMerror);
201
3
        }
202
7.90k
        memset(ctx->xref_table->xref, 0x00, size * sizeof(xref_entry));
203
7.90k
        ctx->xref_table->ctx = ctx;
204
7.90k
        ctx->xref_table->type = PDF_XREF_TABLE;
205
7.90k
        ctx->xref_table->xref_size = size;
206
#if REFCNT_DEBUG
207
        ctx->xref_table->UID = ctx->ref_UID++;
208
        outprintf(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID);
209
#endif
210
7.90k
        pdfi_countup(ctx->xref_table);
211
212
7.90k
        pdfi_countdown(ctx->Trailer);
213
214
7.90k
        ctx->Trailer = sdict;
215
7.90k
        pdfi_countup(sdict);
216
7.90k
    } else {
217
4.91k
        if (size > ctx->xref_table->xref_size)
218
4
            return_error(gs_error_rangecheck);
219
220
4.90k
        code = pdfi_merge_dicts(ctx, ctx->Trailer, sdict);
221
4.90k
        if (code < 0 && (code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREF, "pdfi_process_xref_stream", NULL)) < 0) {
222
0
            goto exit;
223
0
        }
224
4.90k
    }
225
226
12.8k
    pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, stream_obj), SEEK_SET);
227
228
    /* Bug #691220 has a PDF file with a compressed XRef, the stream dictionary has
229
     * a /DecodeParms entry for the stream, which has a /Colors value of 5, which makes
230
     * *no* sense whatever. If we try to apply a Predictor then we end up in a loop trying
231
     * to read 5 colour samples. Rather than meddles with more parameters to the filter
232
     * code, we'll just remove the Colors entry from the DecodeParms dictionary,
233
     * because it is nonsense. This means we'll get the (sensible) default value of 1.
234
     */
235
12.8k
    code = pdfi_dict_known(ctx, sdict, "DecodeParms", &known);
236
12.8k
    if (code < 0)
237
0
        return code;
238
239
12.8k
    if (known) {
240
11.8k
        pdf_dict *DP;
241
11.8k
        double f;
242
11.8k
        pdf_obj *name;
243
244
11.8k
        code = pdfi_dict_get_type(ctx, sdict, "DecodeParms", PDF_DICT, (pdf_obj **)&DP);
245
11.8k
        if (code < 0)
246
1
            return code;
247
248
11.8k
        code = pdfi_dict_knownget_number(ctx, DP, "Colors", &f);
249
11.8k
        if (code < 0) {
250
0
            pdfi_countdown(DP);
251
0
            return code;
252
0
        }
253
11.8k
        if (code > 0 && f != (double)1)
254
0
        {
255
0
            code = pdfi_name_alloc(ctx, (byte *)"Colors", 6, &name);
256
0
            if (code < 0) {
257
0
                pdfi_countdown(DP);
258
0
                return code;
259
0
            }
260
0
            pdfi_countup(name);
261
262
0
            code = pdfi_dict_delete_pair(ctx, DP, (pdf_name *)name);
263
0
            pdfi_countdown(name);
264
0
            if (code < 0) {
265
0
                pdfi_countdown(DP);
266
0
                return code;
267
0
            }
268
0
        }
269
11.8k
        pdfi_countdown(DP);
270
11.8k
    }
271
272
12.8k
    code = pdfi_filter_no_decryption(ctx, stream_obj, s, &XRefStrm, false);
273
12.8k
    if (code < 0) {
274
45
        pdfi_countdown(ctx->xref_table);
275
45
        ctx->xref_table = NULL;
276
45
        return code;
277
45
    }
278
279
12.7k
    code = pdfi_dict_get_type(ctx, sdict, "W", PDF_ARRAY, (pdf_obj **)&a);
280
12.7k
    if (code < 0) {
281
8
        pdfi_close_file(ctx, XRefStrm);
282
8
        pdfi_countdown(ctx->xref_table);
283
8
        ctx->xref_table = NULL;
284
8
        return code;
285
8
    }
286
287
12.7k
    if (pdfi_array_size(a) != 3) {
288
11
        pdfi_countdown(a);
289
11
        pdfi_close_file(ctx, XRefStrm);
290
11
        pdfi_countdown(ctx->xref_table);
291
11
        ctx->xref_table = NULL;
292
11
        return_error(gs_error_rangecheck);
293
11
    }
294
50.8k
    for (i=0;i<3;i++) {
295
38.1k
        code = pdfi_array_get_int(ctx, a, (uint64_t)i, (int64_t *)&W[i]);
296
38.1k
        if (code < 0 || W[i] < 0) {
297
34
            pdfi_countdown(a);
298
34
            pdfi_close_file(ctx, XRefStrm);
299
34
            pdfi_countdown(ctx->xref_table);
300
34
            ctx->xref_table = NULL;
301
34
            if (W[i] < 0)
302
9
                code = gs_note_error(gs_error_rangecheck);
303
34
            return code;
304
34
        }
305
38.1k
    }
306
12.7k
    pdfi_countdown(a);
307
308
    /* W[0] is either:
309
     * 0 (no type field) or a single byte with the type.
310
     * W[1] is either:
311
     * The object number of the next free object, the byte offset of this object in the file or the object5 number of the object stream where this object is stored.
312
     * W[2] is either:
313
     * The generation number to use if this object is used again, the generation number of the object or the index of this object within the object stream.
314
     *
315
     * Object and generation numbers are limited to unsigned 64-bit values, as are bytes offsets in the file, indexes of objects within the stream likewise (actually
316
     * most of these are generally 32-bit max). So we can limit the field widths to 8 bytes, enough to hold a 64-bit number.
317
     * Even if a later version of the spec makes these larger (which seems unlikely!) we still cna't cope with integers > 64-bits.
318
     */
319
12.7k
    if (W[0] > 1 || W[1] > 8 || W[2] > 8) {
320
29
        pdfi_close_file(ctx, XRefStrm);
321
29
        pdfi_countdown(ctx->xref_table);
322
29
        ctx->xref_table = NULL;
323
29
        return code;
324
29
    }
325
326
12.6k
    code = pdfi_dict_get_type(ctx, sdict, "Index", PDF_ARRAY, (pdf_obj **)&a);
327
12.6k
    if (code == gs_error_undefined) {
328
4.63k
        code = read_xref_stream_entries(ctx, XRefStrm, 0, size - 1, W);
329
4.63k
        if (code < 0) {
330
113
            pdfi_close_file(ctx, XRefStrm);
331
113
            pdfi_countdown(ctx->xref_table);
332
113
            ctx->xref_table = NULL;
333
113
            return code;
334
113
        }
335
8.04k
    } else {
336
8.04k
        int64_t start, size;
337
338
8.04k
        if (code < 0) {
339
2
            pdfi_close_file(ctx, XRefStrm);
340
2
            pdfi_countdown(ctx->xref_table);
341
2
            ctx->xref_table = NULL;
342
2
            return code;
343
2
        }
344
345
8.04k
        if (pdfi_array_size(a) & 1) {
346
9
            pdfi_countdown(a);
347
9
            pdfi_close_file(ctx, XRefStrm);
348
9
            pdfi_countdown(ctx->xref_table);
349
9
            ctx->xref_table = NULL;
350
9
            return_error(gs_error_rangecheck);
351
9
        }
352
353
19.9k
        for (i=0;i < pdfi_array_size(a);i+=2){
354
12.1k
            code = pdfi_array_get_int(ctx, a, (uint64_t)i, &start);
355
12.1k
            if (code < 0 || start < 0) {
356
18
                pdfi_countdown(a);
357
18
                pdfi_close_file(ctx, XRefStrm);
358
18
                pdfi_countdown(ctx->xref_table);
359
18
                ctx->xref_table = NULL;
360
18
                return code;
361
18
            }
362
363
12.1k
            code = pdfi_array_get_int(ctx, a, (uint64_t)i+1, &size);
364
12.1k
            if (code < 0) {
365
13
                pdfi_countdown(a);
366
13
                pdfi_close_file(ctx, XRefStrm);
367
13
                pdfi_countdown(ctx->xref_table);
368
13
                ctx->xref_table = NULL;
369
13
                return code;
370
13
            }
371
372
12.1k
            if (size < 1)
373
11
                continue;
374
375
12.1k
            if (start + size >= ctx->xref_table->xref_size) {
376
7.22k
                code = resize_xref(ctx, start + size);
377
7.22k
                if (code < 0) {
378
7
                    pdfi_countdown(a);
379
7
                    pdfi_close_file(ctx, XRefStrm);
380
7
                    pdfi_countdown(ctx->xref_table);
381
7
                    ctx->xref_table = NULL;
382
7
                    return code;
383
7
                }
384
7.22k
            }
385
386
12.1k
            code = read_xref_stream_entries(ctx, XRefStrm, start, start + size - 1, W);
387
12.1k
            if (code < 0) {
388
234
                pdfi_countdown(a);
389
234
                pdfi_close_file(ctx, XRefStrm);
390
234
                pdfi_countdown(ctx->xref_table);
391
234
                ctx->xref_table = NULL;
392
234
                return code;
393
234
            }
394
12.1k
        }
395
8.03k
    }
396
12.2k
    pdfi_countdown(a);
397
398
12.2k
    pdfi_close_file(ctx, XRefStrm);
399
400
12.2k
    code = pdfi_dict_get_int(ctx, sdict, "Prev", &num);
401
12.2k
    if (code == gs_error_undefined)
402
5.12k
        return 0;
403
404
7.16k
    if (code < 0)
405
12
        return code;
406
407
7.15k
    if (num < 0 || num > ctx->main_stream_length)
408
1.76k
        return_error(gs_error_rangecheck);
409
410
5.38k
    if (pdfi_loop_detector_check_object(ctx, num) == true)
411
22
        return_error(gs_error_circular_reference);
412
5.36k
    else {
413
5.36k
        code = pdfi_loop_detector_add_object(ctx, num);
414
5.36k
        if (code < 0)
415
0
            return code;
416
5.36k
    }
417
418
5.36k
    if(ctx->args.pdfdebug)
419
0
        outprintf(ctx->memory, "%% Reading /Prev xref\n");
420
421
5.36k
    pdfi_seek(ctx, s, num, SEEK_SET);
422
423
5.36k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &objnum);
424
5.36k
    if (code == 1) {
425
4.92k
        if (pdfi_check_xref_stream(ctx))
426
4.89k
            return pdfi_read_xref_stream_dict(ctx, s, objnum);
427
4.92k
    }
428
429
471
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
430
471
    if (code < 0)
431
0
        return code;
432
471
    if (code == TOKEN_XREF) {
433
53
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_PREV_NOT_XREF_STREAM, "pdfi_process_xref_stream", NULL)) < 0) {
434
0
            goto exit;
435
0
        }
436
        /* Read old-style xref table */
437
53
        return(read_xref(ctx, ctx->main_stream));
438
53
    }
439
418
exit:
440
418
    return_error(gs_error_syntaxerror);
441
471
}
442
443
static int pdfi_check_xref_stream(pdf_context *ctx)
444
16.3k
{
445
16.3k
    gs_offset_t offset;
446
16.3k
    int gen_num, code = 0;
447
448
16.3k
    offset = pdfi_unread_tell(ctx);
449
450
16.3k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &gen_num);
451
16.3k
    if (code <= 0) {
452
1.16k
        code = 0;
453
1.16k
        goto exit;
454
1.16k
    }
455
456
    /* Try to read 'obj' */
457
15.1k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
458
15.1k
    if (code <= 0) {
459
0
        code = 0;
460
0
        goto exit;
461
0
    }
462
463
    /* Third element must be obj, or it's not a valid xref */
464
15.1k
    if (code != TOKEN_OBJ)
465
1.75k
        code = 0;
466
13.4k
    else
467
13.4k
        code = 1;
468
469
16.3k
exit:
470
16.3k
    pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
471
16.3k
    return code;
472
15.1k
}
473
474
static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num)
475
13.6k
{
476
13.6k
    int code;
477
13.6k
    int gen_num;
478
479
13.6k
    if (ctx->args.pdfdebug)
480
0
        outprintf(ctx->memory, "\n%% Reading PDF 1.5+ xref stream\n");
481
482
    /* We have the obj_num. Lets try for obj_num gen obj as a XRef stream */
483
13.6k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &gen_num);
484
13.6k
    if (code <= 0) {
485
0
        if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", "")) < 0) {
486
0
            return code;
487
0
        }
488
0
        return(pdfi_repair_file(ctx));
489
0
    }
490
491
    /* Try to read 'obj' */
492
13.6k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
493
13.6k
    if (code < 0)
494
0
        return code;
495
13.6k
    if (code == 0)
496
0
        return_error(gs_error_syntaxerror);
497
498
    /* Third element must be obj, or it's not a valid xref */
499
13.6k
    if (code != TOKEN_OBJ) {
500
0
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BAD_XREFSTMOFFSET, "pdfi_read_xref_stream_dict", "")) < 0) {
501
0
            return code;
502
0
        }
503
0
        return(pdfi_repair_file(ctx));
504
0
    }
505
506
556k
    do {
507
556k
        code = pdfi_read_token(ctx, ctx->main_stream, obj_num, gen_num);
508
556k
        if (code <= 0) {
509
548
            if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) {
510
0
                return code;
511
0
            }
512
548
            return pdfi_repair_file(ctx);
513
548
        }
514
515
556k
        if (pdfi_count_stack(ctx) >= 2 && pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) {
516
14.8k
            uintptr_t keyword = (uintptr_t)ctx->stack_top[-1];
517
14.8k
            if (keyword == TOKEN_STREAM) {
518
12.9k
                pdf_dict *dict;
519
12.9k
                pdf_stream *sdict = NULL;
520
12.9k
                int64_t Length;
521
522
                /* Remove the 'stream' token from the stack, should leave a dictionary object on the stack */
523
12.9k
                pdfi_pop(ctx, 1);
524
12.9k
                if (pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) {
525
22
                    if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) {
526
0
                        return code;
527
0
                    }
528
22
                    return pdfi_repair_file(ctx);
529
22
                }
530
12.9k
                dict = (pdf_dict *)ctx->stack_top[-1];
531
532
                /* Convert the dict into a stream (sdict comes back with at least one ref) */
533
12.9k
                code = pdfi_obj_dict_to_stream(ctx, dict, &sdict, true);
534
                /* Pop off the dict */
535
12.9k
                pdfi_pop(ctx, 1);
536
12.9k
                if (code < 0) {
537
0
                    if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) {
538
0
                        return code;
539
0
                    }
540
                    /* TODO: should I return code instead of trying to repair?
541
                     * Normally the above routine should not fail so something is
542
                     * probably seriously fubar.
543
                     */
544
0
                    return pdfi_repair_file(ctx);
545
0
                }
546
12.9k
                dict = NULL;
547
548
                /* Init the stuff for the stream */
549
12.9k
                sdict->stream_offset = pdfi_unread_tell(ctx);
550
12.9k
                sdict->object_num = obj_num;
551
12.9k
                sdict->generation_num = gen_num;
552
553
12.9k
                code = pdfi_dict_get_int(ctx, sdict->stream_dict, "Length", &Length);
554
12.9k
                if (code < 0) {
555
                    /* TODO: Not positive this will actually have a length -- just use 0 */
556
63
                    (void)pdfi_set_error_var(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_xref_stream_dict", "Xref Stream object %u missing mandatory keyword /Length\n", obj_num);
557
63
                    code = 0;
558
63
                    Length = 0;
559
63
                }
560
12.9k
                sdict->Length = Length;
561
12.9k
                sdict->length_valid = true;
562
563
12.9k
                code = pdfi_process_xref_stream(ctx, sdict, ctx->main_stream);
564
12.9k
                pdfi_countdown(sdict);
565
12.9k
                if (code < 0) {
566
3.06k
                    pdfi_set_error(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_PREV_NOT_XREF_STREAM, "pdfi_read_xref_stream_dict", NULL);
567
3.06k
                    return code;
568
3.06k
                }
569
9.85k
                break;
570
12.9k
            } else if (keyword == TOKEN_ENDOBJ) {
571
                /* Something went wrong, this is not a stream dictionary */
572
159
                if ((code = pdfi_set_error_var(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_xref_stream_dict", "Xref Stream object %u missing mandatory keyword /Length\n", obj_num)) < 0) {
573
0
                    return code;
574
0
                }
575
159
                return(pdfi_repair_file(ctx));
576
159
            }
577
14.8k
        }
578
556k
    } while(1);
579
9.85k
    return 0;
580
13.6k
}
581
582
static int skip_to_digit(pdf_context *ctx, pdf_c_stream *s, unsigned int limit)
583
3.23k
{
584
3.23k
    int c, read = 0;
585
586
11.9k
    do {
587
11.9k
        c = pdfi_read_byte(ctx, s);
588
11.9k
        if (c < 0)
589
0
            return_error(gs_error_ioerror);
590
11.9k
        if (c >= '0' && c <= '9') {
591
2.93k
            pdfi_unread_byte(ctx, s, (byte)c);
592
2.93k
            return read;
593
2.93k
        }
594
8.96k
        read++;
595
8.96k
    } while (read < limit);
596
597
297
    return read;
598
3.23k
}
599
600
static int read_digits(pdf_context *ctx, pdf_c_stream *s, byte *Buffer, int limit)
601
3.23k
{
602
3.23k
    int c, read = 0;
603
604
    /* Since the "limit" is a value calculated by the caller,
605
       it's easier to check it in one place (here) than before
606
       every call.
607
     */
608
3.23k
    if (limit <= 0)
609
309
        return_error(gs_error_syntaxerror);
610
611
    /* We assume that Buffer always has limit+1 bytes available, so we can
612
     * safely terminate it. */
613
614
17.5k
    do {
615
17.5k
        c = pdfi_read_byte(ctx, s);
616
17.5k
        if (c < 0)
617
0
            return_error(gs_error_ioerror);
618
17.5k
        if (c < '0' || c > '9') {
619
1.27k
            pdfi_unread_byte(ctx, s, c);
620
1.27k
            break;
621
1.27k
        }
622
16.2k
        *Buffer++ = (byte)c;
623
16.2k
        read++;
624
16.2k
    } while (read < limit);
625
2.92k
    *Buffer = 0;
626
627
2.92k
    return read;
628
2.92k
}
629
630
631
static int read_xref_entry_slow(pdf_context *ctx, pdf_c_stream *s, gs_offset_t *offset, uint32_t *generation_num, unsigned char *free)
632
1.64k
{
633
1.64k
    byte Buffer[20];
634
1.64k
    int c, code, read = 0;
635
636
    /* First off, find a number. If we don't find one, and read 20 bytes, throw an error */
637
1.64k
    code = skip_to_digit(ctx, s, 20);
638
1.64k
    if (code < 0)
639
0
        return code;
640
1.64k
    read += code;
641
642
    /* Now read a number */
643
1.64k
    code = read_digits(ctx, s, (byte *)&Buffer,  (read > 10 ? 20 - read : 10));
644
1.64k
    if (code < 0)
645
49
        return code;
646
1.59k
    read += code;
647
648
1.59k
    *offset = atol((const char *)Buffer);
649
650
    /* find next number */
651
1.59k
    code = skip_to_digit(ctx, s, 20 - read);
652
1.59k
    if (code < 0)
653
0
        return code;
654
1.59k
    read += code;
655
656
    /* and read it */
657
1.59k
    code = read_digits(ctx, s, (byte *)&Buffer, (read > 15 ? 20 - read : 5));
658
1.59k
    if (code < 0)
659
260
        return code;
660
1.33k
    read += code;
661
662
1.33k
    *generation_num = atol((const char *)Buffer);
663
664
2.30k
    do {
665
2.30k
        c = pdfi_read_byte(ctx, s);
666
2.30k
        if (c < 0)
667
0
            return_error(gs_error_ioerror);
668
2.30k
        read ++;
669
2.30k
        if (c == 0x09 || c == 0x20)
670
993
            continue;
671
1.30k
        if (c == 'n' || c == 'f') {
672
737
            *free = (unsigned char)c;
673
737
            break;
674
737
        } else {
675
570
            return_error(gs_error_syntaxerror);
676
570
        }
677
1.30k
    } while (read < 20);
678
762
    if (read >= 20)
679
33
        return_error(gs_error_syntaxerror);
680
681
1.83k
    do {
682
1.83k
        c = pdfi_read_byte(ctx, s);
683
1.83k
        if (c < 0)
684
0
            return_error(gs_error_syntaxerror);
685
1.83k
        read++;
686
1.83k
        if (c == 0x20 || c == 0x09 || c == 0x0d || c == 0x0a)
687
868
            continue;
688
1.83k
    } while (read < 20);
689
729
    return 0;
690
729
}
691
692
static int write_offset(byte *B, gs_offset_t o, unsigned int g, unsigned char free)
693
729
{
694
729
    byte b[20], *ptr = B;
695
729
    int index = 0;
696
697
729
    gs_snprintf((char *)b, sizeof(b), "%"PRIdOFFSET"", o);
698
729
    if (strlen((const char *)b) > 10)
699
0
        return_error(gs_error_rangecheck);
700
5.98k
    for(index=0;index < 10 - strlen((const char *)b); index++) {
701
5.25k
        *ptr++ = 0x30;
702
5.25k
    }
703
729
    memcpy(ptr, b, strlen((const char *)b));
704
729
    ptr += strlen((const char *)b);
705
729
    *ptr++ = 0x20;
706
707
729
    gs_snprintf((char *)b, sizeof(b), "%d", g);
708
729
    if (strlen((const char *)b) > 5)
709
0
        return_error(gs_error_rangecheck);
710
3.29k
    for(index=0;index < 5 - strlen((const char *)b);index++) {
711
2.56k
        *ptr++ = 0x30;
712
2.56k
    }
713
729
    memcpy(ptr, b, strlen((const char *)b));
714
729
    ptr += strlen((const char *)b);
715
729
    *ptr++ = 0x20;
716
729
    *ptr++ = free;
717
729
    *ptr++ = 0x20;
718
729
    *ptr++ = 0x0d;
719
729
    return 0;
720
729
}
721
722
static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *section_start, uint64_t *section_size)
723
36.4k
{
724
36.4k
    int code = 0, i, j;
725
36.4k
    int start = 0;
726
36.4k
    int size = 0;
727
36.4k
    int64_t bytes = 0;
728
36.4k
    char Buffer[21];
729
730
36.4k
    *section_start = *section_size = 0;
731
732
36.4k
    if (ctx->args.pdfdebug)
733
0
        outprintf(ctx->memory, "\n%% Reading xref section\n");
734
735
36.4k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &start);
736
36.4k
    if (code < 0) {
737
        /* Not an int, might be a keyword */
738
9.76k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
739
9.76k
        if (code < 0)
740
0
            return code;
741
742
9.76k
        if (code != TOKEN_TRAILER) {
743
            /* element is not an integer, and not a keyword - not a valid xref */
744
150
            return_error(gs_error_typecheck);
745
150
        }
746
9.61k
        return 1;
747
9.76k
    }
748
749
26.6k
    if (start < 0)
750
23
        return_error(gs_error_rangecheck);
751
752
26.6k
    *section_start = start;
753
754
26.6k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &size);
755
26.6k
    if (code < 0)
756
20
        return code;
757
26.6k
    if (code == 0)
758
46
        return_error(gs_error_syntaxerror);
759
760
    /* Zero sized xref sections are valid; see the file attached to
761
     * bug 704947 for an example. */
762
26.5k
    if (size < 0)
763
12
        return_error(gs_error_rangecheck);
764
765
26.5k
    *section_size = size;
766
767
26.5k
    if (ctx->args.pdfdebug)
768
0
        outprintf(ctx->memory, "\n%% Section starts at %d and has %d entries\n", (unsigned int) start, (unsigned int)size);
769
770
26.5k
    if (size > 0) {
771
26.1k
        if (ctx->xref_table == NULL) {
772
9.43k
            ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table");
773
9.43k
            if (ctx->xref_table == NULL)
774
0
                return_error(gs_error_VMerror);
775
9.43k
            memset(ctx->xref_table, 0x00, sizeof(xref_table_t));
776
777
9.43k
            ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, (start + size) * sizeof(xref_entry), "read_xref_stream allocate xref table entries");
778
9.43k
            if (ctx->xref_table->xref == NULL){
779
27
                gs_free_object(ctx->memory, ctx->xref_table, "free xref table on error allocating entries");
780
27
                ctx->xref_table = NULL;
781
27
                return_error(gs_error_VMerror);
782
27
            }
783
#if REFCNT_DEBUG
784
            ctx->xref_table->UID = ctx->ref_UID++;
785
            outprintf(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID);
786
#endif
787
788
9.41k
            memset(ctx->xref_table->xref, 0x00, (start + size) * sizeof(xref_entry));
789
9.41k
            ctx->xref_table->ctx = ctx;
790
9.41k
            ctx->xref_table->type = PDF_XREF_TABLE;
791
9.41k
            ctx->xref_table->xref_size = start + size;
792
9.41k
            pdfi_countup(ctx->xref_table);
793
16.6k
        } else {
794
16.6k
            if (start + size > ctx->xref_table->xref_size) {
795
12.1k
                code = resize_xref(ctx, start + size);
796
12.1k
                if (code < 0)
797
13
                    return code;
798
12.1k
            }
799
16.6k
        }
800
26.1k
    }
801
802
26.5k
    pdfi_skip_white(ctx, s);
803
475k
    for (i=0;i< size;i++){
804
449k
        xref_entry *entry = &ctx->xref_table->xref[i + start];
805
449k
        unsigned char free;
806
449k
        gs_offset_t off;
807
449k
        unsigned int gen;
808
809
449k
        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 20, s);
810
449k
        if (bytes < 20)
811
2
            return_error(gs_error_ioerror);
812
449k
        j = 19;
813
449k
        if ((Buffer[19] != 0x0a && Buffer[19] != 0x0d) || (Buffer[18] != 0x0d && Buffer[18] != 0x0a && Buffer[18] != 0x20))
814
18.9k
            pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_SIZE, "read_xref_section", NULL);
815
472k
        while (Buffer[j] != 0x0D && Buffer[j] != 0x0A) {
816
23.3k
            pdfi_unread_byte(ctx, s, (byte)Buffer[j]);
817
23.3k
            if (--j < 0) {
818
783
                pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_NO_EOL, "read_xref_section", NULL);
819
783
                outprintf(ctx->memory, "Invalid xref entry, line terminator missing.\n");
820
783
                code = read_xref_entry_slow(ctx, s, &off, &gen, &free);
821
783
                if (code < 0)
822
402
                    return code;
823
381
                code = write_offset((byte *)Buffer, off, gen, free);
824
381
                if (code < 0)
825
0
                    return code;
826
381
                j = 19;
827
381
                break;
828
381
            }
829
23.3k
        }
830
449k
        Buffer[j] = 0x00;
831
449k
        if (entry->object_num != 0)
832
9.03k
            continue;
833
834
440k
        if (sscanf(Buffer, "%"PRIdOFFSET" %d %c", &entry->u.uncompressed.offset, &entry->u.uncompressed.generation_num, &free) != 3) {
835
858
            pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_FORMAT, "read_xref_section", NULL);
836
858
            outprintf(ctx->memory, "Invalid xref entry, incorrect format.\n");
837
858
            pdfi_unread(ctx, s, (byte *)Buffer, 20);
838
858
            code = read_xref_entry_slow(ctx, s, &off, &gen, &free);
839
858
            if (code < 0)
840
510
                return code;
841
348
            code = write_offset((byte *)Buffer, off, gen, free);
842
348
            if (code < 0)
843
0
                return code;
844
348
        }
845
846
439k
        entry->compressed = false;
847
439k
        entry->object_num = i + start;
848
439k
        if (free == 'f')
849
134k
            entry->free = true;
850
439k
        if(free == 'n')
851
304k
            entry->free = false;
852
439k
        if (entry->object_num == 0) {
853
6.35k
            if (!entry->free) {
854
78
                pdfi_set_warning(ctx, 0, NULL, W_PDF_XREF_OBJECT0_NOT_FREE, "read_xref_section", NULL);
855
78
            }
856
6.35k
        }
857
439k
    }
858
859
25.6k
    return 0;
860
26.5k
}
861
862
static int read_xref(pdf_context *ctx, pdf_c_stream *s)
863
10.8k
{
864
10.8k
    int code = 0;
865
10.8k
    pdf_dict *d = NULL;
866
10.8k
    uint64_t max_obj = 0;
867
10.8k
    int64_t num, XRefStm = 0;
868
10.8k
    int obj_num;
869
10.8k
    bool known = false;
870
871
10.8k
    if (ctx->repaired)
872
5
        return 0;
873
874
36.4k
    do {
875
36.4k
        uint64_t section_start, section_size;
876
877
36.4k
        code = read_xref_section(ctx, s, &section_start, &section_size);
878
36.4k
        if (code < 0)
879
1.20k
            return code;
880
881
35.2k
        if (section_size > 0 && section_start + section_size - 1 > max_obj)
882
22.8k
            max_obj = section_start + section_size - 1;
883
884
        /* code == 1 => read_xref_section ended with a trailer. */
885
35.2k
    } while (code != 1);
886
887
9.61k
    code = pdfi_read_dict(ctx, ctx->main_stream, 0, 0);
888
9.61k
    if (code < 0)
889
177
        return code;
890
891
9.43k
    d = (pdf_dict *)ctx->stack_top[-1];
892
9.43k
    if (pdfi_type_of(d) != PDF_DICT) {
893
12
        pdfi_pop(ctx, 1);
894
12
        return_error(gs_error_typecheck);
895
12
    }
896
9.42k
    pdfi_countup(d);
897
9.42k
    pdfi_pop(ctx, 1);
898
899
    /* We don't want to pollute the Trailer dictionary with any XRefStm key/value pairs
900
     * which will happen when we do pdfi_merge_dicts(). So we get any XRefStm here and
901
     * if there was one, remove it from the dictionary before we merge with the
902
     * primary trailer.
903
     */
904
9.42k
    code = pdfi_dict_get_int(ctx, d, "XRefStm", &XRefStm);
905
9.42k
    if (code < 0 && code != gs_error_undefined)
906
2
        goto error;
907
908
9.42k
    if (code == 0) {
909
413
        code = pdfi_dict_delete(ctx, d, "XRefStm");
910
413
        if (code < 0)
911
0
            goto error;
912
413
    }
913
914
9.42k
    if (ctx->Trailer == NULL) {
915
8.24k
        ctx->Trailer = d;
916
8.24k
        pdfi_countup(d);
917
8.24k
    } else {
918
1.18k
        code = pdfi_merge_dicts(ctx, ctx->Trailer, d);
919
1.18k
        if (code < 0) {
920
0
            if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREF, "read_xref", "")) < 0) {
921
0
                return code;
922
0
            }
923
0
        }
924
1.18k
    }
925
926
    /* Check if the highest subsection + size exceeds the /Size in the
927
     * trailer dictionary and set a warning flag if it does
928
     */
929
9.42k
    code = pdfi_dict_get_int(ctx, d, "Size", &num);
930
9.42k
    if (code < 0)
931
17
        goto error;
932
933
9.40k
    if (max_obj >= num)
934
561
        pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_SIZE, "read_xref", NULL);
935
936
    /* Check if this is a modified file and has any
937
     * previous xref entries.
938
     */
939
9.40k
    code = pdfi_dict_known(ctx, d, "Prev", &known);
940
9.40k
    if (known) {
941
4.17k
        code = pdfi_dict_get_int(ctx, d, "Prev", &num);
942
4.17k
        if (code < 0)
943
17
            goto error;
944
945
4.16k
        if (num < 0 || num > ctx->main_stream_length) {
946
1.31k
            code = gs_note_error(gs_error_rangecheck);
947
1.31k
            goto error;
948
1.31k
        }
949
950
2.84k
        if (pdfi_loop_detector_check_object(ctx, num) == true) {
951
7
            code = gs_note_error(gs_error_circular_reference);
952
7
            goto error;
953
7
        }
954
2.83k
        else {
955
2.83k
            code = pdfi_loop_detector_add_object(ctx, num);
956
2.83k
            if (code < 0)
957
0
                goto error;
958
2.83k
        }
959
960
2.83k
        code = pdfi_seek(ctx, s, num, SEEK_SET);
961
2.83k
        if (code < 0)
962
0
            goto error;
963
964
2.83k
        if (!ctx->repaired) {
965
2.83k
            code = pdfi_read_token(ctx, ctx->main_stream, 0, 0);
966
2.83k
            if (code < 0)
967
118
                goto error;
968
969
2.72k
            if (code == 0) {
970
2
                code = gs_note_error(gs_error_syntaxerror);
971
2
                goto error;
972
2
            }
973
2.72k
        } else {
974
0
            code = 0;
975
0
            goto error;
976
0
        }
977
978
2.71k
        if ((intptr_t)(ctx->stack_top[-1]) == (intptr_t)TOKEN_XREF) {
979
            /* Read old-style xref table */
980
1.24k
            pdfi_pop(ctx, 1);
981
1.24k
            code = read_xref(ctx, ctx->main_stream);
982
1.24k
            if (code < 0)
983
223
                goto error;
984
1.47k
        } else {
985
1.47k
            pdfi_pop(ctx, 1);
986
1.47k
            code = gs_note_error(gs_error_typecheck);
987
1.47k
            goto error;
988
1.47k
        }
989
2.71k
    }
990
991
    /* Now check if this is a hybrid file. */
992
6.25k
    if (XRefStm != 0) {
993
213
        ctx->is_hybrid = true;
994
995
213
        if (ctx->args.pdfdebug)
996
0
            outprintf(ctx->memory, "%% File is a hybrid, containing xref table and xref stream. Reading the stream.\n");
997
998
999
213
        if (pdfi_loop_detector_check_object(ctx, XRefStm) == true) {
1000
0
            code = gs_note_error(gs_error_circular_reference);
1001
0
            goto error;
1002
0
        }
1003
213
        else {
1004
213
            code = pdfi_loop_detector_add_object(ctx, XRefStm);
1005
213
            if (code < 0)
1006
0
                goto error;
1007
213
        }
1008
1009
213
        code = pdfi_loop_detector_mark(ctx);
1010
213
        if (code < 0)
1011
0
            goto error;
1012
1013
        /* Because of the way the code works when we read a file which is a pure
1014
         * xref stream file, we need to read the first integer of 'x y obj'
1015
         * because the xref stream decoding code expects that to be on the stack.
1016
         */
1017
213
        pdfi_seek(ctx, s, XRefStm, SEEK_SET);
1018
1019
213
        code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num);
1020
213
        if (code < 0) {
1021
0
            pdfi_set_error(ctx, 0, NULL, E_PDF_BADXREFSTREAM, "read_xref", "");
1022
0
            pdfi_loop_detector_cleartomark(ctx);
1023
0
            goto error;
1024
0
        }
1025
1026
213
        code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num);
1027
        /* We could just fall through to the exit here, but choose not to in order to avoid possible mistakes in future */
1028
213
        if (code < 0) {
1029
25
            pdfi_loop_detector_cleartomark(ctx);
1030
25
            goto error;
1031
25
        }
1032
1033
188
        pdfi_loop_detector_cleartomark(ctx);
1034
188
    } else
1035
6.03k
        code = 0;
1036
1037
9.42k
error:
1038
9.42k
    pdfi_countdown(d);
1039
9.42k
    return code;
1040
6.25k
}
1041
1042
int pdfi_read_xref(pdf_context *ctx)
1043
94.5k
{
1044
94.5k
    int code = 0;
1045
94.5k
    int obj_num;
1046
1047
94.5k
    code = pdfi_loop_detector_mark(ctx);
1048
94.5k
    if (code < 0)
1049
0
        return code;
1050
1051
94.5k
    if (ctx->startxref == 0)
1052
53.6k
        goto repair;
1053
1054
40.8k
    code = pdfi_loop_detector_add_object(ctx, ctx->startxref);
1055
40.8k
    if (code < 0)
1056
0
        goto exit;
1057
1058
40.8k
    if (ctx->args.pdfdebug)
1059
0
        outprintf(ctx->memory, "%% Trying to read 'xref' token for xref table, or 'int int obj' for an xref stream\n");
1060
1061
40.8k
    if (ctx->startxref > ctx->main_stream_length - 5) {
1062
10.4k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is beyond end of file")) < 0)
1063
0
            goto exit;
1064
1065
10.4k
        goto repair;
1066
10.4k
    }
1067
30.3k
    if (ctx->startxref < 0) {
1068
485
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is before start of file")) < 0)
1069
0
            goto exit;
1070
1071
485
        goto repair;
1072
485
    }
1073
1074
    /* Read the xref(s) */
1075
29.8k
    pdfi_seek(ctx, ctx->main_stream, ctx->startxref, SEEK_SET);
1076
1077
    /* If it starts with an int, it's an xref stream dict */
1078
29.8k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num);
1079
29.8k
    if (code == 1) {
1080
11.4k
        if (pdfi_check_xref_stream(ctx)) {
1081
8.54k
            code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num);
1082
8.54k
            if (code < 0)
1083
2.86k
                goto repair;
1084
8.54k
        } else
1085
2.89k
            goto repair;
1086
18.4k
    } else {
1087
        /* If not, it had better start 'xref', and be an old-style xref table */
1088
18.4k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
1089
18.4k
        if (code != TOKEN_XREF) {
1090
8.92k
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"Failed to read any token at the startxref location")) < 0)
1091
0
                goto exit;
1092
1093
8.92k
            goto repair;
1094
8.92k
        }
1095
1096
9.52k
        code = read_xref(ctx, ctx->main_stream);
1097
9.52k
        if (code < 0)
1098
4.33k
            goto repair;
1099
9.52k
    }
1100
1101
10.8k
    if(ctx->args.pdfdebug && ctx->xref_table) {
1102
0
        int i, j;
1103
0
        xref_entry *entry;
1104
0
        char Buffer[32];
1105
1106
0
        outprintf(ctx->memory, "\n%% Dumping xref table\n");
1107
0
        for (i=0;i < ctx->xref_table->xref_size;i++) {
1108
0
            entry = &ctx->xref_table->xref[i];
1109
0
            if(entry->compressed) {
1110
0
                outprintf(ctx->memory, "*");
1111
0
                gs_snprintf(Buffer, sizeof(Buffer), "%"PRId64"", entry->object_num);
1112
0
                j = 10 - strlen(Buffer);
1113
0
                while(j--) {
1114
0
                    outprintf(ctx->memory, " ");
1115
0
                }
1116
0
                outprintf(ctx->memory, "%s ", Buffer);
1117
1118
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.compressed_stream_num);
1119
0
                j = 10 - strlen(Buffer);
1120
0
                while(j--) {
1121
0
                    outprintf(ctx->memory, " ");
1122
0
                }
1123
0
                outprintf(ctx->memory, "%s ", Buffer);
1124
1125
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.object_index);
1126
0
                j = 10 - strlen(Buffer);
1127
0
                while(j--) {
1128
0
                    outprintf(ctx->memory, " ");
1129
0
                }
1130
0
                outprintf(ctx->memory, "%s ", Buffer);
1131
0
            }
1132
0
            else {
1133
0
                outprintf(ctx->memory, " ");
1134
1135
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->object_num);
1136
0
                j = 10 - strlen(Buffer);
1137
0
                while(j--) {
1138
0
                    outprintf(ctx->memory, " ");
1139
0
                }
1140
0
                outprintf(ctx->memory, "%s ", Buffer);
1141
1142
0
                gs_snprintf(Buffer, sizeof(Buffer), "%"PRIdOFFSET"", entry->u.uncompressed.offset);
1143
0
                j = 10 - strlen(Buffer);
1144
0
                while(j--) {
1145
0
                    outprintf(ctx->memory, " ");
1146
0
                }
1147
0
                outprintf(ctx->memory, "%s ", Buffer);
1148
1149
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.uncompressed.generation_num);
1150
0
                j = 10 - strlen(Buffer);
1151
0
                while(j--) {
1152
0
                    outprintf(ctx->memory, " ");
1153
0
                }
1154
0
                outprintf(ctx->memory, "%s ", Buffer);
1155
0
            }
1156
0
            if (entry->free)
1157
0
                outprintf(ctx->memory, "f\n");
1158
0
            else
1159
0
                outprintf(ctx->memory, "n\n");
1160
0
        }
1161
0
    }
1162
10.8k
    if (ctx->args.pdfdebug)
1163
0
        outprintf(ctx->memory, "\n");
1164
1165
10.8k
 exit:
1166
10.8k
    (void)pdfi_loop_detector_cleartomark(ctx);
1167
1168
10.8k
    if (code < 0)
1169
0
        return code;
1170
1171
10.8k
    return 0;
1172
1173
83.6k
repair:
1174
83.6k
    (void)pdfi_loop_detector_cleartomark(ctx);
1175
83.6k
    if (!ctx->repaired && !ctx->args.pdfstoponerror)
1176
83.5k
        return(pdfi_repair_file(ctx));
1177
63
    return 0;
1178
83.6k
}