Coverage Report

Created: 2025-06-10 07:27

/src/ghostpdl/pdf/pdf_xref.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2018-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/* xref parsing */
17
18
#include "pdf_int.h"
19
#include "pdf_stack.h"
20
#include "pdf_xref.h"
21
#include "pdf_file.h"
22
#include "pdf_loop_detect.h"
23
#include "pdf_dict.h"
24
#include "pdf_array.h"
25
#include "pdf_repair.h"
26
27
static int resize_xref(pdf_context *ctx, uint64_t new_size)
28
19.0k
{
29
19.0k
    xref_entry *new_xrefs;
30
31
    /* Although we can technically handle object numbers larger than this, on some systems (32-bit Windows)
32
     * memset is limited to a (signed!) integer for the size of memory to clear. We could deal
33
     * with this by clearing the memory in blocks, but really, this is almost certainly a
34
     * corrupted file or something.
35
     */
36
19.0k
    if (new_size >= (0x7ffffff / sizeof(xref_entry)))
37
18
        return_error(gs_error_rangecheck);
38
39
19.0k
    new_xrefs = (xref_entry *)gs_alloc_bytes(ctx->memory, (new_size) * sizeof(xref_entry), "read_xref_stream allocate xref table entries");
40
19.0k
    if (new_xrefs == NULL){
41
0
        pdfi_countdown(ctx->xref_table);
42
0
        ctx->xref_table = NULL;
43
0
        return_error(gs_error_VMerror);
44
0
    }
45
19.0k
    memset(new_xrefs, 0x00, (new_size) * sizeof(xref_entry));
46
19.0k
    memcpy(new_xrefs, ctx->xref_table->xref, ctx->xref_table->xref_size * sizeof(xref_entry));
47
19.0k
    gs_free_object(ctx->memory, ctx->xref_table->xref, "reallocated xref entries");
48
19.0k
    ctx->xref_table->xref = new_xrefs;
49
19.0k
    ctx->xref_table->xref_size = new_size;
50
19.0k
    return 0;
51
19.0k
}
52
53
static int read_xref_stream_entries(pdf_context *ctx, pdf_c_stream *s, int64_t first, int64_t last, int64_t *W)
54
16.5k
{
55
16.5k
    uint i, j;
56
16.5k
    uint64_t field_width = 0;
57
16.5k
    uint32_t type = 0;
58
16.5k
    uint64_t objnum = 0, gen = 0;
59
16.5k
    byte *Buffer;
60
16.5k
    int64_t bytes = 0;
61
16.5k
    xref_entry *entry;
62
63
    /* Find max number of bytes to be read */
64
16.5k
    field_width = W[0];
65
16.5k
    if (W[1] > field_width)
66
16.4k
        field_width = W[1];
67
16.5k
    if (W[2] > field_width)
68
17
        field_width = W[2];
69
70
16.5k
    Buffer = gs_alloc_bytes(ctx->memory, field_width, "read_xref_stream_entry working buffer");
71
16.5k
    if (Buffer == NULL)
72
0
        return_error(gs_error_VMerror);
73
74
702k
    for (i=first;i<=last; i++){
75
        /* Defaults if W[n] = 0 */
76
686k
        type = 1;
77
686k
        objnum = gen = 0;
78
79
686k
        if (W[0] != 0) {
80
685k
            type = 0;
81
685k
            bytes = pdfi_read_bytes(ctx, Buffer, 1, W[0], s);
82
685k
            if (bytes < W[0]){
83
125
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)");
84
125
                return_error(gs_error_ioerror);
85
125
            }
86
1.37M
            for (j=0;j<W[0];j++)
87
685k
                type = (type << 8) + Buffer[j];
88
685k
        }
89
90
686k
        if (W[1] != 0) {
91
685k
            bytes = pdfi_read_bytes(ctx, Buffer, 1, W[1], s);
92
685k
            if (bytes < W[1]){
93
23
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry free working buffer (error)");
94
23
                return_error(gs_error_ioerror);
95
23
            }
96
2.34M
            for (j=0;j<W[1];j++)
97
1.66M
                objnum = (objnum << 8) + Buffer[j];
98
685k
        }
99
100
686k
        if (W[2] != 0) {
101
667k
            bytes = pdfi_read_bytes(ctx, Buffer, 1, W[2], s);
102
667k
            if (bytes < W[2]){
103
37
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)");
104
37
                return_error(gs_error_ioerror);
105
37
            }
106
1.34M
            for (j=0;j<W[2];j++)
107
682k
                gen = (gen << 8) + Buffer[j];
108
666k
        }
109
110
685k
        entry = &ctx->xref_table->xref[i];
111
685k
        if (entry->object_num != 0 && !entry->free)
112
3.94k
            continue;
113
114
682k
        entry->compressed = false;
115
682k
        entry->free = false;
116
682k
        entry->object_num = i;
117
682k
        entry->cache = NULL;
118
119
682k
        switch(type) {
120
20.4k
            case 0:
121
20.4k
                entry->free = true;
122
20.4k
                entry->u.uncompressed.offset = objnum;         /* For free objects we use the offset to store the object number of the next free object */
123
20.4k
                entry->u.uncompressed.generation_num = gen;    /* And the generation number is the numebr to use if this object is used again */
124
20.4k
                break;
125
199k
            case 1:
126
199k
                entry->u.uncompressed.offset = objnum;
127
199k
                entry->u.uncompressed.generation_num = gen;
128
199k
                break;
129
461k
            case 2:
130
461k
                entry->compressed = true;
131
461k
                entry->u.compressed.compressed_stream_num = objnum;   /* The object number of the compressed stream */
132
461k
                entry->u.compressed.object_index = gen;               /* And the index of the object within the stream */
133
461k
                break;
134
124
            default:
135
124
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer");
136
124
                return_error(gs_error_rangecheck);
137
0
                break;
138
682k
        }
139
682k
    }
140
16.2k
    gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer");
141
16.2k
    return 0;
142
16.5k
}
143
144
/* Forward definition */
145
static int read_xref(pdf_context *ctx, pdf_c_stream *s);
146
static int pdfi_check_xref_stream(pdf_context *ctx);
147
/* These two routines are recursive.... */
148
static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num);
149
150
static int pdfi_process_xref_stream(pdf_context *ctx, pdf_stream *stream_obj, pdf_c_stream *s)
151
12.7k
{
152
12.7k
    pdf_c_stream *XRefStrm;
153
12.7k
    int code, i;
154
12.7k
    pdf_dict *sdict = NULL;
155
12.7k
    pdf_name *n;
156
12.7k
    pdf_array *a;
157
12.7k
    int64_t size;
158
12.7k
    int64_t num;
159
12.7k
    int64_t W[3] = {0, 0, 0};
160
12.7k
    int objnum;
161
12.7k
    bool known = false;
162
163
12.7k
    if (pdfi_type_of(stream_obj) != PDF_STREAM)
164
0
        return_error(gs_error_typecheck);
165
166
12.7k
    code = pdfi_dict_from_obj(ctx, (pdf_obj *)stream_obj, &sdict);
167
12.7k
    if (code < 0)
168
0
        return code;
169
170
12.7k
    code = pdfi_dict_get_type(ctx, sdict, "Type", PDF_NAME, (pdf_obj **)&n);
171
12.7k
    if (code < 0)
172
54
        return code;
173
174
12.6k
    if (n->length != 4 || memcmp(n->data, "XRef", 4) != 0) {
175
17
        pdfi_countdown(n);
176
17
        return_error(gs_error_syntaxerror);
177
17
    }
178
12.6k
    pdfi_countdown(n);
179
180
12.6k
    code = pdfi_dict_get_int(ctx, sdict, "Size", &size);
181
12.6k
    if (code < 0)
182
11
        return code;
183
12.6k
    if (size < 1)
184
10
        return 0;
185
186
12.6k
    if (size < 0 || size > floor((double)ARCH_MAX_SIZE_T / (double)sizeof(xref_entry)))
187
0
        return_error(gs_error_rangecheck);
188
189
    /* If this is the first xref stream then allocate the xref table and store the trailer */
190
12.6k
    if (ctx->xref_table == NULL) {
191
7.79k
        ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table");
192
7.79k
        if (ctx->xref_table == NULL) {
193
0
            return_error(gs_error_VMerror);
194
0
        }
195
7.79k
        memset(ctx->xref_table, 0x00, sizeof(xref_table_t));
196
7.79k
        ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, size * sizeof(xref_entry), "read_xref_stream allocate xref table entries");
197
7.79k
        if (ctx->xref_table->xref == NULL){
198
3
            gs_free_object(ctx->memory, ctx->xref_table, "failed to allocate xref table entries");
199
3
            ctx->xref_table = NULL;
200
3
            return_error(gs_error_VMerror);
201
3
        }
202
7.79k
        memset(ctx->xref_table->xref, 0x00, size * sizeof(xref_entry));
203
7.79k
        ctx->xref_table->ctx = ctx;
204
7.79k
        ctx->xref_table->type = PDF_XREF_TABLE;
205
7.79k
        ctx->xref_table->xref_size = size;
206
#if REFCNT_DEBUG
207
        ctx->xref_table->UID = ctx->ref_UID++;
208
        outprintf(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID);
209
#endif
210
7.79k
        pdfi_countup(ctx->xref_table);
211
212
7.79k
        pdfi_countdown(ctx->Trailer);
213
214
7.79k
        ctx->Trailer = sdict;
215
7.79k
        pdfi_countup(sdict);
216
7.79k
    } else {
217
4.82k
        if (size > ctx->xref_table->xref_size)
218
4
            return_error(gs_error_rangecheck);
219
220
4.82k
        code = pdfi_merge_dicts(ctx, ctx->Trailer, sdict);
221
4.82k
        if (code < 0 && (code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREF, "pdfi_process_xref_stream", NULL)) < 0) {
222
0
            goto exit;
223
0
        }
224
4.82k
    }
225
226
12.6k
    pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, stream_obj), SEEK_SET);
227
228
    /* Bug #691220 has a PDF file with a compressed XRef, the stream dictionary has
229
     * a /DecodeParms entry for the stream, which has a /Colors value of 5, which makes
230
     * *no* sense whatever. If we try to apply a Predictor then we end up in a loop trying
231
     * to read 5 colour samples. Rather than meddles with more parameters to the filter
232
     * code, we'll just remove the Colors entry from the DecodeParms dictionary,
233
     * because it is nonsense. This means we'll get the (sensible) default value of 1.
234
     */
235
12.6k
    code = pdfi_dict_known(ctx, sdict, "DecodeParms", &known);
236
12.6k
    if (code < 0)
237
0
        return code;
238
239
12.6k
    if (known) {
240
11.6k
        pdf_dict *DP;
241
11.6k
        double f;
242
11.6k
        pdf_obj *name;
243
244
11.6k
        code = pdfi_dict_get_type(ctx, sdict, "DecodeParms", PDF_DICT, (pdf_obj **)&DP);
245
11.6k
        if (code < 0)
246
1
            return code;
247
248
11.6k
        code = pdfi_dict_knownget_number(ctx, DP, "Colors", &f);
249
11.6k
        if (code < 0) {
250
0
            pdfi_countdown(DP);
251
0
            return code;
252
0
        }
253
11.6k
        if (code > 0 && f != (double)1)
254
0
        {
255
0
            code = pdfi_name_alloc(ctx, (byte *)"Colors", 6, &name);
256
0
            if (code < 0) {
257
0
                pdfi_countdown(DP);
258
0
                return code;
259
0
            }
260
0
            pdfi_countup(name);
261
262
0
            code = pdfi_dict_delete_pair(ctx, DP, (pdf_name *)name);
263
0
            pdfi_countdown(name);
264
0
            if (code < 0) {
265
0
                pdfi_countdown(DP);
266
0
                return code;
267
0
            }
268
0
        }
269
11.6k
        pdfi_countdown(DP);
270
11.6k
    }
271
272
12.6k
    code = pdfi_filter_no_decryption(ctx, stream_obj, s, &XRefStrm, false);
273
12.6k
    if (code < 0) {
274
47
        pdfi_countdown(ctx->xref_table);
275
47
        ctx->xref_table = NULL;
276
47
        return code;
277
47
    }
278
279
12.5k
    code = pdfi_dict_get_type(ctx, sdict, "W", PDF_ARRAY, (pdf_obj **)&a);
280
12.5k
    if (code < 0) {
281
7
        pdfi_close_file(ctx, XRefStrm);
282
7
        pdfi_countdown(ctx->xref_table);
283
7
        ctx->xref_table = NULL;
284
7
        return code;
285
7
    }
286
287
12.5k
    if (pdfi_array_size(a) != 3) {
288
10
        pdfi_countdown(a);
289
10
        pdfi_close_file(ctx, XRefStrm);
290
10
        pdfi_countdown(ctx->xref_table);
291
10
        ctx->xref_table = NULL;
292
10
        return_error(gs_error_rangecheck);
293
10
    }
294
50.1k
    for (i=0;i<3;i++) {
295
37.6k
        code = pdfi_array_get_int(ctx, a, (uint64_t)i, (int64_t *)&W[i]);
296
37.6k
        if (code < 0 || W[i] < 0) {
297
32
            pdfi_countdown(a);
298
32
            pdfi_close_file(ctx, XRefStrm);
299
32
            pdfi_countdown(ctx->xref_table);
300
32
            ctx->xref_table = NULL;
301
32
            if (W[i] < 0)
302
7
                code = gs_note_error(gs_error_rangecheck);
303
32
            return code;
304
32
        }
305
37.6k
    }
306
12.5k
    pdfi_countdown(a);
307
308
    /* W[0] is either:
309
     * 0 (no type field) or a single byte with the type.
310
     * W[1] is either:
311
     * The object number of the next free object, the byte offset of this object in the file or the object5 number of the object stream where this object is stored.
312
     * W[2] is either:
313
     * The generation number to use if this object is used again, the generation number of the object or the index of this object within the object stream.
314
     *
315
     * Object and generation numbers are limited to unsigned 64-bit values, as are bytes offsets in the file, indexes of objects within the stream likewise (actually
316
     * most of these are generally 32-bit max). So we can limit the field widths to 8 bytes, enough to hold a 64-bit number.
317
     * Even if a later version of the spec makes these larger (which seems unlikely!) we still cna't cope with integers > 64-bits.
318
     */
319
12.5k
    if (W[0] > 1 || W[1] > 8 || W[2] > 8) {
320
29
        pdfi_close_file(ctx, XRefStrm);
321
29
        pdfi_countdown(ctx->xref_table);
322
29
        ctx->xref_table = NULL;
323
29
        return code;
324
29
    }
325
326
12.4k
    code = pdfi_dict_get_type(ctx, sdict, "Index", PDF_ARRAY, (pdf_obj **)&a);
327
12.4k
    if (code == gs_error_undefined) {
328
4.54k
        code = read_xref_stream_entries(ctx, XRefStrm, 0, size - 1, W);
329
4.54k
        if (code < 0) {
330
104
            pdfi_close_file(ctx, XRefStrm);
331
104
            pdfi_countdown(ctx->xref_table);
332
104
            ctx->xref_table = NULL;
333
104
            return code;
334
104
        }
335
7.94k
    } else {
336
7.94k
        int64_t start, size;
337
338
7.94k
        if (code < 0) {
339
2
            pdfi_close_file(ctx, XRefStrm);
340
2
            pdfi_countdown(ctx->xref_table);
341
2
            ctx->xref_table = NULL;
342
2
            return code;
343
2
        }
344
345
7.94k
        if (pdfi_array_size(a) & 1) {
346
9
            pdfi_countdown(a);
347
9
            pdfi_close_file(ctx, XRefStrm);
348
9
            pdfi_countdown(ctx->xref_table);
349
9
            ctx->xref_table = NULL;
350
9
            return_error(gs_error_rangecheck);
351
9
        }
352
353
19.7k
        for (i=0;i < pdfi_array_size(a);i+=2){
354
12.0k
            code = pdfi_array_get_int(ctx, a, (uint64_t)i, &start);
355
12.0k
            if (code < 0 || start < 0) {
356
15
                pdfi_countdown(a);
357
15
                pdfi_close_file(ctx, XRefStrm);
358
15
                pdfi_countdown(ctx->xref_table);
359
15
                ctx->xref_table = NULL;
360
15
                return code;
361
15
            }
362
363
12.0k
            code = pdfi_array_get_int(ctx, a, (uint64_t)i+1, &size);
364
12.0k
            if (code < 0) {
365
13
                pdfi_countdown(a);
366
13
                pdfi_close_file(ctx, XRefStrm);
367
13
                pdfi_countdown(ctx->xref_table);
368
13
                ctx->xref_table = NULL;
369
13
                return code;
370
13
            }
371
372
12.0k
            if (size < 1)
373
11
                continue;
374
375
12.0k
            if (start + size >= ctx->xref_table->xref_size) {
376
7.12k
                code = resize_xref(ctx, start + size);
377
7.12k
                if (code < 0) {
378
5
                    pdfi_countdown(a);
379
5
                    pdfi_close_file(ctx, XRefStrm);
380
5
                    pdfi_countdown(ctx->xref_table);
381
5
                    ctx->xref_table = NULL;
382
5
                    return code;
383
5
                }
384
7.12k
            }
385
386
12.0k
            code = read_xref_stream_entries(ctx, XRefStrm, start, start + size - 1, W);
387
12.0k
            if (code < 0) {
388
205
                pdfi_countdown(a);
389
205
                pdfi_close_file(ctx, XRefStrm);
390
205
                pdfi_countdown(ctx->xref_table);
391
205
                ctx->xref_table = NULL;
392
205
                return code;
393
205
            }
394
12.0k
        }
395
7.93k
    }
396
12.1k
    pdfi_countdown(a);
397
398
12.1k
    pdfi_close_file(ctx, XRefStrm);
399
400
12.1k
    code = pdfi_dict_get_int(ctx, sdict, "Prev", &num);
401
12.1k
    if (code == gs_error_undefined)
402
5.05k
        return 0;
403
404
7.08k
    if (code < 0)
405
12
        return code;
406
407
7.07k
    if (num < 0 || num > ctx->main_stream_length)
408
1.79k
        return_error(gs_error_rangecheck);
409
410
5.27k
    if (pdfi_loop_detector_check_object(ctx, num) == true)
411
20
        return_error(gs_error_circular_reference);
412
5.25k
    else {
413
5.25k
        code = pdfi_loop_detector_add_object(ctx, num);
414
5.25k
        if (code < 0)
415
0
            return code;
416
5.25k
    }
417
418
5.25k
    if(ctx->args.pdfdebug)
419
0
        outprintf(ctx->memory, "%% Reading /Prev xref\n");
420
421
5.25k
    pdfi_seek(ctx, s, num, SEEK_SET);
422
423
5.25k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &objnum);
424
5.25k
    if (code == 1) {
425
4.83k
        if (pdfi_check_xref_stream(ctx))
426
4.80k
            return pdfi_read_xref_stream_dict(ctx, s, objnum);
427
4.83k
    }
428
429
455
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
430
455
    if (code < 0)
431
0
        return code;
432
455
    if (code == TOKEN_XREF) {
433
54
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_PREV_NOT_XREF_STREAM, "pdfi_process_xref_stream", NULL)) < 0) {
434
0
            goto exit;
435
0
        }
436
        /* Read old-style xref table */
437
54
        return(read_xref(ctx, ctx->main_stream));
438
54
    }
439
401
exit:
440
401
    return_error(gs_error_syntaxerror);
441
455
}
442
443
static int pdfi_check_xref_stream(pdf_context *ctx)
444
16.0k
{
445
16.0k
    gs_offset_t offset;
446
16.0k
    int gen_num, code = 0;
447
448
16.0k
    offset = pdfi_unread_tell(ctx);
449
450
16.0k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &gen_num);
451
16.0k
    if (code <= 0) {
452
1.13k
        code = 0;
453
1.13k
        goto exit;
454
1.13k
    }
455
456
    /* Try to read 'obj' */
457
14.9k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
458
14.9k
    if (code <= 0) {
459
0
        code = 0;
460
0
        goto exit;
461
0
    }
462
463
    /* Third element must be obj, or it's not a valid xref */
464
14.9k
    if (code != TOKEN_OBJ)
465
1.75k
        code = 0;
466
13.1k
    else
467
13.1k
        code = 1;
468
469
16.0k
exit:
470
16.0k
    pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
471
16.0k
    return code;
472
14.9k
}
473
474
static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num)
475
13.4k
{
476
13.4k
    int code;
477
13.4k
    int gen_num;
478
479
13.4k
    if (ctx->args.pdfdebug)
480
0
        outprintf(ctx->memory, "\n%% Reading PDF 1.5+ xref stream\n");
481
482
    /* We have the obj_num. Lets try for obj_num gen obj as a XRef stream */
483
13.4k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &gen_num);
484
13.4k
    if (code <= 0) {
485
0
        if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", "")) < 0) {
486
0
            return code;
487
0
        }
488
0
        return(pdfi_repair_file(ctx));
489
0
    }
490
491
    /* Try to read 'obj' */
492
13.4k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
493
13.4k
    if (code < 0)
494
0
        return code;
495
13.4k
    if (code == 0)
496
0
        return_error(gs_error_syntaxerror);
497
498
    /* Third element must be obj, or it's not a valid xref */
499
13.4k
    if (code != TOKEN_OBJ) {
500
0
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BAD_XREFSTMOFFSET, "pdfi_read_xref_stream_dict", "")) < 0) {
501
0
            return code;
502
0
        }
503
0
        return(pdfi_repair_file(ctx));
504
0
    }
505
506
547k
    do {
507
547k
        code = pdfi_read_token(ctx, ctx->main_stream, obj_num, gen_num);
508
547k
        if (code <= 0) {
509
517
            if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) {
510
0
                return code;
511
0
            }
512
517
            return pdfi_repair_file(ctx);
513
517
        }
514
515
546k
        if (pdfi_count_stack(ctx) >= 2 && pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) {
516
14.4k
            uintptr_t keyword = (uintptr_t)ctx->stack_top[-1];
517
14.4k
            if (keyword == TOKEN_STREAM) {
518
12.7k
                pdf_dict *dict;
519
12.7k
                pdf_stream *sdict = NULL;
520
12.7k
                int64_t Length;
521
522
                /* Remove the 'stream' token from the stack, should leave a dictionary object on the stack */
523
12.7k
                pdfi_pop(ctx, 1);
524
12.7k
                if (pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) {
525
22
                    if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) {
526
0
                        return code;
527
0
                    }
528
22
                    return pdfi_repair_file(ctx);
529
22
                }
530
12.7k
                dict = (pdf_dict *)ctx->stack_top[-1];
531
532
                /* Convert the dict into a stream (sdict comes back with at least one ref) */
533
12.7k
                code = pdfi_obj_dict_to_stream(ctx, dict, &sdict, true);
534
                /* Pop off the dict */
535
12.7k
                pdfi_pop(ctx, 1);
536
12.7k
                if (code < 0) {
537
0
                    if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) {
538
0
                        return code;
539
0
                    }
540
                    /* TODO: should I return code instead of trying to repair?
541
                     * Normally the above routine should not fail so something is
542
                     * probably seriously fubar.
543
                     */
544
0
                    return pdfi_repair_file(ctx);
545
0
                }
546
12.7k
                dict = NULL;
547
548
                /* Init the stuff for the stream */
549
12.7k
                sdict->stream_offset = pdfi_unread_tell(ctx);
550
12.7k
                sdict->object_num = obj_num;
551
12.7k
                sdict->generation_num = gen_num;
552
553
12.7k
                code = pdfi_dict_get_int(ctx, sdict->stream_dict, "Length", &Length);
554
12.7k
                if (code < 0) {
555
                    /* TODO: Not positive this will actually have a length -- just use 0 */
556
51
                    (void)pdfi_set_error_var(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_xref_stream_dict", "Xref Stream object %u missing mandatory keyword /Length\n", obj_num);
557
51
                    code = 0;
558
51
                    Length = 0;
559
51
                }
560
12.7k
                sdict->Length = Length;
561
12.7k
                sdict->length_valid = true;
562
563
12.7k
                code = pdfi_process_xref_stream(ctx, sdict, ctx->main_stream);
564
12.7k
                pdfi_countdown(sdict);
565
12.7k
                if (code < 0) {
566
3.00k
                    pdfi_set_error(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_PREV_NOT_XREF_STREAM, "pdfi_read_xref_stream_dict", NULL);
567
3.00k
                    return code;
568
3.00k
                }
569
9.71k
                break;
570
12.7k
            } else if (keyword == TOKEN_ENDOBJ) {
571
                /* Something went wrong, this is not a stream dictionary */
572
152
                if ((code = pdfi_set_error_var(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_xref_stream_dict", "Xref Stream object %u missing mandatory keyword /Length\n", obj_num)) < 0) {
573
0
                    return code;
574
0
                }
575
152
                return(pdfi_repair_file(ctx));
576
152
            }
577
14.4k
        }
578
546k
    } while(1);
579
9.71k
    return 0;
580
13.4k
}
581
582
static int skip_to_digit(pdf_context *ctx, pdf_c_stream *s, unsigned int limit)
583
3.10k
{
584
3.10k
    int c, read = 0;
585
586
11.1k
    do {
587
11.1k
        c = pdfi_read_byte(ctx, s);
588
11.1k
        if (c < 0)
589
0
            return_error(gs_error_ioerror);
590
11.1k
        if (c >= '0' && c <= '9') {
591
2.83k
            pdfi_unread_byte(ctx, s, (byte)c);
592
2.83k
            return read;
593
2.83k
        }
594
8.36k
        read++;
595
8.36k
    } while (read < limit);
596
597
270
    return read;
598
3.10k
}
599
600
static int read_digits(pdf_context *ctx, pdf_c_stream *s, byte *Buffer, int limit)
601
3.10k
{
602
3.10k
    int c, read = 0;
603
604
    /* Since the "limit" is a value calculated by the caller,
605
       it's easier to check it in one place (here) than before
606
       every call.
607
     */
608
3.10k
    if (limit <= 0)
609
283
        return_error(gs_error_syntaxerror);
610
611
    /* We assume that Buffer always has limit+1 bytes available, so we can
612
     * safely terminate it. */
613
614
17.1k
    do {
615
17.1k
        c = pdfi_read_byte(ctx, s);
616
17.1k
        if (c < 0)
617
0
            return_error(gs_error_ioerror);
618
17.1k
        if (c < '0' || c > '9') {
619
1.20k
            pdfi_unread_byte(ctx, s, c);
620
1.20k
            break;
621
1.20k
        }
622
15.9k
        *Buffer++ = (byte)c;
623
15.9k
        read++;
624
15.9k
    } while (read < limit);
625
2.82k
    *Buffer = 0;
626
627
2.82k
    return read;
628
2.82k
}
629
630
631
static int read_xref_entry_slow(pdf_context *ctx, pdf_c_stream *s, gs_offset_t *offset, uint32_t *generation_num, unsigned char *free)
632
1.57k
{
633
1.57k
    byte Buffer[20];
634
1.57k
    int c, code, read = 0;
635
636
    /* First off, find a number. If we don't find one, and read 20 bytes, throw an error */
637
1.57k
    code = skip_to_digit(ctx, s, 20);
638
1.57k
    if (code < 0)
639
0
        return code;
640
1.57k
    read += code;
641
642
    /* Now read a number */
643
1.57k
    code = read_digits(ctx, s, (byte *)&Buffer,  (read > 10 ? 20 - read : 10));
644
1.57k
    if (code < 0)
645
49
        return code;
646
1.52k
    read += code;
647
648
1.52k
    *offset = atol((const char *)Buffer);
649
650
    /* find next number */
651
1.52k
    code = skip_to_digit(ctx, s, 20 - read);
652
1.52k
    if (code < 0)
653
0
        return code;
654
1.52k
    read += code;
655
656
    /* and read it */
657
1.52k
    code = read_digits(ctx, s, (byte *)&Buffer, (read > 15 ? 20 - read : 5));
658
1.52k
    if (code < 0)
659
234
        return code;
660
1.29k
    read += code;
661
662
1.29k
    *generation_num = atol((const char *)Buffer);
663
664
2.25k
    do {
665
2.25k
        c = pdfi_read_byte(ctx, s);
666
2.25k
        if (c < 0)
667
0
            return_error(gs_error_ioerror);
668
2.25k
        read ++;
669
2.25k
        if (c == 0x09 || c == 0x20)
670
987
            continue;
671
1.27k
        if (c == 'n' || c == 'f') {
672
719
            *free = (unsigned char)c;
673
719
            break;
674
719
        } else {
675
551
            return_error(gs_error_syntaxerror);
676
551
        }
677
1.27k
    } while (read < 20);
678
743
    if (read >= 20)
679
31
        return_error(gs_error_syntaxerror);
680
681
1.69k
    do {
682
1.69k
        c = pdfi_read_byte(ctx, s);
683
1.69k
        if (c < 0)
684
0
            return_error(gs_error_syntaxerror);
685
1.69k
        read++;
686
1.69k
        if (c == 0x20 || c == 0x09 || c == 0x0d || c == 0x0a)
687
857
            continue;
688
1.69k
    } while (read < 20);
689
712
    return 0;
690
712
}
691
692
static int write_offset(byte *B, gs_offset_t o, unsigned int g, unsigned char free)
693
712
{
694
712
    byte b[20], *ptr = B;
695
712
    int index = 0;
696
697
712
    gs_snprintf((char *)b, sizeof(b), "%"PRIdOFFSET"", o);
698
712
    if (strlen((const char *)b) > 10)
699
0
        return_error(gs_error_rangecheck);
700
5.85k
    for(index=0;index < 10 - strlen((const char *)b); index++) {
701
5.14k
        *ptr++ = 0x30;
702
5.14k
    }
703
712
    memcpy(ptr, b, strlen((const char *)b));
704
712
    ptr += strlen((const char *)b);
705
712
    *ptr++ = 0x20;
706
707
712
    gs_snprintf((char *)b, sizeof(b), "%d", g);
708
712
    if (strlen((const char *)b) > 5)
709
0
        return_error(gs_error_rangecheck);
710
3.18k
    for(index=0;index < 5 - strlen((const char *)b);index++) {
711
2.47k
        *ptr++ = 0x30;
712
2.47k
    }
713
712
    memcpy(ptr, b, strlen((const char *)b));
714
712
    ptr += strlen((const char *)b);
715
712
    *ptr++ = 0x20;
716
712
    *ptr++ = free;
717
712
    *ptr++ = 0x20;
718
712
    *ptr++ = 0x0d;
719
712
    return 0;
720
712
}
721
722
static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *section_start, uint64_t *section_size)
723
36.3k
{
724
36.3k
    int code = 0, i, j;
725
36.3k
    int start = 0;
726
36.3k
    int size = 0;
727
36.3k
    int64_t bytes = 0;
728
36.3k
    char Buffer[21];
729
730
36.3k
    *section_start = *section_size = 0;
731
732
36.3k
    if (ctx->args.pdfdebug)
733
0
        outprintf(ctx->memory, "\n%% Reading xref section\n");
734
735
36.3k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &start);
736
36.3k
    if (code < 0) {
737
        /* Not an int, might be a keyword */
738
10.0k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
739
10.0k
        if (code < 0)
740
0
            return code;
741
742
10.0k
        if (code != TOKEN_TRAILER) {
743
            /* element is not an integer, and not a keyword - not a valid xref */
744
136
            return_error(gs_error_typecheck);
745
136
        }
746
9.87k
        return 1;
747
10.0k
    }
748
749
26.3k
    if (start < 0)
750
23
        return_error(gs_error_rangecheck);
751
752
26.3k
    *section_start = start;
753
754
26.3k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &size);
755
26.3k
    if (code < 0)
756
20
        return code;
757
26.3k
    if (code == 0)
758
43
        return_error(gs_error_syntaxerror);
759
760
    /* Zero sized xref sections are valid; see the file attached to
761
     * bug 704947 for an example. */
762
26.3k
    if (size < 0)
763
12
        return_error(gs_error_rangecheck);
764
765
26.2k
    *section_size = size;
766
767
26.2k
    if (ctx->args.pdfdebug)
768
0
        outprintf(ctx->memory, "\n%% Section starts at %d and has %d entries\n", (unsigned int) start, (unsigned int)size);
769
770
26.2k
    if (size > 0) {
771
25.8k
        if (ctx->xref_table == NULL) {
772
9.61k
            ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table");
773
9.61k
            if (ctx->xref_table == NULL)
774
0
                return_error(gs_error_VMerror);
775
9.61k
            memset(ctx->xref_table, 0x00, sizeof(xref_table_t));
776
777
9.61k
            ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, (start + size) * sizeof(xref_entry), "read_xref_stream allocate xref table entries");
778
9.61k
            if (ctx->xref_table->xref == NULL){
779
27
                gs_free_object(ctx->memory, ctx->xref_table, "free xref table on error allocating entries");
780
27
                ctx->xref_table = NULL;
781
27
                return_error(gs_error_VMerror);
782
27
            }
783
#if REFCNT_DEBUG
784
            ctx->xref_table->UID = ctx->ref_UID++;
785
            outprintf(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID);
786
#endif
787
788
9.59k
            memset(ctx->xref_table->xref, 0x00, (start + size) * sizeof(xref_entry));
789
9.59k
            ctx->xref_table->ctx = ctx;
790
9.59k
            ctx->xref_table->type = PDF_XREF_TABLE;
791
9.59k
            ctx->xref_table->xref_size = start + size;
792
9.59k
            pdfi_countup(ctx->xref_table);
793
16.2k
        } else {
794
16.2k
            if (start + size > ctx->xref_table->xref_size) {
795
11.9k
                code = resize_xref(ctx, start + size);
796
11.9k
                if (code < 0)
797
13
                    return code;
798
11.9k
            }
799
16.2k
        }
800
25.8k
    }
801
802
26.2k
    pdfi_skip_white(ctx, s);
803
485k
    for (i=0;i< size;i++){
804
459k
        xref_entry *entry = &ctx->xref_table->xref[i + start];
805
459k
        unsigned char free;
806
459k
        gs_offset_t off;
807
459k
        unsigned int gen;
808
809
459k
        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 20, s);
810
459k
        if (bytes < 20)
811
2
            return_error(gs_error_ioerror);
812
459k
        j = 19;
813
459k
        if ((Buffer[19] != 0x0a && Buffer[19] != 0x0d) || (Buffer[18] != 0x0d && Buffer[18] != 0x0a && Buffer[18] != 0x20))
814
19.5k
            pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_SIZE, "read_xref_section", NULL);
815
481k
        while (Buffer[j] != 0x0D && Buffer[j] != 0x0A) {
816
22.4k
            pdfi_unread_byte(ctx, s, (byte)Buffer[j]);
817
22.4k
            if (--j < 0) {
818
769
                pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_NO_EOL, "read_xref_section", NULL);
819
769
                outprintf(ctx->memory, "Invalid xref entry, line terminator missing.\n");
820
769
                code = read_xref_entry_slow(ctx, s, &off, &gen, &free);
821
769
                if (code < 0)
822
383
                    return code;
823
386
                code = write_offset((byte *)Buffer, off, gen, free);
824
386
                if (code < 0)
825
0
                    return code;
826
386
                j = 19;
827
386
                break;
828
386
            }
829
22.4k
        }
830
459k
        Buffer[j] = 0x00;
831
459k
        if (entry->object_num != 0)
832
8.53k
            continue;
833
834
450k
        if (sscanf(Buffer, "%"PRIdOFFSET" %d %c", &entry->u.uncompressed.offset, &entry->u.uncompressed.generation_num, &free) != 3) {
835
808
            pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_FORMAT, "read_xref_section", NULL);
836
808
            outprintf(ctx->memory, "Invalid xref entry, incorrect format.\n");
837
808
            pdfi_unread(ctx, s, (byte *)Buffer, 20);
838
808
            code = read_xref_entry_slow(ctx, s, &off, &gen, &free);
839
808
            if (code < 0)
840
482
                return code;
841
326
            code = write_offset((byte *)Buffer, off, gen, free);
842
326
            if (code < 0)
843
0
                return code;
844
326
        }
845
846
450k
        entry->compressed = false;
847
450k
        entry->object_num = i + start;
848
450k
        if (free == 'f')
849
138k
            entry->free = true;
850
450k
        if(free == 'n')
851
311k
            entry->free = false;
852
450k
        if (entry->object_num == 0) {
853
6.47k
            if (!entry->free) {
854
68
                pdfi_set_warning(ctx, 0, NULL, W_PDF_XREF_OBJECT0_NOT_FREE, "read_xref_section", NULL);
855
68
            }
856
6.47k
        }
857
450k
    }
858
859
25.3k
    return 0;
860
26.2k
}
861
862
static int read_xref(pdf_context *ctx, pdf_c_stream *s)
863
11.0k
{
864
11.0k
    int code = 0;
865
11.0k
    pdf_dict *d = NULL;
866
11.0k
    uint64_t max_obj = 0;
867
11.0k
    int64_t num, XRefStm = 0;
868
11.0k
    int obj_num;
869
11.0k
    bool known = false;
870
871
11.0k
    if (ctx->repaired)
872
4
        return 0;
873
874
36.3k
    do {
875
36.3k
        uint64_t section_start, section_size;
876
877
36.3k
        code = read_xref_section(ctx, s, &section_start, &section_size);
878
36.3k
        if (code < 0)
879
1.14k
            return code;
880
881
35.2k
        if (section_size > 0 && section_start + section_size - 1 > max_obj)
882
22.8k
            max_obj = section_start + section_size - 1;
883
884
        /* code == 1 => read_xref_section ended with a trailer. */
885
35.2k
    } while (code != 1);
886
887
9.87k
    code = pdfi_read_dict(ctx, ctx->main_stream, 0, 0);
888
9.87k
    if (code < 0)
889
172
        return code;
890
891
9.70k
    d = (pdf_dict *)ctx->stack_top[-1];
892
9.70k
    if (pdfi_type_of(d) != PDF_DICT) {
893
11
        pdfi_pop(ctx, 1);
894
11
        return_error(gs_error_typecheck);
895
11
    }
896
9.69k
    pdfi_countup(d);
897
9.69k
    pdfi_pop(ctx, 1);
898
899
    /* We don't want to pollute the Trailer dictionary with any XRefStm key/value pairs
900
     * which will happen when we do pdfi_merge_dicts(). So we get any XRefStm here and
901
     * if there was one, remove it from the dictionary before we merge with the
902
     * primary trailer.
903
     */
904
9.69k
    code = pdfi_dict_get_int(ctx, d, "XRefStm", &XRefStm);
905
9.69k
    if (code < 0 && code != gs_error_undefined)
906
2
        goto error;
907
908
9.68k
    if (code == 0) {
909
413
        code = pdfi_dict_delete(ctx, d, "XRefStm");
910
413
        if (code < 0)
911
0
            goto error;
912
413
    }
913
914
9.68k
    if (ctx->Trailer == NULL) {
915
8.47k
        ctx->Trailer = d;
916
8.47k
        pdfi_countup(d);
917
8.47k
    } else {
918
1.21k
        code = pdfi_merge_dicts(ctx, ctx->Trailer, d);
919
1.21k
        if (code < 0) {
920
0
            if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREF, "read_xref", "")) < 0) {
921
0
                return code;
922
0
            }
923
0
        }
924
1.21k
    }
925
926
    /* Check if the highest subsection + size exceeds the /Size in the
927
     * trailer dictionary and set a warning flag if it does
928
     */
929
9.68k
    code = pdfi_dict_get_int(ctx, d, "Size", &num);
930
9.68k
    if (code < 0)
931
16
        goto error;
932
933
9.67k
    if (max_obj >= num)
934
589
        pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_SIZE, "read_xref", NULL);
935
936
    /* Check if this is a modified file and has any
937
     * previous xref entries.
938
     */
939
9.67k
    code = pdfi_dict_known(ctx, d, "Prev", &known);
940
9.67k
    if (known) {
941
4.26k
        code = pdfi_dict_get_int(ctx, d, "Prev", &num);
942
4.26k
        if (code < 0)
943
16
            goto error;
944
945
4.24k
        if (num < 0 || num > ctx->main_stream_length) {
946
1.36k
            code = gs_note_error(gs_error_rangecheck);
947
1.36k
            goto error;
948
1.36k
        }
949
950
2.88k
        if (pdfi_loop_detector_check_object(ctx, num) == true) {
951
6
            code = gs_note_error(gs_error_circular_reference);
952
6
            goto error;
953
6
        }
954
2.87k
        else {
955
2.87k
            code = pdfi_loop_detector_add_object(ctx, num);
956
2.87k
            if (code < 0)
957
0
                goto error;
958
2.87k
        }
959
960
2.87k
        code = pdfi_seek(ctx, s, num, SEEK_SET);
961
2.87k
        if (code < 0)
962
0
            goto error;
963
964
2.87k
        if (!ctx->repaired) {
965
2.87k
            code = pdfi_read_token(ctx, ctx->main_stream, 0, 0);
966
2.87k
            if (code < 0)
967
125
                goto error;
968
969
2.75k
            if (code == 0) {
970
1
                code = gs_note_error(gs_error_syntaxerror);
971
1
                goto error;
972
1
            }
973
2.75k
        } else {
974
0
            code = 0;
975
0
            goto error;
976
0
        }
977
978
2.75k
        if ((intptr_t)(ctx->stack_top[-1]) == (intptr_t)TOKEN_XREF) {
979
            /* Read old-style xref table */
980
1.26k
            pdfi_pop(ctx, 1);
981
1.26k
            code = read_xref(ctx, ctx->main_stream);
982
1.26k
            if (code < 0)
983
211
                goto error;
984
1.48k
        } else {
985
1.48k
            pdfi_pop(ctx, 1);
986
1.48k
            code = gs_note_error(gs_error_typecheck);
987
1.48k
            goto error;
988
1.48k
        }
989
2.75k
    }
990
991
    /* Now check if this is a hybrid file. */
992
6.46k
    if (XRefStm != 0) {
993
212
        ctx->is_hybrid = true;
994
995
212
        if (ctx->args.pdfdebug)
996
0
            outprintf(ctx->memory, "%% File is a hybrid, containing xref table and xref stream. Reading the stream.\n");
997
998
999
212
        if (pdfi_loop_detector_check_object(ctx, XRefStm) == true) {
1000
0
            code = gs_note_error(gs_error_circular_reference);
1001
0
            goto error;
1002
0
        }
1003
212
        else {
1004
212
            code = pdfi_loop_detector_add_object(ctx, XRefStm);
1005
212
            if (code < 0)
1006
0
                goto error;
1007
212
        }
1008
1009
212
        code = pdfi_loop_detector_mark(ctx);
1010
212
        if (code < 0)
1011
0
            goto error;
1012
1013
        /* Because of the way the code works when we read a file which is a pure
1014
         * xref stream file, we need to read the first integer of 'x y obj'
1015
         * because the xref stream decoding code expects that to be on the stack.
1016
         */
1017
212
        pdfi_seek(ctx, s, XRefStm, SEEK_SET);
1018
1019
212
        code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num);
1020
212
        if (code < 0) {
1021
0
            pdfi_set_error(ctx, 0, NULL, E_PDF_BADXREFSTREAM, "read_xref", "");
1022
0
            pdfi_loop_detector_cleartomark(ctx);
1023
0
            goto error;
1024
0
        }
1025
1026
212
        code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num);
1027
        /* We could just fall through to the exit here, but choose not to in order to avoid possible mistakes in future */
1028
212
        if (code < 0) {
1029
22
            pdfi_loop_detector_cleartomark(ctx);
1030
22
            goto error;
1031
22
        }
1032
1033
190
        pdfi_loop_detector_cleartomark(ctx);
1034
190
    } else
1035
6.25k
        code = 0;
1036
1037
9.69k
error:
1038
9.69k
    pdfi_countdown(d);
1039
9.69k
    return code;
1040
6.46k
}
1041
1042
int pdfi_read_xref(pdf_context *ctx)
1043
92.3k
{
1044
92.3k
    int code = 0;
1045
92.3k
    int obj_num;
1046
1047
92.3k
    code = pdfi_loop_detector_mark(ctx);
1048
92.3k
    if (code < 0)
1049
0
        return code;
1050
1051
92.3k
    if (ctx->startxref == 0)
1052
51.9k
        goto repair;
1053
1054
40.4k
    code = pdfi_loop_detector_add_object(ctx, ctx->startxref);
1055
40.4k
    if (code < 0)
1056
0
        goto exit;
1057
1058
40.4k
    if (ctx->args.pdfdebug)
1059
0
        outprintf(ctx->memory, "%% Trying to read 'xref' token for xref table, or 'int int obj' for an xref stream\n");
1060
1061
40.4k
    if (ctx->startxref > ctx->main_stream_length - 5) {
1062
10.3k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is beyond end of file")) < 0)
1063
0
            goto exit;
1064
1065
10.3k
        goto repair;
1066
10.3k
    }
1067
30.1k
    if (ctx->startxref < 0) {
1068
414
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is before start of file")) < 0)
1069
0
            goto exit;
1070
1071
414
        goto repair;
1072
414
    }
1073
1074
    /* Read the xref(s) */
1075
29.7k
    pdfi_seek(ctx, ctx->main_stream, ctx->startxref, SEEK_SET);
1076
1077
    /* If it starts with an int, it's an xref stream dict */
1078
29.7k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num);
1079
29.7k
    if (code == 1) {
1080
11.2k
        if (pdfi_check_xref_stream(ctx)) {
1081
8.39k
            code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num);
1082
8.39k
            if (code < 0)
1083
2.82k
                goto repair;
1084
8.39k
        } else
1085
2.85k
            goto repair;
1086
18.4k
    } else {
1087
        /* If not, it had better start 'xref', and be an old-style xref table */
1088
18.4k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
1089
18.4k
        if (code != TOKEN_XREF) {
1090
8.79k
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"Failed to read any token at the startxref location")) < 0)
1091
0
                goto exit;
1092
1093
8.79k
            goto repair;
1094
8.79k
        }
1095
1096
9.69k
        code = read_xref(ctx, ctx->main_stream);
1097
9.69k
        if (code < 0)
1098
4.31k
            goto repair;
1099
9.69k
    }
1100
1101
10.9k
    if(ctx->args.pdfdebug && ctx->xref_table) {
1102
0
        int i, j;
1103
0
        xref_entry *entry;
1104
0
        char Buffer[32];
1105
1106
0
        outprintf(ctx->memory, "\n%% Dumping xref table\n");
1107
0
        for (i=0;i < ctx->xref_table->xref_size;i++) {
1108
0
            entry = &ctx->xref_table->xref[i];
1109
0
            if(entry->compressed) {
1110
0
                outprintf(ctx->memory, "*");
1111
0
                gs_snprintf(Buffer, sizeof(Buffer), "%"PRId64"", entry->object_num);
1112
0
                j = 10 - strlen(Buffer);
1113
0
                while(j--) {
1114
0
                    outprintf(ctx->memory, " ");
1115
0
                }
1116
0
                outprintf(ctx->memory, "%s ", Buffer);
1117
1118
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.compressed_stream_num);
1119
0
                j = 10 - strlen(Buffer);
1120
0
                while(j--) {
1121
0
                    outprintf(ctx->memory, " ");
1122
0
                }
1123
0
                outprintf(ctx->memory, "%s ", Buffer);
1124
1125
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.object_index);
1126
0
                j = 10 - strlen(Buffer);
1127
0
                while(j--) {
1128
0
                    outprintf(ctx->memory, " ");
1129
0
                }
1130
0
                outprintf(ctx->memory, "%s ", Buffer);
1131
0
            }
1132
0
            else {
1133
0
                outprintf(ctx->memory, " ");
1134
1135
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->object_num);
1136
0
                j = 10 - strlen(Buffer);
1137
0
                while(j--) {
1138
0
                    outprintf(ctx->memory, " ");
1139
0
                }
1140
0
                outprintf(ctx->memory, "%s ", Buffer);
1141
1142
0
                gs_snprintf(Buffer, sizeof(Buffer), "%"PRIdOFFSET"", entry->u.uncompressed.offset);
1143
0
                j = 10 - strlen(Buffer);
1144
0
                while(j--) {
1145
0
                    outprintf(ctx->memory, " ");
1146
0
                }
1147
0
                outprintf(ctx->memory, "%s ", Buffer);
1148
1149
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.uncompressed.generation_num);
1150
0
                j = 10 - strlen(Buffer);
1151
0
                while(j--) {
1152
0
                    outprintf(ctx->memory, " ");
1153
0
                }
1154
0
                outprintf(ctx->memory, "%s ", Buffer);
1155
0
            }
1156
0
            if (entry->free)
1157
0
                outprintf(ctx->memory, "f\n");
1158
0
            else
1159
0
                outprintf(ctx->memory, "n\n");
1160
0
        }
1161
0
    }
1162
10.9k
    if (ctx->args.pdfdebug)
1163
0
        outprintf(ctx->memory, "\n");
1164
1165
10.9k
 exit:
1166
10.9k
    (void)pdfi_loop_detector_cleartomark(ctx);
1167
1168
10.9k
    if (code < 0)
1169
0
        return code;
1170
1171
10.9k
    return 0;
1172
1173
81.4k
repair:
1174
81.4k
    (void)pdfi_loop_detector_cleartomark(ctx);
1175
81.4k
    if (!ctx->repaired && !ctx->args.pdfstoponerror)
1176
81.3k
        return(pdfi_repair_file(ctx));
1177
60
    return 0;
1178
81.4k
}