Coverage Report

Created: 2025-08-28 07:06

/src/ghostpdl/pdf/pdf_xref.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2018-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/* xref parsing */
17
18
#include "pdf_int.h"
19
#include "pdf_stack.h"
20
#include "pdf_xref.h"
21
#include "pdf_file.h"
22
#include "pdf_loop_detect.h"
23
#include "pdf_dict.h"
24
#include "pdf_array.h"
25
#include "pdf_repair.h"
26
27
static int resize_xref(pdf_context *ctx, uint64_t new_size)
28
21.6k
{
29
21.6k
    xref_entry *new_xrefs;
30
31
    /* Although we can technically handle object numbers larger than this, on some systems (32-bit Windows)
32
     * memset is limited to a (signed!) integer for the size of memory to clear. We could deal
33
     * with this by clearing the memory in blocks, but really, this is almost certainly a
34
     * corrupted file or something.
35
     */
36
21.6k
    if (new_size >= (0x7ffffff / sizeof(xref_entry)))
37
20
        return_error(gs_error_rangecheck);
38
39
21.6k
    new_xrefs = (xref_entry *)gs_alloc_bytes(ctx->memory, (size_t)(new_size) * sizeof(xref_entry), "read_xref_stream allocate xref table entries");
40
21.6k
    if (new_xrefs == NULL){
41
0
        pdfi_countdown(ctx->xref_table);
42
0
        ctx->xref_table = NULL;
43
0
        return_error(gs_error_VMerror);
44
0
    }
45
21.6k
    memset(new_xrefs, 0x00, (new_size) * sizeof(xref_entry));
46
21.6k
    memcpy(new_xrefs, ctx->xref_table->xref, ctx->xref_table->xref_size * sizeof(xref_entry));
47
21.6k
    gs_free_object(ctx->memory, ctx->xref_table->xref, "reallocated xref entries");
48
21.6k
    ctx->xref_table->xref = new_xrefs;
49
21.6k
    ctx->xref_table->xref_size = new_size;
50
21.6k
    return 0;
51
21.6k
}
52
53
static int read_xref_stream_entries(pdf_context *ctx, pdf_c_stream *s, int64_t first, int64_t last, int64_t *W)
54
19.1k
{
55
19.1k
    uint i, j;
56
19.1k
    uint64_t field_width = 0;
57
19.1k
    uint32_t type = 0;
58
19.1k
    uint64_t objnum = 0, gen = 0;
59
19.1k
    byte *Buffer;
60
19.1k
    int64_t bytes = 0;
61
19.1k
    xref_entry *entry;
62
63
    /* Find max number of bytes to be read */
64
19.1k
    field_width = W[0];
65
19.1k
    if (W[1] > field_width)
66
19.0k
        field_width = W[1];
67
19.1k
    if (W[2] > field_width)
68
19
        field_width = W[2];
69
70
19.1k
    Buffer = gs_alloc_bytes(ctx->memory, field_width, "read_xref_stream_entry working buffer");
71
19.1k
    if (Buffer == NULL)
72
0
        return_error(gs_error_VMerror);
73
74
970k
    for (i=first;i<=last; i++){
75
        /* Defaults if W[n] = 0 */
76
951k
        type = 1;
77
951k
        objnum = gen = 0;
78
79
951k
        if (W[0] != 0) {
80
949k
            type = 0;
81
949k
            bytes = pdfi_read_bytes(ctx, Buffer, 1, W[0], s);
82
949k
            if (bytes < W[0]){
83
149
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)");
84
149
                return_error(gs_error_ioerror);
85
149
            }
86
1.89M
            for (j=0;j<W[0];j++)
87
949k
                type = (type << 8) + Buffer[j];
88
949k
        }
89
90
951k
        if (W[1] != 0) {
91
951k
            bytes = pdfi_read_bytes(ctx, Buffer, 1, W[1], s);
92
951k
            if (bytes < W[1]){
93
36
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry free working buffer (error)");
94
36
                return_error(gs_error_ioerror);
95
36
            }
96
3.30M
            for (j=0;j<W[1];j++)
97
2.35M
                objnum = (objnum << 8) + Buffer[j];
98
951k
        }
99
100
951k
        if (W[2] != 0) {
101
934k
            bytes = pdfi_read_bytes(ctx, Buffer, 1, W[2], s);
102
934k
            if (bytes < W[2]){
103
42
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)");
104
42
                return_error(gs_error_ioerror);
105
42
            }
106
1.88M
            for (j=0;j<W[2];j++)
107
954k
                gen = (gen << 8) + Buffer[j];
108
934k
        }
109
110
951k
        entry = &ctx->xref_table->xref[i];
111
951k
        if (entry->object_num != 0 && !entry->free)
112
4.49k
            continue;
113
114
946k
        entry->compressed = false;
115
946k
        entry->free = false;
116
946k
        entry->object_num = i;
117
946k
        entry->cache = NULL;
118
119
946k
        switch(type) {
120
17.4k
            case 0:
121
17.4k
                entry->free = true;
122
17.4k
                entry->u.uncompressed.offset = objnum;         /* For free objects we use the offset to store the object number of the next free object */
123
17.4k
                entry->u.uncompressed.generation_num = gen;    /* And the generation number is the numebr to use if this object is used again */
124
17.4k
                break;
125
280k
            case 1:
126
280k
                entry->u.uncompressed.offset = objnum;
127
280k
                entry->u.uncompressed.generation_num = gen;
128
280k
                break;
129
648k
            case 2:
130
648k
                entry->compressed = true;
131
648k
                entry->u.compressed.compressed_stream_num = objnum;   /* The object number of the compressed stream */
132
648k
                entry->u.compressed.object_index = gen;               /* And the index of the object within the stream */
133
648k
                break;
134
164
            default:
135
164
                gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer");
136
164
                return_error(gs_error_rangecheck);
137
0
                break;
138
946k
        }
139
946k
    }
140
18.7k
    gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer");
141
18.7k
    return 0;
142
19.1k
}
143
144
/* Forward definition */
145
static int read_xref(pdf_context *ctx, pdf_c_stream *s);
146
static int pdfi_check_xref_stream(pdf_context *ctx);
147
/* These two routines are recursive.... */
148
static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num);
149
150
static int pdfi_process_xref_stream(pdf_context *ctx, pdf_stream *stream_obj, pdf_c_stream *s)
151
14.8k
{
152
14.8k
    pdf_c_stream *XRefStrm;
153
14.8k
    int code, i;
154
14.8k
    pdf_dict *sdict = NULL;
155
14.8k
    pdf_name *n;
156
14.8k
    pdf_array *a;
157
14.8k
    int64_t size;
158
14.8k
    int64_t num;
159
14.8k
    int64_t W[3] = {0, 0, 0};
160
14.8k
    int objnum;
161
14.8k
    bool known = false;
162
163
14.8k
    if (pdfi_type_of(stream_obj) != PDF_STREAM)
164
0
        return_error(gs_error_typecheck);
165
166
14.8k
    code = pdfi_dict_from_obj(ctx, (pdf_obj *)stream_obj, &sdict);
167
14.8k
    if (code < 0)
168
0
        return code;
169
170
14.8k
    code = pdfi_dict_get_type(ctx, sdict, "Type", PDF_NAME, (pdf_obj **)&n);
171
14.8k
    if (code < 0)
172
62
        return code;
173
174
14.7k
    if (n->length != 4 || memcmp(n->data, "XRef", 4) != 0) {
175
27
        pdfi_countdown(n);
176
27
        return_error(gs_error_syntaxerror);
177
27
    }
178
14.7k
    pdfi_countdown(n);
179
180
14.7k
    code = pdfi_dict_get_int(ctx, sdict, "Size", &size);
181
14.7k
    if (code < 0)
182
14
        return code;
183
14.7k
    if (size < 1)
184
12
        return 0;
185
186
14.7k
    if (size < 0 || size > floor((double)ARCH_MAX_SIZE_T / (double)sizeof(xref_entry)))
187
0
        return_error(gs_error_rangecheck);
188
189
    /* If this is the first xref stream then allocate the xref table and store the trailer */
190
14.7k
    if (ctx->xref_table == NULL) {
191
9.39k
        ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table");
192
9.39k
        if (ctx->xref_table == NULL) {
193
0
            return_error(gs_error_VMerror);
194
0
        }
195
9.39k
        memset(ctx->xref_table, 0x00, sizeof(xref_table_t));
196
9.39k
        ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, (size_t)size * sizeof(xref_entry), "read_xref_stream allocate xref table entries");
197
9.39k
        if (ctx->xref_table->xref == NULL){
198
4
            gs_free_object(ctx->memory, ctx->xref_table, "failed to allocate xref table entries");
199
4
            ctx->xref_table = NULL;
200
4
            return_error(gs_error_VMerror);
201
4
        }
202
9.38k
        memset(ctx->xref_table->xref, 0x00, size * sizeof(xref_entry));
203
9.38k
        ctx->xref_table->ctx = ctx;
204
9.38k
        ctx->xref_table->type = PDF_XREF_TABLE;
205
9.38k
        ctx->xref_table->xref_size = size;
206
#if REFCNT_DEBUG
207
        ctx->xref_table->UID = ctx->ref_UID++;
208
        outprintf(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID);
209
#endif
210
9.38k
        pdfi_countup(ctx->xref_table);
211
212
9.38k
        pdfi_countdown(ctx->Trailer);
213
214
9.38k
        ctx->Trailer = sdict;
215
9.38k
        pdfi_countup(sdict);
216
9.38k
    } else {
217
5.34k
        if (size > ctx->xref_table->xref_size)
218
4
            return_error(gs_error_rangecheck);
219
220
5.34k
        code = pdfi_merge_dicts(ctx, ctx->Trailer, sdict);
221
5.34k
        if (code < 0 && (code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREF, "pdfi_process_xref_stream", NULL)) < 0) {
222
0
            goto exit;
223
0
        }
224
5.34k
    }
225
226
14.7k
    pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, stream_obj), SEEK_SET);
227
228
    /* Bug #691220 has a PDF file with a compressed XRef, the stream dictionary has
229
     * a /DecodeParms entry for the stream, which has a /Colors value of 5, which makes
230
     * *no* sense whatever. If we try to apply a Predictor then we end up in a loop trying
231
     * to read 5 colour samples. Rather than meddles with more parameters to the filter
232
     * code, we'll just remove the Colors entry from the DecodeParms dictionary,
233
     * because it is nonsense. This means we'll get the (sensible) default value of 1.
234
     */
235
14.7k
    code = pdfi_dict_known(ctx, sdict, "DecodeParms", &known);
236
14.7k
    if (code < 0)
237
0
        return code;
238
239
14.7k
    if (known) {
240
13.6k
        pdf_dict *DP;
241
13.6k
        double f;
242
13.6k
        pdf_obj *name;
243
244
13.6k
        code = pdfi_dict_get_type(ctx, sdict, "DecodeParms", PDF_DICT, (pdf_obj **)&DP);
245
13.6k
        if (code < 0)
246
1
            return code;
247
248
13.6k
        code = pdfi_dict_knownget_number(ctx, DP, "Colors", &f);
249
13.6k
        if (code < 0) {
250
0
            pdfi_countdown(DP);
251
0
            return code;
252
0
        }
253
13.6k
        if (code > 0 && f != (double)1)
254
0
        {
255
0
            code = pdfi_name_alloc(ctx, (byte *)"Colors", 6, &name);
256
0
            if (code < 0) {
257
0
                pdfi_countdown(DP);
258
0
                return code;
259
0
            }
260
0
            pdfi_countup(name);
261
262
0
            code = pdfi_dict_delete_pair(ctx, DP, (pdf_name *)name);
263
0
            pdfi_countdown(name);
264
0
            if (code < 0) {
265
0
                pdfi_countdown(DP);
266
0
                return code;
267
0
            }
268
0
        }
269
13.6k
        pdfi_countdown(DP);
270
13.6k
    }
271
272
14.7k
    code = pdfi_filter_no_decryption(ctx, stream_obj, s, &XRefStrm, false);
273
14.7k
    if (code < 0) {
274
57
        pdfi_countdown(ctx->xref_table);
275
57
        ctx->xref_table = NULL;
276
57
        return code;
277
57
    }
278
279
14.6k
    code = pdfi_dict_get_type(ctx, sdict, "W", PDF_ARRAY, (pdf_obj **)&a);
280
14.6k
    if (code < 0) {
281
9
        pdfi_close_file(ctx, XRefStrm);
282
9
        pdfi_countdown(ctx->xref_table);
283
9
        ctx->xref_table = NULL;
284
9
        return code;
285
9
    }
286
287
14.6k
    if (pdfi_array_size(a) != 3) {
288
13
        pdfi_countdown(a);
289
13
        pdfi_close_file(ctx, XRefStrm);
290
13
        pdfi_countdown(ctx->xref_table);
291
13
        ctx->xref_table = NULL;
292
13
        return_error(gs_error_rangecheck);
293
13
    }
294
58.5k
    for (i=0;i<3;i++) {
295
43.9k
        code = pdfi_array_get_int(ctx, a, (uint64_t)i, (int64_t *)&W[i]);
296
43.9k
        if (code < 0 || W[i] < 0) {
297
43
            pdfi_countdown(a);
298
43
            pdfi_close_file(ctx, XRefStrm);
299
43
            pdfi_countdown(ctx->xref_table);
300
43
            ctx->xref_table = NULL;
301
43
            if (W[i] < 0)
302
11
                code = gs_note_error(gs_error_rangecheck);
303
43
            return code;
304
43
        }
305
43.9k
    }
306
14.6k
    pdfi_countdown(a);
307
308
    /* W[0] is either:
309
     * 0 (no type field) or a single byte with the type.
310
     * W[1] is either:
311
     * The object number of the next free object, the byte offset of this object in the file or the object5 number of the object stream where this object is stored.
312
     * W[2] is either:
313
     * The generation number to use if this object is used again, the generation number of the object or the index of this object within the object stream.
314
     *
315
     * Object and generation numbers are limited to unsigned 64-bit values, as are bytes offsets in the file, indexes of objects within the stream likewise (actually
316
     * most of these are generally 32-bit max). So we can limit the field widths to 8 bytes, enough to hold a 64-bit number.
317
     * Even if a later version of the spec makes these larger (which seems unlikely!) we still cna't cope with integers > 64-bits.
318
     */
319
14.6k
    if (W[0] > 1 || W[1] > 8 || W[2] > 8) {
320
32
        pdfi_close_file(ctx, XRefStrm);
321
32
        pdfi_countdown(ctx->xref_table);
322
32
        ctx->xref_table = NULL;
323
32
        return code;
324
32
    }
325
326
14.5k
    code = pdfi_dict_get_type(ctx, sdict, "Index", PDF_ARRAY, (pdf_obj **)&a);
327
14.5k
    if (code == gs_error_undefined) {
328
5.02k
        code = read_xref_stream_entries(ctx, XRefStrm, 0, size - 1, W);
329
5.02k
        if (code < 0) {
330
133
            pdfi_close_file(ctx, XRefStrm);
331
133
            pdfi_countdown(ctx->xref_table);
332
133
            ctx->xref_table = NULL;
333
133
            return code;
334
133
        }
335
9.55k
    } else {
336
9.55k
        int64_t start, size;
337
338
9.55k
        if (code < 0) {
339
2
            pdfi_close_file(ctx, XRefStrm);
340
2
            pdfi_countdown(ctx->xref_table);
341
2
            ctx->xref_table = NULL;
342
2
            return code;
343
2
        }
344
345
9.55k
        if (pdfi_array_size(a) & 1) {
346
12
            pdfi_countdown(a);
347
12
            pdfi_close_file(ctx, XRefStrm);
348
12
            pdfi_countdown(ctx->xref_table);
349
12
            ctx->xref_table = NULL;
350
12
            return_error(gs_error_rangecheck);
351
12
        }
352
353
23.4k
        for (i=0;i < pdfi_array_size(a);i+=2){
354
14.2k
            code = pdfi_array_get_int(ctx, a, (uint64_t)i, &start);
355
14.2k
            if (code < 0 || start < 0) {
356
21
                pdfi_countdown(a);
357
21
                pdfi_close_file(ctx, XRefStrm);
358
21
                pdfi_countdown(ctx->xref_table);
359
21
                ctx->xref_table = NULL;
360
21
                return code;
361
21
            }
362
363
14.1k
            code = pdfi_array_get_int(ctx, a, (uint64_t)i+1, &size);
364
14.1k
            if (code < 0) {
365
15
                pdfi_countdown(a);
366
15
                pdfi_close_file(ctx, XRefStrm);
367
15
                pdfi_countdown(ctx->xref_table);
368
15
                ctx->xref_table = NULL;
369
15
                return code;
370
15
            }
371
372
14.1k
            if (size < 1)
373
12
                continue;
374
375
14.1k
            if (start + size >= ctx->xref_table->xref_size) {
376
8.66k
                code = resize_xref(ctx, start + size);
377
8.66k
                if (code < 0) {
378
7
                    pdfi_countdown(a);
379
7
                    pdfi_close_file(ctx, XRefStrm);
380
7
                    pdfi_countdown(ctx->xref_table);
381
7
                    ctx->xref_table = NULL;
382
7
                    return code;
383
7
                }
384
8.66k
            }
385
386
14.1k
            code = read_xref_stream_entries(ctx, XRefStrm, start, start + size - 1, W);
387
14.1k
            if (code < 0) {
388
258
                pdfi_countdown(a);
389
258
                pdfi_close_file(ctx, XRefStrm);
390
258
                pdfi_countdown(ctx->xref_table);
391
258
                ctx->xref_table = NULL;
392
258
                return code;
393
258
            }
394
14.1k
        }
395
9.54k
    }
396
14.1k
    pdfi_countdown(a);
397
398
14.1k
    pdfi_close_file(ctx, XRefStrm);
399
400
14.1k
    code = pdfi_dict_get_int(ctx, sdict, "Prev", &num);
401
14.1k
    if (code == gs_error_undefined)
402
5.53k
        return 0;
403
404
8.59k
    if (code < 0)
405
14
        return code;
406
407
8.58k
    if (num < 0 || num > ctx->main_stream_length)
408
2.71k
        return_error(gs_error_rangecheck);
409
410
5.86k
    if (pdfi_loop_detector_check_object(ctx, num) == true)
411
26
        return_error(gs_error_circular_reference);
412
5.83k
    else {
413
5.83k
        code = pdfi_loop_detector_add_object(ctx, num);
414
5.83k
        if (code < 0)
415
0
            return code;
416
5.83k
    }
417
418
5.83k
    if(ctx->args.pdfdebug)
419
0
        outprintf(ctx->memory, "%% Reading /Prev xref\n");
420
421
5.83k
    pdfi_seek(ctx, s, num, SEEK_SET);
422
423
5.83k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &objnum);
424
5.83k
    if (code == 1) {
425
5.35k
        if (pdfi_check_xref_stream(ctx))
426
5.31k
            return pdfi_read_xref_stream_dict(ctx, s, objnum);
427
5.35k
    }
428
429
525
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
430
525
    if (code < 0)
431
0
        return code;
432
525
    if (code == TOKEN_XREF) {
433
59
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_PREV_NOT_XREF_STREAM, "pdfi_process_xref_stream", NULL)) < 0) {
434
0
            goto exit;
435
0
        }
436
        /* Read old-style xref table */
437
59
        return(read_xref(ctx, ctx->main_stream));
438
59
    }
439
466
exit:
440
466
    return_error(gs_error_syntaxerror);
441
525
}
442
443
static int pdfi_check_xref_stream(pdf_context *ctx)
444
18.7k
{
445
18.7k
    gs_offset_t offset;
446
18.7k
    int gen_num, code = 0;
447
448
18.7k
    offset = pdfi_unread_tell(ctx);
449
450
18.7k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &gen_num);
451
18.7k
    if (code <= 0) {
452
1.32k
        code = 0;
453
1.32k
        goto exit;
454
1.32k
    }
455
456
    /* Try to read 'obj' */
457
17.4k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
458
17.4k
    if (code <= 0) {
459
0
        code = 0;
460
0
        goto exit;
461
0
    }
462
463
    /* Third element must be obj, or it's not a valid xref */
464
17.4k
    if (code != TOKEN_OBJ)
465
2.05k
        code = 0;
466
15.4k
    else
467
15.4k
        code = 1;
468
469
18.7k
exit:
470
18.7k
    pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
471
18.7k
    return code;
472
17.4k
}
473
474
static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num)
475
15.6k
{
476
15.6k
    int code;
477
15.6k
    int gen_num;
478
479
15.6k
    if (ctx->args.pdfdebug)
480
0
        outprintf(ctx->memory, "\n%% Reading PDF 1.5+ xref stream\n");
481
482
    /* We have the obj_num. Lets try for obj_num gen obj as a XRef stream */
483
15.6k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &gen_num);
484
15.6k
    if (code <= 0) {
485
0
        if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", "")) < 0) {
486
0
            return code;
487
0
        }
488
0
        return(pdfi_repair_file(ctx));
489
0
    }
490
491
    /* Try to read 'obj' */
492
15.6k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
493
15.6k
    if (code < 0)
494
0
        return code;
495
15.6k
    if (code == 0)
496
0
        return_error(gs_error_syntaxerror);
497
498
    /* Third element must be obj, or it's not a valid xref */
499
15.6k
    if (code != TOKEN_OBJ) {
500
0
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BAD_XREFSTMOFFSET, "pdfi_read_xref_stream_dict", "")) < 0) {
501
0
            return code;
502
0
        }
503
0
        return(pdfi_repair_file(ctx));
504
0
    }
505
506
641k
    do {
507
641k
        code = pdfi_read_token(ctx, ctx->main_stream, obj_num, gen_num);
508
641k
        if (code <= 0) {
509
587
            if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) {
510
0
                return code;
511
0
            }
512
587
            return pdfi_repair_file(ctx);
513
587
        }
514
515
641k
        if (pdfi_count_stack(ctx) >= 2 && pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) {
516
17.1k
            uintptr_t keyword = (uintptr_t)ctx->stack_top[-1];
517
17.1k
            if (keyword == TOKEN_STREAM) {
518
14.8k
                pdf_dict *dict;
519
14.8k
                pdf_stream *sdict = NULL;
520
14.8k
                int64_t Length;
521
522
                /* Remove the 'stream' token from the stack, should leave a dictionary object on the stack */
523
14.8k
                pdfi_pop(ctx, 1);
524
14.8k
                if (pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) {
525
23
                    if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) {
526
0
                        return code;
527
0
                    }
528
23
                    return pdfi_repair_file(ctx);
529
23
                }
530
14.8k
                dict = (pdf_dict *)ctx->stack_top[-1];
531
532
                /* Convert the dict into a stream (sdict comes back with at least one ref) */
533
14.8k
                code = pdfi_obj_dict_to_stream(ctx, dict, &sdict, true);
534
                /* Pop off the dict */
535
14.8k
                pdfi_pop(ctx, 1);
536
14.8k
                if (code < 0) {
537
0
                    if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) {
538
0
                        return code;
539
0
                    }
540
                    /* TODO: should I return code instead of trying to repair?
541
                     * Normally the above routine should not fail so something is
542
                     * probably seriously fubar.
543
                     */
544
0
                    return pdfi_repair_file(ctx);
545
0
                }
546
14.8k
                dict = NULL;
547
548
                /* Init the stuff for the stream */
549
14.8k
                sdict->stream_offset = pdfi_unread_tell(ctx);
550
14.8k
                sdict->object_num = obj_num;
551
14.8k
                sdict->generation_num = gen_num;
552
553
14.8k
                code = pdfi_dict_get_int(ctx, sdict->stream_dict, "Length", &Length);
554
14.8k
                if (code < 0) {
555
                    /* TODO: Not positive this will actually have a length -- just use 0 */
556
61
                    (void)pdfi_set_error_var(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_xref_stream_dict", "Xref Stream object %u missing mandatory keyword /Length\n", obj_num);
557
61
                    code = 0;
558
61
                    Length = 0;
559
61
                }
560
14.8k
                sdict->Length = Length;
561
14.8k
                sdict->length_valid = true;
562
563
14.8k
                code = pdfi_process_xref_stream(ctx, sdict, ctx->main_stream);
564
14.8k
                pdfi_countdown(sdict);
565
14.8k
                if (code < 0) {
566
4.19k
                    pdfi_set_error(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_PREV_NOT_XREF_STREAM, "pdfi_read_xref_stream_dict", NULL);
567
4.19k
                    return code;
568
4.19k
                }
569
10.6k
                break;
570
14.8k
            } else if (keyword == TOKEN_ENDOBJ) {
571
                /* Something went wrong, this is not a stream dictionary */
572
166
                if ((code = pdfi_set_error_var(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_xref_stream_dict", "Xref Stream object %u missing mandatory keyword /Length\n", obj_num)) < 0) {
573
0
                    return code;
574
0
                }
575
166
                return(pdfi_repair_file(ctx));
576
166
            }
577
17.1k
        }
578
641k
    } while(1);
579
10.6k
    return 0;
580
15.6k
}
581
582
static int skip_to_digit(pdf_context *ctx, pdf_c_stream *s, unsigned int limit)
583
3.75k
{
584
3.75k
    int c, read = 0;
585
586
13.8k
    do {
587
13.8k
        c = pdfi_read_byte(ctx, s);
588
13.8k
        if (c < 0)
589
0
            return_error(gs_error_ioerror);
590
13.8k
        if (c >= '0' && c <= '9') {
591
3.41k
            pdfi_unread_byte(ctx, s, (byte)c);
592
3.41k
            return read;
593
3.41k
        }
594
10.4k
        read++;
595
10.4k
    } while (read < limit);
596
597
343
    return read;
598
3.75k
}
599
600
static int read_digits(pdf_context *ctx, pdf_c_stream *s, byte *Buffer, int limit)
601
3.75k
{
602
3.75k
    int c, read = 0;
603
604
    /* Since the "limit" is a value calculated by the caller,
605
       it's easier to check it in one place (here) than before
606
       every call.
607
     */
608
3.75k
    if (limit <= 0)
609
356
        return_error(gs_error_syntaxerror);
610
611
    /* We assume that Buffer always has limit+1 bytes available, so we can
612
     * safely terminate it. */
613
614
20.3k
    do {
615
20.3k
        c = pdfi_read_byte(ctx, s);
616
20.3k
        if (c < 0)
617
0
            return_error(gs_error_ioerror);
618
20.3k
        if (c < '0' || c > '9') {
619
1.50k
            pdfi_unread_byte(ctx, s, c);
620
1.50k
            break;
621
1.50k
        }
622
18.8k
        *Buffer++ = (byte)c;
623
18.8k
        read++;
624
18.8k
    } while (read < limit);
625
3.39k
    *Buffer = 0;
626
627
3.39k
    return read;
628
3.39k
}
629
630
631
static int read_xref_entry_slow(pdf_context *ctx, pdf_c_stream *s, gs_offset_t *offset, uint32_t *generation_num, unsigned char *free)
632
1.90k
{
633
1.90k
    byte Buffer[20];
634
1.90k
    int c, code, read = 0;
635
636
    /* First off, find a number. If we don't find one, and read 20 bytes, throw an error */
637
1.90k
    code = skip_to_digit(ctx, s, 20);
638
1.90k
    if (code < 0)
639
0
        return code;
640
1.90k
    read += code;
641
642
    /* Now read a number */
643
1.90k
    code = read_digits(ctx, s, (byte *)&Buffer,  (read > 10 ? 20 - read : 10));
644
1.90k
    if (code < 0)
645
58
        return code;
646
1.84k
    read += code;
647
648
1.84k
    *offset = atol((const char *)Buffer);
649
650
    /* find next number */
651
1.84k
    code = skip_to_digit(ctx, s, 20 - read);
652
1.84k
    if (code < 0)
653
0
        return code;
654
1.84k
    read += code;
655
656
    /* and read it */
657
1.84k
    code = read_digits(ctx, s, (byte *)&Buffer, (read > 15 ? 20 - read : 5));
658
1.84k
    if (code < 0)
659
298
        return code;
660
1.55k
    read += code;
661
662
1.55k
    *generation_num = atol((const char *)Buffer);
663
664
2.68k
    do {
665
2.68k
        c = pdfi_read_byte(ctx, s);
666
2.68k
        if (c < 0)
667
0
            return_error(gs_error_ioerror);
668
2.68k
        read ++;
669
2.68k
        if (c == 0x09 || c == 0x20)
670
1.16k
            continue;
671
1.51k
        if (c == 'n' || c == 'f') {
672
872
            *free = (unsigned char)c;
673
872
            break;
674
872
        } else {
675
647
            return_error(gs_error_syntaxerror);
676
647
        }
677
1.51k
    } while (read < 20);
678
903
    if (read >= 20)
679
40
        return_error(gs_error_syntaxerror);
680
681
2.17k
    do {
682
2.17k
        c = pdfi_read_byte(ctx, s);
683
2.17k
        if (c < 0)
684
0
            return_error(gs_error_syntaxerror);
685
2.17k
        read++;
686
2.17k
        if (c == 0x20 || c == 0x09 || c == 0x0d || c == 0x0a)
687
1.01k
            continue;
688
2.17k
    } while (read < 20);
689
863
    return 0;
690
863
}
691
692
static int write_offset(byte *B, gs_offset_t o, unsigned int g, unsigned char free)
693
863
{
694
863
    byte b[20], *ptr = B;
695
863
    int index = 0;
696
697
863
    gs_snprintf((char *)b, sizeof(b), "%"PRIdOFFSET"", o);
698
863
    if (strlen((const char *)b) > 10)
699
0
        return_error(gs_error_rangecheck);
700
7.08k
    for(index=0;index < 10 - strlen((const char *)b); index++) {
701
6.22k
        *ptr++ = 0x30;
702
6.22k
    }
703
863
    memcpy(ptr, b, strlen((const char *)b));
704
863
    ptr += strlen((const char *)b);
705
863
    *ptr++ = 0x20;
706
707
863
    gs_snprintf((char *)b, sizeof(b), "%d", g);
708
863
    if (strlen((const char *)b) > 5)
709
0
        return_error(gs_error_rangecheck);
710
3.89k
    for(index=0;index < 5 - strlen((const char *)b);index++) {
711
3.02k
        *ptr++ = 0x30;
712
3.02k
    }
713
863
    memcpy(ptr, b, strlen((const char *)b));
714
863
    ptr += strlen((const char *)b);
715
863
    *ptr++ = 0x20;
716
863
    *ptr++ = free;
717
863
    *ptr++ = 0x20;
718
863
    *ptr++ = 0x0d;
719
863
    return 0;
720
863
}
721
722
static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *section_start, uint64_t *section_size)
723
41.0k
{
724
41.0k
    int code = 0, i, j;
725
41.0k
    int start = 0;
726
41.0k
    int size = 0;
727
41.0k
    int64_t bytes = 0;
728
41.0k
    char Buffer[21];
729
730
41.0k
    *section_start = *section_size = 0;
731
732
41.0k
    if (ctx->args.pdfdebug)
733
0
        outprintf(ctx->memory, "\n%% Reading xref section\n");
734
735
41.0k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &start);
736
41.0k
    if (code < 0) {
737
        /* Not an int, might be a keyword */
738
11.3k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
739
11.3k
        if (code < 0)
740
0
            return code;
741
742
11.3k
        if (code != TOKEN_TRAILER) {
743
            /* element is not an integer, and not a keyword - not a valid xref */
744
163
            return_error(gs_error_typecheck);
745
163
        }
746
11.1k
        return 1;
747
11.3k
    }
748
749
29.6k
    if (start < 0)
750
27
        return_error(gs_error_rangecheck);
751
752
29.6k
    *section_start = start;
753
754
29.6k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &size);
755
29.6k
    if (code < 0)
756
25
        return code;
757
29.6k
    if (code == 0)
758
56
        return_error(gs_error_syntaxerror);
759
760
    /* Zero sized xref sections are valid; see the file attached to
761
     * bug 704947 for an example. */
762
29.5k
    if (size < 0)
763
14
        return_error(gs_error_rangecheck);
764
765
29.5k
    *section_size = size;
766
767
29.5k
    if (ctx->args.pdfdebug)
768
0
        outprintf(ctx->memory, "\n%% Section starts at %d and has %d entries\n", (unsigned int) start, (unsigned int)size);
769
770
29.5k
    if (size > 0) {
771
29.0k
        if (ctx->xref_table == NULL) {
772
10.9k
            ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table");
773
10.9k
            if (ctx->xref_table == NULL)
774
0
                return_error(gs_error_VMerror);
775
10.9k
            memset(ctx->xref_table, 0x00, sizeof(xref_table_t));
776
777
10.9k
            ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, ((size_t)start + (size_t)size) * (size_t)sizeof(xref_entry), "read_xref_stream allocate xref table entries");
778
10.9k
            if (ctx->xref_table->xref == NULL){
779
30
                gs_free_object(ctx->memory, ctx->xref_table, "free xref table on error allocating entries");
780
30
                ctx->xref_table = NULL;
781
30
                return_error(gs_error_VMerror);
782
30
            }
783
#if REFCNT_DEBUG
784
            ctx->xref_table->UID = ctx->ref_UID++;
785
            outprintf(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID);
786
#endif
787
788
10.9k
            memset(ctx->xref_table->xref, 0x00, (start + size) * sizeof(xref_entry));
789
10.9k
            ctx->xref_table->ctx = ctx;
790
10.9k
            ctx->xref_table->type = PDF_XREF_TABLE;
791
10.9k
            ctx->xref_table->xref_size = start + size;
792
10.9k
            pdfi_countup(ctx->xref_table);
793
18.0k
        } else {
794
18.0k
            if (start + size > ctx->xref_table->xref_size) {
795
13.0k
                code = resize_xref(ctx, start + size);
796
13.0k
                if (code < 0)
797
13
                    return code;
798
13.0k
            }
799
18.0k
        }
800
29.0k
    }
801
802
29.5k
    pdfi_skip_white(ctx, s);
803
501k
    for (i=0;i< size;i++){
804
473k
        xref_entry *entry = &ctx->xref_table->xref[i + start];
805
473k
        unsigned char free;
806
473k
        gs_offset_t off;
807
473k
        unsigned int gen;
808
809
473k
        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 20, s);
810
473k
        if (bytes < 20)
811
8
            return_error(gs_error_ioerror);
812
473k
        j = 19;
813
473k
        if ((Buffer[19] != 0x0a && Buffer[19] != 0x0d) || (Buffer[18] != 0x0d && Buffer[18] != 0x0a && Buffer[18] != 0x20))
814
18.0k
            pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_SIZE, "read_xref_section", NULL);
815
499k
        while (Buffer[j] != 0x0D && Buffer[j] != 0x0A) {
816
27.3k
            pdfi_unread_byte(ctx, s, (byte)Buffer[j]);
817
27.3k
            if (--j < 0) {
818
899
                pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_NO_EOL, "read_xref_section", NULL);
819
899
                outprintf(ctx->memory, "Invalid xref entry, line terminator missing.\n");
820
899
                code = read_xref_entry_slow(ctx, s, &off, &gen, &free);
821
899
                if (code < 0)
822
459
                    return code;
823
440
                code = write_offset((byte *)Buffer, off, gen, free);
824
440
                if (code < 0)
825
0
                    return code;
826
440
                j = 19;
827
440
                break;
828
440
            }
829
27.3k
        }
830
472k
        Buffer[j] = 0x00;
831
472k
        if (entry->object_num != 0)
832
9.65k
            continue;
833
834
463k
        if (sscanf(Buffer, "%"PRIdOFFSET" %d %c", &entry->u.uncompressed.offset, &entry->u.uncompressed.generation_num, &free) != 3) {
835
1.00k
            pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_FORMAT, "read_xref_section", NULL);
836
1.00k
            outprintf(ctx->memory, "Invalid xref entry, incorrect format.\n");
837
1.00k
            pdfi_unread(ctx, s, (byte *)Buffer, 20);
838
1.00k
            code = read_xref_entry_slow(ctx, s, &off, &gen, &free);
839
1.00k
            if (code < 0)
840
584
                return code;
841
423
            code = write_offset((byte *)Buffer, off, gen, free);
842
423
            if (code < 0)
843
0
                return code;
844
423
        }
845
846
462k
        entry->compressed = false;
847
462k
        entry->object_num = i + start;
848
462k
        if (free == 'f')
849
134k
            entry->free = true;
850
462k
        if(free == 'n')
851
327k
            entry->free = false;
852
462k
        if (entry->object_num == 0) {
853
7.21k
            if (!entry->free) {
854
83
                pdfi_set_warning(ctx, 0, NULL, W_PDF_XREF_OBJECT0_NOT_FREE, "read_xref_section", NULL);
855
83
            }
856
7.21k
        }
857
462k
    }
858
859
28.4k
    return 0;
860
29.5k
}
861
862
static int read_xref(pdf_context *ctx, pdf_c_stream *s)
863
12.5k
{
864
12.5k
    int code = 0;
865
12.5k
    pdf_dict *d = NULL;
866
12.5k
    uint64_t max_obj = 0;
867
12.5k
    int64_t num, XRefStm = 0;
868
12.5k
    int obj_num;
869
12.5k
    bool known = false;
870
871
12.5k
    if (ctx->repaired)
872
5
        return 0;
873
874
41.0k
    do {
875
41.0k
        uint64_t section_start, section_size;
876
877
41.0k
        code = read_xref_section(ctx, s, &section_start, &section_size);
878
41.0k
        if (code < 0)
879
1.37k
            return code;
880
881
39.6k
        if (section_size > 0 && section_start + section_size - 1 > max_obj)
882
25.4k
            max_obj = section_start + section_size - 1;
883
884
        /* code == 1 => read_xref_section ended with a trailer. */
885
39.6k
    } while (code != 1);
886
887
11.1k
    code = pdfi_read_dict(ctx, ctx->main_stream, 0, 0);
888
11.1k
    if (code < 0)
889
208
        return code;
890
891
10.9k
    d = (pdf_dict *)ctx->stack_top[-1];
892
10.9k
    if (pdfi_type_of(d) != PDF_DICT) {
893
17
        pdfi_pop(ctx, 1);
894
17
        return_error(gs_error_typecheck);
895
17
    }
896
10.9k
    pdfi_countup(d);
897
10.9k
    pdfi_pop(ctx, 1);
898
899
    /* We don't want to pollute the Trailer dictionary with any XRefStm key/value pairs
900
     * which will happen when we do pdfi_merge_dicts(). So we get any XRefStm here and
901
     * if there was one, remove it from the dictionary before we merge with the
902
     * primary trailer.
903
     */
904
10.9k
    code = pdfi_dict_get_int(ctx, d, "XRefStm", &XRefStm);
905
10.9k
    if (code < 0 && code != gs_error_undefined)
906
1
        goto error;
907
908
10.9k
    if (code == 0) {
909
438
        code = pdfi_dict_delete(ctx, d, "XRefStm");
910
438
        if (code < 0)
911
0
            goto error;
912
438
    }
913
914
10.9k
    if (ctx->Trailer == NULL) {
915
9.56k
        ctx->Trailer = d;
916
9.56k
        pdfi_countup(d);
917
9.56k
    } else {
918
1.37k
        code = pdfi_merge_dicts(ctx, ctx->Trailer, d);
919
1.37k
        if (code < 0) {
920
0
            if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREF, "read_xref", "")) < 0) {
921
0
                return code;
922
0
            }
923
0
        }
924
1.37k
    }
925
926
    /* Check if the highest subsection + size exceeds the /Size in the
927
     * trailer dictionary and set a warning flag if it does
928
     */
929
10.9k
    code = pdfi_dict_get_int(ctx, d, "Size", &num);
930
10.9k
    if (code < 0)
931
23
        goto error;
932
933
10.9k
    if (max_obj >= num)
934
601
        pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_SIZE, "read_xref", NULL);
935
936
    /* Check if this is a modified file and has any
937
     * previous xref entries.
938
     */
939
10.9k
    code = pdfi_dict_known(ctx, d, "Prev", &known);
940
10.9k
    if (known) {
941
5.00k
        code = pdfi_dict_get_int(ctx, d, "Prev", &num);
942
5.00k
        if (code < 0)
943
19
            goto error;
944
945
4.98k
        if (num < 0 || num > ctx->main_stream_length) {
946
1.65k
            code = gs_note_error(gs_error_rangecheck);
947
1.65k
            goto error;
948
1.65k
        }
949
950
3.32k
        if (pdfi_loop_detector_check_object(ctx, num) == true) {
951
6
            code = gs_note_error(gs_error_circular_reference);
952
6
            goto error;
953
6
        }
954
3.32k
        else {
955
3.32k
            code = pdfi_loop_detector_add_object(ctx, num);
956
3.32k
            if (code < 0)
957
0
                goto error;
958
3.32k
        }
959
960
3.32k
        code = pdfi_seek(ctx, s, num, SEEK_SET);
961
3.32k
        if (code < 0)
962
0
            goto error;
963
964
3.32k
        if (!ctx->repaired) {
965
3.32k
            code = pdfi_read_token(ctx, ctx->main_stream, 0, 0);
966
3.32k
            if (code < 0)
967
148
                goto error;
968
969
3.17k
            if (code == 0) {
970
2
                code = gs_note_error(gs_error_syntaxerror);
971
2
                goto error;
972
2
            }
973
3.17k
        } else {
974
0
            code = 0;
975
0
            goto error;
976
0
        }
977
978
3.17k
        if ((intptr_t)(ctx->stack_top[-1]) == (intptr_t)TOKEN_XREF) {
979
            /* Read old-style xref table */
980
1.44k
            pdfi_pop(ctx, 1);
981
1.44k
            code = read_xref(ctx, ctx->main_stream);
982
1.44k
            if (code < 0)
983
246
                goto error;
984
1.72k
        } else {
985
1.72k
            pdfi_pop(ctx, 1);
986
1.72k
            code = gs_note_error(gs_error_typecheck);
987
1.72k
            goto error;
988
1.72k
        }
989
3.17k
    }
990
991
    /* Now check if this is a hybrid file. */
992
7.12k
    if (XRefStm != 0) {
993
231
        ctx->is_hybrid = true;
994
995
231
        if (ctx->args.pdfdebug)
996
0
            outprintf(ctx->memory, "%% File is a hybrid, containing xref table and xref stream. Reading the stream.\n");
997
998
999
231
        if (pdfi_loop_detector_check_object(ctx, XRefStm) == true) {
1000
0
            code = gs_note_error(gs_error_circular_reference);
1001
0
            goto error;
1002
0
        }
1003
231
        else {
1004
231
            code = pdfi_loop_detector_add_object(ctx, XRefStm);
1005
231
            if (code < 0)
1006
0
                goto error;
1007
231
        }
1008
1009
231
        code = pdfi_loop_detector_mark(ctx);
1010
231
        if (code < 0)
1011
0
            goto error;
1012
1013
        /* Because of the way the code works when we read a file which is a pure
1014
         * xref stream file, we need to read the first integer of 'x y obj'
1015
         * because the xref stream decoding code expects that to be on the stack.
1016
         */
1017
231
        pdfi_seek(ctx, s, XRefStm, SEEK_SET);
1018
1019
231
        code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num);
1020
231
        if (code < 0) {
1021
0
            pdfi_set_error(ctx, 0, NULL, E_PDF_BADXREFSTREAM, "read_xref", "");
1022
0
            pdfi_loop_detector_cleartomark(ctx);
1023
0
            goto error;
1024
0
        }
1025
1026
231
        code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num);
1027
        /* We could just fall through to the exit here, but choose not to in order to avoid possible mistakes in future */
1028
231
        if (code < 0) {
1029
29
            pdfi_loop_detector_cleartomark(ctx);
1030
29
            goto error;
1031
29
        }
1032
1033
202
        pdfi_loop_detector_cleartomark(ctx);
1034
202
    } else
1035
6.89k
        code = 0;
1036
1037
10.9k
error:
1038
10.9k
    pdfi_countdown(d);
1039
10.9k
    return code;
1040
7.12k
}
1041
1042
int pdfi_read_xref(pdf_context *ctx)
1043
108k
{
1044
108k
    int code = 0;
1045
108k
    int obj_num;
1046
1047
108k
    code = pdfi_loop_detector_mark(ctx);
1048
108k
    if (code < 0)
1049
0
        return code;
1050
1051
108k
    if (ctx->startxref == 0)
1052
60.3k
        goto repair;
1053
1054
48.5k
    code = pdfi_loop_detector_add_object(ctx, ctx->startxref);
1055
48.5k
    if (code < 0)
1056
0
        goto exit;
1057
1058
48.5k
    if (ctx->args.pdfdebug)
1059
0
        outprintf(ctx->memory, "%% Trying to read 'xref' token for xref table, or 'int int obj' for an xref stream\n");
1060
1061
48.5k
    if (ctx->startxref > ctx->main_stream_length - 5) {
1062
13.0k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is beyond end of file")) < 0)
1063
0
            goto exit;
1064
1065
13.0k
        goto repair;
1066
13.0k
    }
1067
35.5k
    if (ctx->startxref < 0) {
1068
534
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is before start of file")) < 0)
1069
0
            goto exit;
1070
1071
534
        goto repair;
1072
534
    }
1073
1074
    /* Read the xref(s) */
1075
34.9k
    pdfi_seek(ctx, ctx->main_stream, ctx->startxref, SEEK_SET);
1076
1077
    /* If it starts with an int, it's an xref stream dict */
1078
34.9k
    code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num);
1079
34.9k
    if (code == 1) {
1080
13.4k
        if (pdfi_check_xref_stream(ctx)) {
1081
10.0k
            code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num);
1082
10.0k
            if (code < 0)
1083
3.97k
                goto repair;
1084
10.0k
        } else
1085
3.33k
            goto repair;
1086
21.5k
    } else {
1087
        /* If not, it had better start 'xref', and be an old-style xref table */
1088
21.5k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
1089
21.5k
        if (code != TOKEN_XREF) {
1090
10.5k
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"Failed to read any token at the startxref location")) < 0)
1091
0
                goto exit;
1092
1093
10.5k
            goto repair;
1094
10.5k
        }
1095
1096
11.0k
        code = read_xref(ctx, ctx->main_stream);
1097
11.0k
        if (code < 0)
1098
5.16k
            goto repair;
1099
11.0k
    }
1100
1101
11.9k
    if(ctx->args.pdfdebug && ctx->xref_table) {
1102
0
        int i, j;
1103
0
        xref_entry *entry;
1104
0
        char Buffer[32];
1105
1106
0
        outprintf(ctx->memory, "\n%% Dumping xref table\n");
1107
0
        for (i=0;i < ctx->xref_table->xref_size;i++) {
1108
0
            entry = &ctx->xref_table->xref[i];
1109
0
            if(entry->compressed) {
1110
0
                outprintf(ctx->memory, "*");
1111
0
                gs_snprintf(Buffer, sizeof(Buffer), "%"PRId64"", entry->object_num);
1112
0
                j = 10 - strlen(Buffer);
1113
0
                while(j--) {
1114
0
                    outprintf(ctx->memory, " ");
1115
0
                }
1116
0
                outprintf(ctx->memory, "%s ", Buffer);
1117
1118
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.compressed_stream_num);
1119
0
                j = 10 - strlen(Buffer);
1120
0
                while(j--) {
1121
0
                    outprintf(ctx->memory, " ");
1122
0
                }
1123
0
                outprintf(ctx->memory, "%s ", Buffer);
1124
1125
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.object_index);
1126
0
                j = 10 - strlen(Buffer);
1127
0
                while(j--) {
1128
0
                    outprintf(ctx->memory, " ");
1129
0
                }
1130
0
                outprintf(ctx->memory, "%s ", Buffer);
1131
0
            }
1132
0
            else {
1133
0
                outprintf(ctx->memory, " ");
1134
1135
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->object_num);
1136
0
                j = 10 - strlen(Buffer);
1137
0
                while(j--) {
1138
0
                    outprintf(ctx->memory, " ");
1139
0
                }
1140
0
                outprintf(ctx->memory, "%s ", Buffer);
1141
1142
0
                gs_snprintf(Buffer, sizeof(Buffer), "%"PRIdOFFSET"", entry->u.uncompressed.offset);
1143
0
                j = 10 - strlen(Buffer);
1144
0
                while(j--) {
1145
0
                    outprintf(ctx->memory, " ");
1146
0
                }
1147
0
                outprintf(ctx->memory, "%s ", Buffer);
1148
1149
0
                gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.uncompressed.generation_num);
1150
0
                j = 10 - strlen(Buffer);
1151
0
                while(j--) {
1152
0
                    outprintf(ctx->memory, " ");
1153
0
                }
1154
0
                outprintf(ctx->memory, "%s ", Buffer);
1155
0
            }
1156
0
            if (entry->free)
1157
0
                outprintf(ctx->memory, "f\n");
1158
0
            else
1159
0
                outprintf(ctx->memory, "n\n");
1160
0
        }
1161
0
    }
1162
11.9k
    if (ctx->args.pdfdebug)
1163
0
        outprintf(ctx->memory, "\n");
1164
1165
11.9k
 exit:
1166
11.9k
    (void)pdfi_loop_detector_cleartomark(ctx);
1167
1168
11.9k
    if (code < 0)
1169
0
        return code;
1170
1171
11.9k
    return 0;
1172
1173
96.9k
repair:
1174
96.9k
    (void)pdfi_loop_detector_cleartomark(ctx);
1175
96.9k
    if (!ctx->repaired && !ctx->args.pdfstoponerror)
1176
96.8k
        return(pdfi_repair_file(ctx));
1177
71
    return 0;
1178
96.9k
}