Coverage Report

Created: 2025-06-24 07:01

/src/ghostpdl/pdf/pdf_deref.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2020-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/* Functions to deal with dereferencing indirect objects
17
 * for the PDF interpreter. In here we also keep the code
18
 * for dealing with the object cache, because the dereferencing
19
 * functions are currently the only place that deals with it.
20
 */
21
22
#include "pdf_int.h"
23
#include "pdf_stack.h"
24
#include "pdf_loop_detect.h"
25
#include "strmio.h"
26
#include "stream.h"
27
#include "pdf_file.h"
28
#include "pdf_misc.h"
29
#include "pdf_dict.h"
30
#include "pdf_array.h"
31
#include "pdf_deref.h"
32
#include "pdf_repair.h"
33
34
/* Start with the object caching functions */
35
/* Disable object caching (for easier debugging with reference counting)
36
 * by uncommenting the following line
37
 */
38
/*#define DISABLE CACHE*/
39
40
/* given an object, create a cache entry for it. If we have too many entries
41
 * then delete the leat-recently-used cache entry. Make the new entry be the
42
 * most-recently-used entry. The actual entries are attached to the xref table
43
 * (as well as being a double-linked list), because we detect an existing
44
 * cache entry by seeing that the xref table for the object number has a non-NULL
45
 * 'cache' member.
46
 * So we need to update the xref as well if we add or delete cache entries.
47
 */
48
static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o)
49
2.21M
{
50
2.21M
#ifndef DISABLE_CACHE
51
2.21M
    pdf_obj_cache_entry *entry;
52
53
2.21M
    if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY))
54
4.42k
        return 0;
55
56
2.21M
    if (ctx->xref_table->xref[o->object_num].cache != NULL) {
57
#if DEBUG_CACHE
58
        outprintf(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num);
59
#endif
60
0
        return_error(gs_error_unknownerror);
61
0
    }
62
63
2.21M
    if (o->object_num > ctx->xref_table->xref_size)
64
0
        return_error(gs_error_rangecheck);
65
66
2.21M
    if (ctx->cache_entries == MAX_OBJECT_CACHE_SIZE)
67
469k
    {
68
#if DEBUG_CACHE
69
        dbgmprintf(ctx->memory, "Cache full, evicting LRU\n");
70
#endif
71
469k
        if (ctx->cache_LRU) {
72
469k
            entry = ctx->cache_LRU;
73
469k
            ctx->cache_LRU = entry->next;
74
469k
            if (entry->next)
75
469k
                ((pdf_obj_cache_entry *)entry->next)->previous = NULL;
76
469k
            ctx->xref_table->xref[entry->o->object_num].cache = NULL;
77
469k
            pdfi_countdown(entry->o);
78
469k
            ctx->cache_entries--;
79
469k
            gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU");
80
469k
        } else
81
0
            return_error(gs_error_unknownerror);
82
469k
    }
83
2.21M
    entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache");
84
2.21M
    if (entry == NULL)
85
0
        return_error(gs_error_VMerror);
86
87
2.21M
    memset(entry, 0x00, sizeof(pdf_obj_cache_entry));
88
89
2.21M
    entry->o = o;
90
2.21M
    pdfi_countup(o);
91
2.21M
    if (ctx->cache_MRU) {
92
2.13M
        entry->previous = ctx->cache_MRU;
93
2.13M
        ctx->cache_MRU->next = entry;
94
2.13M
    }
95
2.21M
    ctx->cache_MRU = entry;
96
2.21M
    if (ctx->cache_LRU == NULL)
97
75.8k
        ctx->cache_LRU = entry;
98
99
2.21M
    ctx->cache_entries++;
100
2.21M
    ctx->xref_table->xref[o->object_num].cache = entry;
101
2.21M
#endif
102
2.21M
    return 0;
103
2.21M
}
104
105
/* Given an existing cache entry, promote it to be the most-recently-used
106
 * cache entry.
107
 */
108
static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry)
109
4.30M
{
110
4.30M
#ifndef DISABLE_CACHE
111
4.30M
    if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) {
112
3.15M
        if ((pdf_obj_cache_entry *)cache_entry->next != NULL)
113
3.15M
            ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous;
114
3.15M
        if ((pdf_obj_cache_entry *)cache_entry->previous != NULL)
115
3.14M
            ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next;
116
2.00k
        else {
117
            /* the existing entry is the current least recently used, we need to make the 'next'
118
             * cache entry into the LRU.
119
             */
120
2.00k
            ctx->cache_LRU = cache_entry->next;
121
2.00k
        }
122
3.15M
        cache_entry->next = NULL;
123
3.15M
        cache_entry->previous = ctx->cache_MRU;
124
3.15M
        ctx->cache_MRU->next = cache_entry;
125
3.15M
        ctx->cache_MRU = cache_entry;
126
3.15M
    }
127
4.30M
#endif
128
4.30M
    return;
129
4.30M
}
130
131
/* This one's a bit of an oddity, its used for fonts. When we build a PDF font object
132
 * we want the object cache to reference *that* object, not the dictionary which was
133
 * read out of the PDF file, so this allows us to replace the font dictionary in the
134
 * cache with the actual font object, so that later dereferences will get this font
135
 * object.
136
 */
137
int replace_cache_entry(pdf_context *ctx, pdf_obj *o)
138
156k
{
139
156k
#ifndef DISABLE_CACHE
140
156k
    xref_entry *entry;
141
156k
    pdf_obj_cache_entry *cache_entry;
142
156k
    pdf_obj *old_cached_obj = NULL;
143
144
    /* Limited error checking here, we assume that things like the
145
     * validity of the object (eg not a free oobject) have already been handled.
146
     */
147
148
156k
    entry = &ctx->xref_table->xref[o->object_num];
149
156k
    cache_entry = entry->cache;
150
151
156k
    if (cache_entry == NULL) {
152
4.74k
        return(pdfi_add_to_cache(ctx, o));
153
152k
    } else {
154
        /* NOTE: We grab the object without decrementing, to avoid triggering
155
         * a warning message for freeing an object that's in the cache
156
         */
157
152k
        if (cache_entry->o != NULL)
158
152k
            old_cached_obj = cache_entry->o;
159
160
        /* Put new entry in the cache */
161
152k
        cache_entry->o = o;
162
152k
        pdfi_countup(o);
163
152k
        pdfi_promote_cache_entry(ctx, cache_entry);
164
165
        /* Now decrement the old cache entry, if any */
166
152k
        pdfi_countdown(old_cached_obj);
167
152k
    }
168
152k
#endif
169
152k
    return 0;
170
156k
}
171
172
/* Now the dereferencing functions */
173
174
/*
175
 * Technically we can accept a stream other than the main PDF file stream here. This is
176
 * really for the case of compressed objects where we read tokens from the compressed
177
 * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode
178
 * on top of the main file stream, which may be useful. Note that this cannot work with
179
 * objects in compressed object streams! They should always pass a value of 0 for the stream_offset.
180
 * The stream_offset is the offset from the start of the underlying uncompressed PDF file of
181
 * the stream we are using. See the comments below when keyword is PDF_STREAM.
182
 */
183
184
/* Determine if a PDF object is in a compressed ObjStm. Returns < 0
185
 * for an error, 0 if it is not in a compressed ObjStm and 1 if it is.
186
 * Currently errors are inmpossible. This is only used by the decryption code
187
 * to determine if a string is in a compressed object stream, if it is then
188
 * it can't be used for decryption.
189
 */
190
int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen)
191
15.5k
{
192
15.5k
    xref_entry *entry;
193
194
    /* Can't possibly be a compressed object before we have finished reading
195
     * the xref.
196
     */
197
15.5k
    if (ctx->xref_table == NULL)
198
0
        return 0;
199
200
15.5k
    entry = &ctx->xref_table->xref[obj];
201
202
15.5k
    if (entry->compressed)
203
0
        return 1;
204
205
15.5k
    return 0;
206
15.5k
}
207
208
/* We should never read a 'stream' keyword from a compressed object stream
209
 * so this case should never end up here.
210
 */
211
static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset,
212
                                   uint32_t objnum, uint32_t gen)
213
678k
{
214
678k
    int code = 0;
215
678k
    int64_t i;
216
678k
    pdf_dict *dict = NULL;
217
678k
    gs_offset_t offset;
218
678k
    pdf_stream *stream_obj = NULL;
219
220
    /* Strange code time....
221
     * If we are using a stream which is *not* the PDF uncompressed main file stream
222
     * then doing stell on it will only tell us how many bytes have been read from
223
     * that stream, it won't tell us the underlying file position. So we add on the
224
     * 'unread' bytes, *and* we add on the position of the start of the stream in
225
     * the actual main file. This is all done so that we can check the /Length
226
     * of the object. Note that this will *only* work for regular objects it can
227
     * not be used for compressed object streams, but those don't need checking anyway
228
     * they have a different mechanism altogether and should never get here.
229
     */
230
678k
    if (s != ctx->main_stream) {
231
0
        offset = stell(s->s) - s->unread_size + stream_offset;
232
0
        code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
233
0
        if (code < 0)
234
0
            return_error(gs_error_ioerror);
235
678k
    } else {
236
678k
        offset = stell(s->s) - s->unread_size;
237
678k
    }
238
239
678k
    if (pdfi_count_stack(ctx) < 1)
240
0
        return_error(gs_error_stackunderflow);
241
242
678k
    dict = (pdf_dict *)ctx->stack_top[-1];
243
244
678k
    if (pdfi_type_of(dict) != PDF_DICT) {
245
7.07k
        pdfi_pop(ctx, 1);
246
7.07k
        return_error(gs_error_syntaxerror);
247
7.07k
    }
248
249
671k
    dict->indirect_num = dict->object_num = objnum;
250
671k
    dict->indirect_gen = dict->generation_num = gen;
251
252
    /* Convert the dict into a stream */
253
671k
    code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true);
254
671k
    if (code < 0) {
255
0
        pdfi_pop(ctx, 1);
256
0
        return code;
257
0
    }
258
    /* Pop off the dict and push the stream */
259
671k
    pdfi_pop(ctx, 1);
260
671k
    dict = NULL;
261
671k
    pdfi_push(ctx, (pdf_obj *)stream_obj);
262
263
671k
    stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum;
264
671k
    stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen;
265
671k
    stream_obj->stream_offset = offset;
266
267
    /* Exceptional code. Normally we do not need to worry about detecting circular references
268
     * when reading objects, because we do not dereference any indirect objects. However streams
269
     * are a slight exception in that we do get the Length from the stream dictionay and if that
270
     * is an indirect reference, then we dereference it.
271
     * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect
272
     * reference to the same stream object, and leads to infinite recursion. So deal with that
273
     * possibility here.
274
     */
275
671k
    code = pdfi_loop_detector_mark(ctx);
276
671k
    if (code < 0) {
277
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
278
0
        return code;
279
0
    }
280
671k
    if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) {
281
127
        pdfi_countdown(stream_obj); /* get rid of extra ref */
282
127
        pdfi_loop_detector_cleartomark(ctx);
283
127
        return_error(gs_error_circular_reference);
284
127
    }
285
286
671k
    code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num);
287
671k
    if (code < 0) {
288
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
289
0
        pdfi_loop_detector_cleartomark(ctx);
290
0
        return code;
291
0
    }
292
293
    /* This code may be a performance overhead, it simply skips over the stream contents
294
     * and checks that the stream ends with a 'endstream endobj' pair. We could add a
295
     * 'go faster' flag for users who are certain their PDF files are well-formed. This
296
     * could also allow us to skip all kinds of other checking.....
297
     */
298
299
671k
    code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i);
300
671k
    if (code < 0) {
301
18.2k
        char extra_info[gp_file_name_sizeof];
302
303
18.2k
        (void)pdfi_loop_detector_cleartomark(ctx);
304
18.2k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum);
305
18.2k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info);
306
18.2k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
307
18.2k
        return code;
308
18.2k
    }
309
653k
    code = pdfi_loop_detector_cleartomark(ctx);
310
653k
    if (code < 0) {
311
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
312
0
        return code;
313
0
    }
314
315
653k
    if (i < 0 || (i + offset)> ctx->main_stream_length) {
316
37.2k
        char extra_info[gp_file_name_sizeof];
317
318
37.2k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum);
319
37.2k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info))< 0) {
320
0
            pdfi_pop(ctx, 1);
321
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
322
0
            return code;
323
0
        }
324
616k
    } else {
325
616k
        code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR);
326
616k
        if (code < 0) {
327
0
            pdfi_pop(ctx, 1);
328
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
329
0
            return code;
330
0
        }
331
332
616k
        stream_obj->Length = 0;
333
616k
        stream_obj->length_valid = false;
334
335
616k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
336
616k
        if (code == 0) {
337
0
            char extra_info[gp_file_name_sizeof];
338
339
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum);
340
0
            pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
341
            /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that
342
             * happens give up.
343
             */
344
0
            if (pdfi_count_stack(ctx) == 0) {
345
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
346
0
                return code;
347
0
            }
348
616k
        } else if (code < 0) {
349
0
            char extra_info[gp_file_name_sizeof];
350
351
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum);
352
0
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info)) < 0) {
353
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
354
0
                return code;
355
0
            }
356
616k
        } else if (code != TOKEN_ENDSTREAM) {
357
71.5k
            char extra_info[gp_file_name_sizeof];
358
359
71.5k
            gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i);
360
71.5k
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BAD_LENGTH, "pdfi_read_stream_object", extra_info)) < 0) {
361
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
362
0
                return code;
363
0
            }
364
544k
        } else {
365
            /* Cache the Length in the stream object and mark it valid */
366
544k
            stream_obj->Length = i;
367
544k
            stream_obj->length_valid = true;
368
544k
        }
369
616k
    }
370
371
    /* If we failed to find a valid object, or the object wasn't a keyword, or the
372
     * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct
373
     * Length for streams if we have encrypted files, because we must install a
374
     * SubFileDecode filter with a Length (EODString is incompatible with AES encryption)
375
     * Rather than mess about checking for encryption, we'll choose to just correctly
376
     * calculate the Length of all streams. Although this takes time, it will only
377
     * happen for files which are invalid.
378
     */
379
653k
    if (stream_obj->length_valid != true) {
380
108k
        char Buffer[10];
381
108k
        unsigned int bytes, total = 0;
382
108k
        int c = 0;
383
384
108k
        code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET);
385
108k
        if (code < 0) {
386
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
387
0
            pdfi_pop(ctx, 1);
388
0
            return code;
389
0
        }
390
108k
        memset(Buffer, 0x00, 10);
391
108k
        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream);
392
108k
        if (bytes < 9) {
393
382
            pdfi_countdown(stream_obj); /* get rid of extra ref */
394
382
            return_error(gs_error_ioerror);
395
382
        }
396
397
108k
        total = bytes;
398
1.90G
        do {
399
1.90G
            if (memcmp(Buffer, "endstream", 9) == 0) {
400
67.2k
                if (Buffer[9] != 0x00)
401
67.1k
                    total--;
402
67.2k
                stream_obj->Length = total - 9;
403
67.2k
                stream_obj->length_valid = true;
404
67.2k
                break;
405
67.2k
            }
406
1.90G
            if (memcmp(Buffer, "endobj", 6) == 0) {
407
7.32k
                if (Buffer[9] != 0x00)
408
7.23k
                    total--;
409
7.32k
                stream_obj->Length = total - 6;
410
7.32k
                stream_obj->length_valid = true;
411
7.32k
                break;
412
7.32k
            }
413
1.90G
            memmove(Buffer, Buffer+1, 9);
414
1.90G
            c = pdfi_read_byte(ctx, ctx->main_stream);
415
1.90G
            if (c < 0)
416
33.9k
                break;
417
1.90G
            Buffer[9] = (byte)c;
418
1.90G
            total++;
419
1.90G
        } while(1);
420
108k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
421
108k
        if (c < 0)
422
33.9k
            return_error(gs_error_ioerror);
423
74.5k
        return 0;
424
108k
    }
425
426
544k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
427
544k
    if (code < 0) {
428
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
429
0
        if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", "")) < 0) {
430
0
            return code;
431
0
        }
432
        /* Something went wrong looking for endobj, but we found endstream, so assume
433
         * for now that will suffice.
434
         */
435
0
        return 0;
436
0
    }
437
438
544k
    if (code == 0) {
439
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
440
0
        return_error(gs_error_stackunderflow);
441
0
    }
442
443
544k
    if (code != TOKEN_ENDOBJ) {
444
1.69k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
445
1.69k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_typecheck), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
446
        /* Didn't find an endobj, but we have an endstream, so assume
447
         * for now that will suffice
448
         */
449
1.69k
        return code;
450
1.69k
    }
451
542k
    pdfi_countdown(stream_obj); /* get rid of extra ref */
452
453
542k
    return 0;
454
544k
}
455
456
/* This reads an object *after* the x y obj keyword has been found. Its broken out
457
 * separately for the benefit of the repair code when reading the dictionary following
458
 * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does
459
 * not have an 'endobj', we rely on the error handling to take care of that for us.
460
 */
461
int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen)
462
2.15M
{
463
2.15M
    int code = 0, initial_depth = 0;
464
2.15M
    pdf_key keyword;
465
2.15M
    gs_offset_t saved_offset[3];
466
2.15M
    pdf_obj_type type;
467
468
2.15M
    initial_depth = pdfi_count_stack(ctx);
469
2.15M
    saved_offset[0] = saved_offset[1] = saved_offset[2] = 0;
470
471
2.15M
    code = pdfi_read_token(ctx, s, objnum, gen);
472
2.15M
    if (code < 0)
473
4.67k
        return code;
474
475
2.14M
    if (code == 0)
476
        /* failed to read a token */
477
70
        return_error(gs_error_syntaxerror);
478
479
2.14M
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) {
480
24.5k
        keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
481
24.5k
        if (keyword == TOKEN_ENDOBJ) {
482
515
            ctx->stack_top[-1] = PDF_NULL_OBJ;
483
515
            return 0;
484
515
        }
485
24.5k
    }
486
487
70.5M
    do {
488
        /* move all the saved offsets up by one */
489
70.5M
        saved_offset[0] = saved_offset[1];
490
70.5M
        saved_offset[1] = saved_offset[2];
491
70.5M
        saved_offset[2] = pdfi_unread_tell(ctx);
492
493
70.5M
        code = pdfi_read_token(ctx, s, objnum, gen);
494
70.5M
        if (code < 0) {
495
201k
            pdfi_clearstack(ctx);
496
201k
            return code;
497
201k
        }
498
70.3M
        if (s->eof)
499
2.50k
            return_error(gs_error_syntaxerror);
500
70.3M
        code = 0;
501
70.3M
        type = pdfi_type_of(ctx->stack_top[-1]);
502
70.3M
        if (type == PDF_KEYWORD)
503
153k
            goto missing_endobj;
504
70.3M
    } while (type != PDF_FAST_KEYWORD);
505
506
1.78M
    keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
507
1.78M
    if (keyword == TOKEN_ENDOBJ) {
508
1.05M
        pdf_obj *o;
509
510
1.05M
        if (pdfi_count_stack(ctx) - initial_depth < 2) {
511
271
            pdfi_clearstack(ctx);
512
271
            return_error(gs_error_stackunderflow);
513
271
        }
514
515
1.05M
        o = ctx->stack_top[-2];
516
517
1.05M
        pdfi_pop(ctx, 1);
518
519
1.05M
        if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
520
1.05M
            o->indirect_num = o->object_num = objnum;
521
1.05M
            o->indirect_gen = o->generation_num = gen;
522
1.05M
        }
523
1.05M
        return code;
524
1.05M
    }
525
729k
    if (keyword == TOKEN_STREAM) {
526
678k
        pdfi_pop(ctx, 1);
527
678k
        return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen);
528
678k
    }
529
50.7k
    if (keyword == TOKEN_OBJ) {
530
5.86k
        pdf_obj *o;
531
532
5.86k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL)) < 0) {
533
0
            return code;
534
0
        }
535
536
        /* 4 for; the object we want, the object number, generation number and 'obj' keyword */
537
5.86k
        if (pdfi_count_stack(ctx) - initial_depth < 4)
538
1.30k
            return_error(gs_error_stackunderflow);
539
540
        /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */
541
4.55k
        o = ctx->stack_top[-4];
542
543
4.55k
        pdfi_pop(ctx, 3);
544
545
4.55k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
546
4.53k
            o->indirect_num = o->object_num = objnum;
547
4.53k
            o->indirect_gen = o->generation_num = gen;
548
4.53k
        }
549
4.55k
        if (saved_offset[0] > 0)
550
4.55k
            (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET);
551
4.55k
        return 0;
552
5.86k
    }
553
554
198k
missing_endobj:
555
    /* Assume that any other keyword means a missing 'endobj' */
556
198k
    if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_xref_stream_dict", "")) == 0) {
557
198k
        pdf_obj *o;
558
559
198k
        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL);
560
561
198k
        if (pdfi_count_stack(ctx) - initial_depth < 2)
562
3.82k
            return_error(gs_error_stackunderflow);
563
564
194k
        o = ctx->stack_top[-2];
565
566
194k
        pdfi_pop(ctx, 1);
567
568
194k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
569
192k
            o->indirect_num = o->object_num = objnum;
570
192k
            o->indirect_gen = o->generation_num = gen;
571
192k
        }
572
194k
        return code;
573
198k
    }
574
0
    pdfi_pop(ctx, 2);
575
0
    return_error(gs_error_syntaxerror);
576
198k
}
577
578
static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset)
579
2.16M
{
580
2.16M
    int code = 0;
581
2.16M
    int objnum = 0, gen = 0;
582
583
    /* An object consists of 'num gen obj' followed by a token, follwed by an endobj
584
     * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we
585
     * want to deal with it specially by getting the Length, jumping to the end and checking
586
     * for an endobj. Or not, possibly, because it would be slow.
587
     */
588
2.16M
    code = pdfi_read_bare_int(ctx, s, &objnum);
589
2.16M
    if (code < 0)
590
47.8k
        return code;
591
2.11M
    if (code == 0)
592
14.9k
        return_error(gs_error_syntaxerror);
593
594
2.10M
    code = pdfi_read_bare_int(ctx, s, &gen);
595
2.10M
    if (code < 0)
596
2.88k
        return code;
597
2.09M
    if (code == 0)
598
2.84k
        return_error(gs_error_syntaxerror);
599
600
2.09M
    code = pdfi_read_bare_keyword(ctx, s);
601
2.09M
    if (code < 0)
602
0
        return code;
603
2.09M
    if (code == 0)
604
0
        return gs_note_error(gs_error_ioerror);
605
2.09M
    if (code != TOKEN_OBJ) {
606
6.19k
        return_error(gs_error_syntaxerror);
607
6.19k
    }
608
609
2.08M
    return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen);
610
2.09M
}
611
612
static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object,
613
                                 const xref_entry *entry, bool cache)
614
947k
{
615
947k
    int code = 0;
616
947k
    xref_entry *compressed_entry;
617
947k
    pdf_c_stream *compressed_stream = NULL;
618
947k
    pdf_c_stream *SubFile_stream = NULL;
619
947k
    pdf_c_stream *Object_stream = NULL;
620
947k
    int i = 0, object_length = 0;
621
947k
    int64_t num_entries;
622
947k
    int found_object;
623
947k
    int64_t Length, First;
624
947k
    gs_offset_t offset = 0;
625
947k
    pdf_stream *compressed_object = NULL;
626
947k
    pdf_dict *compressed_sdict = NULL; /* alias */
627
947k
    pdf_name *Type = NULL;
628
629
947k
    if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1)
630
2.37k
        return_error(gs_error_undefined);
631
632
944k
    compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num];
633
634
944k
    if (ctx->args.pdfdebug) {
635
0
        outprintf(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj);
636
0
        outprintf(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num);
637
0
    }
638
639
944k
    if (compressed_entry->cache == NULL) {
640
#if CACHE_STATISTICS
641
        ctx->compressed_misses++;
642
#endif
643
61.9k
        code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET);
644
61.9k
        if (code < 0)
645
0
            goto exit;
646
647
61.9k
        code = pdfi_read_object(ctx, ctx->main_stream, 0);
648
61.9k
        if (code < 0)
649
14.1k
            goto exit;
650
651
47.8k
        if (pdfi_count_stack(ctx) < 1) {
652
0
            code = gs_note_error(gs_error_stackunderflow);
653
0
            goto exit;
654
0
        }
655
656
47.8k
        if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) {
657
12.1k
            pdfi_pop(ctx, 1);
658
12.1k
            code = gs_note_error(gs_error_typecheck);
659
12.1k
            goto exit;
660
12.1k
        }
661
35.6k
        if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) {
662
264
            pdfi_pop(ctx, 1);
663
            /* Same error (undefined) as when we read an uncompressed object with the wrong number */
664
264
            code = gs_note_error(gs_error_undefined);
665
264
            goto exit;
666
264
        }
667
35.4k
        compressed_object = (pdf_stream *)ctx->stack_top[-1];
668
35.4k
        pdfi_countup(compressed_object);
669
35.4k
        pdfi_pop(ctx, 1);
670
35.4k
        code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object);
671
35.4k
        if (code < 0)
672
0
            goto exit;
673
882k
    } else {
674
#if CACHE_STATISTICS
675
        ctx->compressed_hits++;
676
#endif
677
882k
        compressed_object = (pdf_stream *)compressed_entry->cache->o;
678
882k
        pdfi_countup(compressed_object);
679
882k
        pdfi_promote_cache_entry(ctx, compressed_entry->cache);
680
882k
    }
681
918k
    code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict);
682
918k
    if (code < 0)
683
13
        return code;
684
685
918k
    if (ctx->loop_detection != NULL) {
686
918k
        code = pdfi_loop_detector_mark(ctx);
687
918k
        if (code < 0)
688
0
            goto exit;
689
918k
        if (compressed_sdict->object_num != 0) {
690
918k
            if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) {
691
212
                code = gs_note_error(gs_error_circular_reference);
692
917k
            } else {
693
917k
                code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num);
694
917k
            }
695
918k
            if (code < 0) {
696
212
                (void)pdfi_loop_detector_cleartomark(ctx);
697
212
                goto exit;
698
212
            }
699
918k
        }
700
918k
    }
701
    /* Check its an ObjStm ! */
702
917k
    code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type);
703
917k
    if (code < 0) {
704
194
        if (ctx->loop_detection != NULL)
705
194
            (void)pdfi_loop_detector_cleartomark(ctx);
706
194
        goto exit;
707
194
    }
708
709
917k
    if (!pdfi_name_is(Type, "ObjStm")){
710
1.19k
        if (ctx->loop_detection != NULL)
711
1.18k
            (void)pdfi_loop_detector_cleartomark(ctx);
712
1.19k
        code = gs_note_error(gs_error_syntaxerror);
713
1.19k
        goto exit;
714
1.19k
    }
715
716
    /* Need to check the /N entry to see if the object is actually in this stream! */
717
916k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries);
718
916k
    if (code < 0) {
719
240
        if (ctx->loop_detection != NULL)
720
240
            (void)pdfi_loop_detector_cleartomark(ctx);
721
240
        goto exit;
722
240
    }
723
724
916k
    if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) {
725
49
        if (ctx->loop_detection != NULL)
726
49
            (void)pdfi_loop_detector_cleartomark(ctx);
727
49
        code = gs_note_error(gs_error_rangecheck);
728
49
        goto exit;
729
49
    }
730
731
916k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
732
916k
    if (code < 0) {
733
210k
        if (ctx->loop_detection != NULL)
734
210k
            (void)pdfi_loop_detector_cleartomark(ctx);
735
210k
        goto exit;
736
210k
    }
737
738
706k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First);
739
706k
    if (code < 0) {
740
4.58k
        if (ctx->loop_detection != NULL)
741
4.58k
            (void)pdfi_loop_detector_cleartomark(ctx);
742
4.58k
        goto exit;
743
4.58k
    }
744
745
701k
    if (ctx->loop_detection != NULL)
746
701k
        (void)pdfi_loop_detector_cleartomark(ctx);
747
748
701k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
749
701k
    if (code < 0)
750
0
        goto exit;
751
752
701k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
753
701k
    if (code < 0)
754
0
        goto exit;
755
756
701k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
757
701k
    if (code < 0)
758
1.37k
        goto exit;
759
760
41.0M
    for (i=0;i < num_entries;i++)
761
40.3M
    {
762
40.3M
        int new_offset;
763
40.3M
        code = pdfi_read_bare_int(ctx, compressed_stream, &found_object);
764
40.3M
        if (code < 0)
765
14.3k
            goto exit;
766
40.3M
        if (code == 0) {
767
1.51k
            code = gs_note_error(gs_error_syntaxerror);
768
1.51k
            goto exit;
769
1.51k
        }
770
40.3M
        code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset);
771
40.3M
        if (code < 0)
772
22.8k
            goto exit;
773
40.3M
        if (code == 0) {
774
854
            code = gs_note_error(gs_error_syntaxerror);
775
854
            goto exit;
776
854
        }
777
40.3M
        if (i == entry->u.compressed.object_index) {
778
676k
            if (found_object != obj) {
779
2.10k
                code = gs_note_error(gs_error_undefined);
780
2.10k
                goto exit;
781
2.10k
            }
782
673k
            offset = new_offset;
783
673k
        }
784
40.3M
        if (i == entry->u.compressed.object_index + 1)
785
646k
            object_length = new_offset - offset;
786
40.3M
    }
787
788
    /* Bug #705259 - The first object need not lie immediately after the initial
789
     * table of object numbers and offsets. The start of the first object is given
790
     * by the value of First. We don't know how many bytes we consumed getting to
791
     * the end of the table, unfortunately, so we close the stream, rewind the main
792
     * stream back to the beginning of the ObjStm, and then read and discard 'First'
793
     * bytes in order to get to the start of the first object. Then we read the
794
     * number of bytes required to get from there to the start of the object we
795
     * actually want.
796
     * If this ever looks like it's causing performance problems we could read the
797
     * initial table above manually instead of using the existing code, and track
798
     * how many bytes we'd read, which would avoid us having to tear down and
799
     * rebuild the stream.
800
     */
801
658k
    if (compressed_stream)
802
658k
        pdfi_close_file(ctx, compressed_stream);
803
658k
    if (SubFile_stream)
804
658k
        pdfi_close_file(ctx, SubFile_stream);
805
806
658k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
807
658k
    if (code < 0)
808
0
        goto exit;
809
810
    /* We already dereferenced this above, so we don't need the loop detection checking here */
811
658k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
812
658k
    if (code < 0)
813
0
        goto exit;
814
815
658k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
816
658k
    if (code < 0)
817
0
        goto exit;
818
819
658k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
820
658k
    if (code < 0)
821
0
        goto exit;
822
823
341M
    for (i=0;i < First;i++)
824
340M
    {
825
340M
        int c = pdfi_read_byte(ctx, compressed_stream);
826
340M
        if (c < 0) {
827
33
            code = gs_note_error(gs_error_ioerror);
828
33
            goto exit;
829
33
        }
830
340M
    }
831
832
    /* Skip to the offset of the object we want to read */
833
2.32G
    for (i=0;i < offset;i++)
834
2.32G
    {
835
2.32G
        int c = pdfi_read_byte(ctx, compressed_stream);
836
2.32G
        if (c < 0) {
837
64.5k
            code = gs_note_error(gs_error_ioerror);
838
64.5k
            goto exit;
839
64.5k
        }
840
2.32G
    }
841
842
    /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit
843
     * the number of bytes we read to the declared size of the object (difference between
844
     * the offsets of the object we want to read, and the next object). If it is 0 then
845
     * we're reading the last object in the stream, so we just rely on the SubFileDecode
846
     * we set up when we created compressed_stream to limit the bytes to the length of
847
     * that stream.
848
     */
849
593k
    if (object_length > 0) {
850
569k
        code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false);
851
569k
        if (code < 0)
852
0
            goto exit;
853
569k
    } else {
854
24.7k
        Object_stream = compressed_stream;
855
24.7k
    }
856
857
593k
    code = pdfi_read_token(ctx, Object_stream, obj, gen);
858
593k
    if (code < 0)
859
5.89k
        goto exit;
860
587k
    if (code == 0) {
861
96
        code = gs_note_error(gs_error_syntaxerror);
862
96
        goto exit;
863
96
    }
864
587k
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) {
865
575k
        int start_depth = pdfi_count_stack(ctx);
866
867
        /* Need to read all the elements from COS objects */
868
21.2M
        do {
869
21.2M
            code = pdfi_read_token(ctx, Object_stream, obj, gen);
870
21.2M
            if (code < 0)
871
25.7k
                goto exit;
872
21.1M
            if (code == 0) {
873
4.75k
                code = gs_note_error(gs_error_syntaxerror);
874
4.75k
                goto exit;
875
4.75k
            }
876
21.1M
            if (compressed_stream->eof == true) {
877
231
                code = gs_note_error(gs_error_ioerror);
878
231
                goto exit;
879
231
            }
880
21.1M
        } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth);
881
575k
    }
882
883
557k
    *object = ctx->stack_top[-1];
884
    /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets
885
     * the object number for uncompressed objects. So we need to do that here.
886
     */
887
557k
    if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
888
553k
        (*object)->indirect_num = (*object)->object_num = obj;
889
553k
        (*object)->indirect_gen = (*object)->generation_num = gen;
890
553k
        pdfi_countup(*object);
891
553k
    }
892
557k
    pdfi_pop(ctx, 1);
893
894
557k
    if (cache) {
895
526k
        code = pdfi_add_to_cache(ctx, *object);
896
526k
        if (code < 0) {
897
0
            pdfi_countdown(*object);
898
0
            goto exit;
899
0
        }
900
526k
    }
901
902
944k
 exit:
903
944k
    if (Object_stream)
904
593k
        pdfi_close_file(ctx, Object_stream);
905
944k
    if (Object_stream != compressed_stream)
906
675k
        if (compressed_stream)
907
675k
            pdfi_close_file(ctx, compressed_stream);
908
944k
    if (SubFile_stream)
909
701k
        pdfi_close_file(ctx, SubFile_stream);
910
944k
    pdfi_countdown(compressed_object);
911
944k
    pdfi_countdown(Type);
912
944k
    return code;
913
557k
}
914
915
/* pdf_dereference returns an object with a reference count of at least 1, this represents the
916
 * reference being held by the caller (in **object) when we return from this function.
917
 */
918
static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache)
919
8.37M
{
920
8.37M
    xref_entry *entry;
921
8.37M
    int code, stack_depth = pdfi_count_stack(ctx);
922
8.37M
    gs_offset_t saved_stream_offset;
923
8.37M
    bool saved_decrypt_strings = ctx->encryption.decrypt_strings;
924
925
8.37M
    *object = NULL;
926
927
8.37M
    if (ctx->xref_table == NULL)
928
56
        return_error(gs_error_typecheck);
929
930
8.37M
    if (ctx->main_stream == NULL || ctx->main_stream->s == NULL)
931
0
        return_error(gs_error_ioerror);
932
933
8.37M
    if (obj >= ctx->xref_table->xref_size) {
934
275k
        char extra_info[gp_file_name_sizeof];
935
936
275k
        gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj);
937
275k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info)) < 0) {
938
0
            return code;
939
0
        }
940
941
275k
        code = pdfi_repair_file(ctx);
942
275k
        if (code < 0) {
943
275k
            *object = NULL;
944
275k
            return code;
945
275k
        }
946
32
        if (obj >= ctx->xref_table->xref_size) {
947
21
            *object = NULL;
948
21
            return_error(gs_error_rangecheck);
949
21
        }
950
32
    }
951
952
8.09M
    entry = &ctx->xref_table->xref[obj];
953
954
8.09M
    if(entry->object_num == 0) {
955
1.76M
        pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference_main", "Attempt to dereference object 0");
956
1.76M
        return_error(gs_error_undefined);
957
1.76M
    }
958
959
6.33M
    if (entry->free) {
960
10.7k
        char extra_info[gp_file_name_sizeof];
961
962
10.7k
        gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
963
10.7k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
964
10.7k
        *object = PDF_NULL_OBJ;
965
10.7k
        return code;
966
6.32M
    }else {
967
6.32M
        if (!entry->compressed) {
968
4.90M
            if(entry->u.uncompressed.generation_num != gen)
969
4.77k
                pdfi_set_warning(ctx, 0, NULL, W_PDF_MISMATCH_GENERATION, "pdfi_dereference_main", "");
970
4.90M
        }
971
6.32M
    }
972
973
6.32M
    if (ctx->loop_detection) {
974
6.07M
        if (pdfi_loop_detector_check_object(ctx, obj) == true)
975
1.74k
            return_error(gs_error_circular_reference);
976
6.07M
        if (entry->free) {
977
0
            code = pdfi_loop_detector_add_object(ctx, obj);
978
0
            if (code < 0)
979
0
                return code;
980
0
        }
981
6.07M
    }
982
6.31M
    if (entry->cache != NULL){
983
3.26M
        pdf_obj_cache_entry *cache_entry = entry->cache;
984
985
#if CACHE_STATISTICS
986
        ctx->hits++;
987
#endif
988
3.26M
        *object = cache_entry->o;
989
3.26M
        pdfi_countup(*object);
990
991
3.26M
        pdfi_promote_cache_entry(ctx, cache_entry);
992
3.26M
    } else {
993
3.04M
        saved_stream_offset = pdfi_unread_tell(ctx);
994
995
3.04M
        if (entry->compressed) {
996
            /* This is an object in a compressed object stream */
997
947k
            ctx->encryption.decrypt_strings = false;
998
999
947k
            code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache);
1000
947k
            if (code < 0 || *object == NULL)
1001
389k
                goto error;
1002
2.10M
        } else {
1003
#if CACHE_STATISTICS
1004
            ctx->misses++;
1005
#endif
1006
2.10M
            ctx->encryption.decrypt_strings = true;
1007
1008
2.10M
            code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET);
1009
2.10M
            if (code < 0)
1010
105
                goto error;
1011
1012
2.10M
            code = pdfi_read_object(ctx, ctx->main_stream, entry->u.uncompressed.offset);
1013
1014
            /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it.
1015
             * reload the xref entry to be certain it is valid.
1016
             */
1017
2.10M
            entry = &ctx->xref_table->xref[obj];
1018
2.10M
            if (code < 0) {
1019
306k
                int code1 = 0;
1020
306k
                if (entry->free) {
1021
0
                    char extra_info[gp_file_name_sizeof];
1022
1023
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1024
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1025
0
                    *object = PDF_NULL_OBJ;
1026
0
                    if (code < 0)
1027
0
                        goto error;
1028
0
                    goto free_obj;
1029
0
                }
1030
306k
                ctx->encryption.decrypt_strings = saved_decrypt_strings;
1031
306k
                (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1032
306k
                pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1033
1034
306k
                code1 = pdfi_repair_file(ctx);
1035
306k
                if (code1 == 0)
1036
1.49k
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1037
                /* Repair failed, just give up and return an error */
1038
304k
                goto error;
1039
306k
            }
1040
1041
            /* We only expect a single object back when dereferencing an indirect reference
1042
             * The only way (I think) we can end up with more than one is if the object initially
1043
             * appears to be a dictionary or array, but the object terminates (with endobj or
1044
             * simply reaching EOF) without terminating the array or dictionary. That's clearly
1045
             * an error. We might, as a future 'improvement' choose to walk back through
1046
             * the stack looking for unterminated dictionary or array markers, and closing them
1047
             * so that (hopefully!) we end up with a single 'repaired' object on the stack.
1048
             * But for now I'm simply going to treat these as errors. We will try a repair on the
1049
             * file to see if we end up using a different (hopefully intact) object from the file.
1050
             */
1051
1.79M
            if (pdfi_count_stack(ctx) - stack_depth > 1) {
1052
139k
                int code1 = 0;
1053
1054
139k
                code1 = pdfi_repair_file(ctx);
1055
139k
                if (code1 == 0)
1056
518
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1057
                /* Repair failed, just give up and return an error */
1058
138k
                code = gs_note_error(gs_error_syntaxerror);
1059
138k
                goto error;
1060
139k
            }
1061
1062
1.65M
            if (pdfi_count_stack(ctx) > 0 &&
1063
1.65M
                ((ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) &&
1064
1.65M
                (ctx->stack_top[-1])->object_num == obj)
1065
1.65M
                || ctx->stack_top[-1] == PDF_NULL_OBJ)) {
1066
1.65M
                *object = ctx->stack_top[-1];
1067
1.65M
                pdfi_countup(*object);
1068
1.65M
                pdfi_pop(ctx, 1);
1069
1.65M
                if (pdfi_type_of(*object) == PDF_INDIRECT) {
1070
0
                    pdf_indirect_ref *iref = (pdf_indirect_ref *)*object;
1071
1072
0
                    if (iref->ref_object_num == obj) {
1073
0
                        code = gs_note_error(gs_error_circular_reference);
1074
0
                        pdfi_countdown(*object);
1075
0
                        *object = NULL;
1076
0
                        goto error;
1077
0
                    }
1078
0
                }
1079
                /* There's really no point in caching an indirect reference and
1080
                 * I think it could be potentially confusing to later calls.
1081
                 */
1082
1.65M
                if (cache && pdfi_type_of(*object) != PDF_INDIRECT) {
1083
1.64M
                    code = pdfi_add_to_cache(ctx, *object);
1084
1.64M
                    if (code < 0) {
1085
0
                        pdfi_countdown(*object);
1086
0
                        goto error;
1087
0
                    }
1088
1.64M
                }
1089
1.65M
            } else {
1090
1.86k
                int code1 = 0;
1091
1092
1.86k
                if (pdfi_count_stack(ctx) > 0)
1093
1.76k
                    pdfi_pop(ctx, 1);
1094
1095
1.86k
                if (entry->free) {
1096
0
                    char extra_info[gp_file_name_sizeof];
1097
1098
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1099
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1100
0
                    *object = PDF_NULL_OBJ;
1101
0
                    if (code < 0)
1102
0
                        goto error;
1103
0
                    return code;
1104
0
                }
1105
1.86k
                code1 = pdfi_repair_file(ctx);
1106
1.86k
                if (code1 == 0)
1107
188
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1108
                /* Repair failed, just give up and return an error */
1109
1.67k
                code = gs_note_error(gs_error_undefined);
1110
1.67k
                goto error;
1111
1.86k
            }
1112
1.65M
        }
1113
2.21M
free_obj:
1114
2.21M
        (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1115
2.21M
    }
1116
1117
5.48M
    if (ctx->loop_detection && pdf_object_num(*object) != 0) {
1118
5.23M
        code = pdfi_loop_detector_add_object(ctx, (*object)->object_num);
1119
5.23M
        if (code < 0) {
1120
0
            ctx->encryption.decrypt_strings = saved_decrypt_strings;
1121
0
            return code;
1122
0
        }
1123
5.23M
    }
1124
5.48M
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1125
5.48M
    return 0;
1126
1127
835k
error:
1128
835k
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1129
835k
    (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1130
    /* Return the stack to the state at entry */
1131
835k
    pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1132
835k
    return code;
1133
5.48M
}
1134
1135
int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1136
8.32M
{
1137
8.32M
    return pdfi_dereference_main(ctx, obj, gen, object, true);
1138
8.32M
}
1139
1140
int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1141
41.9k
{
1142
41.9k
    return pdfi_dereference_main(ctx, obj, gen, object, false);
1143
41.9k
}
1144
1145
/* do a derefence with loop detection */
1146
int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1147
2.62M
{
1148
2.62M
    int code;
1149
1150
2.62M
    code = pdfi_loop_detector_mark(ctx);
1151
2.62M
    if (code < 0)
1152
0
        return code;
1153
1154
2.62M
    code = pdfi_dereference(ctx, obj, gen, object);
1155
2.62M
    (void)pdfi_loop_detector_cleartomark(ctx);
1156
2.62M
    return code;
1157
2.62M
}
1158
1159
int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1160
41.9k
{
1161
41.9k
    int code;
1162
1163
41.9k
    code = pdfi_loop_detector_mark(ctx);
1164
41.9k
    if (code < 0)
1165
0
        return code;
1166
1167
41.9k
    code = pdfi_dereference_nocache(ctx, obj, gen, object);
1168
41.9k
    (void)pdfi_loop_detector_cleartomark(ctx);
1169
41.9k
    return code;
1170
41.9k
}
1171
1172
static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse)
1173
1.63k
{
1174
1.63k
    int code = 0;
1175
1.63k
    uint64_t index, arraysize;
1176
1.63k
    pdf_obj *object = NULL;
1177
1.63k
    pdf_array *array = (pdf_array *)obj;
1178
1179
1.63k
    arraysize = pdfi_array_size(array);
1180
7.22k
    for (index = 0; index < arraysize; index++) {
1181
5.60k
        if (ctx->loop_detection != NULL) {
1182
5.60k
            code = pdfi_loop_detector_mark(ctx);
1183
5.60k
            if (code < 0)
1184
0
                return code;
1185
5.60k
        }
1186
1187
5.60k
        code = pdfi_array_get_no_store_R(ctx, array, index, &object);
1188
1189
5.60k
        if (ctx->loop_detection != NULL) {
1190
5.60k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1191
5.60k
            if (code1 < 0)
1192
0
                return code1;
1193
5.60k
        }
1194
1195
5.60k
        if (code == gs_error_circular_reference) {
1196
            /* Previously we just left as an indirect reference, but now we want
1197
             * to return the error so we don't end up replacing indirect references
1198
             * to objects with circular references.
1199
             */
1200
5.60k
        } else {
1201
5.60k
            if (code < 0) goto exit;
1202
5.59k
            if (recurse) {
1203
699
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse);
1204
699
                if (code < 0) goto exit;
1205
699
            }
1206
            /* don't store the object if it's a stream (leave as a ref) */
1207
5.58k
            if (pdfi_type_of(object) != PDF_STREAM)
1208
5.58k
                code = pdfi_array_put(ctx, array, index, object);
1209
5.58k
        }
1210
5.58k
        if (code < 0) goto exit;
1211
1212
5.58k
        pdfi_countdown(object);
1213
5.58k
        object = NULL;
1214
5.58k
    }
1215
1216
1.63k
 exit:
1217
1.63k
    pdfi_countdown(object);
1218
1.63k
    return code;
1219
1.63k
}
1220
1221
static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse)
1222
958
{
1223
958
    int code = 0;
1224
958
    pdf_dict *dict = (pdf_dict *)obj;
1225
958
    pdf_name *Key = NULL;
1226
958
    pdf_obj *Value = NULL;
1227
958
    uint64_t index, dictsize;
1228
1229
958
    dictsize = pdfi_dict_entries(dict);
1230
1231
    /* Note: I am not using pdfi_dict_first/next because of needing to handle
1232
     * circular references.
1233
     */
1234
2.49k
    for (index=0; index<dictsize; index ++) {
1235
1.59k
        Key = (pdf_name *)dict->list[index].key;
1236
1.59k
        if (pdfi_name_is(Key, "Parent"))
1237
5
            continue;
1238
1239
1.58k
        if (ctx->loop_detection != NULL) {
1240
1.52k
            code = pdfi_loop_detector_mark(ctx);
1241
1.52k
            if (code < 0)
1242
0
                return code;
1243
1.52k
        }
1244
1245
1.58k
        code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value);
1246
1247
1.58k
        if (ctx->loop_detection != NULL) {
1248
1.52k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1249
1.52k
            if (code1 < 0)
1250
0
                return code1;
1251
1.52k
        }
1252
1253
1.58k
        if (code == gs_error_circular_reference) {
1254
            /* Just leave as an indirect ref */
1255
7
            code = 0;
1256
1.58k
        } else {
1257
1.58k
            if (code < 0) goto exit;
1258
1.56k
            if (recurse) {
1259
1.31k
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse);
1260
1.31k
                if (code < 0)
1261
43
                    goto exit;
1262
1.31k
            }
1263
            /* don't store the object if it's a stream (leave as a ref) */
1264
1.52k
            if (pdfi_type_of(Value) != PDF_STREAM)
1265
1.50k
                code = pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true);
1266
1.52k
        }
1267
1.53k
        if (code < 0) goto exit;
1268
1269
1.53k
        pdfi_countdown(Value);
1270
1.53k
        Value = NULL;
1271
1.53k
    }
1272
1273
958
 exit:
1274
958
    pdfi_countdown(Value);
1275
958
    return code;
1276
958
}
1277
1278
/* Resolve all the indirect references for an object
1279
 * Note: This can be recursive
1280
 */
1281
int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse)
1282
7.40k
{
1283
7.40k
    int code = 0;
1284
1285
7.40k
    switch(pdfi_type_of(value)) {
1286
1.63k
    case PDF_ARRAY:
1287
1.63k
        code = pdfi_resolve_indirect_array(ctx, value, recurse);
1288
1.63k
        break;
1289
958
    case PDF_DICT:
1290
958
        code = pdfi_resolve_indirect_dict(ctx, value, recurse);
1291
958
        break;
1292
4.81k
    default:
1293
4.81k
        break;
1294
7.40k
    }
1295
7.40k
    return code;
1296
7.40k
}
1297
1298
/* Resolve all the indirect references for an object
1299
 * Resolve indirect references, either one level or recursively, with loop detect on
1300
 * the parent (can by NULL) and the value.
1301
 */
1302
int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse)
1303
7.38k
{
1304
7.38k
    int code = 0;
1305
1306
7.38k
    code = pdfi_loop_detector_mark(ctx);
1307
7.38k
    if (code < 0) goto exit;
1308
7.38k
    if (parent && parent->object_num != 0) {
1309
4.97k
        code = pdfi_loop_detector_add_object(ctx, parent->object_num);
1310
4.97k
        if (code < 0) goto exit;
1311
4.97k
    }
1312
1313
7.38k
    if (pdf_object_num(value) != 0) {
1314
406
        if (pdfi_loop_detector_check_object(ctx, value->object_num)) {
1315
0
            code = gs_note_error(gs_error_circular_reference);
1316
0
            goto exit;
1317
0
        }
1318
406
        code = pdfi_loop_detector_add_object(ctx, value->object_num);
1319
406
        if (code < 0) goto exit;
1320
406
    }
1321
7.38k
    code = pdfi_resolve_indirect(ctx, value, recurse);
1322
1323
7.38k
 exit:
1324
7.38k
    (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */
1325
7.38k
    return code;
1326
7.38k
}