Coverage Report

Created: 2026-04-01 07:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ghostpdl/pdf/pdf_deref.c
Line
Count
Source
1
/* Copyright (C) 2020-2026 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/* Functions to deal with dereferencing indirect objects
17
 * for the PDF interpreter. In here we also keep the code
18
 * for dealing with the object cache, because the dereferencing
19
 * functions are currently the only place that deals with it.
20
 */
21
22
#include "pdf_int.h"
23
#include "pdf_stack.h"
24
#include "pdf_loop_detect.h"
25
#include "strmio.h"
26
#include "stream.h"
27
#include "pdf_file.h"
28
#include "pdf_misc.h"
29
#include "pdf_dict.h"
30
#include "pdf_array.h"
31
#include "pdf_deref.h"
32
#include "pdf_repair.h"
33
34
/* Start with the object caching functions */
35
/* Disable object caching (for easier debugging with reference counting)
36
 * by uncommenting the following line
37
 */
38
/*#define DISABLE CACHE*/
39
40
/* given an object, create a cache entry for it. If we have too many entries
41
 * then delete the leat-recently-used cache entry. Make the new entry be the
42
 * most-recently-used entry. The actual entries are attached to the xref table
43
 * (as well as being a double-linked list), because we detect an existing
44
 * cache entry by seeing that the xref table for the object number has a non-NULL
45
 * 'cache' member.
46
 * So we need to update the xref as well if we add or delete cache entries.
47
 */
48
static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o)
49
2.08M
{
50
2.08M
#ifndef DISABLE_CACHE
51
2.08M
    pdf_obj_cache_entry *entry;
52
53
2.08M
    if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY))
54
3.32k
        return 0;
55
56
2.07M
    if (o->object_num >= ctx->xref_table->xref_size)
57
0
        return_error(gs_error_rangecheck);
58
59
2.07M
    if (ctx->xref_table->xref[o->object_num].cache != NULL) {
60
#if DEBUG_CACHE
61
        outprintf(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num);
62
#endif
63
0
        return_error(gs_error_unknownerror);
64
0
    }
65
66
#if DEBUG_CACHE
67
        dbgmprintf1(ctx->memory, "Adding object %d\n", o->object_num);
68
#endif
69
2.07M
    if (ctx->cache_entries == ctx->args.PDFCacheSize)
70
462k
    {
71
#if DEBUG_CACHE
72
        dbgmprintf(ctx->memory, "Cache full, evicting LRU\n");
73
#endif
74
462k
        if (ctx->cache_LRU) {
75
462k
            entry = ctx->cache_LRU;
76
#if DEBUG_CACHE
77
            dbgmprintf1(ctx->memory, "Evicting %d\n", entry->o->object_num);
78
#endif
79
462k
            ctx->cache_LRU = entry->next;
80
462k
            if (entry->next)
81
462k
                ((pdf_obj_cache_entry *)entry->next)->previous = NULL;
82
462k
            ctx->xref_table->xref[entry->o->object_num].cache = NULL;
83
462k
            pdfi_countdown(entry->o);
84
462k
            ctx->cache_entries--;
85
462k
            gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU");
86
462k
        } else
87
0
            return_error(gs_error_unknownerror);
88
462k
    }
89
2.07M
    entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache");
90
2.07M
    if (entry == NULL)
91
0
        return_error(gs_error_VMerror);
92
93
2.07M
    memset(entry, 0x00, sizeof(pdf_obj_cache_entry));
94
95
2.07M
    entry->o = o;
96
2.07M
    pdfi_countup(o);
97
2.07M
    if (ctx->cache_MRU) {
98
1.99M
        entry->previous = ctx->cache_MRU;
99
1.99M
        ctx->cache_MRU->next = entry;
100
1.99M
    }
101
2.07M
    ctx->cache_MRU = entry;
102
2.07M
    if (ctx->cache_LRU == NULL)
103
78.5k
        ctx->cache_LRU = entry;
104
105
2.07M
    ctx->cache_entries++;
106
2.07M
    ctx->xref_table->xref[o->object_num].cache = entry;
107
2.07M
#endif
108
2.07M
    return 0;
109
2.07M
}
110
111
/* Given an existing cache entry, promote it to be the most-recently-used
112
 * cache entry.
113
 */
114
static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry)
115
3.64M
{
116
3.64M
#ifndef DISABLE_CACHE
117
3.64M
    if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) {
118
2.31M
        if ((pdf_obj_cache_entry *)cache_entry->next != NULL)
119
2.31M
            ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous;
120
2.31M
        if ((pdf_obj_cache_entry *)cache_entry->previous != NULL)
121
2.30M
            ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next;
122
1.81k
        else {
123
            /* the existing entry is the current least recently used, we need to make the 'next'
124
             * cache entry into the LRU.
125
             */
126
1.81k
            ctx->cache_LRU = cache_entry->next;
127
1.81k
        }
128
2.31M
        cache_entry->next = NULL;
129
2.31M
        cache_entry->previous = ctx->cache_MRU;
130
2.31M
        ctx->cache_MRU->next = cache_entry;
131
2.31M
        ctx->cache_MRU = cache_entry;
132
2.31M
    }
133
3.64M
#endif
134
3.64M
    return;
135
3.64M
}
136
137
int pdfi_cache_object(pdf_context *ctx, pdf_obj *o)
138
2.63M
{
139
2.63M
    if (o->object_num == 0)
140
1.84M
        return 0;
141
789k
    if (ctx->xref_table->xref[o->object_num].cache == NULL)
142
10
        return pdfi_add_to_cache(ctx, o);
143
789k
    else
144
789k
        pdfi_promote_cache_entry(ctx, ctx->xref_table->xref[o->object_num].cache);
145
789k
    return 0;
146
789k
}
147
148
/* This one's a bit of an oddity, its used for fonts. When we build a PDF font object
149
 * we want the object cache to reference *that* object, not the dictionary which was
150
 * read out of the PDF file, so this allows us to replace the font dictionary in the
151
 * cache with the actual font object, so that later dereferences will get this font
152
 * object.
153
 */
154
int replace_cache_entry(pdf_context *ctx, pdf_obj *o)
155
141k
{
156
141k
#ifndef DISABLE_CACHE
157
141k
    xref_entry *entry;
158
141k
    pdf_obj_cache_entry *cache_entry;
159
141k
    pdf_obj *old_cached_obj = NULL;
160
161
    /* Limited error checking here, we assume that things like the
162
     * validity of the object (eg not a free oobject) have already been handled.
163
     */
164
165
141k
    entry = &ctx->xref_table->xref[o->object_num];
166
141k
    cache_entry = entry->cache;
167
168
141k
    if (cache_entry == NULL) {
169
3.40k
        return(pdfi_add_to_cache(ctx, o));
170
138k
    } else {
171
        /* NOTE: We grab the object without decrementing, to avoid triggering
172
         * a warning message for freeing an object that's in the cache
173
         */
174
138k
        if (cache_entry->o != NULL)
175
138k
            old_cached_obj = cache_entry->o;
176
177
        /* Put new entry in the cache */
178
138k
        cache_entry->o = o;
179
138k
        pdfi_countup(o);
180
138k
        pdfi_promote_cache_entry(ctx, cache_entry);
181
182
        /* Now decrement the old cache entry, if any */
183
138k
        pdfi_countdown(old_cached_obj);
184
138k
    }
185
138k
#endif
186
138k
    return 0;
187
141k
}
188
189
/* Now the dereferencing functions */
190
191
/*
192
 * Technically we can accept a stream other than the main PDF file stream here. This is
193
 * really for the case of compressed objects where we read tokens from the compressed
194
 * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode
195
 * on top of the main file stream, which may be useful. Note that this cannot work with
196
 * objects in compressed object streams! They should always pass a value of 0 for the stream_offset.
197
 * The stream_offset is the offset from the start of the underlying uncompressed PDF file of
198
 * the stream we are using. See the comments below when keyword is PDF_STREAM.
199
 */
200
201
/* Determine if a PDF object is in a compressed ObjStm. Returns < 0
202
 * for an error, 0 if it is not in a compressed ObjStm and 1 if it is.
203
 * Currently errors are inmpossible. This is only used by the decryption code
204
 * to determine if a string is in a compressed object stream, if it is then
205
 * it can't be used for decryption.
206
 */
207
int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen)
208
15.9k
{
209
15.9k
    xref_entry *entry;
210
211
    /* Can't possibly be a compressed object before we have finished reading
212
     * the xref.
213
     */
214
15.9k
    if (ctx->xref_table == NULL)
215
0
        return 0;
216
217
15.9k
    entry = &ctx->xref_table->xref[obj];
218
219
15.9k
    if (entry->compressed)
220
0
        return 1;
221
222
15.9k
    return 0;
223
15.9k
}
224
225
/* We should never read a 'stream' keyword from a compressed object stream
226
 * so this case should never end up here.
227
 */
228
static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset,
229
                                   uint32_t objnum, uint32_t gen)
230
654k
{
231
654k
    int code = 0;
232
654k
    int64_t i;
233
654k
    pdf_dict *dict = NULL;
234
654k
    gs_offset_t offset;
235
654k
    pdf_stream *stream_obj = NULL;
236
237
    /* Strange code time....
238
     * If we are using a stream which is *not* the PDF uncompressed main file stream
239
     * then doing stell on it will only tell us how many bytes have been read from
240
     * that stream, it won't tell us the underlying file position. So we add on the
241
     * 'unread' bytes, *and* we add on the position of the start of the stream in
242
     * the actual main file. This is all done so that we can check the /Length
243
     * of the object. Note that this will *only* work for regular objects it can
244
     * not be used for compressed object streams, but those don't need checking anyway
245
     * they have a different mechanism altogether and should never get here.
246
     */
247
654k
    if (s != ctx->main_stream) {
248
0
        offset = stell(s->s) - s->unread_size + stream_offset;
249
0
        code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
250
0
        if (code < 0)
251
0
            return_error(gs_error_ioerror);
252
654k
    } else {
253
654k
        offset = stell(s->s) - s->unread_size;
254
654k
    }
255
256
654k
    if (pdfi_count_stack(ctx) < 1)
257
0
        return_error(gs_error_stackunderflow);
258
259
654k
    dict = (pdf_dict *)ctx->stack_top[-1];
260
261
654k
    if (pdfi_type_of(dict) != PDF_DICT) {
262
9.05k
        pdfi_pop(ctx, 1);
263
9.05k
        return_error(gs_error_syntaxerror);
264
9.05k
    }
265
266
645k
    dict->indirect_num = dict->object_num = objnum;
267
645k
    dict->indirect_gen = dict->generation_num = gen;
268
269
    /* Convert the dict into a stream */
270
645k
    code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true);
271
645k
    if (code < 0) {
272
0
        pdfi_pop(ctx, 1);
273
0
        return code;
274
0
    }
275
    /* Pop off the dict and push the stream */
276
645k
    pdfi_pop(ctx, 1);
277
645k
    dict = NULL;
278
645k
    pdfi_push(ctx, (pdf_obj *)stream_obj);
279
280
645k
    stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum;
281
645k
    stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen;
282
645k
    stream_obj->stream_offset = offset;
283
284
    /* Exceptional code. Normally we do not need to worry about detecting circular references
285
     * when reading objects, because we do not dereference any indirect objects. However streams
286
     * are a slight exception in that we do get the Length from the stream dictionay and if that
287
     * is an indirect reference, then we dereference it.
288
     * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect
289
     * reference to the same stream object, and leads to infinite recursion. So deal with that
290
     * possibility here.
291
     */
292
645k
    code = pdfi_loop_detector_mark(ctx);
293
645k
    if (code < 0) {
294
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
295
0
        return code;
296
0
    }
297
645k
    if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) {
298
127
        pdfi_countdown(stream_obj); /* get rid of extra ref */
299
127
        pdfi_loop_detector_cleartomark(ctx);
300
127
        return_error(gs_error_circular_reference);
301
127
    }
302
303
644k
    code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num);
304
644k
    if (code < 0) {
305
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
306
0
        pdfi_loop_detector_cleartomark(ctx);
307
0
        return code;
308
0
    }
309
310
    /* This code may be a performance overhead, it simply skips over the stream contents
311
     * and checks that the stream ends with a 'endstream endobj' pair. We could add a
312
     * 'go faster' flag for users who are certain their PDF files are well-formed. This
313
     * could also allow us to skip all kinds of other checking.....
314
     */
315
316
644k
    code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i);
317
644k
    if (code < 0) {
318
15.6k
        char extra_info[gp_file_name_sizeof];
319
320
15.6k
        (void)pdfi_loop_detector_cleartomark(ctx);
321
15.6k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum);
322
15.6k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info);
323
15.6k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
324
15.6k
        return code;
325
15.6k
    }
326
629k
    code = pdfi_loop_detector_cleartomark(ctx);
327
629k
    if (code < 0) {
328
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
329
0
        return code;
330
0
    }
331
332
629k
    if (i < 0 || (i + offset)> ctx->main_stream_length) {
333
38.8k
        char extra_info[gp_file_name_sizeof];
334
335
38.8k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum);
336
38.8k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info))< 0) {
337
0
            pdfi_pop(ctx, 1);
338
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
339
0
            return code;
340
0
        }
341
590k
    } else {
342
590k
        code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR);
343
590k
        if (code < 0) {
344
0
            pdfi_pop(ctx, 1);
345
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
346
0
            return code;
347
0
        }
348
349
590k
        stream_obj->Length = 0;
350
590k
        stream_obj->length_valid = false;
351
352
590k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
353
590k
        if (code == 0) {
354
0
            char extra_info[gp_file_name_sizeof];
355
356
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum);
357
0
            pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
358
            /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that
359
             * happens give up.
360
             */
361
0
            if (pdfi_count_stack(ctx) == 0) {
362
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
363
0
                return code;
364
0
            }
365
590k
        } else if (code < 0) {
366
0
            char extra_info[gp_file_name_sizeof];
367
368
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum);
369
0
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info)) < 0) {
370
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
371
0
                return code;
372
0
            }
373
590k
        } else if (code != TOKEN_ENDSTREAM) {
374
70.9k
            char extra_info[gp_file_name_sizeof];
375
376
70.9k
            gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i);
377
70.9k
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BAD_LENGTH, "pdfi_read_stream_object", extra_info)) < 0) {
378
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
379
0
                return code;
380
0
            }
381
519k
        } else {
382
            /* Cache the Length in the stream object and mark it valid */
383
519k
            stream_obj->Length = i;
384
519k
            stream_obj->length_valid = true;
385
519k
        }
386
590k
    }
387
388
    /* If we failed to find a valid object, or the object wasn't a keyword, or the
389
     * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct
390
     * Length for streams if we have encrypted files, because we must install a
391
     * SubFileDecode filter with a Length (EODString is incompatible with AES encryption)
392
     * Rather than mess about checking for encryption, we'll choose to just correctly
393
     * calculate the Length of all streams. Although this takes time, it will only
394
     * happen for files which are invalid.
395
     */
396
629k
    if (stream_obj->length_valid != true) {
397
109k
        char Buffer[10];
398
109k
        unsigned int bytes, total = 0;
399
109k
        int c = 0;
400
401
109k
        code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET);
402
109k
        if (code < 0) {
403
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
404
0
            pdfi_pop(ctx, 1);
405
0
            return code;
406
0
        }
407
109k
        memset(Buffer, 0x00, 10);
408
109k
        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream);
409
109k
        if (bytes < 9) {
410
660
            pdfi_countdown(stream_obj); /* get rid of extra ref */
411
660
            return_error(gs_error_ioerror);
412
660
        }
413
414
109k
        total = bytes;
415
1.75G
        do {
416
1.75G
            if (memcmp(Buffer, "endstream", 9) == 0) {
417
66.4k
                if (Buffer[9] != 0x00)
418
66.4k
                    total--;
419
66.4k
                stream_obj->Length = total - 9;
420
66.4k
                stream_obj->length_valid = true;
421
66.4k
                break;
422
66.4k
            }
423
1.75G
            if (memcmp(Buffer, "endobj", 6) == 0) {
424
7.36k
                if (Buffer[9] != 0x00)
425
7.27k
                    total--;
426
7.36k
                stream_obj->Length = total - 6;
427
7.36k
                stream_obj->length_valid = true;
428
7.36k
                break;
429
7.36k
            }
430
1.75G
            memmove(Buffer, Buffer+1, 9);
431
1.75G
            c = pdfi_read_byte(ctx, ctx->main_stream);
432
1.75G
            if (c < 0)
433
35.3k
                break;
434
1.75G
            Buffer[9] = (byte)c;
435
1.75G
            total++;
436
1.75G
        } while(1);
437
109k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
438
109k
        if (c < 0)
439
35.3k
            return_error(gs_error_ioerror);
440
73.8k
        return 0;
441
109k
    }
442
443
519k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
444
519k
    if (code < 0) {
445
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
446
0
        if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", "")) < 0) {
447
0
            return code;
448
0
        }
449
        /* Something went wrong looking for endobj, but we found endstream, so assume
450
         * for now that will suffice.
451
         */
452
0
        return 0;
453
0
    }
454
455
519k
    if (code == 0) {
456
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
457
0
        return_error(gs_error_stackunderflow);
458
0
    }
459
460
519k
    if (code != TOKEN_ENDOBJ) {
461
1.51k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
462
1.51k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_typecheck), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
463
        /* Didn't find an endobj, but we have an endstream, so assume
464
         * for now that will suffice
465
         */
466
1.51k
        return code;
467
1.51k
    }
468
517k
    pdfi_countdown(stream_obj); /* get rid of extra ref */
469
470
517k
    return 0;
471
519k
}
472
473
/* This reads an object *after* the x y obj keyword has been found. Its broken out
474
 * separately for the benefit of the repair code when reading the dictionary following
475
 * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does
476
 * not have an 'endobj', we rely on the error handling to take care of that for us.
477
 */
478
int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen)
479
1.83M
{
480
1.83M
    int code = 0, initial_depth = 0;
481
1.83M
    pdf_key keyword;
482
1.83M
    gs_offset_t saved_offset[3];
483
1.83M
    pdf_obj_type type;
484
485
1.83M
    initial_depth = pdfi_count_stack(ctx);
486
1.83M
    saved_offset[0] = saved_offset[1] = saved_offset[2] = 0;
487
488
1.83M
    code = pdfi_read_token(ctx, s, objnum, gen);
489
1.83M
    if (code < 0)
490
5.03k
        return code;
491
492
1.83M
    if (code == 0)
493
        /* failed to read a token */
494
74
        return_error(gs_error_syntaxerror);
495
496
1.83M
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) {
497
23.7k
        keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
498
23.7k
        if (keyword == TOKEN_ENDOBJ) {
499
359
            ctx->stack_top[-1] = PDF_NULL_OBJ;
500
359
            return 0;
501
359
        }
502
23.7k
    }
503
504
60.8M
    do {
505
        /* move all the saved offsets up by one */
506
60.8M
        saved_offset[0] = saved_offset[1];
507
60.8M
        saved_offset[1] = saved_offset[2];
508
60.8M
        saved_offset[2] = pdfi_unread_tell(ctx);
509
510
60.8M
        code = pdfi_read_token(ctx, s, objnum, gen);
511
60.8M
        if (code < 0) {
512
189k
            pdfi_clearstack(ctx);
513
189k
            return code;
514
189k
        }
515
60.6M
        if (s->eof)
516
2.79k
            return_error(gs_error_syntaxerror);
517
60.6M
        code = 0;
518
60.6M
        type = pdfi_type_of(ctx->stack_top[-1]);
519
60.6M
        if (type == PDF_KEYWORD)
520
147k
            goto missing_endobj;
521
60.6M
    } while (type != PDF_FAST_KEYWORD);
522
523
1.49M
    keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
524
1.49M
    if (keyword == TOKEN_ENDOBJ) {
525
787k
        pdf_obj *o;
526
527
787k
        if (pdfi_count_stack(ctx) - initial_depth < 2) {
528
249
            pdfi_clearstack(ctx);
529
249
            return_error(gs_error_stackunderflow);
530
249
        }
531
532
787k
        o = ctx->stack_top[-2];
533
534
787k
        pdfi_pop(ctx, 1);
535
536
787k
        if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
537
786k
            o->indirect_num = o->object_num = objnum;
538
786k
            o->indirect_gen = o->generation_num = gen;
539
786k
        }
540
787k
        return code;
541
787k
    }
542
705k
    if (keyword == TOKEN_STREAM) {
543
654k
        pdfi_pop(ctx, 1);
544
654k
        return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen);
545
654k
    }
546
50.9k
    if (keyword == TOKEN_OBJ) {
547
6.29k
        pdf_obj *o;
548
549
6.29k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL)) < 0) {
550
0
            return code;
551
0
        }
552
553
        /* 4 for; the object we want, the object number, generation number and 'obj' keyword */
554
6.29k
        if (pdfi_count_stack(ctx) - initial_depth < 4)
555
1.49k
            return_error(gs_error_stackunderflow);
556
557
        /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */
558
4.80k
        o = ctx->stack_top[-4];
559
560
4.80k
        pdfi_pop(ctx, 3);
561
562
4.80k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
563
4.77k
            o->indirect_num = o->object_num = objnum;
564
4.77k
            o->indirect_gen = o->generation_num = gen;
565
4.77k
        }
566
4.80k
        if (saved_offset[0] > 0)
567
4.80k
            (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET);
568
4.80k
        return 0;
569
6.29k
    }
570
571
192k
missing_endobj:
572
    /* Assume that any other keyword means a missing 'endobj' */
573
192k
    if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_xref_stream_dict", "")) == 0) {
574
192k
        pdf_obj *o;
575
576
192k
        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL);
577
578
192k
        if (pdfi_count_stack(ctx) - initial_depth < 2)
579
3.33k
            return_error(gs_error_stackunderflow);
580
581
189k
        o = ctx->stack_top[-2];
582
583
189k
        pdfi_pop(ctx, 1);
584
585
189k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
586
186k
            o->indirect_num = o->object_num = objnum;
587
186k
            o->indirect_gen = o->generation_num = gen;
588
186k
        }
589
189k
        return code;
590
192k
    }
591
0
    pdfi_pop(ctx, 2);
592
0
    return_error(gs_error_syntaxerror);
593
192k
}
594
595
static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset)
596
1.82M
{
597
1.82M
    int code = 0;
598
1.82M
    int objnum = 0, gen = 0;
599
600
    /* An object consists of 'num gen obj' followed by a token, follwed by an endobj
601
     * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we
602
     * want to deal with it specially by getting the Length, jumping to the end and checking
603
     * for an endobj. Or not, possibly, because it would be slow.
604
     */
605
1.82M
    code = pdfi_read_bare_int(ctx, s, &objnum);
606
1.82M
    if (code < 0)
607
35.2k
        return code;
608
1.79M
    if (code == 0)
609
8.16k
        return_error(gs_error_syntaxerror);
610
611
1.78M
    code = pdfi_read_bare_int(ctx, s, &gen);
612
1.78M
    if (code < 0)
613
2.92k
        return code;
614
1.78M
    if (code == 0)
615
1.18k
        return_error(gs_error_syntaxerror);
616
617
1.78M
    code = pdfi_read_bare_keyword(ctx, s);
618
1.78M
    if (code < 0)
619
0
        return code;
620
1.78M
    if (code == 0)
621
0
        return gs_note_error(gs_error_ioerror);
622
1.78M
    if (code != TOKEN_OBJ) {
623
5.14k
        return_error(gs_error_syntaxerror);
624
5.14k
    }
625
626
1.77M
    return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen);
627
1.78M
}
628
629
static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object,
630
                                 const xref_entry *entry, bool cache)
631
958k
{
632
958k
    int code = 0;
633
958k
    xref_entry *compressed_entry;
634
958k
    pdf_c_stream *compressed_stream = NULL;
635
958k
    pdf_c_stream *SubFile_stream = NULL;
636
958k
    pdf_c_stream *Object_stream = NULL;
637
958k
    int i = 0, object_length = 0;
638
958k
    int64_t num_entries;
639
958k
    int found_object;
640
958k
    int64_t Length, First;
641
958k
    gs_offset_t offset = 0;
642
958k
    pdf_stream *compressed_object = NULL;
643
958k
    pdf_dict *compressed_sdict = NULL; /* alias */
644
958k
    pdf_name *Type = NULL;
645
646
958k
    if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1)
647
1.08k
        return_error(gs_error_undefined);
648
649
957k
    compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num];
650
651
957k
    if (ctx->args.pdfdebug) {
652
0
        outprintf(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj);
653
0
        outprintf(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num);
654
0
    }
655
656
957k
    if (compressed_entry->cache == NULL) {
657
#if CACHE_STATISTICS
658
        ctx->compressed_misses++;
659
#endif
660
61.8k
        code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET);
661
61.8k
        if (code < 0)
662
0
            goto exit;
663
664
61.8k
        code = pdfi_read_object(ctx, ctx->main_stream, 0);
665
61.8k
        if (code < 0)
666
9.20k
            goto exit;
667
668
52.6k
        if (pdfi_count_stack(ctx) < 1) {
669
0
            code = gs_note_error(gs_error_stackunderflow);
670
0
            goto exit;
671
0
        }
672
673
52.6k
        if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) {
674
12.0k
            pdfi_pop(ctx, 1);
675
12.0k
            code = gs_note_error(gs_error_typecheck);
676
12.0k
            goto exit;
677
12.0k
        }
678
40.6k
        if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) {
679
291
            pdfi_pop(ctx, 1);
680
            /* Same error (undefined) as when we read an uncompressed object with the wrong number */
681
291
            code = gs_note_error(gs_error_undefined);
682
291
            goto exit;
683
291
        }
684
40.3k
        compressed_object = (pdf_stream *)ctx->stack_top[-1];
685
40.3k
        pdfi_countup(compressed_object);
686
40.3k
        pdfi_pop(ctx, 1);
687
40.3k
        code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object);
688
40.3k
        if (code < 0)
689
0
            goto exit;
690
895k
    } else {
691
#if CACHE_STATISTICS
692
        ctx->compressed_hits++;
693
#endif
694
895k
        compressed_object = (pdf_stream *)compressed_entry->cache->o;
695
895k
        pdfi_countup(compressed_object);
696
895k
        pdfi_promote_cache_entry(ctx, compressed_entry->cache);
697
895k
    }
698
936k
    code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict);
699
936k
    if (code < 0)
700
20
        return code;
701
702
936k
    if (ctx->loop_detection != NULL) {
703
930k
        code = pdfi_loop_detector_mark(ctx);
704
930k
        if (code < 0)
705
0
            goto exit;
706
930k
        if (compressed_sdict->object_num != 0) {
707
930k
            if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) {
708
212
                code = gs_note_error(gs_error_circular_reference);
709
929k
            } else {
710
929k
                code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num);
711
929k
            }
712
930k
            if (code < 0) {
713
212
                (void)pdfi_loop_detector_cleartomark(ctx);
714
212
                goto exit;
715
212
            }
716
930k
        }
717
930k
    }
718
    /* Check its an ObjStm ! */
719
935k
    code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type);
720
935k
    if (code < 0) {
721
311
        if (ctx->loop_detection != NULL)
722
311
            (void)pdfi_loop_detector_cleartomark(ctx);
723
311
        goto exit;
724
311
    }
725
726
935k
    if (!pdfi_name_is(Type, "ObjStm")){
727
1.21k
        if (ctx->loop_detection != NULL)
728
1.21k
            (void)pdfi_loop_detector_cleartomark(ctx);
729
1.21k
        code = gs_note_error(gs_error_syntaxerror);
730
1.21k
        goto exit;
731
1.21k
    }
732
733
    /* Need to check the /N entry to see if the object is actually in this stream! */
734
934k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries);
735
934k
    if (code < 0) {
736
276
        if (ctx->loop_detection != NULL)
737
276
            (void)pdfi_loop_detector_cleartomark(ctx);
738
276
        goto exit;
739
276
    }
740
741
934k
    if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) {
742
81
        if (ctx->loop_detection != NULL)
743
81
            (void)pdfi_loop_detector_cleartomark(ctx);
744
81
        code = gs_note_error(gs_error_rangecheck);
745
81
        goto exit;
746
81
    }
747
748
934k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
749
934k
    if (code < 0) {
750
145k
        if (ctx->loop_detection != NULL)
751
145k
            (void)pdfi_loop_detector_cleartomark(ctx);
752
145k
        goto exit;
753
145k
    }
754
755
788k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First);
756
788k
    if (code < 0) {
757
1.18k
        if (ctx->loop_detection != NULL)
758
1.18k
            (void)pdfi_loop_detector_cleartomark(ctx);
759
1.18k
        goto exit;
760
1.18k
    }
761
762
787k
    if (ctx->loop_detection != NULL)
763
781k
        (void)pdfi_loop_detector_cleartomark(ctx);
764
765
787k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
766
787k
    if (code < 0)
767
0
        goto exit;
768
769
787k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
770
787k
    if (code < 0)
771
0
        goto exit;
772
773
787k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
774
787k
    if (code < 0)
775
1.56k
        goto exit;
776
777
47.0M
    for (i=0;i < num_entries;i++)
778
46.2M
    {
779
46.2M
        int new_offset;
780
46.2M
        code = pdfi_read_bare_int(ctx, compressed_stream, &found_object);
781
46.2M
        if (code < 0)
782
5.65k
            goto exit;
783
46.2M
        if (code == 0) {
784
596
            code = gs_note_error(gs_error_syntaxerror);
785
596
            goto exit;
786
596
        }
787
46.2M
        code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset);
788
46.2M
        if (code < 0)
789
5.28k
            goto exit;
790
46.2M
        if (code == 0) {
791
721
            code = gs_note_error(gs_error_syntaxerror);
792
721
            goto exit;
793
721
        }
794
46.2M
        if (i == entry->u.compressed.object_index) {
795
779k
            if (found_object != obj) {
796
839
                code = gs_note_error(gs_error_undefined);
797
839
                goto exit;
798
839
            }
799
778k
            offset = new_offset;
800
778k
        }
801
46.2M
        if (i == entry->u.compressed.object_index + 1)
802
748k
            object_length = new_offset - offset;
803
46.2M
    }
804
805
    /* Bug #705259 - The first object need not lie immediately after the initial
806
     * table of object numbers and offsets. The start of the first object is given
807
     * by the value of First. We don't know how many bytes we consumed getting to
808
     * the end of the table, unfortunately, so we close the stream, rewind the main
809
     * stream back to the beginning of the ObjStm, and then read and discard 'First'
810
     * bytes in order to get to the start of the first object. Then we read the
811
     * number of bytes required to get from there to the start of the object we
812
     * actually want.
813
     * If this ever looks like it's causing performance problems we could read the
814
     * initial table above manually instead of using the existing code, and track
815
     * how many bytes we'd read, which would avoid us having to tear down and
816
     * rebuild the stream.
817
     */
818
772k
    if (compressed_stream) {
819
772k
        pdfi_close_file(ctx, compressed_stream);
820
772k
        compressed_stream = NULL;
821
772k
    }
822
772k
    if (SubFile_stream) {
823
772k
        pdfi_close_file(ctx, SubFile_stream);
824
772k
        SubFile_stream = NULL;
825
772k
    }
826
827
772k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
828
772k
    if (code < 0)
829
0
        goto exit;
830
831
    /* We already dereferenced this above, so we don't need the loop detection checking here */
832
772k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
833
772k
    if (code < 0)
834
0
        goto exit;
835
836
772k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
837
772k
    if (code < 0)
838
0
        goto exit;
839
840
772k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
841
772k
    if (code < 0)
842
0
        goto exit;
843
844
399M
    for (i=0;i < First;i++)
845
398M
    {
846
398M
        int c = pdfi_read_byte(ctx, compressed_stream);
847
398M
        if (c < 0) {
848
42
            code = gs_note_error(gs_error_ioerror);
849
42
            goto exit;
850
42
        }
851
398M
    }
852
853
    /* Skip to the offset of the object we want to read */
854
2.60G
    for (i=0;i < offset;i++)
855
2.60G
    {
856
2.60G
        int c = pdfi_read_byte(ctx, compressed_stream);
857
2.60G
        if (c < 0) {
858
44.3k
            code = gs_note_error(gs_error_ioerror);
859
44.3k
            goto exit;
860
44.3k
        }
861
2.60G
    }
862
863
    /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit
864
     * the number of bytes we read to the declared size of the object (difference between
865
     * the offsets of the object we want to read, and the next object). If it is 0 then
866
     * we're reading the last object in the stream, so we just rely on the SubFileDecode
867
     * we set up when we created compressed_stream to limit the bytes to the length of
868
     * that stream.
869
     */
870
728k
    if (object_length > 0) {
871
699k
        code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false);
872
699k
        if (code < 0)
873
0
            goto exit;
874
699k
    } else {
875
28.4k
        Object_stream = compressed_stream;
876
28.4k
    }
877
878
728k
    code = pdfi_read_token(ctx, Object_stream, obj, gen);
879
728k
    if (code < 0)
880
3.81k
        goto exit;
881
724k
    if (code == 0) {
882
85
        code = gs_note_error(gs_error_syntaxerror);
883
85
        goto exit;
884
85
    }
885
724k
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) {
886
713k
        int start_depth = pdfi_count_stack(ctx);
887
888
        /* Need to read all the elements from COS objects */
889
25.6M
        do {
890
25.6M
            code = pdfi_read_token(ctx, Object_stream, obj, gen);
891
25.6M
            if (code < 0)
892
22.1k
                goto exit;
893
25.6M
            if (code == 0) {
894
5.28k
                code = gs_note_error(gs_error_syntaxerror);
895
5.28k
                goto exit;
896
5.28k
            }
897
25.6M
            if (compressed_stream->eof == true) {
898
407
                code = gs_note_error(gs_error_ioerror);
899
407
                goto exit;
900
407
            }
901
25.6M
        } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth);
902
713k
    }
903
904
696k
    *object = ctx->stack_top[-1];
905
    /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets
906
     * the object number for uncompressed objects. So we need to do that here.
907
     */
908
696k
    if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
909
693k
        (*object)->indirect_num = (*object)->object_num = obj;
910
693k
        (*object)->indirect_gen = (*object)->generation_num = gen;
911
693k
        pdfi_countup(*object);
912
693k
    }
913
696k
    pdfi_pop(ctx, 1);
914
915
696k
    if (cache) {
916
691k
        code = pdfi_add_to_cache(ctx, *object);
917
691k
        if (code < 0) {
918
0
            pdfi_countdown(*object);
919
0
            goto exit;
920
0
        }
921
691k
    }
922
923
957k
 exit:
924
957k
    if (Object_stream)
925
728k
        pdfi_close_file(ctx, Object_stream);
926
957k
    if (Object_stream != compressed_stream)
927
757k
        if (compressed_stream)
928
757k
            pdfi_close_file(ctx, compressed_stream);
929
957k
    if (SubFile_stream)
930
787k
        pdfi_close_file(ctx, SubFile_stream);
931
957k
    pdfi_countdown(compressed_object);
932
957k
    pdfi_countdown(Type);
933
957k
    return code;
934
696k
}
935
936
/* pdf_dereference returns an object with a reference count of at least 1, this represents the
937
 * reference being held by the caller (in **object) when we return from this function.
938
 */
939
static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache)
940
6.33M
{
941
6.33M
    xref_entry *entry;
942
6.33M
    int code, stack_depth = pdfi_count_stack(ctx);
943
6.33M
    gs_offset_t saved_stream_offset;
944
6.33M
    bool saved_decrypt_strings = ctx->encryption.decrypt_strings;
945
946
6.33M
    *object = NULL;
947
948
6.33M
    if (ctx->xref_table == NULL)
949
60
        return_error(gs_error_typecheck);
950
951
6.33M
    if (ctx->main_stream == NULL || ctx->main_stream->s == NULL)
952
0
        return_error(gs_error_ioerror);
953
954
6.33M
    if (obj >= ctx->xref_table->xref_size) {
955
288k
        char extra_info[gp_file_name_sizeof];
956
957
288k
        gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj);
958
288k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info)) < 0) {
959
0
            return code;
960
0
        }
961
962
288k
        code = pdfi_repair_file(ctx);
963
288k
        if (code < 0) {
964
288k
            *object = NULL;
965
288k
            return code;
966
288k
        }
967
34
        if (obj >= ctx->xref_table->xref_size) {
968
20
            *object = NULL;
969
20
            return_error(gs_error_rangecheck);
970
20
        }
971
34
    }
972
973
6.05M
    entry = &ctx->xref_table->xref[obj];
974
975
6.05M
    if(entry->object_num == 0) {
976
1.50M
        pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference_main", "Attempt to dereference object 0");
977
1.50M
        return_error(gs_error_undefined);
978
1.50M
    }
979
980
4.55M
    if (entry->free) {
981
6.43k
        char extra_info[gp_file_name_sizeof];
982
983
6.43k
        gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
984
6.43k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
985
6.43k
        *object = PDF_NULL_OBJ;
986
6.43k
        return code;
987
4.54M
    }else {
988
4.54M
        if (!entry->compressed) {
989
3.38M
            if(entry->u.uncompressed.generation_num != gen)
990
4.68k
                pdfi_set_warning(ctx, 0, NULL, W_PDF_MISMATCH_GENERATION, "pdfi_dereference_main", "");
991
3.38M
        }
992
4.54M
    }
993
994
4.54M
    if (ctx->loop_detection) {
995
4.23M
        if (pdfi_loop_detector_check_object(ctx, obj) == true)
996
1.66k
            return_error(gs_error_circular_reference);
997
4.23M
        if (entry->free) {
998
0
            code = pdfi_loop_detector_add_object(ctx, obj);
999
0
            if (code < 0)
1000
0
                return code;
1001
0
        }
1002
4.23M
    }
1003
4.54M
    if (entry->cache != NULL){
1004
1.81M
        pdf_obj_cache_entry *cache_entry = entry->cache;
1005
1006
#if CACHE_STATISTICS
1007
        ctx->hits++;
1008
#endif
1009
1.81M
        *object = cache_entry->o;
1010
1.81M
        pdfi_countup(*object);
1011
1012
1.81M
        pdfi_promote_cache_entry(ctx, cache_entry);
1013
2.72M
    } else {
1014
2.72M
        saved_stream_offset = pdfi_unread_tell(ctx);
1015
1016
2.72M
        if (entry->compressed) {
1017
            /* This is an object in a compressed object stream */
1018
958k
            ctx->encryption.decrypt_strings = false;
1019
1020
958k
            code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache);
1021
958k
            if (code < 0 || *object == NULL)
1022
262k
                goto error;
1023
1.76M
        } else {
1024
#if CACHE_STATISTICS
1025
            ctx->misses++;
1026
#endif
1027
1.76M
            ctx->encryption.decrypt_strings = true;
1028
1029
1.76M
            code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET);
1030
1.76M
            if (code < 0)
1031
140
                goto error;
1032
1033
1.76M
            code = pdfi_read_object(ctx, ctx->main_stream, entry->u.uncompressed.offset);
1034
1035
            /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it.
1036
             * reload the xref entry to be certain it is valid.
1037
             */
1038
1.76M
            entry = &ctx->xref_table->xref[obj];
1039
1.76M
            if (code < 0) {
1040
281k
                int code1 = 0;
1041
281k
                if (entry->free) {
1042
0
                    char extra_info[gp_file_name_sizeof];
1043
1044
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1045
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1046
0
                    *object = PDF_NULL_OBJ;
1047
0
                    if (code < 0)
1048
0
                        goto error;
1049
0
                    goto free_obj;
1050
0
                }
1051
281k
                ctx->encryption.decrypt_strings = saved_decrypt_strings;
1052
281k
                (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1053
281k
                pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1054
1055
281k
                code1 = pdfi_repair_file(ctx);
1056
281k
                if (code1 == 0)
1057
1.51k
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1058
                /* Repair failed, just give up and return an error */
1059
279k
                goto error;
1060
281k
            }
1061
1062
            /* We only expect a single object back when dereferencing an indirect reference
1063
             * The only way (I think) we can end up with more than one is if the object initially
1064
             * appears to be a dictionary or array, but the object terminates (with endobj or
1065
             * simply reaching EOF) without terminating the array or dictionary. That's clearly
1066
             * an error. We might, as a future 'improvement' choose to walk back through
1067
             * the stack looking for unterminated dictionary or array markers, and closing them
1068
             * so that (hopefully!) we end up with a single 'repaired' object on the stack.
1069
             * But for now I'm simply going to treat these as errors. We will try a repair on the
1070
             * file to see if we end up using a different (hopefully intact) object from the file.
1071
             */
1072
1.48M
            if (pdfi_count_stack(ctx) - stack_depth > 1) {
1073
132k
                int code1 = 0;
1074
1075
132k
                code1 = pdfi_repair_file(ctx);
1076
132k
                if (code1 == 0)
1077
560
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1078
                /* Repair failed, just give up and return an error */
1079
131k
                code = gs_note_error(gs_error_syntaxerror);
1080
131k
                goto error;
1081
132k
            }
1082
1083
1.35M
            if (pdfi_count_stack(ctx) > 0 &&
1084
1.35M
                ((ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) &&
1085
1.35M
                (ctx->stack_top[-1])->object_num == obj)
1086
1.35M
                || ctx->stack_top[-1] == PDF_NULL_OBJ)) {
1087
1.35M
                *object = ctx->stack_top[-1];
1088
1.35M
                pdfi_countup(*object);
1089
1.35M
                pdfi_pop(ctx, 1);
1090
1.35M
                if (pdfi_type_of(*object) == PDF_INDIRECT) {
1091
1
                    pdf_indirect_ref *iref = (pdf_indirect_ref *)*object;
1092
1093
1
                    if (iref->ref_object_num == obj) {
1094
0
                        code = gs_note_error(gs_error_circular_reference);
1095
0
                        pdfi_countdown(*object);
1096
0
                        *object = NULL;
1097
0
                        goto error;
1098
0
                    }
1099
1
                }
1100
                /* There's really no point in caching an indirect reference and
1101
                 * I think it could be potentially confusing to later calls.
1102
                 */
1103
1.35M
                if (cache && pdfi_type_of(*object) != PDF_INDIRECT) {
1104
1.34M
                    code = pdfi_add_to_cache(ctx, *object);
1105
1.34M
                    if (code < 0) {
1106
0
                        pdfi_countdown(*object);
1107
0
                        goto error;
1108
0
                    }
1109
1.34M
                }
1110
1.35M
            } else {
1111
1.77k
                int code1 = 0;
1112
1113
1.77k
                if (pdfi_count_stack(ctx) > 0)
1114
1.65k
                    pdfi_pop(ctx, 1);
1115
1116
1.77k
                if (entry->free) {
1117
0
                    char extra_info[gp_file_name_sizeof];
1118
1119
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1120
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1121
0
                    *object = PDF_NULL_OBJ;
1122
0
                    if (code < 0)
1123
0
                        goto error;
1124
0
                    return code;
1125
0
                }
1126
1.77k
                code1 = pdfi_repair_file(ctx);
1127
1.77k
                if (code1 == 0)
1128
215
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1129
                /* Repair failed, just give up and return an error */
1130
1.55k
                code = gs_note_error(gs_error_undefined);
1131
1.55k
                goto error;
1132
1.77k
            }
1133
1.35M
        }
1134
2.04M
free_obj:
1135
2.04M
        (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1136
2.04M
    }
1137
1138
3.86M
    if (ctx->loop_detection && pdf_object_num(*object) != 0) {
1139
3.56M
        code = pdfi_loop_detector_add_object(ctx, (*object)->object_num);
1140
3.56M
        if (code < 0) {
1141
0
            ctx->encryption.decrypt_strings = saved_decrypt_strings;
1142
0
            return code;
1143
0
        }
1144
3.56M
    }
1145
3.86M
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1146
3.86M
    return 0;
1147
1148
675k
error:
1149
675k
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1150
675k
    (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1151
    /* Return the stack to the state at entry */
1152
675k
    pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1153
675k
    return code;
1154
3.86M
}
1155
1156
int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1157
6.31M
{
1158
6.31M
    return pdfi_dereference_main(ctx, obj, gen, object, true);
1159
6.31M
}
1160
1161
int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1162
21.5k
{
1163
21.5k
    return pdfi_dereference_main(ctx, obj, gen, object, false);
1164
21.5k
}
1165
1166
/* do a derefence with loop detection */
1167
int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1168
2.58M
{
1169
2.58M
    int code;
1170
1171
2.58M
    code = pdfi_loop_detector_mark(ctx);
1172
2.58M
    if (code < 0)
1173
0
        return code;
1174
1175
2.58M
    code = pdfi_dereference(ctx, obj, gen, object);
1176
2.58M
    (void)pdfi_loop_detector_cleartomark(ctx);
1177
2.58M
    return code;
1178
2.58M
}
1179
1180
int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1181
21.5k
{
1182
21.5k
    int code;
1183
1184
21.5k
    code = pdfi_loop_detector_mark(ctx);
1185
21.5k
    if (code < 0)
1186
0
        return code;
1187
1188
21.5k
    code = pdfi_dereference_nocache(ctx, obj, gen, object);
1189
21.5k
    (void)pdfi_loop_detector_cleartomark(ctx);
1190
21.5k
    return code;
1191
21.5k
}
1192
1193
static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse)
1194
74.0k
{
1195
74.0k
    int code = 0;
1196
74.0k
    uint64_t index, arraysize;
1197
74.0k
    pdf_obj *object = NULL;
1198
74.0k
    pdf_array *array = (pdf_array *)obj;
1199
1200
74.0k
    arraysize = pdfi_array_size(array);
1201
338k
    for (index = 0; index < arraysize; index++) {
1202
264k
        if (ctx->loop_detection != NULL) {
1203
264k
            code = pdfi_loop_detector_mark(ctx);
1204
264k
            if (code < 0)
1205
0
                return code;
1206
264k
        }
1207
1208
264k
        code = pdfi_array_get_no_store_R(ctx, array, index, &object);
1209
1210
264k
        if (ctx->loop_detection != NULL) {
1211
264k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1212
264k
            if (code1 < 0)
1213
0
                return code1;
1214
264k
        }
1215
1216
264k
        if (code == gs_error_circular_reference) {
1217
            /* Previously we just left as an indirect reference, but now we want
1218
             * to return the error so we don't end up replacing indirect references
1219
             * to objects with circular references.
1220
             */
1221
264k
        } else {
1222
264k
            if (code < 0) goto exit;
1223
264k
            if (recurse) {
1224
1.85k
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse);
1225
1.85k
                if (code < 0) goto exit;
1226
1.85k
            }
1227
            /* don't store the object if it's a stream (leave as a ref) */
1228
264k
            if (pdfi_type_of(object) != PDF_STREAM)
1229
264k
                code = pdfi_array_put(ctx, array, index, object);
1230
264k
        }
1231
264k
        if (code < 0) goto exit;
1232
1233
264k
        pdfi_countdown(object);
1234
264k
        object = NULL;
1235
264k
    }
1236
1237
74.0k
 exit:
1238
74.0k
    pdfi_countdown(object);
1239
74.0k
    return code;
1240
74.0k
}
1241
1242
static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse)
1243
10.4k
{
1244
10.4k
    int code = 0;
1245
10.4k
    pdf_dict *dict = (pdf_dict *)obj;
1246
10.4k
    pdf_name *Key = NULL;
1247
10.4k
    pdf_obj *Value = NULL;
1248
10.4k
    uint64_t index, dictsize;
1249
1250
10.4k
    dictsize = pdfi_dict_entries(dict);
1251
1252
    /* Note: I am not using pdfi_dict_first/next because of needing to handle
1253
     * circular references.
1254
     */
1255
23.6k
    for (index=0; index<dictsize; index ++) {
1256
13.3k
        Key = (pdf_name *)dict->list[index].key;
1257
13.3k
        if (pdfi_name_is(Key, "Parent"))
1258
10
            continue;
1259
1260
13.3k
        if (ctx->loop_detection != NULL) {
1261
13.2k
            code = pdfi_loop_detector_mark(ctx);
1262
13.2k
            if (code < 0)
1263
0
                return code;
1264
13.2k
        }
1265
1266
13.3k
        code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value);
1267
1268
13.3k
        if (ctx->loop_detection != NULL) {
1269
13.2k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1270
13.2k
            if (code1 < 0)
1271
0
                return code1;
1272
13.2k
        }
1273
1274
13.3k
        if (code == gs_error_circular_reference) {
1275
            /* Just leave as an indirect ref */
1276
9
            code = 0;
1277
13.3k
        } else {
1278
13.3k
            if (code < 0) goto exit;
1279
13.2k
            if (recurse) {
1280
4.28k
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse);
1281
4.28k
                if (code < 0)
1282
69
                    goto exit;
1283
4.28k
            }
1284
            /* don't store the object if it's a stream (leave as a ref) */
1285
13.2k
            if (pdfi_type_of(Value) != PDF_STREAM)
1286
13.1k
                code = pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true);
1287
13.2k
        }
1288
13.2k
        if (code < 0) goto exit;
1289
1290
13.2k
        pdfi_countdown(Value);
1291
13.2k
        Value = NULL;
1292
13.2k
    }
1293
1294
10.4k
 exit:
1295
10.4k
    pdfi_countdown(Value);
1296
10.4k
    return code;
1297
10.4k
}
1298
1299
/* Resolve all the indirect references for an object
1300
 * Note: This can be recursive
1301
 */
1302
int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse)
1303
305k
{
1304
305k
    int code = 0;
1305
1306
305k
    switch(pdfi_type_of(value)) {
1307
74.0k
    case PDF_ARRAY:
1308
74.0k
        code = pdfi_resolve_indirect_array(ctx, value, recurse);
1309
74.0k
        break;
1310
10.4k
    case PDF_DICT:
1311
10.4k
        code = pdfi_resolve_indirect_dict(ctx, value, recurse);
1312
10.4k
        break;
1313
221k
    default:
1314
221k
        break;
1315
305k
    }
1316
305k
    return code;
1317
305k
}
1318
1319
/* Resolve all the indirect references for an object
1320
 * Resolve indirect references, either one level or recursively, with loop detect on
1321
 * the parent (can by NULL) and the value.
1322
 */
1323
int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse)
1324
305k
{
1325
305k
    int code = 0;
1326
1327
305k
    code = pdfi_loop_detector_mark(ctx);
1328
305k
    if (code < 0) goto exit;
1329
305k
    if (parent && parent->object_num != 0) {
1330
298k
        code = pdfi_loop_detector_add_object(ctx, parent->object_num);
1331
298k
        if (code < 0) goto exit;
1332
298k
    }
1333
1334
305k
    if (pdf_object_num(value) != 0) {
1335
1.87k
        if (pdfi_loop_detector_check_object(ctx, value->object_num)) {
1336
5
            code = gs_note_error(gs_error_circular_reference);
1337
5
            goto exit;
1338
5
        }
1339
1.87k
        code = pdfi_loop_detector_add_object(ctx, value->object_num);
1340
1.87k
        if (code < 0) goto exit;
1341
1.87k
    }
1342
305k
    code = pdfi_resolve_indirect(ctx, value, recurse);
1343
1344
305k
 exit:
1345
305k
    (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */
1346
305k
    return code;
1347
305k
}