Coverage Report

Created: 2026-04-09 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ghostpdl/pdf/pdf_deref.c
Line
Count
Source
1
/* Copyright (C) 2020-2026 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/* Functions to deal with dereferencing indirect objects
17
 * for the PDF interpreter. In here we also keep the code
18
 * for dealing with the object cache, because the dereferencing
19
 * functions are currently the only place that deals with it.
20
 */
21
22
#include "pdf_int.h"
23
#include "pdf_stack.h"
24
#include "pdf_loop_detect.h"
25
#include "strmio.h"
26
#include "stream.h"
27
#include "pdf_file.h"
28
#include "pdf_misc.h"
29
#include "pdf_dict.h"
30
#include "pdf_array.h"
31
#include "pdf_deref.h"
32
#include "pdf_repair.h"
33
34
/* Start with the object caching functions */
35
/* Disable object caching (for easier debugging with reference counting)
36
 * by uncommenting the following line
37
 */
38
/*#define DISABLE CACHE*/
39
40
/* given an object, create a cache entry for it. If we have too many entries
41
 * then delete the leat-recently-used cache entry. Make the new entry be the
42
 * most-recently-used entry. The actual entries are attached to the xref table
43
 * (as well as being a double-linked list), because we detect an existing
44
 * cache entry by seeing that the xref table for the object number has a non-NULL
45
 * 'cache' member.
46
 * So we need to update the xref as well if we add or delete cache entries.
47
 */
48
static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o)
49
2.08M
{
50
2.08M
#ifndef DISABLE_CACHE
51
2.08M
    pdf_obj_cache_entry *entry;
52
53
2.08M
    if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY))
54
2.86k
        return 0;
55
56
2.08M
    if (o->object_num >= ctx->xref_table->xref_size)
57
0
        return_error(gs_error_rangecheck);
58
59
2.08M
    if (ctx->xref_table->xref[o->object_num].cache != NULL) {
60
#if DEBUG_CACHE
61
        outprintf(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num);
62
#endif
63
0
        return_error(gs_error_unknownerror);
64
0
    }
65
66
#if DEBUG_CACHE
67
        dbgmprintf1(ctx->memory, "Adding object %d\n", o->object_num);
68
#endif
69
2.08M
    if (ctx->cache_entries == ctx->args.PDFCacheSize)
70
533k
    {
71
#if DEBUG_CACHE
72
        dbgmprintf(ctx->memory, "Cache full, evicting LRU\n");
73
#endif
74
533k
        if (ctx->cache_LRU) {
75
533k
            entry = ctx->cache_LRU;
76
#if DEBUG_CACHE
77
            dbgmprintf1(ctx->memory, "Evicting %d\n", entry->o->object_num);
78
#endif
79
533k
            ctx->cache_LRU = entry->next;
80
533k
            if (entry->next)
81
533k
                ((pdf_obj_cache_entry *)entry->next)->previous = NULL;
82
533k
            ctx->xref_table->xref[entry->o->object_num].cache = NULL;
83
533k
            pdfi_countdown(entry->o);
84
533k
            ctx->cache_entries--;
85
533k
            gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU");
86
533k
        } else
87
0
            return_error(gs_error_unknownerror);
88
533k
    }
89
2.08M
    entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache");
90
2.08M
    if (entry == NULL)
91
0
        return_error(gs_error_VMerror);
92
93
2.08M
    memset(entry, 0x00, sizeof(pdf_obj_cache_entry));
94
95
2.08M
    entry->o = o;
96
2.08M
    pdfi_countup(o);
97
2.08M
    if (ctx->cache_MRU) {
98
2.01M
        entry->previous = ctx->cache_MRU;
99
2.01M
        ctx->cache_MRU->next = entry;
100
2.01M
    }
101
2.08M
    ctx->cache_MRU = entry;
102
2.08M
    if (ctx->cache_LRU == NULL)
103
69.4k
        ctx->cache_LRU = entry;
104
105
2.08M
    ctx->cache_entries++;
106
2.08M
    ctx->xref_table->xref[o->object_num].cache = entry;
107
2.08M
#endif
108
2.08M
    return 0;
109
2.08M
}
110
111
/* Given an existing cache entry, promote it to be the most-recently-used
112
 * cache entry.
113
 */
114
static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry)
115
3.66M
{
116
3.66M
#ifndef DISABLE_CACHE
117
3.66M
    if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) {
118
2.35M
        if ((pdf_obj_cache_entry *)cache_entry->next != NULL)
119
2.35M
            ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous;
120
2.35M
        if ((pdf_obj_cache_entry *)cache_entry->previous != NULL)
121
2.35M
            ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next;
122
1.49k
        else {
123
            /* the existing entry is the current least recently used, we need to make the 'next'
124
             * cache entry into the LRU.
125
             */
126
1.49k
            ctx->cache_LRU = cache_entry->next;
127
1.49k
        }
128
2.35M
        cache_entry->next = NULL;
129
2.35M
        cache_entry->previous = ctx->cache_MRU;
130
2.35M
        ctx->cache_MRU->next = cache_entry;
131
2.35M
        ctx->cache_MRU = cache_entry;
132
2.35M
    }
133
3.66M
#endif
134
3.66M
    return;
135
3.66M
}
136
137
int pdfi_cache_object(pdf_context *ctx, pdf_obj *o)
138
2.69M
{
139
2.69M
    if (o->object_num == 0)
140
1.87M
        return 0;
141
820k
    if (ctx->xref_table->xref[o->object_num].cache == NULL)
142
12
        return pdfi_add_to_cache(ctx, o);
143
820k
    else
144
820k
        pdfi_promote_cache_entry(ctx, ctx->xref_table->xref[o->object_num].cache);
145
820k
    return 0;
146
820k
}
147
148
/* This one's a bit of an oddity, its used for fonts. When we build a PDF font object
149
 * we want the object cache to reference *that* object, not the dictionary which was
150
 * read out of the PDF file, so this allows us to replace the font dictionary in the
151
 * cache with the actual font object, so that later dereferences will get this font
152
 * object.
153
 */
154
int replace_cache_entry(pdf_context *ctx, pdf_obj *o)
155
131k
{
156
131k
#ifndef DISABLE_CACHE
157
131k
    xref_entry *entry;
158
131k
    pdf_obj_cache_entry *cache_entry;
159
131k
    pdf_obj *old_cached_obj = NULL;
160
161
    /* Limited error checking here, we assume that things like the
162
     * validity of the object (eg not a free oobject) have already been handled.
163
     */
164
165
131k
    entry = &ctx->xref_table->xref[o->object_num];
166
131k
    cache_entry = entry->cache;
167
168
131k
    if (cache_entry == NULL) {
169
2.98k
        return(pdfi_add_to_cache(ctx, o));
170
128k
    } else {
171
        /* NOTE: We grab the object without decrementing, to avoid triggering
172
         * a warning message for freeing an object that's in the cache
173
         */
174
128k
        if (cache_entry->o != NULL)
175
128k
            old_cached_obj = cache_entry->o;
176
177
        /* Put new entry in the cache */
178
128k
        cache_entry->o = o;
179
128k
        pdfi_countup(o);
180
128k
        pdfi_promote_cache_entry(ctx, cache_entry);
181
182
        /* Now decrement the old cache entry, if any */
183
128k
        pdfi_countdown(old_cached_obj);
184
128k
    }
185
128k
#endif
186
128k
    return 0;
187
131k
}
188
189
/* Now the dereferencing functions */
190
191
/*
192
 * Technically we can accept a stream other than the main PDF file stream here. This is
193
 * really for the case of compressed objects where we read tokens from the compressed
194
 * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode
195
 * on top of the main file stream, which may be useful. Note that this cannot work with
196
 * objects in compressed object streams! They should always pass a value of 0 for the stream_offset.
197
 * The stream_offset is the offset from the start of the underlying uncompressed PDF file of
198
 * the stream we are using. See the comments below when keyword is PDF_STREAM.
199
 */
200
201
/* Determine if a PDF object is in a compressed ObjStm. Returns < 0
202
 * for an error, 0 if it is not in a compressed ObjStm and 1 if it is.
203
 * Currently errors are inmpossible. This is only used by the decryption code
204
 * to determine if a string is in a compressed object stream, if it is then
205
 * it can't be used for decryption.
206
 */
207
int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen)
208
14.3k
{
209
14.3k
    xref_entry *entry;
210
211
    /* Can't possibly be a compressed object before we have finished reading
212
     * the xref.
213
     */
214
14.3k
    if (ctx->xref_table == NULL)
215
0
        return 0;
216
217
14.3k
    entry = &ctx->xref_table->xref[obj];
218
219
14.3k
    if (entry->compressed)
220
0
        return 1;
221
222
14.3k
    return 0;
223
14.3k
}
224
225
/* We should never read a 'stream' keyword from a compressed object stream
226
 * so this case should never end up here.
227
 */
228
static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset,
229
                                   uint32_t objnum, uint32_t gen)
230
643k
{
231
643k
    int code = 0;
232
643k
    int64_t i;
233
643k
    pdf_dict *dict = NULL;
234
643k
    gs_offset_t offset;
235
643k
    pdf_stream *stream_obj = NULL;
236
237
    /* Strange code time....
238
     * If we are using a stream which is *not* the PDF uncompressed main file stream
239
     * then doing stell on it will only tell us how many bytes have been read from
240
     * that stream, it won't tell us the underlying file position. So we add on the
241
     * 'unread' bytes, *and* we add on the position of the start of the stream in
242
     * the actual main file. This is all done so that we can check the /Length
243
     * of the object. Note that this will *only* work for regular objects it can
244
     * not be used for compressed object streams, but those don't need checking anyway
245
     * they have a different mechanism altogether and should never get here.
246
     */
247
643k
    if (s != ctx->main_stream) {
248
0
        offset = stell(s->s) - s->unread_size + stream_offset;
249
0
        code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
250
0
        if (code < 0)
251
0
            return_error(gs_error_ioerror);
252
643k
    } else {
253
643k
        offset = stell(s->s) - s->unread_size;
254
643k
    }
255
256
643k
    if (pdfi_count_stack(ctx) < 1)
257
0
        return_error(gs_error_stackunderflow);
258
259
643k
    dict = (pdf_dict *)ctx->stack_top[-1];
260
261
643k
    if (pdfi_type_of(dict) != PDF_DICT) {
262
8.42k
        pdfi_pop(ctx, 1);
263
8.42k
        return_error(gs_error_syntaxerror);
264
8.42k
    }
265
266
635k
    dict->indirect_num = dict->object_num = objnum;
267
635k
    dict->indirect_gen = dict->generation_num = gen;
268
269
    /* Convert the dict into a stream */
270
635k
    code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true);
271
635k
    if (code < 0) {
272
0
        pdfi_pop(ctx, 1);
273
0
        return code;
274
0
    }
275
    /* Pop off the dict and push the stream */
276
635k
    pdfi_pop(ctx, 1);
277
635k
    dict = NULL;
278
635k
    pdfi_push(ctx, (pdf_obj *)stream_obj);
279
280
635k
    stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum;
281
635k
    stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen;
282
635k
    stream_obj->stream_offset = offset;
283
284
    /* Exceptional code. Normally we do not need to worry about detecting circular references
285
     * when reading objects, because we do not dereference any indirect objects. However streams
286
     * are a slight exception in that we do get the Length from the stream dictionay and if that
287
     * is an indirect reference, then we dereference it.
288
     * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect
289
     * reference to the same stream object, and leads to infinite recursion. So deal with that
290
     * possibility here.
291
     */
292
635k
    code = pdfi_loop_detector_mark(ctx);
293
635k
    if (code < 0) {
294
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
295
0
        return code;
296
0
    }
297
635k
    if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) {
298
107
        pdfi_countdown(stream_obj); /* get rid of extra ref */
299
107
        pdfi_loop_detector_cleartomark(ctx);
300
107
        return_error(gs_error_circular_reference);
301
107
    }
302
303
635k
    code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num);
304
635k
    if (code < 0) {
305
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
306
0
        pdfi_loop_detector_cleartomark(ctx);
307
0
        return code;
308
0
    }
309
310
    /* This code may be a performance overhead, it simply skips over the stream contents
311
     * and checks that the stream ends with a 'endstream endobj' pair. We could add a
312
     * 'go faster' flag for users who are certain their PDF files are well-formed. This
313
     * could also allow us to skip all kinds of other checking.....
314
     */
315
316
635k
    code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i);
317
635k
    if (code < 0) {
318
14.3k
        char extra_info[gp_file_name_sizeof];
319
320
14.3k
        (void)pdfi_loop_detector_cleartomark(ctx);
321
14.3k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum);
322
14.3k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info);
323
14.3k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
324
14.3k
        return code;
325
14.3k
    }
326
621k
    code = pdfi_loop_detector_cleartomark(ctx);
327
621k
    if (code < 0) {
328
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
329
0
        return code;
330
0
    }
331
332
621k
    if (i < 0 || (i + offset)> ctx->main_stream_length) {
333
32.4k
        char extra_info[gp_file_name_sizeof];
334
335
32.4k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum);
336
32.4k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info))< 0) {
337
0
            pdfi_pop(ctx, 1);
338
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
339
0
            return code;
340
0
        }
341
588k
    } else {
342
588k
        code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR);
343
588k
        if (code < 0) {
344
0
            pdfi_pop(ctx, 1);
345
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
346
0
            return code;
347
0
        }
348
349
588k
        stream_obj->Length = 0;
350
588k
        stream_obj->length_valid = false;
351
352
588k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
353
588k
        if (code == 0) {
354
0
            char extra_info[gp_file_name_sizeof];
355
356
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum);
357
0
            pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
358
            /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that
359
             * happens give up.
360
             */
361
0
            if (pdfi_count_stack(ctx) == 0) {
362
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
363
0
                return code;
364
0
            }
365
588k
        } else if (code < 0) {
366
0
            char extra_info[gp_file_name_sizeof];
367
368
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum);
369
0
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info)) < 0) {
370
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
371
0
                return code;
372
0
            }
373
588k
        } else if (code != TOKEN_ENDSTREAM) {
374
64.5k
            char extra_info[gp_file_name_sizeof];
375
376
64.5k
            gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i);
377
64.5k
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BAD_LENGTH, "pdfi_read_stream_object", extra_info)) < 0) {
378
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
379
0
                return code;
380
0
            }
381
524k
        } else {
382
            /* Cache the Length in the stream object and mark it valid */
383
524k
            stream_obj->Length = i;
384
524k
            stream_obj->length_valid = true;
385
524k
        }
386
588k
    }
387
388
    /* If we failed to find a valid object, or the object wasn't a keyword, or the
389
     * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct
390
     * Length for streams if we have encrypted files, because we must install a
391
     * SubFileDecode filter with a Length (EODString is incompatible with AES encryption)
392
     * Rather than mess about checking for encryption, we'll choose to just correctly
393
     * calculate the Length of all streams. Although this takes time, it will only
394
     * happen for files which are invalid.
395
     */
396
621k
    if (stream_obj->length_valid != true) {
397
96.9k
        char Buffer[10];
398
96.9k
        unsigned int bytes, total = 0;
399
96.9k
        int c = 0;
400
401
96.9k
        code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET);
402
96.9k
        if (code < 0) {
403
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
404
0
            pdfi_pop(ctx, 1);
405
0
            return code;
406
0
        }
407
96.9k
        memset(Buffer, 0x00, 10);
408
96.9k
        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream);
409
96.9k
        if (bytes < 9) {
410
624
            pdfi_countdown(stream_obj); /* get rid of extra ref */
411
624
            return_error(gs_error_ioerror);
412
624
        }
413
414
96.2k
        total = bytes;
415
1.67G
        do {
416
1.67G
            if (memcmp(Buffer, "endstream", 9) == 0) {
417
60.5k
                if (Buffer[9] != 0x00)
418
60.4k
                    total--;
419
60.5k
                stream_obj->Length = total - 9;
420
60.5k
                stream_obj->length_valid = true;
421
60.5k
                break;
422
60.5k
            }
423
1.67G
            if (memcmp(Buffer, "endobj", 6) == 0) {
424
6.44k
                if (Buffer[9] != 0x00)
425
6.36k
                    total--;
426
6.44k
                stream_obj->Length = total - 6;
427
6.44k
                stream_obj->length_valid = true;
428
6.44k
                break;
429
6.44k
            }
430
1.67G
            memmove(Buffer, Buffer+1, 9);
431
1.67G
            c = pdfi_read_byte(ctx, ctx->main_stream);
432
1.67G
            if (c < 0)
433
29.3k
                break;
434
1.67G
            Buffer[9] = (byte)c;
435
1.67G
            total++;
436
1.67G
        } while(1);
437
96.2k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
438
96.2k
        if (c < 0)
439
29.3k
            return_error(gs_error_ioerror);
440
66.9k
        return 0;
441
96.2k
    }
442
443
524k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
444
524k
    if (code < 0) {
445
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
446
0
        if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", "")) < 0) {
447
0
            return code;
448
0
        }
449
        /* Something went wrong looking for endobj, but we found endstream, so assume
450
         * for now that will suffice.
451
         */
452
0
        return 0;
453
0
    }
454
455
524k
    if (code == 0) {
456
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
457
0
        return_error(gs_error_stackunderflow);
458
0
    }
459
460
524k
    if (code != TOKEN_ENDOBJ) {
461
1.37k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
462
1.37k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_typecheck), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
463
        /* Didn't find an endobj, but we have an endstream, so assume
464
         * for now that will suffice
465
         */
466
1.37k
        return code;
467
1.37k
    }
468
522k
    pdfi_countdown(stream_obj); /* get rid of extra ref */
469
470
522k
    return 0;
471
524k
}
472
473
/* This reads an object *after* the x y obj keyword has been found. Its broken out
474
 * separately for the benefit of the repair code when reading the dictionary following
475
 * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does
476
 * not have an 'endobj', we rely on the error handling to take care of that for us.
477
 */
478
int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen)
479
1.71M
{
480
1.71M
    int code = 0, initial_depth = 0;
481
1.71M
    pdf_key keyword;
482
1.71M
    gs_offset_t saved_offset[3];
483
1.71M
    pdf_obj_type type;
484
485
1.71M
    initial_depth = pdfi_count_stack(ctx);
486
1.71M
    saved_offset[0] = saved_offset[1] = saved_offset[2] = 0;
487
488
1.71M
    code = pdfi_read_token(ctx, s, objnum, gen);
489
1.71M
    if (code < 0)
490
4.43k
        return code;
491
492
1.71M
    if (code == 0)
493
        /* failed to read a token */
494
59
        return_error(gs_error_syntaxerror);
495
496
1.71M
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) {
497
20.8k
        keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
498
20.8k
        if (keyword == TOKEN_ENDOBJ) {
499
331
            ctx->stack_top[-1] = PDF_NULL_OBJ;
500
331
            return 0;
501
331
        }
502
20.8k
    }
503
504
56.5M
    do {
505
        /* move all the saved offsets up by one */
506
56.5M
        saved_offset[0] = saved_offset[1];
507
56.5M
        saved_offset[1] = saved_offset[2];
508
56.5M
        saved_offset[2] = pdfi_unread_tell(ctx);
509
510
56.5M
        code = pdfi_read_token(ctx, s, objnum, gen);
511
56.5M
        if (code < 0) {
512
162k
            pdfi_clearstack(ctx);
513
162k
            return code;
514
162k
        }
515
56.4M
        if (s->eof)
516
2.25k
            return_error(gs_error_syntaxerror);
517
56.4M
        code = 0;
518
56.4M
        type = pdfi_type_of(ctx->stack_top[-1]);
519
56.4M
        if (type == PDF_KEYWORD)
520
129k
            goto missing_endobj;
521
56.4M
    } while (type != PDF_FAST_KEYWORD);
522
523
1.41M
    keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
524
1.41M
    if (keyword == TOKEN_ENDOBJ) {
525
728k
        pdf_obj *o;
526
527
728k
        if (pdfi_count_stack(ctx) - initial_depth < 2) {
528
221
            pdfi_clearstack(ctx);
529
221
            return_error(gs_error_stackunderflow);
530
221
        }
531
532
728k
        o = ctx->stack_top[-2];
533
534
728k
        pdfi_pop(ctx, 1);
535
536
728k
        if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
537
727k
            o->indirect_num = o->object_num = objnum;
538
727k
            o->indirect_gen = o->generation_num = gen;
539
727k
        }
540
728k
        return code;
541
728k
    }
542
689k
    if (keyword == TOKEN_STREAM) {
543
643k
        pdfi_pop(ctx, 1);
544
643k
        return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen);
545
643k
    }
546
45.2k
    if (keyword == TOKEN_OBJ) {
547
5.30k
        pdf_obj *o;
548
549
5.30k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL)) < 0) {
550
0
            return code;
551
0
        }
552
553
        /* 4 for; the object we want, the object number, generation number and 'obj' keyword */
554
5.30k
        if (pdfi_count_stack(ctx) - initial_depth < 4)
555
1.35k
            return_error(gs_error_stackunderflow);
556
557
        /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */
558
3.94k
        o = ctx->stack_top[-4];
559
560
3.94k
        pdfi_pop(ctx, 3);
561
562
3.94k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
563
3.92k
            o->indirect_num = o->object_num = objnum;
564
3.92k
            o->indirect_gen = o->generation_num = gen;
565
3.92k
        }
566
3.94k
        if (saved_offset[0] > 0)
567
3.94k
            (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET);
568
3.94k
        return 0;
569
5.30k
    }
570
571
169k
missing_endobj:
572
    /* Assume that any other keyword means a missing 'endobj' */
573
169k
    if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_xref_stream_dict", "")) == 0) {
574
169k
        pdf_obj *o;
575
576
169k
        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL);
577
578
169k
        if (pdfi_count_stack(ctx) - initial_depth < 2)
579
3.09k
            return_error(gs_error_stackunderflow);
580
581
166k
        o = ctx->stack_top[-2];
582
583
166k
        pdfi_pop(ctx, 1);
584
585
166k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
586
164k
            o->indirect_num = o->object_num = objnum;
587
164k
            o->indirect_gen = o->generation_num = gen;
588
164k
        }
589
166k
        return code;
590
169k
    }
591
0
    pdfi_pop(ctx, 2);
592
0
    return_error(gs_error_syntaxerror);
593
169k
}
594
595
static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset)
596
1.72M
{
597
1.72M
    int code = 0;
598
1.72M
    int objnum = 0, gen = 0;
599
600
    /* An object consists of 'num gen obj' followed by a token, follwed by an endobj
601
     * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we
602
     * want to deal with it specially by getting the Length, jumping to the end and checking
603
     * for an endobj. Or not, possibly, because it would be slow.
604
     */
605
1.72M
    code = pdfi_read_bare_int(ctx, s, &objnum);
606
1.72M
    if (code < 0)
607
39.4k
        return code;
608
1.68M
    if (code == 0)
609
9.71k
        return_error(gs_error_syntaxerror);
610
611
1.67M
    code = pdfi_read_bare_int(ctx, s, &gen);
612
1.67M
    if (code < 0)
613
3.36k
        return code;
614
1.66M
    if (code == 0)
615
1.08k
        return_error(gs_error_syntaxerror);
616
617
1.66M
    code = pdfi_read_bare_keyword(ctx, s);
618
1.66M
    if (code < 0)
619
0
        return code;
620
1.66M
    if (code == 0)
621
0
        return gs_note_error(gs_error_ioerror);
622
1.66M
    if (code != TOKEN_OBJ) {
623
4.98k
        return_error(gs_error_syntaxerror);
624
4.98k
    }
625
626
1.66M
    return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen);
627
1.66M
}
628
629
static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object,
630
                                 const xref_entry *entry, bool cache)
631
1.02M
{
632
1.02M
    int code = 0;
633
1.02M
    xref_entry *compressed_entry;
634
1.02M
    pdf_c_stream *compressed_stream = NULL;
635
1.02M
    pdf_c_stream *SubFile_stream = NULL;
636
1.02M
    pdf_c_stream *Object_stream = NULL;
637
1.02M
    int i = 0, object_length = 0;
638
1.02M
    int64_t num_entries;
639
1.02M
    int found_object;
640
1.02M
    int64_t Length, First;
641
1.02M
    gs_offset_t offset = 0;
642
1.02M
    pdf_stream *compressed_object = NULL;
643
1.02M
    pdf_dict *compressed_sdict = NULL; /* alias */
644
1.02M
    pdf_name *Type = NULL;
645
646
1.02M
    if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1)
647
953
        return_error(gs_error_undefined);
648
649
1.01M
    compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num];
650
651
1.01M
    if (ctx->args.pdfdebug) {
652
0
        outprintf(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj);
653
0
        outprintf(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num);
654
0
    }
655
656
1.01M
    if (compressed_entry->cache == NULL) {
657
#if CACHE_STATISTICS
658
        ctx->compressed_misses++;
659
#endif
660
61.8k
        code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET);
661
61.8k
        if (code < 0)
662
0
            goto exit;
663
664
61.8k
        code = pdfi_read_object(ctx, ctx->main_stream, 0);
665
61.8k
        if (code < 0)
666
9.02k
            goto exit;
667
668
52.8k
        if (pdfi_count_stack(ctx) < 1) {
669
1
            code = gs_note_error(gs_error_stackunderflow);
670
1
            goto exit;
671
1
        }
672
673
52.8k
        if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) {
674
11.5k
            pdfi_pop(ctx, 1);
675
11.5k
            code = gs_note_error(gs_error_typecheck);
676
11.5k
            goto exit;
677
11.5k
        }
678
41.2k
        if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) {
679
265
            pdfi_pop(ctx, 1);
680
            /* Same error (undefined) as when we read an uncompressed object with the wrong number */
681
265
            code = gs_note_error(gs_error_undefined);
682
265
            goto exit;
683
265
        }
684
41.0k
        compressed_object = (pdf_stream *)ctx->stack_top[-1];
685
41.0k
        pdfi_countup(compressed_object);
686
41.0k
        pdfi_pop(ctx, 1);
687
41.0k
        code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object);
688
41.0k
        if (code < 0)
689
0
            goto exit;
690
957k
    } else {
691
#if CACHE_STATISTICS
692
        ctx->compressed_hits++;
693
#endif
694
957k
        compressed_object = (pdf_stream *)compressed_entry->cache->o;
695
957k
        pdfi_countup(compressed_object);
696
957k
        pdfi_promote_cache_entry(ctx, compressed_entry->cache);
697
957k
    }
698
998k
    code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict);
699
998k
    if (code < 0)
700
20
        return code;
701
702
998k
    if (ctx->loop_detection != NULL) {
703
997k
        code = pdfi_loop_detector_mark(ctx);
704
997k
        if (code < 0)
705
0
            goto exit;
706
997k
        if (compressed_sdict->object_num != 0) {
707
997k
            if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) {
708
212
                code = gs_note_error(gs_error_circular_reference);
709
997k
            } else {
710
997k
                code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num);
711
997k
            }
712
997k
            if (code < 0) {
713
212
                (void)pdfi_loop_detector_cleartomark(ctx);
714
212
                goto exit;
715
212
            }
716
997k
        }
717
997k
    }
718
    /* Check its an ObjStm ! */
719
998k
    code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type);
720
998k
    if (code < 0) {
721
264
        if (ctx->loop_detection != NULL)
722
264
            (void)pdfi_loop_detector_cleartomark(ctx);
723
264
        goto exit;
724
264
    }
725
726
998k
    if (!pdfi_name_is(Type, "ObjStm")){
727
1.18k
        if (ctx->loop_detection != NULL)
728
1.18k
            (void)pdfi_loop_detector_cleartomark(ctx);
729
1.18k
        code = gs_note_error(gs_error_syntaxerror);
730
1.18k
        goto exit;
731
1.18k
    }
732
733
    /* Need to check the /N entry to see if the object is actually in this stream! */
734
997k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries);
735
997k
    if (code < 0) {
736
246
        if (ctx->loop_detection != NULL)
737
246
            (void)pdfi_loop_detector_cleartomark(ctx);
738
246
        goto exit;
739
246
    }
740
741
996k
    if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) {
742
81
        if (ctx->loop_detection != NULL)
743
81
            (void)pdfi_loop_detector_cleartomark(ctx);
744
81
        code = gs_note_error(gs_error_rangecheck);
745
81
        goto exit;
746
81
    }
747
748
996k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
749
996k
    if (code < 0) {
750
130k
        if (ctx->loop_detection != NULL)
751
130k
            (void)pdfi_loop_detector_cleartomark(ctx);
752
130k
        goto exit;
753
130k
    }
754
755
866k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First);
756
866k
    if (code < 0) {
757
1.14k
        if (ctx->loop_detection != NULL)
758
1.14k
            (void)pdfi_loop_detector_cleartomark(ctx);
759
1.14k
        goto exit;
760
1.14k
    }
761
762
865k
    if (ctx->loop_detection != NULL)
763
864k
        (void)pdfi_loop_detector_cleartomark(ctx);
764
765
865k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
766
865k
    if (code < 0)
767
0
        goto exit;
768
769
865k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
770
865k
    if (code < 0)
771
0
        goto exit;
772
773
865k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
774
865k
    if (code < 0)
775
1.42k
        goto exit;
776
777
53.5M
    for (i=0;i < num_entries;i++)
778
52.6M
    {
779
52.6M
        int new_offset;
780
52.6M
        code = pdfi_read_bare_int(ctx, compressed_stream, &found_object);
781
52.6M
        if (code < 0)
782
5.24k
            goto exit;
783
52.6M
        if (code == 0) {
784
590
            code = gs_note_error(gs_error_syntaxerror);
785
590
            goto exit;
786
590
        }
787
52.6M
        code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset);
788
52.6M
        if (code < 0)
789
5.26k
            goto exit;
790
52.6M
        if (code == 0) {
791
477
            code = gs_note_error(gs_error_syntaxerror);
792
477
            goto exit;
793
477
        }
794
52.6M
        if (i == entry->u.compressed.object_index) {
795
858k
            if (found_object != obj) {
796
1.77k
                code = gs_note_error(gs_error_undefined);
797
1.77k
                goto exit;
798
1.77k
            }
799
856k
            offset = new_offset;
800
856k
        }
801
52.6M
        if (i == entry->u.compressed.object_index + 1)
802
826k
            object_length = new_offset - offset;
803
52.6M
    }
804
805
    /* Bug #705259 - The first object need not lie immediately after the initial
806
     * table of object numbers and offsets. The start of the first object is given
807
     * by the value of First. We don't know how many bytes we consumed getting to
808
     * the end of the table, unfortunately, so we close the stream, rewind the main
809
     * stream back to the beginning of the ObjStm, and then read and discard 'First'
810
     * bytes in order to get to the start of the first object. Then we read the
811
     * number of bytes required to get from there to the start of the object we
812
     * actually want.
813
     * If this ever looks like it's causing performance problems we could read the
814
     * initial table above manually instead of using the existing code, and track
815
     * how many bytes we'd read, which would avoid us having to tear down and
816
     * rebuild the stream.
817
     */
818
850k
    if (compressed_stream) {
819
850k
        pdfi_close_file(ctx, compressed_stream);
820
850k
        compressed_stream = NULL;
821
850k
    }
822
850k
    if (SubFile_stream) {
823
850k
        pdfi_close_file(ctx, SubFile_stream);
824
850k
        SubFile_stream = NULL;
825
850k
    }
826
827
850k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
828
850k
    if (code < 0)
829
0
        goto exit;
830
831
    /* We already dereferenced this above, so we don't need the loop detection checking here */
832
850k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
833
850k
    if (code < 0)
834
0
        goto exit;
835
836
850k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
837
850k
    if (code < 0)
838
0
        goto exit;
839
840
850k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
841
850k
    if (code < 0)
842
0
        goto exit;
843
844
455M
    for (i=0;i < First;i++)
845
454M
    {
846
454M
        int c = pdfi_read_byte(ctx, compressed_stream);
847
454M
        if (c < 0) {
848
34
            code = gs_note_error(gs_error_ioerror);
849
34
            goto exit;
850
34
        }
851
454M
    }
852
853
    /* Skip to the offset of the object we want to read */
854
2.86G
    for (i=0;i < offset;i++)
855
2.86G
    {
856
2.86G
        int c = pdfi_read_byte(ctx, compressed_stream);
857
2.86G
        if (c < 0) {
858
43.3k
            code = gs_note_error(gs_error_ioerror);
859
43.3k
            goto exit;
860
43.3k
        }
861
2.86G
    }
862
863
    /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit
864
     * the number of bytes we read to the declared size of the object (difference between
865
     * the offsets of the object we want to read, and the next object). If it is 0 then
866
     * we're reading the last object in the stream, so we just rely on the SubFileDecode
867
     * we set up when we created compressed_stream to limit the bytes to the length of
868
     * that stream.
869
     */
870
807k
    if (object_length > 0) {
871
778k
        code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false);
872
778k
        if (code < 0)
873
0
            goto exit;
874
778k
    } else {
875
28.4k
        Object_stream = compressed_stream;
876
28.4k
    }
877
878
807k
    code = pdfi_read_token(ctx, Object_stream, obj, gen);
879
807k
    if (code < 0)
880
3.47k
        goto exit;
881
803k
    if (code == 0) {
882
84
        code = gs_note_error(gs_error_syntaxerror);
883
84
        goto exit;
884
84
    }
885
803k
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) {
886
793k
        int start_depth = pdfi_count_stack(ctx);
887
888
        /* Need to read all the elements from COS objects */
889
27.9M
        do {
890
27.9M
            code = pdfi_read_token(ctx, Object_stream, obj, gen);
891
27.9M
            if (code < 0)
892
21.9k
                goto exit;
893
27.9M
            if (code == 0) {
894
4.60k
                code = gs_note_error(gs_error_syntaxerror);
895
4.60k
                goto exit;
896
4.60k
            }
897
27.9M
            if (compressed_stream->eof == true) {
898
336
                code = gs_note_error(gs_error_ioerror);
899
336
                goto exit;
900
336
            }
901
27.9M
        } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth);
902
793k
    }
903
904
776k
    *object = ctx->stack_top[-1];
905
    /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets
906
     * the object number for uncompressed objects. So we need to do that here.
907
     */
908
776k
    if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
909
774k
        (*object)->indirect_num = (*object)->object_num = obj;
910
774k
        (*object)->indirect_gen = (*object)->generation_num = gen;
911
774k
        pdfi_countup(*object);
912
774k
    }
913
776k
    pdfi_pop(ctx, 1);
914
915
776k
    if (cache) {
916
760k
        code = pdfi_add_to_cache(ctx, *object);
917
760k
        if (code < 0) {
918
0
            pdfi_countdown(*object);
919
0
            goto exit;
920
0
        }
921
760k
    }
922
923
1.01M
 exit:
924
1.01M
    if (Object_stream)
925
807k
        pdfi_close_file(ctx, Object_stream);
926
1.01M
    if (Object_stream != compressed_stream)
927
835k
        if (compressed_stream)
928
835k
            pdfi_close_file(ctx, compressed_stream);
929
1.01M
    if (SubFile_stream)
930
865k
        pdfi_close_file(ctx, SubFile_stream);
931
1.01M
    pdfi_countdown(compressed_object);
932
1.01M
    pdfi_countdown(Type);
933
1.01M
    return code;
934
776k
}
935
936
/* pdf_dereference returns an object with a reference count of at least 1, this represents the
937
 * reference being held by the caller (in **object) when we return from this function.
938
 */
939
static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache)
940
6.06M
{
941
6.06M
    xref_entry *entry;
942
6.06M
    int code, stack_depth = pdfi_count_stack(ctx);
943
6.06M
    gs_offset_t saved_stream_offset;
944
6.06M
    bool saved_decrypt_strings = ctx->encryption.decrypt_strings;
945
946
6.06M
    *object = NULL;
947
948
6.06M
    if (ctx->xref_table == NULL)
949
51
        return_error(gs_error_typecheck);
950
951
6.06M
    if (ctx->main_stream == NULL || ctx->main_stream->s == NULL)
952
0
        return_error(gs_error_ioerror);
953
954
6.06M
    if (obj >= ctx->xref_table->xref_size) {
955
246k
        char extra_info[gp_file_name_sizeof];
956
957
246k
        gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj);
958
246k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info)) < 0) {
959
0
            return code;
960
0
        }
961
962
246k
        code = pdfi_repair_file(ctx);
963
246k
        if (code < 0) {
964
246k
            *object = NULL;
965
246k
            return code;
966
246k
        }
967
28
        if (obj >= ctx->xref_table->xref_size) {
968
17
            *object = NULL;
969
17
            return_error(gs_error_rangecheck);
970
17
        }
971
28
    }
972
973
5.81M
    entry = &ctx->xref_table->xref[obj];
974
975
5.81M
    if(entry->object_num == 0) {
976
1.37M
        pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference_main", "Attempt to dereference object 0");
977
1.37M
        return_error(gs_error_undefined);
978
1.37M
    }
979
980
4.44M
    if (entry->free) {
981
6.76k
        char extra_info[gp_file_name_sizeof];
982
983
6.76k
        gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
984
6.76k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
985
6.76k
        *object = PDF_NULL_OBJ;
986
6.76k
        return code;
987
4.43M
    }else {
988
4.43M
        if (!entry->compressed) {
989
3.18M
            if(entry->u.uncompressed.generation_num != gen)
990
3.47k
                pdfi_set_warning(ctx, 0, NULL, W_PDF_MISMATCH_GENERATION, "pdfi_dereference_main", "");
991
3.18M
        }
992
4.43M
    }
993
994
4.43M
    if (ctx->loop_detection) {
995
4.11M
        if (pdfi_loop_detector_check_object(ctx, obj) == true)
996
770
            return_error(gs_error_circular_reference);
997
4.11M
        if (entry->free) {
998
0
            code = pdfi_loop_detector_add_object(ctx, obj);
999
0
            if (code < 0)
1000
0
                return code;
1001
0
        }
1002
4.11M
    }
1003
4.43M
    if (entry->cache != NULL){
1004
1.75M
        pdf_obj_cache_entry *cache_entry = entry->cache;
1005
1006
#if CACHE_STATISTICS
1007
        ctx->hits++;
1008
#endif
1009
1.75M
        *object = cache_entry->o;
1010
1.75M
        pdfi_countup(*object);
1011
1012
1.75M
        pdfi_promote_cache_entry(ctx, cache_entry);
1013
2.67M
    } else {
1014
2.67M
        saved_stream_offset = pdfi_unread_tell(ctx);
1015
1016
2.67M
        if (entry->compressed) {
1017
            /* This is an object in a compressed object stream */
1018
1.02M
            ctx->encryption.decrypt_strings = false;
1019
1020
1.02M
            code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache);
1021
1.02M
            if (code < 0 || *object == NULL)
1022
243k
                goto error;
1023
1.65M
        } else {
1024
#if CACHE_STATISTICS
1025
            ctx->misses++;
1026
#endif
1027
1.65M
            ctx->encryption.decrypt_strings = true;
1028
1029
1.65M
            code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET);
1030
1.65M
            if (code < 0)
1031
100
                goto error;
1032
1033
1.65M
            code = pdfi_read_object(ctx, ctx->main_stream, entry->u.uncompressed.offset);
1034
1035
            /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it.
1036
             * reload the xref entry to be certain it is valid.
1037
             */
1038
1.65M
            entry = &ctx->xref_table->xref[obj];
1039
1.65M
            if (code < 0) {
1040
253k
                int code1 = 0;
1041
253k
                if (entry->free) {
1042
0
                    char extra_info[gp_file_name_sizeof];
1043
1044
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1045
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1046
0
                    *object = PDF_NULL_OBJ;
1047
0
                    if (code < 0)
1048
0
                        goto error;
1049
0
                    goto free_obj;
1050
0
                }
1051
253k
                ctx->encryption.decrypt_strings = saved_decrypt_strings;
1052
253k
                (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1053
253k
                pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1054
1055
253k
                code1 = pdfi_repair_file(ctx);
1056
253k
                if (code1 == 0)
1057
1.56k
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1058
                /* Repair failed, just give up and return an error */
1059
251k
                goto error;
1060
253k
            }
1061
1062
            /* We only expect a single object back when dereferencing an indirect reference
1063
             * The only way (I think) we can end up with more than one is if the object initially
1064
             * appears to be a dictionary or array, but the object terminates (with endobj or
1065
             * simply reaching EOF) without terminating the array or dictionary. That's clearly
1066
             * an error. We might, as a future 'improvement' choose to walk back through
1067
             * the stack looking for unterminated dictionary or array markers, and closing them
1068
             * so that (hopefully!) we end up with a single 'repaired' object on the stack.
1069
             * But for now I'm simply going to treat these as errors. We will try a repair on the
1070
             * file to see if we end up using a different (hopefully intact) object from the file.
1071
             */
1072
1.40M
            if (pdfi_count_stack(ctx) - stack_depth > 1) {
1073
116k
                int code1 = 0;
1074
1075
116k
                code1 = pdfi_repair_file(ctx);
1076
116k
                if (code1 == 0)
1077
474
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1078
                /* Repair failed, just give up and return an error */
1079
115k
                code = gs_note_error(gs_error_syntaxerror);
1080
115k
                goto error;
1081
116k
            }
1082
1083
1.28M
            if (pdfi_count_stack(ctx) > 0 &&
1084
1.28M
                ((ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) &&
1085
1.28M
                (ctx->stack_top[-1])->object_num == obj)
1086
1.28M
                || ctx->stack_top[-1] == PDF_NULL_OBJ)) {
1087
1.28M
                *object = ctx->stack_top[-1];
1088
1.28M
                pdfi_countup(*object);
1089
1.28M
                pdfi_pop(ctx, 1);
1090
1.28M
                if (pdfi_type_of(*object) == PDF_INDIRECT) {
1091
0
                    pdf_indirect_ref *iref = (pdf_indirect_ref *)*object;
1092
1093
0
                    if (iref->ref_object_num == obj) {
1094
0
                        code = gs_note_error(gs_error_circular_reference);
1095
0
                        pdfi_countdown(*object);
1096
0
                        *object = NULL;
1097
0
                        goto error;
1098
0
                    }
1099
0
                }
1100
                /* There's really no point in caching an indirect reference and
1101
                 * I think it could be potentially confusing to later calls.
1102
                 */
1103
1.28M
                if (cache && pdfi_type_of(*object) != PDF_INDIRECT) {
1104
1.28M
                    code = pdfi_add_to_cache(ctx, *object);
1105
1.28M
                    if (code < 0) {
1106
0
                        pdfi_countdown(*object);
1107
0
                        goto error;
1108
0
                    }
1109
1.28M
                }
1110
1.28M
            } else {
1111
1.53k
                int code1 = 0;
1112
1113
1.53k
                if (pdfi_count_stack(ctx) > 0)
1114
1.43k
                    pdfi_pop(ctx, 1);
1115
1116
1.53k
                if (entry->free) {
1117
0
                    char extra_info[gp_file_name_sizeof];
1118
1119
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1120
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1121
0
                    *object = PDF_NULL_OBJ;
1122
0
                    if (code < 0)
1123
0
                        goto error;
1124
0
                    return code;
1125
0
                }
1126
1.53k
                code1 = pdfi_repair_file(ctx);
1127
1.53k
                if (code1 == 0)
1128
180
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1129
                /* Repair failed, just give up and return an error */
1130
1.35k
                code = gs_note_error(gs_error_undefined);
1131
1.35k
                goto error;
1132
1.53k
            }
1133
1.28M
        }
1134
2.06M
free_obj:
1135
2.06M
        (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1136
2.06M
    }
1137
1138
3.81M
    if (ctx->loop_detection && pdf_object_num(*object) != 0) {
1139
3.50M
        code = pdfi_loop_detector_add_object(ctx, (*object)->object_num);
1140
3.50M
        if (code < 0) {
1141
0
            ctx->encryption.decrypt_strings = saved_decrypt_strings;
1142
0
            return code;
1143
0
        }
1144
3.50M
    }
1145
3.81M
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1146
3.81M
    return 0;
1147
1148
612k
error:
1149
612k
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1150
612k
    (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1151
    /* Return the stack to the state at entry */
1152
612k
    pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1153
612k
    return code;
1154
3.81M
}
1155
1156
int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1157
6.03M
{
1158
6.03M
    return pdfi_dereference_main(ctx, obj, gen, object, true);
1159
6.03M
}
1160
1161
int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1162
26.2k
{
1163
26.2k
    return pdfi_dereference_main(ctx, obj, gen, object, false);
1164
26.2k
}
1165
1166
/* do a derefence with loop detection */
1167
int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1168
2.54M
{
1169
2.54M
    int code;
1170
1171
2.54M
    code = pdfi_loop_detector_mark(ctx);
1172
2.54M
    if (code < 0)
1173
0
        return code;
1174
1175
2.54M
    code = pdfi_dereference(ctx, obj, gen, object);
1176
2.54M
    (void)pdfi_loop_detector_cleartomark(ctx);
1177
2.54M
    return code;
1178
2.54M
}
1179
1180
int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1181
26.2k
{
1182
26.2k
    int code;
1183
1184
26.2k
    code = pdfi_loop_detector_mark(ctx);
1185
26.2k
    if (code < 0)
1186
0
        return code;
1187
1188
26.2k
    code = pdfi_dereference_nocache(ctx, obj, gen, object);
1189
26.2k
    (void)pdfi_loop_detector_cleartomark(ctx);
1190
26.2k
    return code;
1191
26.2k
}
1192
1193
static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse)
1194
13.3k
{
1195
13.3k
    int code = 0;
1196
13.3k
    uint64_t index, arraysize;
1197
13.3k
    pdf_obj *object = NULL;
1198
13.3k
    pdf_array *array = (pdf_array *)obj;
1199
1200
13.3k
    arraysize = pdfi_array_size(array);
1201
60.6k
    for (index = 0; index < arraysize; index++) {
1202
47.3k
        if (ctx->loop_detection != NULL) {
1203
47.3k
            code = pdfi_loop_detector_mark(ctx);
1204
47.3k
            if (code < 0)
1205
0
                return code;
1206
47.3k
        }
1207
1208
47.3k
        code = pdfi_array_get_no_store_R(ctx, array, index, &object);
1209
1210
47.3k
        if (ctx->loop_detection != NULL) {
1211
47.3k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1212
47.3k
            if (code1 < 0)
1213
0
                return code1;
1214
47.3k
        }
1215
1216
47.3k
        if (code == gs_error_circular_reference) {
1217
            /* Previously we just left as an indirect reference, but now we want
1218
             * to return the error so we don't end up replacing indirect references
1219
             * to objects with circular references.
1220
             */
1221
47.3k
        } else {
1222
47.3k
            if (code < 0) goto exit;
1223
47.3k
            if (recurse) {
1224
946
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse);
1225
946
                if (code < 0) goto exit;
1226
946
            }
1227
            /* don't store the object if it's a stream (leave as a ref) */
1228
47.3k
            if (pdfi_type_of(object) != PDF_STREAM)
1229
47.3k
                code = pdfi_array_put(ctx, array, index, object);
1230
47.3k
        }
1231
47.3k
        if (code < 0) goto exit;
1232
1233
47.3k
        pdfi_countdown(object);
1234
47.3k
        object = NULL;
1235
47.3k
    }
1236
1237
13.3k
 exit:
1238
13.3k
    pdfi_countdown(object);
1239
13.3k
    return code;
1240
13.3k
}
1241
1242
static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse)
1243
2.35k
{
1244
2.35k
    int code = 0;
1245
2.35k
    pdf_dict *dict = (pdf_dict *)obj;
1246
2.35k
    pdf_name *Key = NULL;
1247
2.35k
    pdf_obj *Value = NULL;
1248
2.35k
    uint64_t index, dictsize;
1249
1250
2.35k
    dictsize = pdfi_dict_entries(dict);
1251
1252
    /* Note: I am not using pdfi_dict_first/next because of needing to handle
1253
     * circular references.
1254
     */
1255
5.50k
    for (index=0; index<dictsize; index ++) {
1256
3.20k
        Key = (pdf_name *)dict->list[index].key;
1257
3.20k
        if (pdfi_name_is(Key, "Parent"))
1258
6
            continue;
1259
1260
3.19k
        if (ctx->loop_detection != NULL) {
1261
3.14k
            code = pdfi_loop_detector_mark(ctx);
1262
3.14k
            if (code < 0)
1263
0
                return code;
1264
3.14k
        }
1265
1266
3.19k
        code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value);
1267
1268
3.19k
        if (ctx->loop_detection != NULL) {
1269
3.14k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1270
3.14k
            if (code1 < 0)
1271
0
                return code1;
1272
3.14k
        }
1273
1274
3.19k
        if (code == gs_error_circular_reference) {
1275
            /* Just leave as an indirect ref */
1276
7
            code = 0;
1277
3.19k
        } else {
1278
3.19k
            if (code < 0) goto exit;
1279
3.18k
            if (recurse) {
1280
1.56k
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse);
1281
1.56k
                if (code < 0)
1282
46
                    goto exit;
1283
1.56k
            }
1284
            /* don't store the object if it's a stream (leave as a ref) */
1285
3.13k
            if (pdfi_type_of(Value) != PDF_STREAM)
1286
3.11k
                code = pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true);
1287
3.13k
        }
1288
3.14k
        if (code < 0) goto exit;
1289
1290
3.14k
        pdfi_countdown(Value);
1291
3.14k
        Value = NULL;
1292
3.14k
    }
1293
1294
2.35k
 exit:
1295
2.35k
    pdfi_countdown(Value);
1296
2.35k
    return code;
1297
2.35k
}
1298
1299
/* Resolve all the indirect references for an object
1300
 * Note: This can be recursive
1301
 */
1302
int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse)
1303
56.5k
{
1304
56.5k
    int code = 0;
1305
1306
56.5k
    switch(pdfi_type_of(value)) {
1307
13.3k
    case PDF_ARRAY:
1308
13.3k
        code = pdfi_resolve_indirect_array(ctx, value, recurse);
1309
13.3k
        break;
1310
2.35k
    case PDF_DICT:
1311
2.35k
        code = pdfi_resolve_indirect_dict(ctx, value, recurse);
1312
2.35k
        break;
1313
40.8k
    default:
1314
40.8k
        break;
1315
56.5k
    }
1316
56.5k
    return code;
1317
56.5k
}
1318
1319
/* Resolve all the indirect references for an object
1320
 * Resolve indirect references, either one level or recursively, with loop detect on
1321
 * the parent (can by NULL) and the value.
1322
 */
1323
int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse)
1324
56.4k
{
1325
56.4k
    int code = 0;
1326
1327
56.4k
    code = pdfi_loop_detector_mark(ctx);
1328
56.4k
    if (code < 0) goto exit;
1329
56.4k
    if (parent && parent->object_num != 0) {
1330
53.5k
        code = pdfi_loop_detector_add_object(ctx, parent->object_num);
1331
53.5k
        if (code < 0) goto exit;
1332
53.5k
    }
1333
1334
56.4k
    if (pdf_object_num(value) != 0) {
1335
435
        if (pdfi_loop_detector_check_object(ctx, value->object_num)) {
1336
0
            code = gs_note_error(gs_error_circular_reference);
1337
0
            goto exit;
1338
0
        }
1339
435
        code = pdfi_loop_detector_add_object(ctx, value->object_num);
1340
435
        if (code < 0) goto exit;
1341
435
    }
1342
56.4k
    code = pdfi_resolve_indirect(ctx, value, recurse);
1343
1344
56.4k
 exit:
1345
56.4k
    (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */
1346
56.4k
    return code;
1347
56.4k
}