Coverage Report

Created: 2025-08-28 07:06

/src/ghostpdl/pdf/pdf_deref.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2020-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/* Functions to deal with dereferencing indirect objects
17
 * for the PDF interpreter. In here we also keep the code
18
 * for dealing with the object cache, because the dereferencing
19
 * functions are currently the only place that deals with it.
20
 */
21
22
#include "pdf_int.h"
23
#include "pdf_stack.h"
24
#include "pdf_loop_detect.h"
25
#include "strmio.h"
26
#include "stream.h"
27
#include "pdf_file.h"
28
#include "pdf_misc.h"
29
#include "pdf_dict.h"
30
#include "pdf_array.h"
31
#include "pdf_deref.h"
32
#include "pdf_repair.h"
33
34
/* Start with the object caching functions */
35
/* Disable object caching (for easier debugging with reference counting)
36
 * by uncommenting the following line
37
 */
38
/*#define DISABLE CACHE*/
39
40
/* given an object, create a cache entry for it. If we have too many entries
41
 * then delete the leat-recently-used cache entry. Make the new entry be the
42
 * most-recently-used entry. The actual entries are attached to the xref table
43
 * (as well as being a double-linked list), because we detect an existing
44
 * cache entry by seeing that the xref table for the object number has a non-NULL
45
 * 'cache' member.
46
 * So we need to update the xref as well if we add or delete cache entries.
47
 */
48
static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o)
49
2.89M
{
50
2.89M
#ifndef DISABLE_CACHE
51
2.89M
    pdf_obj_cache_entry *entry;
52
53
2.89M
    if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY))
54
5.25k
        return 0;
55
56
2.89M
    if (ctx->xref_table->xref[o->object_num].cache != NULL) {
57
#if DEBUG_CACHE
58
        outprintf(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num);
59
#endif
60
0
        return_error(gs_error_unknownerror);
61
0
    }
62
63
2.89M
    if (o->object_num > ctx->xref_table->xref_size)
64
0
        return_error(gs_error_rangecheck);
65
66
#if DEBUG_CACHE
67
        dbgmprintf1(ctx->memory, "Adding object %d\n", o->object_num);
68
#endif
69
2.89M
    if (ctx->cache_entries == ctx->args.PDFCacheSize)
70
721k
    {
71
#if DEBUG_CACHE
72
        dbgmprintf(ctx->memory, "Cache full, evicting LRU\n");
73
#endif
74
721k
        if (ctx->cache_LRU) {
75
721k
            entry = ctx->cache_LRU;
76
#if DEBUG_CACHE
77
            dbgmprintf1(ctx->memory, "Evicting %d\n", entry->o->object_num);
78
#endif
79
721k
            ctx->cache_LRU = entry->next;
80
721k
            if (entry->next)
81
721k
                ((pdf_obj_cache_entry *)entry->next)->previous = NULL;
82
721k
            ctx->xref_table->xref[entry->o->object_num].cache = NULL;
83
721k
            pdfi_countdown(entry->o);
84
721k
            ctx->cache_entries--;
85
721k
            gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU");
86
721k
        } else
87
0
            return_error(gs_error_unknownerror);
88
721k
    }
89
2.89M
    entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache");
90
2.89M
    if (entry == NULL)
91
0
        return_error(gs_error_VMerror);
92
93
2.89M
    memset(entry, 0x00, sizeof(pdf_obj_cache_entry));
94
95
2.89M
    entry->o = o;
96
2.89M
    pdfi_countup(o);
97
2.89M
    if (ctx->cache_MRU) {
98
2.80M
        entry->previous = ctx->cache_MRU;
99
2.80M
        ctx->cache_MRU->next = entry;
100
2.80M
    }
101
2.89M
    ctx->cache_MRU = entry;
102
2.89M
    if (ctx->cache_LRU == NULL)
103
87.1k
        ctx->cache_LRU = entry;
104
105
2.89M
    ctx->cache_entries++;
106
2.89M
    ctx->xref_table->xref[o->object_num].cache = entry;
107
2.89M
#endif
108
2.89M
    return 0;
109
2.89M
}
110
111
/* Given an existing cache entry, promote it to be the most-recently-used
112
 * cache entry.
113
 */
114
static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry)
115
5.60M
{
116
5.60M
#ifndef DISABLE_CACHE
117
5.60M
    if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) {
118
3.47M
        if ((pdf_obj_cache_entry *)cache_entry->next != NULL)
119
3.47M
            ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous;
120
3.47M
        if ((pdf_obj_cache_entry *)cache_entry->previous != NULL)
121
3.46M
            ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next;
122
1.94k
        else {
123
            /* the existing entry is the current least recently used, we need to make the 'next'
124
             * cache entry into the LRU.
125
             */
126
1.94k
            ctx->cache_LRU = cache_entry->next;
127
1.94k
        }
128
3.47M
        cache_entry->next = NULL;
129
3.47M
        cache_entry->previous = ctx->cache_MRU;
130
3.47M
        ctx->cache_MRU->next = cache_entry;
131
3.47M
        ctx->cache_MRU = cache_entry;
132
3.47M
    }
133
5.60M
#endif
134
5.60M
    return;
135
5.60M
}
136
137
int pdfi_cache_object(pdf_context *ctx, pdf_obj *o)
138
3.47M
{
139
3.47M
    if (o->object_num == 0)
140
2.31M
        return 0;
141
1.16M
    if (ctx->xref_table->xref[o->object_num].cache == NULL)
142
36
        return pdfi_add_to_cache(ctx, o);
143
1.16M
    else
144
1.16M
        pdfi_promote_cache_entry(ctx, ctx->xref_table->xref[o->object_num].cache);
145
1.16M
    return 0;
146
1.16M
}
147
148
/* This one's a bit of an oddity, its used for fonts. When we build a PDF font object
149
 * we want the object cache to reference *that* object, not the dictionary which was
150
 * read out of the PDF file, so this allows us to replace the font dictionary in the
151
 * cache with the actual font object, so that later dereferences will get this font
152
 * object.
153
 */
154
int replace_cache_entry(pdf_context *ctx, pdf_obj *o)
155
183k
{
156
183k
#ifndef DISABLE_CACHE
157
183k
    xref_entry *entry;
158
183k
    pdf_obj_cache_entry *cache_entry;
159
183k
    pdf_obj *old_cached_obj = NULL;
160
161
    /* Limited error checking here, we assume that things like the
162
     * validity of the object (eg not a free oobject) have already been handled.
163
     */
164
165
183k
    entry = &ctx->xref_table->xref[o->object_num];
166
183k
    cache_entry = entry->cache;
167
168
183k
    if (cache_entry == NULL) {
169
5.20k
        return(pdfi_add_to_cache(ctx, o));
170
178k
    } else {
171
        /* NOTE: We grab the object without decrementing, to avoid triggering
172
         * a warning message for freeing an object that's in the cache
173
         */
174
178k
        if (cache_entry->o != NULL)
175
178k
            old_cached_obj = cache_entry->o;
176
177
        /* Put new entry in the cache */
178
178k
        cache_entry->o = o;
179
178k
        pdfi_countup(o);
180
178k
        pdfi_promote_cache_entry(ctx, cache_entry);
181
182
        /* Now decrement the old cache entry, if any */
183
178k
        pdfi_countdown(old_cached_obj);
184
178k
    }
185
178k
#endif
186
178k
    return 0;
187
183k
}
188
189
/* Now the dereferencing functions */
190
191
/*
192
 * Technically we can accept a stream other than the main PDF file stream here. This is
193
 * really for the case of compressed objects where we read tokens from the compressed
194
 * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode
195
 * on top of the main file stream, which may be useful. Note that this cannot work with
196
 * objects in compressed object streams! They should always pass a value of 0 for the stream_offset.
197
 * The stream_offset is the offset from the start of the underlying uncompressed PDF file of
198
 * the stream we are using. See the comments below when keyword is PDF_STREAM.
199
 */
200
201
/* Determine if a PDF object is in a compressed ObjStm. Returns < 0
202
 * for an error, 0 if it is not in a compressed ObjStm and 1 if it is.
203
 * Currently errors are inmpossible. This is only used by the decryption code
204
 * to determine if a string is in a compressed object stream, if it is then
205
 * it can't be used for decryption.
206
 */
207
int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen)
208
17.8k
{
209
17.8k
    xref_entry *entry;
210
211
    /* Can't possibly be a compressed object before we have finished reading
212
     * the xref.
213
     */
214
17.8k
    if (ctx->xref_table == NULL)
215
0
        return 0;
216
217
17.8k
    entry = &ctx->xref_table->xref[obj];
218
219
17.8k
    if (entry->compressed)
220
0
        return 1;
221
222
17.8k
    return 0;
223
17.8k
}
224
225
/* We should never read a 'stream' keyword from a compressed object stream
226
 * so this case should never end up here.
227
 */
228
static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset,
229
                                   uint32_t objnum, uint32_t gen)
230
869k
{
231
869k
    int code = 0;
232
869k
    int64_t i;
233
869k
    pdf_dict *dict = NULL;
234
869k
    gs_offset_t offset;
235
869k
    pdf_stream *stream_obj = NULL;
236
237
    /* Strange code time....
238
     * If we are using a stream which is *not* the PDF uncompressed main file stream
239
     * then doing stell on it will only tell us how many bytes have been read from
240
     * that stream, it won't tell us the underlying file position. So we add on the
241
     * 'unread' bytes, *and* we add on the position of the start of the stream in
242
     * the actual main file. This is all done so that we can check the /Length
243
     * of the object. Note that this will *only* work for regular objects it can
244
     * not be used for compressed object streams, but those don't need checking anyway
245
     * they have a different mechanism altogether and should never get here.
246
     */
247
869k
    if (s != ctx->main_stream) {
248
0
        offset = stell(s->s) - s->unread_size + stream_offset;
249
0
        code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
250
0
        if (code < 0)
251
0
            return_error(gs_error_ioerror);
252
869k
    } else {
253
869k
        offset = stell(s->s) - s->unread_size;
254
869k
    }
255
256
869k
    if (pdfi_count_stack(ctx) < 1)
257
0
        return_error(gs_error_stackunderflow);
258
259
869k
    dict = (pdf_dict *)ctx->stack_top[-1];
260
261
869k
    if (pdfi_type_of(dict) != PDF_DICT) {
262
8.24k
        pdfi_pop(ctx, 1);
263
8.24k
        return_error(gs_error_syntaxerror);
264
8.24k
    }
265
266
861k
    dict->indirect_num = dict->object_num = objnum;
267
861k
    dict->indirect_gen = dict->generation_num = gen;
268
269
    /* Convert the dict into a stream */
270
861k
    code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true);
271
861k
    if (code < 0) {
272
0
        pdfi_pop(ctx, 1);
273
0
        return code;
274
0
    }
275
    /* Pop off the dict and push the stream */
276
861k
    pdfi_pop(ctx, 1);
277
861k
    dict = NULL;
278
861k
    pdfi_push(ctx, (pdf_obj *)stream_obj);
279
280
861k
    stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum;
281
861k
    stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen;
282
861k
    stream_obj->stream_offset = offset;
283
284
    /* Exceptional code. Normally we do not need to worry about detecting circular references
285
     * when reading objects, because we do not dereference any indirect objects. However streams
286
     * are a slight exception in that we do get the Length from the stream dictionay and if that
287
     * is an indirect reference, then we dereference it.
288
     * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect
289
     * reference to the same stream object, and leads to infinite recursion. So deal with that
290
     * possibility here.
291
     */
292
861k
    code = pdfi_loop_detector_mark(ctx);
293
861k
    if (code < 0) {
294
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
295
0
        return code;
296
0
    }
297
861k
    if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) {
298
160
        pdfi_countdown(stream_obj); /* get rid of extra ref */
299
160
        pdfi_loop_detector_cleartomark(ctx);
300
160
        return_error(gs_error_circular_reference);
301
160
    }
302
303
861k
    code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num);
304
861k
    if (code < 0) {
305
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
306
0
        pdfi_loop_detector_cleartomark(ctx);
307
0
        return code;
308
0
    }
309
310
    /* This code may be a performance overhead, it simply skips over the stream contents
311
     * and checks that the stream ends with a 'endstream endobj' pair. We could add a
312
     * 'go faster' flag for users who are certain their PDF files are well-formed. This
313
     * could also allow us to skip all kinds of other checking.....
314
     */
315
316
861k
    code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i);
317
861k
    if (code < 0) {
318
19.3k
        char extra_info[gp_file_name_sizeof];
319
320
19.3k
        (void)pdfi_loop_detector_cleartomark(ctx);
321
19.3k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum);
322
19.3k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info);
323
19.3k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
324
19.3k
        return code;
325
19.3k
    }
326
841k
    code = pdfi_loop_detector_cleartomark(ctx);
327
841k
    if (code < 0) {
328
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
329
0
        return code;
330
0
    }
331
332
841k
    if (i < 0 || (i + offset)> ctx->main_stream_length) {
333
45.4k
        char extra_info[gp_file_name_sizeof];
334
335
45.4k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum);
336
45.4k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info))< 0) {
337
0
            pdfi_pop(ctx, 1);
338
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
339
0
            return code;
340
0
        }
341
796k
    } else {
342
796k
        code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR);
343
796k
        if (code < 0) {
344
0
            pdfi_pop(ctx, 1);
345
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
346
0
            return code;
347
0
        }
348
349
796k
        stream_obj->Length = 0;
350
796k
        stream_obj->length_valid = false;
351
352
796k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
353
796k
        if (code == 0) {
354
0
            char extra_info[gp_file_name_sizeof];
355
356
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum);
357
0
            pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
358
            /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that
359
             * happens give up.
360
             */
361
0
            if (pdfi_count_stack(ctx) == 0) {
362
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
363
0
                return code;
364
0
            }
365
796k
        } else if (code < 0) {
366
0
            char extra_info[gp_file_name_sizeof];
367
368
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum);
369
0
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info)) < 0) {
370
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
371
0
                return code;
372
0
            }
373
796k
        } else if (code != TOKEN_ENDSTREAM) {
374
85.7k
            char extra_info[gp_file_name_sizeof];
375
376
85.7k
            gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i);
377
85.7k
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BAD_LENGTH, "pdfi_read_stream_object", extra_info)) < 0) {
378
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
379
0
                return code;
380
0
            }
381
710k
        } else {
382
            /* Cache the Length in the stream object and mark it valid */
383
710k
            stream_obj->Length = i;
384
710k
            stream_obj->length_valid = true;
385
710k
        }
386
796k
    }
387
388
    /* If we failed to find a valid object, or the object wasn't a keyword, or the
389
     * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct
390
     * Length for streams if we have encrypted files, because we must install a
391
     * SubFileDecode filter with a Length (EODString is incompatible with AES encryption)
392
     * Rather than mess about checking for encryption, we'll choose to just correctly
393
     * calculate the Length of all streams. Although this takes time, it will only
394
     * happen for files which are invalid.
395
     */
396
841k
    if (stream_obj->length_valid != true) {
397
131k
        char Buffer[10];
398
131k
        unsigned int bytes, total = 0;
399
131k
        int c = 0;
400
401
131k
        code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET);
402
131k
        if (code < 0) {
403
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
404
0
            pdfi_pop(ctx, 1);
405
0
            return code;
406
0
        }
407
131k
        memset(Buffer, 0x00, 10);
408
131k
        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream);
409
131k
        if (bytes < 9) {
410
393
            pdfi_countdown(stream_obj); /* get rid of extra ref */
411
393
            return_error(gs_error_ioerror);
412
393
        }
413
414
130k
        total = bytes;
415
2.23G
        do {
416
2.23G
            if (memcmp(Buffer, "endstream", 9) == 0) {
417
80.0k
                if (Buffer[9] != 0x00)
418
80.0k
                    total--;
419
80.0k
                stream_obj->Length = total - 9;
420
80.0k
                stream_obj->length_valid = true;
421
80.0k
                break;
422
80.0k
            }
423
2.23G
            if (memcmp(Buffer, "endobj", 6) == 0) {
424
8.18k
                if (Buffer[9] != 0x00)
425
8.09k
                    total--;
426
8.18k
                stream_obj->Length = total - 6;
427
8.18k
                stream_obj->length_valid = true;
428
8.18k
                break;
429
8.18k
            }
430
2.23G
            memmove(Buffer, Buffer+1, 9);
431
2.23G
            c = pdfi_read_byte(ctx, ctx->main_stream);
432
2.23G
            if (c < 0)
433
42.5k
                break;
434
2.23G
            Buffer[9] = (byte)c;
435
2.23G
            total++;
436
2.23G
        } while(1);
437
130k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
438
130k
        if (c < 0)
439
42.5k
            return_error(gs_error_ioerror);
440
88.2k
        return 0;
441
130k
    }
442
443
710k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
444
710k
    if (code < 0) {
445
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
446
0
        if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", "")) < 0) {
447
0
            return code;
448
0
        }
449
        /* Something went wrong looking for endobj, but we found endstream, so assume
450
         * for now that will suffice.
451
         */
452
0
        return 0;
453
0
    }
454
455
710k
    if (code == 0) {
456
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
457
0
        return_error(gs_error_stackunderflow);
458
0
    }
459
460
710k
    if (code != TOKEN_ENDOBJ) {
461
1.94k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
462
1.94k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_typecheck), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
463
        /* Didn't find an endobj, but we have an endstream, so assume
464
         * for now that will suffice
465
         */
466
1.94k
        return code;
467
1.94k
    }
468
708k
    pdfi_countdown(stream_obj); /* get rid of extra ref */
469
470
708k
    return 0;
471
710k
}
472
473
/* This reads an object *after* the x y obj keyword has been found. Its broken out
474
 * separately for the benefit of the repair code when reading the dictionary following
475
 * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does
476
 * not have an 'endobj', we rely on the error handling to take care of that for us.
477
 */
478
int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen)
479
2.50M
{
480
2.50M
    int code = 0, initial_depth = 0;
481
2.50M
    pdf_key keyword;
482
2.50M
    gs_offset_t saved_offset[3];
483
2.50M
    pdf_obj_type type;
484
485
2.50M
    initial_depth = pdfi_count_stack(ctx);
486
2.50M
    saved_offset[0] = saved_offset[1] = saved_offset[2] = 0;
487
488
2.50M
    code = pdfi_read_token(ctx, s, objnum, gen);
489
2.50M
    if (code < 0)
490
5.20k
        return code;
491
492
2.49M
    if (code == 0)
493
        /* failed to read a token */
494
75
        return_error(gs_error_syntaxerror);
495
496
2.49M
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) {
497
27.6k
        keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
498
27.6k
        if (keyword == TOKEN_ENDOBJ) {
499
372
            ctx->stack_top[-1] = PDF_NULL_OBJ;
500
372
            return 0;
501
372
        }
502
27.6k
    }
503
504
81.1M
    do {
505
        /* move all the saved offsets up by one */
506
81.1M
        saved_offset[0] = saved_offset[1];
507
81.1M
        saved_offset[1] = saved_offset[2];
508
81.1M
        saved_offset[2] = pdfi_unread_tell(ctx);
509
510
81.1M
        code = pdfi_read_token(ctx, s, objnum, gen);
511
81.1M
        if (code < 0) {
512
225k
            pdfi_clearstack(ctx);
513
225k
            return code;
514
225k
        }
515
80.9M
        if (s->eof)
516
2.84k
            return_error(gs_error_syntaxerror);
517
80.9M
        code = 0;
518
80.9M
        type = pdfi_type_of(ctx->stack_top[-1]);
519
80.9M
        if (type == PDF_KEYWORD)
520
170k
            goto missing_endobj;
521
80.9M
    } while (type != PDF_FAST_KEYWORD);
522
523
2.09M
    keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
524
2.09M
    if (keyword == TOKEN_ENDOBJ) {
525
1.16M
        pdf_obj *o;
526
527
1.16M
        if (pdfi_count_stack(ctx) - initial_depth < 2) {
528
234
            pdfi_clearstack(ctx);
529
234
            return_error(gs_error_stackunderflow);
530
234
        }
531
532
1.16M
        o = ctx->stack_top[-2];
533
534
1.16M
        pdfi_pop(ctx, 1);
535
536
1.16M
        if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
537
1.16M
            o->indirect_num = o->object_num = objnum;
538
1.16M
            o->indirect_gen = o->generation_num = gen;
539
1.16M
        }
540
1.16M
        return code;
541
1.16M
    }
542
928k
    if (keyword == TOKEN_STREAM) {
543
869k
        pdfi_pop(ctx, 1);
544
869k
        return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen);
545
869k
    }
546
58.5k
    if (keyword == TOKEN_OBJ) {
547
6.50k
        pdf_obj *o;
548
549
6.50k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL)) < 0) {
550
0
            return code;
551
0
        }
552
553
        /* 4 for; the object we want, the object number, generation number and 'obj' keyword */
554
6.50k
        if (pdfi_count_stack(ctx) - initial_depth < 4)
555
1.48k
            return_error(gs_error_stackunderflow);
556
557
        /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */
558
5.01k
        o = ctx->stack_top[-4];
559
560
5.01k
        pdfi_pop(ctx, 3);
561
562
5.01k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
563
4.98k
            o->indirect_num = o->object_num = objnum;
564
4.98k
            o->indirect_gen = o->generation_num = gen;
565
4.98k
        }
566
5.01k
        if (saved_offset[0] > 0)
567
5.01k
            (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET);
568
5.01k
        return 0;
569
6.50k
    }
570
571
222k
missing_endobj:
572
    /* Assume that any other keyword means a missing 'endobj' */
573
222k
    if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_xref_stream_dict", "")) == 0) {
574
222k
        pdf_obj *o;
575
576
222k
        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL);
577
578
222k
        if (pdfi_count_stack(ctx) - initial_depth < 2)
579
4.70k
            return_error(gs_error_stackunderflow);
580
581
218k
        o = ctx->stack_top[-2];
582
583
218k
        pdfi_pop(ctx, 1);
584
585
218k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
586
216k
            o->indirect_num = o->object_num = objnum;
587
216k
            o->indirect_gen = o->generation_num = gen;
588
216k
        }
589
218k
        return code;
590
222k
    }
591
0
    pdfi_pop(ctx, 2);
592
0
    return_error(gs_error_syntaxerror);
593
222k
}
594
595
static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset)
596
2.52M
{
597
2.52M
    int code = 0;
598
2.52M
    int objnum = 0, gen = 0;
599
600
    /* An object consists of 'num gen obj' followed by a token, follwed by an endobj
601
     * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we
602
     * want to deal with it specially by getting the Length, jumping to the end and checking
603
     * for an endobj. Or not, possibly, because it would be slow.
604
     */
605
2.52M
    code = pdfi_read_bare_int(ctx, s, &objnum);
606
2.52M
    if (code < 0)
607
58.9k
        return code;
608
2.46M
    if (code == 0)
609
16.6k
        return_error(gs_error_syntaxerror);
610
611
2.44M
    code = pdfi_read_bare_int(ctx, s, &gen);
612
2.44M
    if (code < 0)
613
3.96k
        return code;
614
2.44M
    if (code == 0)
615
3.48k
        return_error(gs_error_syntaxerror);
616
617
2.43M
    code = pdfi_read_bare_keyword(ctx, s);
618
2.43M
    if (code < 0)
619
0
        return code;
620
2.43M
    if (code == 0)
621
0
        return gs_note_error(gs_error_ioerror);
622
2.43M
    if (code != TOKEN_OBJ) {
623
7.65k
        return_error(gs_error_syntaxerror);
624
7.65k
    }
625
626
2.42M
    return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen);
627
2.43M
}
628
629
static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object,
630
                                 const xref_entry *entry, bool cache)
631
1.43M
{
632
1.43M
    int code = 0;
633
1.43M
    xref_entry *compressed_entry;
634
1.43M
    pdf_c_stream *compressed_stream = NULL;
635
1.43M
    pdf_c_stream *SubFile_stream = NULL;
636
1.43M
    pdf_c_stream *Object_stream = NULL;
637
1.43M
    int i = 0, object_length = 0;
638
1.43M
    int64_t num_entries;
639
1.43M
    int found_object;
640
1.43M
    int64_t Length, First;
641
1.43M
    gs_offset_t offset = 0;
642
1.43M
    pdf_stream *compressed_object = NULL;
643
1.43M
    pdf_dict *compressed_sdict = NULL; /* alias */
644
1.43M
    pdf_name *Type = NULL;
645
646
1.43M
    if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1)
647
2.06k
        return_error(gs_error_undefined);
648
649
1.42M
    compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num];
650
651
1.42M
    if (ctx->args.pdfdebug) {
652
0
        outprintf(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj);
653
0
        outprintf(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num);
654
0
    }
655
656
1.42M
    if (compressed_entry->cache == NULL) {
657
#if CACHE_STATISTICS
658
        ctx->compressed_misses++;
659
#endif
660
81.1k
        code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET);
661
81.1k
        if (code < 0)
662
0
            goto exit;
663
664
81.1k
        code = pdfi_read_object(ctx, ctx->main_stream, 0);
665
81.1k
        if (code < 0)
666
17.4k
            goto exit;
667
668
63.7k
        if (pdfi_count_stack(ctx) < 1) {
669
2
            code = gs_note_error(gs_error_stackunderflow);
670
2
            goto exit;
671
2
        }
672
673
63.7k
        if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) {
674
13.2k
            pdfi_pop(ctx, 1);
675
13.2k
            code = gs_note_error(gs_error_typecheck);
676
13.2k
            goto exit;
677
13.2k
        }
678
50.5k
        if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) {
679
298
            pdfi_pop(ctx, 1);
680
            /* Same error (undefined) as when we read an uncompressed object with the wrong number */
681
298
            code = gs_note_error(gs_error_undefined);
682
298
            goto exit;
683
298
        }
684
50.2k
        compressed_object = (pdf_stream *)ctx->stack_top[-1];
685
50.2k
        pdfi_countup(compressed_object);
686
50.2k
        pdfi_pop(ctx, 1);
687
50.2k
        code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object);
688
50.2k
        if (code < 0)
689
0
            goto exit;
690
1.34M
    } else {
691
#if CACHE_STATISTICS
692
        ctx->compressed_hits++;
693
#endif
694
1.34M
        compressed_object = (pdf_stream *)compressed_entry->cache->o;
695
1.34M
        pdfi_countup(compressed_object);
696
1.34M
        pdfi_promote_cache_entry(ctx, compressed_entry->cache);
697
1.34M
    }
698
1.39M
    code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict);
699
1.39M
    if (code < 0)
700
11
        return code;
701
702
1.39M
    if (ctx->loop_detection != NULL) {
703
1.39M
        code = pdfi_loop_detector_mark(ctx);
704
1.39M
        if (code < 0)
705
0
            goto exit;
706
1.39M
        if (compressed_sdict->object_num != 0) {
707
1.39M
            if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) {
708
212
                code = gs_note_error(gs_error_circular_reference);
709
1.39M
            } else {
710
1.39M
                code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num);
711
1.39M
            }
712
1.39M
            if (code < 0) {
713
212
                (void)pdfi_loop_detector_cleartomark(ctx);
714
212
                goto exit;
715
212
            }
716
1.39M
        }
717
1.39M
    }
718
    /* Check its an ObjStm ! */
719
1.39M
    code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type);
720
1.39M
    if (code < 0) {
721
277
        if (ctx->loop_detection != NULL)
722
277
            (void)pdfi_loop_detector_cleartomark(ctx);
723
277
        goto exit;
724
277
    }
725
726
1.39M
    if (!pdfi_name_is(Type, "ObjStm")){
727
1.21k
        if (ctx->loop_detection != NULL)
728
1.21k
            (void)pdfi_loop_detector_cleartomark(ctx);
729
1.21k
        code = gs_note_error(gs_error_syntaxerror);
730
1.21k
        goto exit;
731
1.21k
    }
732
733
    /* Need to check the /N entry to see if the object is actually in this stream! */
734
1.39M
    code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries);
735
1.39M
    if (code < 0) {
736
270
        if (ctx->loop_detection != NULL)
737
270
            (void)pdfi_loop_detector_cleartomark(ctx);
738
270
        goto exit;
739
270
    }
740
741
1.39M
    if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) {
742
84
        if (ctx->loop_detection != NULL)
743
84
            (void)pdfi_loop_detector_cleartomark(ctx);
744
84
        code = gs_note_error(gs_error_rangecheck);
745
84
        goto exit;
746
84
    }
747
748
1.39M
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
749
1.39M
    if (code < 0) {
750
250k
        if (ctx->loop_detection != NULL)
751
250k
            (void)pdfi_loop_detector_cleartomark(ctx);
752
250k
        goto exit;
753
250k
    }
754
755
1.14M
    code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First);
756
1.14M
    if (code < 0) {
757
4.60k
        if (ctx->loop_detection != NULL)
758
4.60k
            (void)pdfi_loop_detector_cleartomark(ctx);
759
4.60k
        goto exit;
760
4.60k
    }
761
762
1.14M
    if (ctx->loop_detection != NULL)
763
1.13M
        (void)pdfi_loop_detector_cleartomark(ctx);
764
765
1.14M
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
766
1.14M
    if (code < 0)
767
0
        goto exit;
768
769
1.14M
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
770
1.14M
    if (code < 0)
771
0
        goto exit;
772
773
1.14M
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
774
1.14M
    if (code < 0)
775
1.64k
        goto exit;
776
777
68.4M
    for (i=0;i < num_entries;i++)
778
67.3M
    {
779
67.3M
        int new_offset;
780
67.3M
        code = pdfi_read_bare_int(ctx, compressed_stream, &found_object);
781
67.3M
        if (code < 0)
782
17.2k
            goto exit;
783
67.3M
        if (code == 0) {
784
2.55k
            code = gs_note_error(gs_error_syntaxerror);
785
2.55k
            goto exit;
786
2.55k
        }
787
67.3M
        code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset);
788
67.3M
        if (code < 0)
789
25.0k
            goto exit;
790
67.2M
        if (code == 0) {
791
1.13k
            code = gs_note_error(gs_error_syntaxerror);
792
1.13k
            goto exit;
793
1.13k
        }
794
67.2M
        if (i == entry->u.compressed.object_index) {
795
1.11M
            if (found_object != obj) {
796
2.97k
                code = gs_note_error(gs_error_undefined);
797
2.97k
                goto exit;
798
2.97k
            }
799
1.10M
            offset = new_offset;
800
1.10M
        }
801
67.2M
        if (i == entry->u.compressed.object_index + 1)
802
1.06M
            object_length = new_offset - offset;
803
67.2M
    }
804
805
    /* Bug #705259 - The first object need not lie immediately after the initial
806
     * table of object numbers and offsets. The start of the first object is given
807
     * by the value of First. We don't know how many bytes we consumed getting to
808
     * the end of the table, unfortunately, so we close the stream, rewind the main
809
     * stream back to the beginning of the ObjStm, and then read and discard 'First'
810
     * bytes in order to get to the start of the first object. Then we read the
811
     * number of bytes required to get from there to the start of the object we
812
     * actually want.
813
     * If this ever looks like it's causing performance problems we could read the
814
     * initial table above manually instead of using the existing code, and track
815
     * how many bytes we'd read, which would avoid us having to tear down and
816
     * rebuild the stream.
817
     */
818
1.08M
    if (compressed_stream)
819
1.08M
        pdfi_close_file(ctx, compressed_stream);
820
1.08M
    if (SubFile_stream)
821
1.08M
        pdfi_close_file(ctx, SubFile_stream);
822
823
1.08M
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
824
1.08M
    if (code < 0)
825
0
        goto exit;
826
827
    /* We already dereferenced this above, so we don't need the loop detection checking here */
828
1.08M
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
829
1.08M
    if (code < 0)
830
0
        goto exit;
831
832
1.08M
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
833
1.08M
    if (code < 0)
834
0
        goto exit;
835
836
1.08M
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
837
1.08M
    if (code < 0)
838
0
        goto exit;
839
840
575M
    for (i=0;i < First;i++)
841
574M
    {
842
574M
        int c = pdfi_read_byte(ctx, compressed_stream);
843
574M
        if (c < 0) {
844
37
            code = gs_note_error(gs_error_ioerror);
845
37
            goto exit;
846
37
        }
847
574M
    }
848
849
    /* Skip to the offset of the object we want to read */
850
3.76G
    for (i=0;i < offset;i++)
851
3.76G
    {
852
3.76G
        int c = pdfi_read_byte(ctx, compressed_stream);
853
3.76G
        if (c < 0) {
854
84.3k
            code = gs_note_error(gs_error_ioerror);
855
84.3k
            goto exit;
856
84.3k
        }
857
3.76G
    }
858
859
    /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit
860
     * the number of bytes we read to the declared size of the object (difference between
861
     * the offsets of the object we want to read, and the next object). If it is 0 then
862
     * we're reading the last object in the stream, so we just rely on the SubFileDecode
863
     * we set up when we created compressed_stream to limit the bytes to the length of
864
     * that stream.
865
     */
866
1.00M
    if (object_length > 0) {
867
970k
        code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false);
868
970k
        if (code < 0)
869
0
            goto exit;
870
970k
    } else {
871
34.7k
        Object_stream = compressed_stream;
872
34.7k
    }
873
874
1.00M
    code = pdfi_read_token(ctx, Object_stream, obj, gen);
875
1.00M
    if (code < 0)
876
6.63k
        goto exit;
877
998k
    if (code == 0) {
878
137
        code = gs_note_error(gs_error_syntaxerror);
879
137
        goto exit;
880
137
    }
881
998k
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) {
882
983k
        int start_depth = pdfi_count_stack(ctx);
883
884
        /* Need to read all the elements from COS objects */
885
34.6M
        do {
886
34.6M
            code = pdfi_read_token(ctx, Object_stream, obj, gen);
887
34.6M
            if (code < 0)
888
34.4k
                goto exit;
889
34.6M
            if (code == 0) {
890
6.73k
                code = gs_note_error(gs_error_syntaxerror);
891
6.73k
                goto exit;
892
6.73k
            }
893
34.6M
            if (compressed_stream->eof == true) {
894
478
                code = gs_note_error(gs_error_ioerror);
895
478
                goto exit;
896
478
            }
897
34.6M
        } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth);
898
983k
    }
899
900
957k
    *object = ctx->stack_top[-1];
901
    /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets
902
     * the object number for uncompressed objects. So we need to do that here.
903
     */
904
957k
    if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
905
952k
        (*object)->indirect_num = (*object)->object_num = obj;
906
952k
        (*object)->indirect_gen = (*object)->generation_num = gen;
907
952k
        pdfi_countup(*object);
908
952k
    }
909
957k
    pdfi_pop(ctx, 1);
910
911
957k
    if (cache) {
912
922k
        code = pdfi_add_to_cache(ctx, *object);
913
922k
        if (code < 0) {
914
0
            pdfi_countdown(*object);
915
0
            goto exit;
916
0
        }
917
922k
    }
918
919
1.42M
 exit:
920
1.42M
    if (Object_stream)
921
1.00M
        pdfi_close_file(ctx, Object_stream);
922
1.42M
    if (Object_stream != compressed_stream)
923
1.10M
        if (compressed_stream)
924
1.10M
            pdfi_close_file(ctx, compressed_stream);
925
1.42M
    if (SubFile_stream)
926
1.14M
        pdfi_close_file(ctx, SubFile_stream);
927
1.42M
    pdfi_countdown(compressed_object);
928
1.42M
    pdfi_countdown(Type);
929
1.42M
    return code;
930
957k
}
931
932
/* pdf_dereference returns an object with a reference count of at least 1, this represents the
933
 * reference being held by the caller (in **object) when we return from this function.
934
 */
935
static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache)
936
9.22M
{
937
9.22M
    xref_entry *entry;
938
9.22M
    int code, stack_depth = pdfi_count_stack(ctx);
939
9.22M
    gs_offset_t saved_stream_offset;
940
9.22M
    bool saved_decrypt_strings = ctx->encryption.decrypt_strings;
941
942
9.22M
    *object = NULL;
943
944
9.22M
    if (ctx->xref_table == NULL)
945
62
        return_error(gs_error_typecheck);
946
947
9.22M
    if (ctx->main_stream == NULL || ctx->main_stream->s == NULL)
948
0
        return_error(gs_error_ioerror);
949
950
9.22M
    if (obj >= ctx->xref_table->xref_size) {
951
305k
        char extra_info[gp_file_name_sizeof];
952
953
305k
        gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj);
954
305k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info)) < 0) {
955
0
            return code;
956
0
        }
957
958
305k
        code = pdfi_repair_file(ctx);
959
305k
        if (code < 0) {
960
305k
            *object = NULL;
961
305k
            return code;
962
305k
        }
963
37
        if (obj >= ctx->xref_table->xref_size) {
964
25
            *object = NULL;
965
25
            return_error(gs_error_rangecheck);
966
25
        }
967
37
    }
968
969
8.91M
    entry = &ctx->xref_table->xref[obj];
970
971
8.91M
    if(entry->object_num == 0) {
972
2.12M
        pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference_main", "Attempt to dereference object 0");
973
2.12M
        return_error(gs_error_undefined);
974
2.12M
    }
975
976
6.78M
    if (entry->free) {
977
7.78k
        char extra_info[gp_file_name_sizeof];
978
979
7.78k
        gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
980
7.78k
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
981
7.78k
        *object = PDF_NULL_OBJ;
982
7.78k
        return code;
983
6.78M
    }else {
984
6.78M
        if (!entry->compressed) {
985
4.92M
            if(entry->u.uncompressed.generation_num != gen)
986
5.22k
                pdfi_set_warning(ctx, 0, NULL, W_PDF_MISMATCH_GENERATION, "pdfi_dereference_main", "");
987
4.92M
        }
988
6.78M
    }
989
990
6.78M
    if (ctx->loop_detection) {
991
6.35M
        if (pdfi_loop_detector_check_object(ctx, obj) == true)
992
1.77k
            return_error(gs_error_circular_reference);
993
6.35M
        if (entry->free) {
994
0
            code = pdfi_loop_detector_add_object(ctx, obj);
995
0
            if (code < 0)
996
0
                return code;
997
0
        }
998
6.35M
    }
999
6.77M
    if (entry->cache != NULL){
1000
2.90M
        pdf_obj_cache_entry *cache_entry = entry->cache;
1001
1002
#if CACHE_STATISTICS
1003
        ctx->hits++;
1004
#endif
1005
2.90M
        *object = cache_entry->o;
1006
2.90M
        pdfi_countup(*object);
1007
1008
2.90M
        pdfi_promote_cache_entry(ctx, cache_entry);
1009
3.86M
    } else {
1010
3.86M
        saved_stream_offset = pdfi_unread_tell(ctx);
1011
1012
3.86M
        if (entry->compressed) {
1013
            /* This is an object in a compressed object stream */
1014
1.43M
            ctx->encryption.decrypt_strings = false;
1015
1016
1.43M
            code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache);
1017
1.43M
            if (code < 0 || *object == NULL)
1018
473k
                goto error;
1019
2.43M
        } else {
1020
#if CACHE_STATISTICS
1021
            ctx->misses++;
1022
#endif
1023
2.43M
            ctx->encryption.decrypt_strings = true;
1024
1025
2.43M
            code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET);
1026
2.43M
            if (code < 0)
1027
129
                goto error;
1028
1029
2.43M
            code = pdfi_read_object(ctx, ctx->main_stream, entry->u.uncompressed.offset);
1030
1031
            /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it.
1032
             * reload the xref entry to be certain it is valid.
1033
             */
1034
2.43M
            entry = &ctx->xref_table->xref[obj];
1035
2.43M
            if (code < 0) {
1036
352k
                int code1 = 0;
1037
352k
                if (entry->free) {
1038
0
                    char extra_info[gp_file_name_sizeof];
1039
1040
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1041
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1042
0
                    *object = PDF_NULL_OBJ;
1043
0
                    if (code < 0)
1044
0
                        goto error;
1045
0
                    goto free_obj;
1046
0
                }
1047
352k
                ctx->encryption.decrypt_strings = saved_decrypt_strings;
1048
352k
                (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1049
352k
                pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1050
1051
352k
                code1 = pdfi_repair_file(ctx);
1052
352k
                if (code1 == 0)
1053
1.96k
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1054
                /* Repair failed, just give up and return an error */
1055
350k
                goto error;
1056
352k
            }
1057
1058
            /* We only expect a single object back when dereferencing an indirect reference
1059
             * The only way (I think) we can end up with more than one is if the object initially
1060
             * appears to be a dictionary or array, but the object terminates (with endobj or
1061
             * simply reaching EOF) without terminating the array or dictionary. That's clearly
1062
             * an error. We might, as a future 'improvement' choose to walk back through
1063
             * the stack looking for unterminated dictionary or array markers, and closing them
1064
             * so that (hopefully!) we end up with a single 'repaired' object on the stack.
1065
             * But for now I'm simply going to treat these as errors. We will try a repair on the
1066
             * file to see if we end up using a different (hopefully intact) object from the file.
1067
             */
1068
2.08M
            if (pdfi_count_stack(ctx) - stack_depth > 1) {
1069
155k
                int code1 = 0;
1070
1071
155k
                code1 = pdfi_repair_file(ctx);
1072
155k
                if (code1 == 0)
1073
613
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1074
                /* Repair failed, just give up and return an error */
1075
154k
                code = gs_note_error(gs_error_syntaxerror);
1076
154k
                goto error;
1077
155k
            }
1078
1079
1.93M
            if (pdfi_count_stack(ctx) > 0 &&
1080
1.93M
                ((ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) &&
1081
1.93M
                (ctx->stack_top[-1])->object_num == obj)
1082
1.93M
                || ctx->stack_top[-1] == PDF_NULL_OBJ)) {
1083
1.92M
                *object = ctx->stack_top[-1];
1084
1.92M
                pdfi_countup(*object);
1085
1.92M
                pdfi_pop(ctx, 1);
1086
1.92M
                if (pdfi_type_of(*object) == PDF_INDIRECT) {
1087
0
                    pdf_indirect_ref *iref = (pdf_indirect_ref *)*object;
1088
1089
0
                    if (iref->ref_object_num == obj) {
1090
0
                        code = gs_note_error(gs_error_circular_reference);
1091
0
                        pdfi_countdown(*object);
1092
0
                        *object = NULL;
1093
0
                        goto error;
1094
0
                    }
1095
0
                }
1096
                /* There's really no point in caching an indirect reference and
1097
                 * I think it could be potentially confusing to later calls.
1098
                 */
1099
1.92M
                if (cache && pdfi_type_of(*object) != PDF_INDIRECT) {
1100
1.92M
                    code = pdfi_add_to_cache(ctx, *object);
1101
1.92M
                    if (code < 0) {
1102
0
                        pdfi_countdown(*object);
1103
0
                        goto error;
1104
0
                    }
1105
1.92M
                }
1106
1.92M
            } else {
1107
1.88k
                int code1 = 0;
1108
1109
1.88k
                if (pdfi_count_stack(ctx) > 0)
1110
1.75k
                    pdfi_pop(ctx, 1);
1111
1112
1.88k
                if (entry->free) {
1113
0
                    char extra_info[gp_file_name_sizeof];
1114
1115
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1116
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1117
0
                    *object = PDF_NULL_OBJ;
1118
0
                    if (code < 0)
1119
0
                        goto error;
1120
0
                    return code;
1121
0
                }
1122
1.88k
                code1 = pdfi_repair_file(ctx);
1123
1.88k
                if (code1 == 0)
1124
236
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1125
                /* Repair failed, just give up and return an error */
1126
1.64k
                code = gs_note_error(gs_error_undefined);
1127
1.64k
                goto error;
1128
1.88k
            }
1129
1.93M
        }
1130
2.88M
free_obj:
1131
2.88M
        (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1132
2.88M
    }
1133
1134
5.79M
    if (ctx->loop_detection && pdf_object_num(*object) != 0) {
1135
5.37M
        code = pdfi_loop_detector_add_object(ctx, (*object)->object_num);
1136
5.37M
        if (code < 0) {
1137
0
            ctx->encryption.decrypt_strings = saved_decrypt_strings;
1138
0
            return code;
1139
0
        }
1140
5.37M
    }
1141
5.79M
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1142
5.79M
    return 0;
1143
1144
980k
error:
1145
980k
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1146
980k
    (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1147
    /* Return the stack to the state at entry */
1148
980k
    pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1149
980k
    return code;
1150
5.79M
}
1151
1152
int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1153
9.16M
{
1154
9.16M
    return pdfi_dereference_main(ctx, obj, gen, object, true);
1155
9.16M
}
1156
1157
int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1158
54.9k
{
1159
54.9k
    return pdfi_dereference_main(ctx, obj, gen, object, false);
1160
54.9k
}
1161
1162
/* do a derefence with loop detection */
1163
int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1164
3.43M
{
1165
3.43M
    int code;
1166
1167
3.43M
    code = pdfi_loop_detector_mark(ctx);
1168
3.43M
    if (code < 0)
1169
0
        return code;
1170
1171
3.43M
    code = pdfi_dereference(ctx, obj, gen, object);
1172
3.43M
    (void)pdfi_loop_detector_cleartomark(ctx);
1173
3.43M
    return code;
1174
3.43M
}
1175
1176
int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1177
54.9k
{
1178
54.9k
    int code;
1179
1180
54.9k
    code = pdfi_loop_detector_mark(ctx);
1181
54.9k
    if (code < 0)
1182
0
        return code;
1183
1184
54.9k
    code = pdfi_dereference_nocache(ctx, obj, gen, object);
1185
54.9k
    (void)pdfi_loop_detector_cleartomark(ctx);
1186
54.9k
    return code;
1187
54.9k
}
1188
1189
static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse)
1190
90.3k
{
1191
90.3k
    int code = 0;
1192
90.3k
    uint64_t index, arraysize;
1193
90.3k
    pdf_obj *object = NULL;
1194
90.3k
    pdf_array *array = (pdf_array *)obj;
1195
1196
90.3k
    arraysize = pdfi_array_size(array);
1197
413k
    for (index = 0; index < arraysize; index++) {
1198
323k
        if (ctx->loop_detection != NULL) {
1199
323k
            code = pdfi_loop_detector_mark(ctx);
1200
323k
            if (code < 0)
1201
0
                return code;
1202
323k
        }
1203
1204
323k
        code = pdfi_array_get_no_store_R(ctx, array, index, &object);
1205
1206
323k
        if (ctx->loop_detection != NULL) {
1207
323k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1208
323k
            if (code1 < 0)
1209
0
                return code1;
1210
323k
        }
1211
1212
323k
        if (code == gs_error_circular_reference) {
1213
            /* Previously we just left as an indirect reference, but now we want
1214
             * to return the error so we don't end up replacing indirect references
1215
             * to objects with circular references.
1216
             */
1217
323k
        } else {
1218
323k
            if (code < 0) goto exit;
1219
323k
            if (recurse) {
1220
2.46k
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse);
1221
2.46k
                if (code < 0) goto exit;
1222
2.46k
            }
1223
            /* don't store the object if it's a stream (leave as a ref) */
1224
323k
            if (pdfi_type_of(object) != PDF_STREAM)
1225
323k
                code = pdfi_array_put(ctx, array, index, object);
1226
323k
        }
1227
323k
        if (code < 0) goto exit;
1228
1229
323k
        pdfi_countdown(object);
1230
323k
        object = NULL;
1231
323k
    }
1232
1233
90.3k
 exit:
1234
90.3k
    pdfi_countdown(object);
1235
90.3k
    return code;
1236
90.3k
}
1237
1238
static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse)
1239
12.7k
{
1240
12.7k
    int code = 0;
1241
12.7k
    pdf_dict *dict = (pdf_dict *)obj;
1242
12.7k
    pdf_name *Key = NULL;
1243
12.7k
    pdf_obj *Value = NULL;
1244
12.7k
    uint64_t index, dictsize;
1245
1246
12.7k
    dictsize = pdfi_dict_entries(dict);
1247
1248
    /* Note: I am not using pdfi_dict_first/next because of needing to handle
1249
     * circular references.
1250
     */
1251
29.0k
    for (index=0; index<dictsize; index ++) {
1252
16.4k
        Key = (pdf_name *)dict->list[index].key;
1253
16.4k
        if (pdfi_name_is(Key, "Parent"))
1254
9
            continue;
1255
1256
16.4k
        if (ctx->loop_detection != NULL) {
1257
16.3k
            code = pdfi_loop_detector_mark(ctx);
1258
16.3k
            if (code < 0)
1259
0
                return code;
1260
16.3k
        }
1261
1262
16.4k
        code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value);
1263
1264
16.4k
        if (ctx->loop_detection != NULL) {
1265
16.3k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1266
16.3k
            if (code1 < 0)
1267
0
                return code1;
1268
16.3k
        }
1269
1270
16.4k
        if (code == gs_error_circular_reference) {
1271
            /* Just leave as an indirect ref */
1272
9
            code = 0;
1273
16.4k
        } else {
1274
16.4k
            if (code < 0) goto exit;
1275
16.3k
            if (recurse) {
1276
5.57k
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse);
1277
5.57k
                if (code < 0)
1278
78
                    goto exit;
1279
5.57k
            }
1280
            /* don't store the object if it's a stream (leave as a ref) */
1281
16.3k
            if (pdfi_type_of(Value) != PDF_STREAM)
1282
16.2k
                code = pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true);
1283
16.3k
        }
1284
16.3k
        if (code < 0) goto exit;
1285
1286
16.3k
        pdfi_countdown(Value);
1287
16.3k
        Value = NULL;
1288
16.3k
    }
1289
1290
12.7k
 exit:
1291
12.7k
    pdfi_countdown(Value);
1292
12.7k
    return code;
1293
12.7k
}
1294
1295
/* Resolve all the indirect references for an object
1296
 * Note: This can be recursive
1297
 */
1298
int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse)
1299
373k
{
1300
373k
    int code = 0;
1301
1302
373k
    switch(pdfi_type_of(value)) {
1303
90.3k
    case PDF_ARRAY:
1304
90.3k
        code = pdfi_resolve_indirect_array(ctx, value, recurse);
1305
90.3k
        break;
1306
12.7k
    case PDF_DICT:
1307
12.7k
        code = pdfi_resolve_indirect_dict(ctx, value, recurse);
1308
12.7k
        break;
1309
270k
    default:
1310
270k
        break;
1311
373k
    }
1312
373k
    return code;
1313
373k
}
1314
1315
/* Resolve all the indirect references for an object
1316
 * Resolve indirect references, either one level or recursively, with loop detect on
1317
 * the parent (can by NULL) and the value.
1318
 */
1319
int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse)
1320
373k
{
1321
373k
    int code = 0;
1322
1323
373k
    code = pdfi_loop_detector_mark(ctx);
1324
373k
    if (code < 0) goto exit;
1325
373k
    if (parent && parent->object_num != 0) {
1326
364k
        code = pdfi_loop_detector_add_object(ctx, parent->object_num);
1327
364k
        if (code < 0) goto exit;
1328
364k
    }
1329
1330
373k
    if (pdf_object_num(value) != 0) {
1331
2.14k
        if (pdfi_loop_detector_check_object(ctx, value->object_num)) {
1332
6
            code = gs_note_error(gs_error_circular_reference);
1333
6
            goto exit;
1334
6
        }
1335
2.13k
        code = pdfi_loop_detector_add_object(ctx, value->object_num);
1336
2.13k
        if (code < 0) goto exit;
1337
2.13k
    }
1338
373k
    code = pdfi_resolve_indirect(ctx, value, recurse);
1339
1340
373k
 exit:
1341
373k
    (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */
1342
373k
    return code;
1343
373k
}