Coverage Report

Created: 2025-06-10 07:27

/src/ghostpdl/pdf/pdf_deref.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2020-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/* Functions to deal with dereferencing indirect objects
17
 * for the PDF interpreter. In here we also keep the code
18
 * for dealing with the object cache, because the dereferencing
19
 * functions are currently the only place that deals with it.
20
 */
21
22
#include "pdf_int.h"
23
#include "pdf_stack.h"
24
#include "pdf_loop_detect.h"
25
#include "strmio.h"
26
#include "stream.h"
27
#include "pdf_file.h"
28
#include "pdf_misc.h"
29
#include "pdf_dict.h"
30
#include "pdf_array.h"
31
#include "pdf_deref.h"
32
#include "pdf_repair.h"
33
34
/* Start with the object caching functions */
35
/* Disable object caching (for easier debugging with reference counting)
36
 * by uncommenting the following line
37
 */
38
/*#define DISABLE CACHE*/
39
40
/* given an object, create a cache entry for it. If we have too many entries
41
 * then delete the leat-recently-used cache entry. Make the new entry be the
42
 * most-recently-used entry. The actual entries are attached to the xref table
43
 * (as well as being a double-linked list), because we detect an existing
44
 * cache entry by seeing that the xref table for the object number has a non-NULL
45
 * 'cache' member.
46
 * So we need to update the xref as well if we add or delete cache entries.
47
 */
48
static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o)
49
116k
{
50
116k
#ifndef DISABLE_CACHE
51
116k
    pdf_obj_cache_entry *entry;
52
53
116k
    if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY))
54
325
        return 0;
55
56
116k
    if (ctx->xref_table->xref[o->object_num].cache != NULL) {
57
#if DEBUG_CACHE
58
        outprintf(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num);
59
#endif
60
0
        return_error(gs_error_unknownerror);
61
0
    }
62
63
116k
    if (o->object_num > ctx->xref_table->xref_size)
64
0
        return_error(gs_error_rangecheck);
65
66
116k
    if (ctx->cache_entries == MAX_OBJECT_CACHE_SIZE)
67
16.2k
    {
68
#if DEBUG_CACHE
69
        dbgmprintf(ctx->memory, "Cache full, evicting LRU\n");
70
#endif
71
16.2k
        if (ctx->cache_LRU) {
72
16.2k
            entry = ctx->cache_LRU;
73
16.2k
            ctx->cache_LRU = entry->next;
74
16.2k
            if (entry->next)
75
16.2k
                ((pdf_obj_cache_entry *)entry->next)->previous = NULL;
76
16.2k
            ctx->xref_table->xref[entry->o->object_num].cache = NULL;
77
16.2k
            pdfi_countdown(entry->o);
78
16.2k
            ctx->cache_entries--;
79
16.2k
            gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU");
80
16.2k
        } else
81
0
            return_error(gs_error_unknownerror);
82
16.2k
    }
83
116k
    entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache");
84
116k
    if (entry == NULL)
85
0
        return_error(gs_error_VMerror);
86
87
116k
    memset(entry, 0x00, sizeof(pdf_obj_cache_entry));
88
89
116k
    entry->o = o;
90
116k
    pdfi_countup(o);
91
116k
    if (ctx->cache_MRU) {
92
111k
        entry->previous = ctx->cache_MRU;
93
111k
        ctx->cache_MRU->next = entry;
94
111k
    }
95
116k
    ctx->cache_MRU = entry;
96
116k
    if (ctx->cache_LRU == NULL)
97
4.54k
        ctx->cache_LRU = entry;
98
99
116k
    ctx->cache_entries++;
100
116k
    ctx->xref_table->xref[o->object_num].cache = entry;
101
116k
#endif
102
116k
    return 0;
103
116k
}
104
105
/* Given an existing cache entry, promote it to be the most-recently-used
106
 * cache entry.
107
 */
108
static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry)
109
233k
{
110
233k
#ifndef DISABLE_CACHE
111
233k
    if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) {
112
154k
        if ((pdf_obj_cache_entry *)cache_entry->next != NULL)
113
154k
            ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous;
114
154k
        if ((pdf_obj_cache_entry *)cache_entry->previous != NULL)
115
154k
            ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next;
116
137
        else {
117
            /* the existing entry is the current least recently used, we need to make the 'next'
118
             * cache entry into the LRU.
119
             */
120
137
            ctx->cache_LRU = cache_entry->next;
121
137
        }
122
154k
        cache_entry->next = NULL;
123
154k
        cache_entry->previous = ctx->cache_MRU;
124
154k
        ctx->cache_MRU->next = cache_entry;
125
154k
        ctx->cache_MRU = cache_entry;
126
154k
    }
127
233k
#endif
128
233k
    return;
129
233k
}
130
131
/* This one's a bit of an oddity, its used for fonts. When we build a PDF font object
132
 * we want the object cache to reference *that* object, not the dictionary which was
133
 * read out of the PDF file, so this allows us to replace the font dictionary in the
134
 * cache with the actual font object, so that later dereferences will get this font
135
 * object.
136
 */
137
int replace_cache_entry(pdf_context *ctx, pdf_obj *o)
138
10.2k
{
139
10.2k
#ifndef DISABLE_CACHE
140
10.2k
    xref_entry *entry;
141
10.2k
    pdf_obj_cache_entry *cache_entry;
142
10.2k
    pdf_obj *old_cached_obj = NULL;
143
144
    /* Limited error checking here, we assume that things like the
145
     * validity of the object (eg not a free oobject) have already been handled.
146
     */
147
148
10.2k
    entry = &ctx->xref_table->xref[o->object_num];
149
10.2k
    cache_entry = entry->cache;
150
151
10.2k
    if (cache_entry == NULL) {
152
225
        return(pdfi_add_to_cache(ctx, o));
153
10.0k
    } else {
154
        /* NOTE: We grab the object without decrementing, to avoid triggering
155
         * a warning message for freeing an object that's in the cache
156
         */
157
10.0k
        if (cache_entry->o != NULL)
158
10.0k
            old_cached_obj = cache_entry->o;
159
160
        /* Put new entry in the cache */
161
10.0k
        cache_entry->o = o;
162
10.0k
        pdfi_countup(o);
163
10.0k
        pdfi_promote_cache_entry(ctx, cache_entry);
164
165
        /* Now decrement the old cache entry, if any */
166
10.0k
        pdfi_countdown(old_cached_obj);
167
10.0k
    }
168
10.0k
#endif
169
10.0k
    return 0;
170
10.2k
}
171
172
/* Now the dereferencing functions */
173
174
/*
175
 * Technically we can accept a stream other than the main PDF file stream here. This is
176
 * really for the case of compressed objects where we read tokens from the compressed
177
 * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode
178
 * on top of the main file stream, which may be useful. Note that this cannot work with
179
 * objects in compressed object streams! They should always pass a value of 0 for the stream_offset.
180
 * The stream_offset is the offset from the start of the underlying uncompressed PDF file of
181
 * the stream we are using. See the comments below when keyword is PDF_STREAM.
182
 */
183
184
/* Determine if a PDF object is in a compressed ObjStm. Returns < 0
185
 * for an error, 0 if it is not in a compressed ObjStm and 1 if it is.
186
 * Currently errors are inmpossible. This is only used by the decryption code
187
 * to determine if a string is in a compressed object stream, if it is then
188
 * it can't be used for decryption.
189
 */
190
int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen)
191
1.44k
{
192
1.44k
    xref_entry *entry;
193
194
    /* Can't possibly be a compressed object before we have finished reading
195
     * the xref.
196
     */
197
1.44k
    if (ctx->xref_table == NULL)
198
0
        return 0;
199
200
1.44k
    entry = &ctx->xref_table->xref[obj];
201
202
1.44k
    if (entry->compressed)
203
0
        return 1;
204
205
1.44k
    return 0;
206
1.44k
}
207
208
/* We should never read a 'stream' keyword from a compressed object stream
209
 * so this case should never end up here.
210
 */
211
static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset,
212
                                   uint32_t objnum, uint32_t gen)
213
38.6k
{
214
38.6k
    int code = 0;
215
38.6k
    int64_t i;
216
38.6k
    pdf_dict *dict = NULL;
217
38.6k
    gs_offset_t offset;
218
38.6k
    pdf_stream *stream_obj = NULL;
219
220
    /* Strange code time....
221
     * If we are using a stream which is *not* the PDF uncompressed main file stream
222
     * then doing stell on it will only tell us how many bytes have been read from
223
     * that stream, it won't tell us the underlying file position. So we add on the
224
     * 'unread' bytes, *and* we add on the position of the start of the stream in
225
     * the actual main file. This is all done so that we can check the /Length
226
     * of the object. Note that this will *only* work for regular objects it can
227
     * not be used for compressed object streams, but those don't need checking anyway
228
     * they have a different mechanism altogether and should never get here.
229
     */
230
38.6k
    if (s != ctx->main_stream) {
231
0
        offset = stell(s->s) - s->unread_size + stream_offset;
232
0
        code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
233
0
        if (code < 0)
234
0
            return_error(gs_error_ioerror);
235
38.6k
    } else {
236
38.6k
        offset = stell(s->s) - s->unread_size;
237
38.6k
    }
238
239
38.6k
    if (pdfi_count_stack(ctx) < 1)
240
0
        return_error(gs_error_stackunderflow);
241
242
38.6k
    dict = (pdf_dict *)ctx->stack_top[-1];
243
244
38.6k
    if (pdfi_type_of(dict) != PDF_DICT) {
245
90
        pdfi_pop(ctx, 1);
246
90
        return_error(gs_error_syntaxerror);
247
90
    }
248
249
38.5k
    dict->indirect_num = dict->object_num = objnum;
250
38.5k
    dict->indirect_gen = dict->generation_num = gen;
251
252
    /* Convert the dict into a stream */
253
38.5k
    code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true);
254
38.5k
    if (code < 0) {
255
0
        pdfi_pop(ctx, 1);
256
0
        return code;
257
0
    }
258
    /* Pop off the dict and push the stream */
259
38.5k
    pdfi_pop(ctx, 1);
260
38.5k
    dict = NULL;
261
38.5k
    pdfi_push(ctx, (pdf_obj *)stream_obj);
262
263
38.5k
    stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum;
264
38.5k
    stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen;
265
38.5k
    stream_obj->stream_offset = offset;
266
267
    /* Exceptional code. Normally we do not need to worry about detecting circular references
268
     * when reading objects, because we do not dereference any indirect objects. However streams
269
     * are a slight exception in that we do get the Length from the stream dictionay and if that
270
     * is an indirect reference, then we dereference it.
271
     * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect
272
     * reference to the same stream object, and leads to infinite recursion. So deal with that
273
     * possibility here.
274
     */
275
38.5k
    code = pdfi_loop_detector_mark(ctx);
276
38.5k
    if (code < 0) {
277
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
278
0
        return code;
279
0
    }
280
38.5k
    if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) {
281
9
        pdfi_countdown(stream_obj); /* get rid of extra ref */
282
9
        pdfi_loop_detector_cleartomark(ctx);
283
9
        return_error(gs_error_circular_reference);
284
9
    }
285
286
38.5k
    code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num);
287
38.5k
    if (code < 0) {
288
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
289
0
        pdfi_loop_detector_cleartomark(ctx);
290
0
        return code;
291
0
    }
292
293
    /* This code may be a performance overhead, it simply skips over the stream contents
294
     * and checks that the stream ends with a 'endstream endobj' pair. We could add a
295
     * 'go faster' flag for users who are certain their PDF files are well-formed. This
296
     * could also allow us to skip all kinds of other checking.....
297
     */
298
299
38.5k
    code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i);
300
38.5k
    if (code < 0) {
301
890
        char extra_info[gp_file_name_sizeof];
302
303
890
        (void)pdfi_loop_detector_cleartomark(ctx);
304
890
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum);
305
890
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info);
306
890
        pdfi_countdown(stream_obj); /* get rid of extra ref */
307
890
        return code;
308
890
    }
309
37.6k
    code = pdfi_loop_detector_cleartomark(ctx);
310
37.6k
    if (code < 0) {
311
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
312
0
        return code;
313
0
    }
314
315
37.6k
    if (i < 0 || (i + offset)> ctx->main_stream_length) {
316
2.34k
        char extra_info[gp_file_name_sizeof];
317
318
2.34k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum);
319
2.34k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info))< 0) {
320
0
            pdfi_pop(ctx, 1);
321
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
322
0
            return code;
323
0
        }
324
35.3k
    } else {
325
35.3k
        code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR);
326
35.3k
        if (code < 0) {
327
0
            pdfi_pop(ctx, 1);
328
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
329
0
            return code;
330
0
        }
331
332
35.3k
        stream_obj->Length = 0;
333
35.3k
        stream_obj->length_valid = false;
334
335
35.3k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
336
35.3k
        if (code == 0) {
337
0
            char extra_info[gp_file_name_sizeof];
338
339
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum);
340
0
            pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
341
            /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that
342
             * happens give up.
343
             */
344
0
            if (pdfi_count_stack(ctx) == 0) {
345
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
346
0
                return code;
347
0
            }
348
35.3k
        } else if (code < 0) {
349
0
            char extra_info[gp_file_name_sizeof];
350
351
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum);
352
0
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info)) < 0) {
353
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
354
0
                return code;
355
0
            }
356
35.3k
        } else if (code != TOKEN_ENDSTREAM) {
357
4.38k
            char extra_info[gp_file_name_sizeof];
358
359
4.38k
            gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i);
360
4.38k
            if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BAD_LENGTH, "pdfi_read_stream_object", extra_info)) < 0) {
361
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
362
0
                return code;
363
0
            }
364
30.9k
        } else {
365
            /* Cache the Length in the stream object and mark it valid */
366
30.9k
            stream_obj->Length = i;
367
30.9k
            stream_obj->length_valid = true;
368
30.9k
        }
369
35.3k
    }
370
371
    /* If we failed to find a valid object, or the object wasn't a keyword, or the
372
     * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct
373
     * Length for streams if we have encrypted files, because we must install a
374
     * SubFileDecode filter with a Length (EODString is incompatible with AES encryption)
375
     * Rather than mess about checking for encryption, we'll choose to just correctly
376
     * calculate the Length of all streams. Although this takes time, it will only
377
     * happen for files which are invalid.
378
     */
379
37.6k
    if (stream_obj->length_valid != true) {
380
6.73k
        char Buffer[10];
381
6.73k
        unsigned int bytes, total = 0;
382
6.73k
        int c = 0;
383
384
6.73k
        code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET);
385
6.73k
        if (code < 0) {
386
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
387
0
            pdfi_pop(ctx, 1);
388
0
            return code;
389
0
        }
390
6.73k
        memset(Buffer, 0x00, 10);
391
6.73k
        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream);
392
6.73k
        if (bytes < 9) {
393
9
            pdfi_countdown(stream_obj); /* get rid of extra ref */
394
9
            return_error(gs_error_ioerror);
395
9
        }
396
397
6.72k
        total = bytes;
398
132M
        do {
399
132M
            if (memcmp(Buffer, "endstream", 9) == 0) {
400
4.50k
                if (Buffer[9] != 0x00)
401
4.50k
                    total--;
402
4.50k
                stream_obj->Length = total - 9;
403
4.50k
                stream_obj->length_valid = true;
404
4.50k
                break;
405
4.50k
            }
406
132M
            if (memcmp(Buffer, "endobj", 6) == 0) {
407
323
                if (Buffer[9] != 0x00)
408
316
                    total--;
409
323
                stream_obj->Length = total - 6;
410
323
                stream_obj->length_valid = true;
411
323
                break;
412
323
            }
413
132M
            memmove(Buffer, Buffer+1, 9);
414
132M
            c = pdfi_read_byte(ctx, ctx->main_stream);
415
132M
            if (c < 0)
416
1.89k
                break;
417
132M
            Buffer[9] = (byte)c;
418
132M
            total++;
419
132M
        } while(1);
420
6.72k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
421
6.72k
        if (c < 0)
422
1.89k
            return_error(gs_error_ioerror);
423
4.82k
        return 0;
424
6.72k
    }
425
426
30.9k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
427
30.9k
    if (code < 0) {
428
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
429
0
        if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", "")) < 0) {
430
0
            return code;
431
0
        }
432
        /* Something went wrong looking for endobj, but we found endstream, so assume
433
         * for now that will suffice.
434
         */
435
0
        return 0;
436
0
    }
437
438
30.9k
    if (code == 0) {
439
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
440
0
        return_error(gs_error_stackunderflow);
441
0
    }
442
443
30.9k
    if (code != TOKEN_ENDOBJ) {
444
124
        pdfi_countdown(stream_obj); /* get rid of extra ref */
445
124
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_typecheck), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
446
        /* Didn't find an endobj, but we have an endstream, so assume
447
         * for now that will suffice
448
         */
449
124
        return code;
450
124
    }
451
30.7k
    pdfi_countdown(stream_obj); /* get rid of extra ref */
452
453
30.7k
    return 0;
454
30.9k
}
455
456
/* This reads an object *after* the x y obj keyword has been found. Its broken out
457
 * separately for the benefit of the repair code when reading the dictionary following
458
 * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does
459
 * not have an 'endobj', we rely on the error handling to take care of that for us.
460
 */
461
int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen)
462
131k
{
463
131k
    int code = 0, initial_depth = 0;
464
131k
    pdf_key keyword;
465
131k
    gs_offset_t saved_offset[3];
466
131k
    pdf_obj_type type;
467
468
131k
    initial_depth = pdfi_count_stack(ctx);
469
131k
    saved_offset[0] = saved_offset[1] = saved_offset[2] = 0;
470
471
131k
    code = pdfi_read_token(ctx, s, objnum, gen);
472
131k
    if (code < 0)
473
260
        return code;
474
475
131k
    if (code == 0)
476
        /* failed to read a token */
477
3
        return_error(gs_error_syntaxerror);
478
479
131k
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) {
480
1.02k
        keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
481
1.02k
        if (keyword == TOKEN_ENDOBJ) {
482
74
            ctx->stack_top[-1] = PDF_NULL_OBJ;
483
74
            return 0;
484
74
        }
485
1.02k
    }
486
487
4.61M
    do {
488
        /* move all the saved offsets up by one */
489
4.61M
        saved_offset[0] = saved_offset[1];
490
4.61M
        saved_offset[1] = saved_offset[2];
491
4.61M
        saved_offset[2] = pdfi_unread_tell(ctx);
492
493
4.61M
        code = pdfi_read_token(ctx, s, objnum, gen);
494
4.61M
        if (code < 0) {
495
12.8k
            pdfi_clearstack(ctx);
496
12.8k
            return code;
497
12.8k
        }
498
4.60M
        if (s->eof)
499
180
            return_error(gs_error_syntaxerror);
500
4.60M
        code = 0;
501
4.60M
        type = pdfi_type_of(ctx->stack_top[-1]);
502
4.60M
        if (type == PDF_KEYWORD)
503
6.19k
            goto missing_endobj;
504
4.60M
    } while (type != PDF_FAST_KEYWORD);
505
506
112k
    keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
507
112k
    if (keyword == TOKEN_ENDOBJ) {
508
69.9k
        pdf_obj *o;
509
510
69.9k
        if (pdfi_count_stack(ctx) - initial_depth < 2) {
511
78
            pdfi_clearstack(ctx);
512
78
            return_error(gs_error_stackunderflow);
513
78
        }
514
515
69.8k
        o = ctx->stack_top[-2];
516
517
69.8k
        pdfi_pop(ctx, 1);
518
519
69.8k
        if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
520
69.8k
            o->indirect_num = o->object_num = objnum;
521
69.8k
            o->indirect_gen = o->generation_num = gen;
522
69.8k
        }
523
69.8k
        return code;
524
69.9k
    }
525
42.1k
    if (keyword == TOKEN_STREAM) {
526
38.6k
        pdfi_pop(ctx, 1);
527
38.6k
        return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen);
528
38.6k
    }
529
3.51k
    if (keyword == TOKEN_OBJ) {
530
310
        pdf_obj *o;
531
532
310
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL)) < 0) {
533
0
            return code;
534
0
        }
535
536
        /* 4 for; the object we want, the object number, generation number and 'obj' keyword */
537
310
        if (pdfi_count_stack(ctx) - initial_depth < 4)
538
76
            return_error(gs_error_stackunderflow);
539
540
        /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */
541
234
        o = ctx->stack_top[-4];
542
543
234
        pdfi_pop(ctx, 3);
544
545
234
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
546
233
            o->indirect_num = o->object_num = objnum;
547
233
            o->indirect_gen = o->generation_num = gen;
548
233
        }
549
234
        if (saved_offset[0] > 0)
550
234
            (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET);
551
234
        return 0;
552
310
    }
553
554
9.39k
missing_endobj:
555
    /* Assume that any other keyword means a missing 'endobj' */
556
9.39k
    if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_xref_stream_dict", "")) == 0) {
557
9.39k
        pdf_obj *o;
558
559
9.39k
        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL);
560
561
9.39k
        if (pdfi_count_stack(ctx) - initial_depth < 2)
562
90
            return_error(gs_error_stackunderflow);
563
564
9.30k
        o = ctx->stack_top[-2];
565
566
9.30k
        pdfi_pop(ctx, 1);
567
568
9.30k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
569
9.20k
            o->indirect_num = o->object_num = objnum;
570
9.20k
            o->indirect_gen = o->generation_num = gen;
571
9.20k
        }
572
9.30k
        return code;
573
9.39k
    }
574
0
    pdfi_pop(ctx, 2);
575
0
    return_error(gs_error_syntaxerror);
576
9.39k
}
577
578
static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset)
579
129k
{
580
129k
    int code = 0;
581
129k
    int objnum = 0, gen = 0;
582
583
    /* An object consists of 'num gen obj' followed by a token, follwed by an endobj
584
     * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we
585
     * want to deal with it specially by getting the Length, jumping to the end and checking
586
     * for an endobj. Or not, possibly, because it would be slow.
587
     */
588
129k
    code = pdfi_read_bare_int(ctx, s, &objnum);
589
129k
    if (code < 0)
590
808
        return code;
591
128k
    if (code == 0)
592
690
        return_error(gs_error_syntaxerror);
593
594
127k
    code = pdfi_read_bare_int(ctx, s, &gen);
595
127k
    if (code < 0)
596
43
        return code;
597
127k
    if (code == 0)
598
14
        return_error(gs_error_syntaxerror);
599
600
127k
    code = pdfi_read_bare_keyword(ctx, s);
601
127k
    if (code < 0)
602
0
        return code;
603
127k
    if (code == 0)
604
0
        return gs_note_error(gs_error_ioerror);
605
127k
    if (code != TOKEN_OBJ) {
606
187
        return_error(gs_error_syntaxerror);
607
187
    }
608
609
127k
    return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen);
610
127k
}
611
612
static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object,
613
                                 const xref_entry *entry, bool cache)
614
40.3k
{
615
40.3k
    int code = 0;
616
40.3k
    xref_entry *compressed_entry;
617
40.3k
    pdf_c_stream *compressed_stream = NULL;
618
40.3k
    pdf_c_stream *SubFile_stream = NULL;
619
40.3k
    pdf_c_stream *Object_stream = NULL;
620
40.3k
    int i = 0, object_length = 0;
621
40.3k
    int64_t num_entries;
622
40.3k
    int found_object;
623
40.3k
    int64_t Length, First;
624
40.3k
    gs_offset_t offset = 0;
625
40.3k
    pdf_stream *compressed_object = NULL;
626
40.3k
    pdf_dict *compressed_sdict = NULL; /* alias */
627
40.3k
    pdf_name *Type = NULL;
628
629
40.3k
    if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1)
630
13
        return_error(gs_error_undefined);
631
632
40.3k
    compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num];
633
634
40.3k
    if (ctx->args.pdfdebug) {
635
0
        outprintf(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj);
636
0
        outprintf(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num);
637
0
    }
638
639
40.3k
    if (compressed_entry->cache == NULL) {
640
#if CACHE_STATISTICS
641
        ctx->compressed_misses++;
642
#endif
643
2.86k
        code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET);
644
2.86k
        if (code < 0)
645
0
            goto exit;
646
647
2.86k
        code = pdfi_read_object(ctx, ctx->main_stream, 0);
648
2.86k
        if (code < 0)
649
801
            goto exit;
650
651
2.06k
        if (pdfi_count_stack(ctx) < 1) {
652
0
            code = gs_note_error(gs_error_stackunderflow);
653
0
            goto exit;
654
0
        }
655
656
2.06k
        if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) {
657
275
            pdfi_pop(ctx, 1);
658
275
            code = gs_note_error(gs_error_typecheck);
659
275
            goto exit;
660
275
        }
661
1.79k
        if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) {
662
21
            pdfi_pop(ctx, 1);
663
            /* Same error (undefined) as when we read an uncompressed object with the wrong number */
664
21
            code = gs_note_error(gs_error_undefined);
665
21
            goto exit;
666
21
        }
667
1.77k
        compressed_object = (pdf_stream *)ctx->stack_top[-1];
668
1.77k
        pdfi_countup(compressed_object);
669
1.77k
        pdfi_pop(ctx, 1);
670
1.77k
        code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object);
671
1.77k
        if (code < 0)
672
0
            goto exit;
673
37.4k
    } else {
674
#if CACHE_STATISTICS
675
        ctx->compressed_hits++;
676
#endif
677
37.4k
        compressed_object = (pdf_stream *)compressed_entry->cache->o;
678
37.4k
        pdfi_countup(compressed_object);
679
37.4k
        pdfi_promote_cache_entry(ctx, compressed_entry->cache);
680
37.4k
    }
681
39.2k
    code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict);
682
39.2k
    if (code < 0)
683
0
        return code;
684
685
39.2k
    if (ctx->loop_detection != NULL) {
686
39.2k
        code = pdfi_loop_detector_mark(ctx);
687
39.2k
        if (code < 0)
688
0
            goto exit;
689
39.2k
        if (compressed_sdict->object_num != 0) {
690
39.2k
            if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) {
691
15
                code = gs_note_error(gs_error_circular_reference);
692
39.2k
            } else {
693
39.2k
                code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num);
694
39.2k
            }
695
39.2k
            if (code < 0) {
696
15
                (void)pdfi_loop_detector_cleartomark(ctx);
697
15
                goto exit;
698
15
            }
699
39.2k
        }
700
39.2k
    }
701
    /* Check its an ObjStm ! */
702
39.2k
    code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type);
703
39.2k
    if (code < 0) {
704
27
        if (ctx->loop_detection != NULL)
705
27
            (void)pdfi_loop_detector_cleartomark(ctx);
706
27
        goto exit;
707
27
    }
708
709
39.2k
    if (!pdfi_name_is(Type, "ObjStm")){
710
11
        if (ctx->loop_detection != NULL)
711
11
            (void)pdfi_loop_detector_cleartomark(ctx);
712
11
        code = gs_note_error(gs_error_syntaxerror);
713
11
        goto exit;
714
11
    }
715
716
    /* Need to check the /N entry to see if the object is actually in this stream! */
717
39.1k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries);
718
39.1k
    if (code < 0) {
719
30
        if (ctx->loop_detection != NULL)
720
30
            (void)pdfi_loop_detector_cleartomark(ctx);
721
30
        goto exit;
722
30
    }
723
724
39.1k
    if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) {
725
0
        if (ctx->loop_detection != NULL)
726
0
            (void)pdfi_loop_detector_cleartomark(ctx);
727
0
        code = gs_note_error(gs_error_rangecheck);
728
0
        goto exit;
729
0
    }
730
731
39.1k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
732
39.1k
    if (code < 0) {
733
21.6k
        if (ctx->loop_detection != NULL)
734
21.6k
            (void)pdfi_loop_detector_cleartomark(ctx);
735
21.6k
        goto exit;
736
21.6k
    }
737
738
17.4k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First);
739
17.4k
    if (code < 0) {
740
68
        if (ctx->loop_detection != NULL)
741
68
            (void)pdfi_loop_detector_cleartomark(ctx);
742
68
        goto exit;
743
68
    }
744
745
17.4k
    if (ctx->loop_detection != NULL)
746
17.4k
        (void)pdfi_loop_detector_cleartomark(ctx);
747
748
17.4k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
749
17.4k
    if (code < 0)
750
0
        goto exit;
751
752
17.4k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
753
17.4k
    if (code < 0)
754
0
        goto exit;
755
756
17.4k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
757
17.4k
    if (code < 0)
758
134
        goto exit;
759
760
832k
    for (i=0;i < num_entries;i++)
761
816k
    {
762
816k
        int new_offset;
763
816k
        code = pdfi_read_bare_int(ctx, compressed_stream, &found_object);
764
816k
        if (code < 0)
765
682
            goto exit;
766
816k
        if (code == 0) {
767
9
            code = gs_note_error(gs_error_syntaxerror);
768
9
            goto exit;
769
9
        }
770
816k
        code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset);
771
816k
        if (code < 0)
772
331
            goto exit;
773
815k
        if (code == 0) {
774
5
            code = gs_note_error(gs_error_syntaxerror);
775
5
            goto exit;
776
5
        }
777
815k
        if (i == entry->u.compressed.object_index) {
778
16.3k
            if (found_object != obj) {
779
831
                code = gs_note_error(gs_error_undefined);
780
831
                goto exit;
781
831
            }
782
15.5k
            offset = new_offset;
783
15.5k
        }
784
814k
        if (i == entry->u.compressed.object_index + 1)
785
14.0k
            object_length = new_offset - offset;
786
814k
    }
787
788
    /* Bug #705259 - The first object need not lie immediately after the initial
789
     * table of object numbers and offsets. The start of the first object is given
790
     * by the value of First. We don't know how many bytes we consumed getting to
791
     * the end of the table, unfortunately, so we close the stream, rewind the main
792
     * stream back to the beginning of the ObjStm, and then read and discard 'First'
793
     * bytes in order to get to the start of the first object. Then we read the
794
     * number of bytes required to get from there to the start of the object we
795
     * actually want.
796
     * If this ever looks like it's causing performance problems we could read the
797
     * initial table above manually instead of using the existing code, and track
798
     * how many bytes we'd read, which would avoid us having to tear down and
799
     * rebuild the stream.
800
     */
801
15.4k
    if (compressed_stream)
802
15.4k
        pdfi_close_file(ctx, compressed_stream);
803
15.4k
    if (SubFile_stream)
804
15.4k
        pdfi_close_file(ctx, SubFile_stream);
805
806
15.4k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
807
15.4k
    if (code < 0)
808
0
        goto exit;
809
810
    /* We already dereferenced this above, so we don't need the loop detection checking here */
811
15.4k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
812
15.4k
    if (code < 0)
813
0
        goto exit;
814
815
15.4k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
816
15.4k
    if (code < 0)
817
0
        goto exit;
818
819
15.4k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
820
15.4k
    if (code < 0)
821
0
        goto exit;
822
823
6.69M
    for (i=0;i < First;i++)
824
6.67M
    {
825
6.67M
        int c = pdfi_read_byte(ctx, compressed_stream);
826
6.67M
        if (c < 0) {
827
4
            code = gs_note_error(gs_error_ioerror);
828
4
            goto exit;
829
4
        }
830
6.67M
    }
831
832
    /* Skip to the offset of the object we want to read */
833
47.5M
    for (i=0;i < offset;i++)
834
47.5M
    {
835
47.5M
        int c = pdfi_read_byte(ctx, compressed_stream);
836
47.5M
        if (c < 0) {
837
2.00k
            code = gs_note_error(gs_error_ioerror);
838
2.00k
            goto exit;
839
2.00k
        }
840
47.5M
    }
841
842
    /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit
843
     * the number of bytes we read to the declared size of the object (difference between
844
     * the offsets of the object we want to read, and the next object). If it is 0 then
845
     * we're reading the last object in the stream, so we just rely on the SubFileDecode
846
     * we set up when we created compressed_stream to limit the bytes to the length of
847
     * that stream.
848
     */
849
13.4k
    if (object_length > 0) {
850
12.0k
        code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false);
851
12.0k
        if (code < 0)
852
0
            goto exit;
853
12.0k
    } else {
854
1.41k
        Object_stream = compressed_stream;
855
1.41k
    }
856
857
13.4k
    code = pdfi_read_token(ctx, Object_stream, obj, gen);
858
13.4k
    if (code < 0)
859
340
        goto exit;
860
13.0k
    if (code == 0) {
861
13
        code = gs_note_error(gs_error_syntaxerror);
862
13
        goto exit;
863
13
    }
864
13.0k
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) {
865
12.5k
        int start_depth = pdfi_count_stack(ctx);
866
867
        /* Need to read all the elements from COS objects */
868
493k
        do {
869
493k
            code = pdfi_read_token(ctx, Object_stream, obj, gen);
870
493k
            if (code < 0)
871
706
                goto exit;
872
492k
            if (code == 0) {
873
73
                code = gs_note_error(gs_error_syntaxerror);
874
73
                goto exit;
875
73
            }
876
492k
            if (compressed_stream->eof == true) {
877
14
                code = gs_note_error(gs_error_ioerror);
878
14
                goto exit;
879
14
            }
880
492k
        } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth);
881
12.5k
    }
882
883
12.2k
    *object = ctx->stack_top[-1];
884
    /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets
885
     * the object number for uncompressed objects. So we need to do that here.
886
     */
887
12.2k
    if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
888
12.0k
        (*object)->indirect_num = (*object)->object_num = obj;
889
12.0k
        (*object)->indirect_gen = (*object)->generation_num = gen;
890
12.0k
        pdfi_countup(*object);
891
12.0k
    }
892
12.2k
    pdfi_pop(ctx, 1);
893
894
12.2k
    if (cache) {
895
10.3k
        code = pdfi_add_to_cache(ctx, *object);
896
10.3k
        if (code < 0) {
897
0
            pdfi_countdown(*object);
898
0
            goto exit;
899
0
        }
900
10.3k
    }
901
902
40.3k
 exit:
903
40.3k
    if (Object_stream)
904
13.4k
        pdfi_close_file(ctx, Object_stream);
905
40.3k
    if (Object_stream != compressed_stream)
906
15.8k
        if (compressed_stream)
907
15.8k
            pdfi_close_file(ctx, compressed_stream);
908
40.3k
    if (SubFile_stream)
909
17.4k
        pdfi_close_file(ctx, SubFile_stream);
910
40.3k
    pdfi_countdown(compressed_object);
911
40.3k
    pdfi_countdown(Type);
912
40.3k
    return code;
913
12.2k
}
914
915
/* pdf_dereference returns an object with a reference count of at least 1, this represents the
916
 * reference being held by the caller (in **object) when we return from this function.
917
 */
918
static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache)
919
556k
{
920
556k
    xref_entry *entry;
921
556k
    int code, stack_depth = pdfi_count_stack(ctx);
922
556k
    gs_offset_t saved_stream_offset;
923
556k
    bool saved_decrypt_strings = ctx->encryption.decrypt_strings;
924
925
556k
    *object = NULL;
926
927
556k
    if (ctx->xref_table == NULL)
928
4
        return_error(gs_error_typecheck);
929
930
556k
    if (ctx->main_stream == NULL || ctx->main_stream->s == NULL)
931
0
        return_error(gs_error_ioerror);
932
933
556k
    if (obj >= ctx->xref_table->xref_size) {
934
11.6k
        char extra_info[gp_file_name_sizeof];
935
936
11.6k
        gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj);
937
11.6k
        if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info)) < 0) {
938
0
            return code;
939
0
        }
940
941
11.6k
        code = pdfi_repair_file(ctx);
942
11.6k
        if (code < 0) {
943
11.6k
            *object = NULL;
944
11.6k
            return code;
945
11.6k
        }
946
2
        if (obj >= ctx->xref_table->xref_size) {
947
1
            *object = NULL;
948
1
            return_error(gs_error_rangecheck);
949
1
        }
950
2
    }
951
952
545k
    entry = &ctx->xref_table->xref[obj];
953
954
545k
    if(entry->object_num == 0) {
955
192k
        pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference_main", "Attempt to dereference object 0");
956
192k
        return_error(gs_error_undefined);
957
192k
    }
958
959
353k
    if (entry->free) {
960
282
        char extra_info[gp_file_name_sizeof];
961
962
282
        gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
963
282
        code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
964
282
        *object = PDF_NULL_OBJ;
965
282
        return code;
966
352k
    }else {
967
352k
        if (!entry->compressed) {
968
297k
            if(entry->u.uncompressed.generation_num != gen)
969
932
                pdfi_set_warning(ctx, 0, NULL, W_PDF_MISMATCH_GENERATION, "pdfi_dereference_main", "");
970
297k
        }
971
352k
    }
972
973
352k
    if (ctx->loop_detection) {
974
343k
        if (pdfi_loop_detector_check_object(ctx, obj) == true)
975
502
            return_error(gs_error_circular_reference);
976
343k
        if (entry->free) {
977
0
            code = pdfi_loop_detector_add_object(ctx, obj);
978
0
            if (code < 0)
979
0
                return code;
980
0
        }
981
343k
    }
982
352k
    if (entry->cache != NULL){
983
185k
        pdf_obj_cache_entry *cache_entry = entry->cache;
984
985
#if CACHE_STATISTICS
986
        ctx->hits++;
987
#endif
988
185k
        *object = cache_entry->o;
989
185k
        pdfi_countup(*object);
990
991
185k
        pdfi_promote_cache_entry(ctx, cache_entry);
992
185k
    } else {
993
166k
        saved_stream_offset = pdfi_unread_tell(ctx);
994
995
166k
        if (entry->compressed) {
996
            /* This is an object in a compressed object stream */
997
40.3k
            ctx->encryption.decrypt_strings = false;
998
999
40.3k
            code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache);
1000
40.3k
            if (code < 0 || *object == NULL)
1001
28.0k
                goto error;
1002
126k
        } else {
1003
#if CACHE_STATISTICS
1004
            ctx->misses++;
1005
#endif
1006
126k
            ctx->encryption.decrypt_strings = true;
1007
1008
126k
            code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET);
1009
126k
            if (code < 0)
1010
3
                goto error;
1011
1012
126k
            code = pdfi_read_object(ctx, ctx->main_stream, entry->u.uncompressed.offset);
1013
1014
            /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it.
1015
             * reload the xref entry to be certain it is valid.
1016
             */
1017
126k
            entry = &ctx->xref_table->xref[obj];
1018
126k
            if (code < 0) {
1019
15.9k
                int code1 = 0;
1020
15.9k
                if (entry->free) {
1021
0
                    char extra_info[gp_file_name_sizeof];
1022
1023
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1024
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1025
0
                    *object = PDF_NULL_OBJ;
1026
0
                    if (code < 0)
1027
0
                        goto error;
1028
0
                    goto free_obj;
1029
0
                }
1030
15.9k
                ctx->encryption.decrypt_strings = saved_decrypt_strings;
1031
15.9k
                (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1032
15.9k
                pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1033
1034
15.9k
                code1 = pdfi_repair_file(ctx);
1035
15.9k
                if (code1 == 0)
1036
94
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1037
                /* Repair failed, just give up and return an error */
1038
15.8k
                goto error;
1039
15.9k
            }
1040
1041
            /* We only expect a single object back when dereferencing an indirect reference
1042
             * The only way (I think) we can end up with more than one is if the object initially
1043
             * appears to be a dictionary or array, but the object terminates (with endobj or
1044
             * simply reaching EOF) without terminating the array or dictionary. That's clearly
1045
             * an error. We might, as a future 'improvement' choose to walk back through
1046
             * the stack looking for unterminated dictionary or array markers, and closing them
1047
             * so that (hopefully!) we end up with a single 'repaired' object on the stack.
1048
             * But for now I'm simply going to treat these as errors. We will try a repair on the
1049
             * file to see if we end up using a different (hopefully intact) object from the file.
1050
             */
1051
110k
            if (pdfi_count_stack(ctx) - stack_depth > 1) {
1052
5.61k
                int code1 = 0;
1053
1054
5.61k
                code1 = pdfi_repair_file(ctx);
1055
5.61k
                if (code1 == 0)
1056
37
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1057
                /* Repair failed, just give up and return an error */
1058
5.58k
                code = gs_note_error(gs_error_syntaxerror);
1059
5.58k
                goto error;
1060
5.61k
            }
1061
1062
104k
            if (pdfi_count_stack(ctx) > 0 &&
1063
104k
                ((ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) &&
1064
104k
                (ctx->stack_top[-1])->object_num == obj)
1065
104k
                || ctx->stack_top[-1] == PDF_NULL_OBJ)) {
1066
104k
                *object = ctx->stack_top[-1];
1067
104k
                pdfi_countup(*object);
1068
104k
                pdfi_pop(ctx, 1);
1069
104k
                if (pdfi_type_of(*object) == PDF_INDIRECT) {
1070
0
                    pdf_indirect_ref *iref = (pdf_indirect_ref *)*object;
1071
1072
0
                    if (iref->ref_object_num == obj) {
1073
0
                        code = gs_note_error(gs_error_circular_reference);
1074
0
                        pdfi_countdown(*object);
1075
0
                        *object = NULL;
1076
0
                        goto error;
1077
0
                    }
1078
0
                }
1079
                /* There's really no point in caching an indirect reference and
1080
                 * I think it could be potentially confusing to later calls.
1081
                 */
1082
104k
                if (cache && pdfi_type_of(*object) != PDF_INDIRECT) {
1083
104k
                    code = pdfi_add_to_cache(ctx, *object);
1084
104k
                    if (code < 0) {
1085
0
                        pdfi_countdown(*object);
1086
0
                        goto error;
1087
0
                    }
1088
104k
                }
1089
104k
            } else {
1090
122
                int code1 = 0;
1091
1092
122
                if (pdfi_count_stack(ctx) > 0)
1093
102
                    pdfi_pop(ctx, 1);
1094
1095
122
                if (entry->free) {
1096
0
                    char extra_info[gp_file_name_sizeof];
1097
1098
0
                    gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num);
1099
0
                    code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
1100
0
                    *object = PDF_NULL_OBJ;
1101
0
                    if (code < 0)
1102
0
                        goto error;
1103
0
                    return code;
1104
0
                }
1105
122
                code1 = pdfi_repair_file(ctx);
1106
122
                if (code1 == 0)
1107
17
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
1108
                /* Repair failed, just give up and return an error */
1109
105
                code = gs_note_error(gs_error_undefined);
1110
105
                goto error;
1111
122
            }
1112
104k
        }
1113
116k
free_obj:
1114
116k
        (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1115
116k
    }
1116
1117
302k
    if (ctx->loop_detection && pdf_object_num(*object) != 0) {
1118
293k
        code = pdfi_loop_detector_add_object(ctx, (*object)->object_num);
1119
293k
        if (code < 0) {
1120
0
            ctx->encryption.decrypt_strings = saved_decrypt_strings;
1121
0
            return code;
1122
0
        }
1123
293k
    }
1124
302k
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1125
302k
    return 0;
1126
1127
49.5k
error:
1128
49.5k
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1129
49.5k
    (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1130
    /* Return the stack to the state at entry */
1131
49.5k
    pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1132
49.5k
    return code;
1133
302k
}
1134
1135
int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1136
554k
{
1137
554k
    return pdfi_dereference_main(ctx, obj, gen, object, true);
1138
554k
}
1139
1140
int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1141
2.45k
{
1142
2.45k
    return pdfi_dereference_main(ctx, obj, gen, object, false);
1143
2.45k
}
1144
1145
/* do a derefence with loop detection */
1146
int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1147
228k
{
1148
228k
    int code;
1149
1150
228k
    code = pdfi_loop_detector_mark(ctx);
1151
228k
    if (code < 0)
1152
0
        return code;
1153
1154
228k
    code = pdfi_dereference(ctx, obj, gen, object);
1155
228k
    (void)pdfi_loop_detector_cleartomark(ctx);
1156
228k
    return code;
1157
228k
}
1158
1159
int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1160
2.45k
{
1161
2.45k
    int code;
1162
1163
2.45k
    code = pdfi_loop_detector_mark(ctx);
1164
2.45k
    if (code < 0)
1165
0
        return code;
1166
1167
2.45k
    code = pdfi_dereference_nocache(ctx, obj, gen, object);
1168
2.45k
    (void)pdfi_loop_detector_cleartomark(ctx);
1169
2.45k
    return code;
1170
2.45k
}
1171
1172
static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse)
1173
0
{
1174
0
    int code = 0;
1175
0
    uint64_t index, arraysize;
1176
0
    pdf_obj *object = NULL;
1177
0
    pdf_array *array = (pdf_array *)obj;
1178
1179
0
    arraysize = pdfi_array_size(array);
1180
0
    for (index = 0; index < arraysize; index++) {
1181
0
        if (ctx->loop_detection != NULL) {
1182
0
            code = pdfi_loop_detector_mark(ctx);
1183
0
            if (code < 0)
1184
0
                return code;
1185
0
        }
1186
1187
0
        code = pdfi_array_get_no_store_R(ctx, array, index, &object);
1188
1189
0
        if (ctx->loop_detection != NULL) {
1190
0
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1191
0
            if (code1 < 0)
1192
0
                return code1;
1193
0
        }
1194
1195
0
        if (code == gs_error_circular_reference) {
1196
            /* Previously we just left as an indirect reference, but now we want
1197
             * to return the error so we don't end up replacing indirect references
1198
             * to objects with circular references.
1199
             */
1200
0
        } else {
1201
0
            if (code < 0) goto exit;
1202
0
            if (recurse) {
1203
0
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse);
1204
0
                if (code < 0) goto exit;
1205
0
            }
1206
            /* don't store the object if it's a stream (leave as a ref) */
1207
0
            if (pdfi_type_of(object) != PDF_STREAM)
1208
0
                code = pdfi_array_put(ctx, array, index, object);
1209
0
        }
1210
0
        if (code < 0) goto exit;
1211
1212
0
        pdfi_countdown(object);
1213
0
        object = NULL;
1214
0
    }
1215
1216
0
 exit:
1217
0
    pdfi_countdown(object);
1218
0
    return code;
1219
0
}
1220
1221
static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse)
1222
0
{
1223
0
    int code = 0;
1224
0
    pdf_dict *dict = (pdf_dict *)obj;
1225
0
    pdf_name *Key = NULL;
1226
0
    pdf_obj *Value = NULL;
1227
0
    uint64_t index, dictsize;
1228
1229
0
    dictsize = pdfi_dict_entries(dict);
1230
1231
    /* Note: I am not using pdfi_dict_first/next because of needing to handle
1232
     * circular references.
1233
     */
1234
0
    for (index=0; index<dictsize; index ++) {
1235
0
        Key = (pdf_name *)dict->list[index].key;
1236
0
        if (pdfi_name_is(Key, "Parent"))
1237
0
            continue;
1238
1239
0
        if (ctx->loop_detection != NULL) {
1240
0
            code = pdfi_loop_detector_mark(ctx);
1241
0
            if (code < 0)
1242
0
                return code;
1243
0
        }
1244
1245
0
        code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value);
1246
1247
0
        if (ctx->loop_detection != NULL) {
1248
0
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1249
0
            if (code1 < 0)
1250
0
                return code1;
1251
0
        }
1252
1253
0
        if (code == gs_error_circular_reference) {
1254
            /* Just leave as an indirect ref */
1255
0
            code = 0;
1256
0
        } else {
1257
0
            if (code < 0) goto exit;
1258
0
            if (recurse) {
1259
0
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse);
1260
0
                if (code < 0)
1261
0
                    goto exit;
1262
0
            }
1263
            /* don't store the object if it's a stream (leave as a ref) */
1264
0
            if (pdfi_type_of(Value) != PDF_STREAM)
1265
0
                code = pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true);
1266
0
        }
1267
0
        if (code < 0) goto exit;
1268
1269
0
        pdfi_countdown(Value);
1270
0
        Value = NULL;
1271
0
    }
1272
1273
0
 exit:
1274
0
    pdfi_countdown(Value);
1275
0
    return code;
1276
0
}
1277
1278
/* Resolve all the indirect references for an object
1279
 * Note: This can be recursive
1280
 */
1281
int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse)
1282
0
{
1283
0
    int code = 0;
1284
1285
0
    switch(pdfi_type_of(value)) {
1286
0
    case PDF_ARRAY:
1287
0
        code = pdfi_resolve_indirect_array(ctx, value, recurse);
1288
0
        break;
1289
0
    case PDF_DICT:
1290
0
        code = pdfi_resolve_indirect_dict(ctx, value, recurse);
1291
0
        break;
1292
0
    default:
1293
0
        break;
1294
0
    }
1295
0
    return code;
1296
0
}
1297
1298
/* Resolve all the indirect references for an object
1299
 * Resolve indirect references, either one level or recursively, with loop detect on
1300
 * the parent (can by NULL) and the value.
1301
 */
1302
int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse)
1303
0
{
1304
0
    int code = 0;
1305
1306
0
    code = pdfi_loop_detector_mark(ctx);
1307
0
    if (code < 0) goto exit;
1308
0
    if (parent && parent->object_num != 0) {
1309
0
        code = pdfi_loop_detector_add_object(ctx, parent->object_num);
1310
0
        if (code < 0) goto exit;
1311
0
    }
1312
1313
0
    if (pdf_object_num(value) != 0) {
1314
0
        if (pdfi_loop_detector_check_object(ctx, value->object_num)) {
1315
0
            code = gs_note_error(gs_error_circular_reference);
1316
0
            goto exit;
1317
0
        }
1318
0
        code = pdfi_loop_detector_add_object(ctx, value->object_num);
1319
0
        if (code < 0) goto exit;
1320
0
    }
1321
0
    code = pdfi_resolve_indirect(ctx, value, recurse);
1322
1323
0
 exit:
1324
0
    (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */
1325
0
    return code;
1326
0
}