Coverage Report

Created: 2022-10-31 07:00

/src/ghostpdl/pdf/pdf_deref.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2020-2022 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
13
   CA 94945, U.S.A., +1(415)492-9861, for further information.
14
*/
15
16
/* Functions to deal with dereferencing indirect objects
17
 * for the PDF interpreter. In here we also keep the code
18
 * for dealing with the object cache, because the dereferencing
19
 * functions are currently the only place that deals with it.
20
 */
21
22
#include "pdf_int.h"
23
#include "pdf_stack.h"
24
#include "pdf_loop_detect.h"
25
#include "strmio.h"
26
#include "stream.h"
27
#include "pdf_file.h"
28
#include "pdf_misc.h"
29
#include "pdf_dict.h"
30
#include "pdf_array.h"
31
#include "pdf_deref.h"
32
#include "pdf_repair.h"
33
34
/* Start with the object caching functions */
35
36
/* given an object, create a cache entry for it. If we have too many entries
37
 * then delete the leat-recently-used cache entry. Make the new entry be the
38
 * most-recently-used entry. The actual entries are attached to the xref table
39
 * (as well as being a double-linked list), because we detect an existing
40
 * cache entry by seeing that the xref table for the object number has a non-NULL
41
 * 'cache' member.
42
 * So we need to update the xref as well if we add or delete cache entries.
43
 */
44
static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o)
45
1.54M
{
46
1.54M
    pdf_obj_cache_entry *entry;
47
48
1.54M
    if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY))
49
1.29k
        return 0;
50
51
1.54M
    if (ctx->xref_table->xref[o->object_num].cache != NULL) {
52
#if DEBUG_CACHE
53
        dmprintf1(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num);
54
#endif
55
0
        return_error(gs_error_unknownerror);
56
0
    }
57
58
1.54M
    if (o->object_num > ctx->xref_table->xref_size)
59
0
        return_error(gs_error_rangecheck);
60
61
1.54M
    if (ctx->cache_entries == MAX_OBJECT_CACHE_SIZE)
62
596k
    {
63
#if DEBUG_CACHE
64
        dbgmprintf(ctx->memory, "Cache full, evicting LRU\n");
65
#endif
66
596k
        if (ctx->cache_LRU) {
67
596k
            entry = ctx->cache_LRU;
68
596k
            ctx->cache_LRU = entry->next;
69
596k
            if (entry->next)
70
596k
                ((pdf_obj_cache_entry *)entry->next)->previous = NULL;
71
596k
            ctx->xref_table->xref[entry->o->object_num].cache = NULL;
72
596k
            pdfi_countdown(entry->o);
73
596k
            ctx->cache_entries--;
74
596k
            gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU");
75
596k
        } else
76
0
            return_error(gs_error_unknownerror);
77
596k
    }
78
1.54M
    entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache");
79
1.54M
    if (entry == NULL)
80
0
        return_error(gs_error_VMerror);
81
82
1.54M
    memset(entry, 0x00, sizeof(pdf_obj_cache_entry));
83
84
1.54M
    entry->o = o;
85
1.54M
    pdfi_countup(o);
86
1.54M
    if (ctx->cache_MRU) {
87
1.51M
        entry->previous = ctx->cache_MRU;
88
1.51M
        ctx->cache_MRU->next = entry;
89
1.51M
    }
90
1.54M
    ctx->cache_MRU = entry;
91
1.54M
    if (ctx->cache_LRU == NULL)
92
26.2k
        ctx->cache_LRU = entry;
93
94
1.54M
    ctx->cache_entries++;
95
1.54M
    ctx->xref_table->xref[o->object_num].cache = entry;
96
1.54M
    return 0;
97
1.54M
}
98
99
/* Given an existing cache entry, promote it to be the most-recently-used
100
 * cache entry.
101
 */
102
static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry)
103
2.31M
{
104
2.31M
    if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) {
105
2.01M
        if ((pdf_obj_cache_entry *)cache_entry->next != NULL)
106
2.01M
            ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous;
107
2.01M
        if ((pdf_obj_cache_entry *)cache_entry->previous != NULL)
108
2.01M
            ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next;
109
277
        else {
110
            /* the existing entry is the current least recently used, we need to make the 'next'
111
             * cache entry into the LRU.
112
             */
113
277
            ctx->cache_LRU = cache_entry->next;
114
277
        }
115
2.01M
        cache_entry->next = NULL;
116
2.01M
        cache_entry->previous = ctx->cache_MRU;
117
2.01M
        ctx->cache_MRU->next = cache_entry;
118
2.01M
        ctx->cache_MRU = cache_entry;
119
2.01M
    }
120
2.31M
    return;
121
2.31M
}
122
123
/* This one's a bit of an oddity, its used for fonts. When we build a PDF font object
124
 * we want the object cache to reference *that* object, not the dictionary which was
125
 * read out of the PDF file, so this allows us to replace the font dictionary in the
126
 * cache with the actual font object, so that later dereferences will get this font
127
 * object.
128
 */
129
int replace_cache_entry(pdf_context *ctx, pdf_obj *o)
130
57.0k
{
131
57.0k
    xref_entry *entry;
132
57.0k
    pdf_obj_cache_entry *cache_entry;
133
57.0k
    pdf_obj *old_cached_obj = NULL;
134
135
    /* Limited error checking here, we assume that things like the
136
     * validity of the object (eg not a free oobject) have already been handled.
137
     */
138
139
57.0k
    entry = &ctx->xref_table->xref[o->object_num];
140
57.0k
    cache_entry = entry->cache;
141
142
57.0k
    if (cache_entry == NULL) {
143
0
        return(pdfi_add_to_cache(ctx, o));
144
57.0k
    } else {
145
        /* NOTE: We grab the object without decrementing, to avoid triggering
146
         * a warning message for freeing an object that's in the cache
147
         */
148
57.0k
        if (cache_entry->o != NULL)
149
57.0k
            old_cached_obj = cache_entry->o;
150
151
        /* Put new entry in the cache */
152
57.0k
        cache_entry->o = o;
153
57.0k
        pdfi_countup(o);
154
57.0k
        pdfi_promote_cache_entry(ctx, cache_entry);
155
156
        /* Now decrement the old cache entry, if any */
157
57.0k
        pdfi_countdown(old_cached_obj);
158
57.0k
    }
159
57.0k
    return 0;
160
57.0k
}
161
162
/* Now the dereferencing functions */
163
164
/*
165
 * Technically we can accept a stream other than the main PDF file stream here. This is
166
 * really for the case of compressed objects where we read tokens from the compressed
167
 * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode
168
 * on top of the main file stream, which may be useful. Note that this cannot work with
169
 * objects in compressed object streams! They should always pass a value of 0 for the stream_offset.
170
 * The stream_offset is the offset from the start of the underlying uncompressed PDF file of
171
 * the stream we are using. See the comments below when keyword is PDF_STREAM.
172
 */
173
174
/* Determine if a PDF object is in a compressed ObjStm. Returns < 0
175
 * for an error, 0 if it is not in a compressed ObjStm and 1 if it is.
176
 * Currently errors are inmpossible. This is only used by the decryption code
177
 * to determine if a string is in a compressed object stream, if it is then
178
 * it can't be used for decryption.
179
 */
180
int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen)
181
3.42k
{
182
3.42k
    xref_entry *entry;
183
184
    /* Can't possibly be a compressed object before we have finished reading
185
     * the xref.
186
     */
187
3.42k
    if (ctx->xref_table == NULL)
188
0
        return 0;
189
190
3.42k
    entry = &ctx->xref_table->xref[obj];
191
192
3.42k
    if (entry->compressed)
193
0
        return 1;
194
195
3.42k
    return 0;
196
3.42k
}
197
198
/* We should never read a 'stream' keyword from a compressed object stream
199
 * so this case should never end up here.
200
 */
201
static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset,
202
                                   uint32_t objnum, uint32_t gen)
203
383k
{
204
383k
    int code = 0;
205
383k
    int64_t i;
206
383k
    pdf_dict *dict = NULL;
207
383k
    gs_offset_t offset;
208
383k
    pdf_stream *stream_obj = NULL;
209
210
    /* Strange code time....
211
     * If we are using a stream which is *not* the PDF uncompressed main file stream
212
     * then doing stell on it will only tell us how many bytes have been read from
213
     * that stream, it won't tell us the underlying file position. So we add on the
214
     * 'unread' bytes, *and* we add on the position of the start of the stream in
215
     * the actual main file. This is all done so that we can check the /Length
216
     * of the object. Note that this will *only* work for regular objects it can
217
     * not be used for compressed object streams, but those don't need checking anyway
218
     * they have a different mechanism altogether and should never get here.
219
     */
220
383k
    offset = stell(s->s) - s->unread_size + stream_offset;
221
383k
    code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET);
222
223
383k
    if (pdfi_count_stack(ctx) < 1)
224
0
        return_error(gs_error_stackunderflow);
225
226
383k
    dict = (pdf_dict *)ctx->stack_top[-1];
227
228
383k
    if (pdfi_type_of(dict) != PDF_DICT) {
229
474
        pdfi_pop(ctx, 1);
230
474
        return_error(gs_error_syntaxerror);
231
474
    }
232
233
383k
    dict->indirect_num = dict->object_num = objnum;
234
383k
    dict->indirect_gen = dict->generation_num = gen;
235
236
    /* Convert the dict into a stream */
237
383k
    code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true);
238
383k
    if (code < 0) {
239
0
        pdfi_pop(ctx, 1);
240
0
        return code;
241
0
    }
242
    /* Pop off the dict and push the stream */
243
383k
    pdfi_pop(ctx, 1);
244
383k
    dict = NULL;
245
383k
    pdfi_push(ctx, (pdf_obj *)stream_obj);
246
247
383k
    stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum;
248
383k
    stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen;
249
383k
    stream_obj->stream_offset = offset;
250
251
    /* Exceptional code. Normally we do not need to worry about detecting circular references
252
     * when reading objects, because we do not dereference any indirect objects. However streams
253
     * are a slight exception in that we do get the Length from the stream dictionay and if that
254
     * is an indirect reference, then we dereference it.
255
     * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect
256
     * reference to the same stream object, and leads to infinite recursion. So deal with that
257
     * possibility here.
258
     */
259
383k
    code = pdfi_loop_detector_mark(ctx);
260
383k
    if (code < 0) {
261
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
262
0
        return code;
263
0
    }
264
383k
    if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) {
265
30
        pdfi_countdown(stream_obj); /* get rid of extra ref */
266
30
        pdfi_loop_detector_cleartomark(ctx);
267
30
        return_error(gs_error_circular_reference);
268
30
    }
269
270
383k
    code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num);
271
383k
    if (code < 0) {
272
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
273
0
        pdfi_loop_detector_cleartomark(ctx);
274
0
        return code;
275
0
    }
276
277
    /* This code may be a performance overhead, it simply skips over the stream contents
278
     * and checks that the stream ends with a 'endstream endobj' pair. We could add a
279
     * 'go faster' flag for users who are certain their PDF files are well-formed. This
280
     * could also allow us to skip all kinds of other checking.....
281
     */
282
283
383k
    code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i);
284
383k
    if (code < 0) {
285
4.49k
        char extra_info[gp_file_name_sizeof];
286
287
4.49k
        (void)pdfi_loop_detector_cleartomark(ctx);
288
4.49k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum);
289
4.49k
        pdfi_set_error(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info);
290
4.49k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
291
4.49k
        return 0;
292
4.49k
    }
293
378k
    code = pdfi_loop_detector_cleartomark(ctx);
294
378k
    if (code < 0) {
295
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
296
0
        return code;
297
0
    }
298
299
378k
    if (i < 0 || (i + offset)> ctx->main_stream_length) {
300
4.41k
        char extra_info[gp_file_name_sizeof];
301
302
4.41k
        gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum);
303
4.41k
        pdfi_set_error(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info);
304
374k
    } else {
305
374k
        code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR);
306
374k
        if (code < 0) {
307
0
            pdfi_pop(ctx, 1);
308
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
309
0
            return code;
310
0
        }
311
312
374k
        stream_obj->Length = 0;
313
374k
        stream_obj->length_valid = false;
314
315
374k
        code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
316
374k
        if (code == 0) {
317
0
            char extra_info[gp_file_name_sizeof];
318
319
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum);
320
0
            pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
321
            /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that
322
             * happens give up.
323
             */
324
0
            if (pdfi_count_stack(ctx) == 0) {
325
0
                pdfi_countdown(stream_obj); /* get rid of extra ref */
326
0
                return code;
327
0
            }
328
374k
        } else if (code < 0) {
329
0
            char extra_info[gp_file_name_sizeof];
330
331
0
            gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum);
332
0
            pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info);
333
374k
        } else if (code != TOKEN_ENDSTREAM) {
334
26.0k
            char extra_info[gp_file_name_sizeof];
335
336
26.0k
            gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i);
337
26.0k
            pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info);
338
348k
        } else {
339
            /* Cache the Length in the stream object and mark it valid */
340
348k
            stream_obj->Length = i;
341
348k
            stream_obj->length_valid = true;
342
348k
        }
343
374k
    }
344
345
    /* If we failed to find a valid object, or the object wasn't a keyword, or the
346
     * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct
347
     * Length for streams if we have encrypted files, because we must install a
348
     * SubFileDecode filter with a Length (EODString is incompatible with AES encryption)
349
     * Rather than mess about checking for encryption, we'll choose to just correctly
350
     * calculate the Length of all streams. Although this takes time, it will only
351
     * happen for files which are invalid.
352
     */
353
378k
    if (stream_obj->length_valid != true) {
354
30.4k
        char Buffer[10];
355
30.4k
        unsigned int bytes, total = 0;
356
30.4k
        int c = 0;
357
358
30.4k
        code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET);
359
30.4k
        if (code < 0) {
360
0
            pdfi_countdown(stream_obj); /* get rid of extra ref */
361
0
            pdfi_pop(ctx, 1);
362
0
            return code;
363
0
        }
364
30.4k
        memset(Buffer, 0x00, 10);
365
30.4k
        bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream);
366
30.4k
        if (bytes < 9) {
367
3.50k
            pdfi_countdown(stream_obj); /* get rid of extra ref */
368
3.50k
            return_error(gs_error_ioerror);
369
3.50k
        }
370
371
26.9k
        total = bytes;
372
680M
        do {
373
680M
            if (memcmp(Buffer, "endstream", 9) == 0) {
374
23.5k
                stream_obj->Length = total - 9;
375
23.5k
                stream_obj->length_valid = true;
376
23.5k
                break;
377
23.5k
            }
378
680M
            if (memcmp(Buffer, "endobj", 6) == 0) {
379
1.86k
                stream_obj->Length = total - 6;
380
1.86k
                stream_obj->length_valid = true;
381
1.86k
                break;
382
1.86k
            }
383
680M
            memmove(Buffer, Buffer+1, 9);
384
680M
            c = pdfi_read_byte(ctx, ctx->main_stream);
385
680M
            if (c < 0)
386
1.52k
                break;
387
680M
            Buffer[9] = (byte)c;
388
680M
            total++;
389
680M
        } while(1);
390
26.9k
        pdfi_countdown(stream_obj); /* get rid of extra ref */
391
26.9k
        if (c < 0)
392
1.52k
            return_error(gs_error_ioerror);
393
25.4k
        return 0;
394
26.9k
    }
395
396
348k
    code = pdfi_read_bare_keyword(ctx, ctx->main_stream);
397
348k
    if (code < 0) {
398
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
399
0
        if (ctx->args.pdfstoponerror)
400
0
            return code;
401
0
        else
402
            /* Something went wrong looking for endobj, but we found endstream, so assume
403
             * for now that will suffice.
404
             */
405
0
            pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
406
0
        return 0;
407
0
    }
408
409
348k
    if (code == 0) {
410
0
        pdfi_countdown(stream_obj); /* get rid of extra ref */
411
0
        return_error(gs_error_stackunderflow);
412
0
    }
413
414
348k
    if (code != TOKEN_ENDOBJ) {
415
589
        pdfi_countdown(stream_obj); /* get rid of extra ref */
416
589
        if (ctx->args.pdfstoponerror)
417
0
            return_error(gs_error_typecheck);
418
589
        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL);
419
        /* Didn't find an endobj, but we have an endstream, so assume
420
         * for now that will suffice
421
         */
422
589
        return 0;
423
589
    }
424
347k
    pdfi_countdown(stream_obj); /* get rid of extra ref */
425
426
347k
    return 0;
427
348k
}
428
429
/* This reads an object *after* the x y obj keyword has been found. Its broken out
430
 * separately for the benefit of the repair code when reading the dictionary following
431
 * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does
432
 * not have an 'endobj', we rely on the error handling to take care of that for us.
433
 */
434
int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen)
435
797k
{
436
797k
    int code = 0, initial_depth = 0;
437
797k
    pdf_key keyword;
438
797k
    gs_offset_t saved_offset[3];
439
797k
    pdf_obj_type type;
440
441
797k
    initial_depth = pdfi_count_stack(ctx);
442
797k
    saved_offset[0] = saved_offset[1] = saved_offset[2] = 0;
443
444
797k
    code = pdfi_read_token(ctx, s, objnum, gen);
445
797k
    if (code < 0)
446
1.44k
        return code;
447
448
795k
    if (code == 0)
449
        /* failed to read a token */
450
24
        return_error(gs_error_syntaxerror);
451
452
21.9M
    do {
453
        /* move all the saved offsets up by one */
454
21.9M
        saved_offset[0] = saved_offset[1];
455
21.9M
        saved_offset[1] = saved_offset[2];
456
21.9M
        saved_offset[2] = pdfi_unread_tell(ctx);
457
458
21.9M
        code = pdfi_read_token(ctx, s, objnum, gen);
459
21.9M
        if (code < 0) {
460
32.9k
            pdfi_clearstack(ctx);
461
32.9k
            return code;
462
32.9k
        }
463
21.8M
        if (s->eof)
464
442
            return_error(gs_error_syntaxerror);
465
21.8M
        code = 0;
466
21.8M
        type = pdfi_type_of(ctx->stack_top[-1]);
467
21.8M
        if (type == PDF_KEYWORD)
468
14.6k
            goto missing_endobj;
469
21.8M
    } while (type != PDF_FAST_KEYWORD);
470
471
747k
    keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]);
472
747k
    if (keyword == TOKEN_ENDOBJ) {
473
351k
        pdf_obj *o;
474
475
351k
        if (pdfi_count_stack(ctx) - initial_depth < 2) {
476
29
            pdfi_clearstack(ctx);
477
29
            return_error(gs_error_stackunderflow);
478
29
        }
479
480
351k
        o = ctx->stack_top[-2];
481
482
351k
        pdfi_pop(ctx, 1);
483
484
351k
        if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
485
351k
            o->indirect_num = o->object_num = objnum;
486
351k
            o->indirect_gen = o->generation_num = gen;
487
351k
        }
488
351k
        return code;
489
351k
    }
490
396k
    if (keyword == TOKEN_STREAM) {
491
383k
        pdfi_pop(ctx, 1);
492
383k
        return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen);
493
383k
    }
494
12.9k
    if (keyword == TOKEN_OBJ) {
495
802
        pdf_obj *o;
496
497
802
        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL);
498
499
        /* 4 for; the object we want, the object number, generation number and 'obj' keyword */
500
802
        if (pdfi_count_stack(ctx) - initial_depth < 4)
501
320
            return_error(gs_error_stackunderflow);
502
503
        /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */
504
482
        o = ctx->stack_top[-4];
505
506
482
        pdfi_pop(ctx, 3);
507
508
482
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
509
438
            o->indirect_num = o->object_num = objnum;
510
438
            o->indirect_gen = o->generation_num = gen;
511
438
        }
512
482
        if (saved_offset[0] > 0)
513
482
            (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET);
514
482
        return 0;
515
802
    }
516
517
26.7k
missing_endobj:
518
    /* Assume that any other keyword means a missing 'endobj' */
519
26.7k
    if (!ctx->args.pdfstoponerror) {
520
26.7k
        pdf_obj *o;
521
522
26.7k
        pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL);
523
524
26.7k
        if (pdfi_count_stack(ctx) - initial_depth < 2)
525
456
            return_error(gs_error_stackunderflow);
526
527
26.3k
        o = ctx->stack_top[-2];
528
529
26.3k
        pdfi_pop(ctx, 1);
530
531
26.3k
        if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) {
532
26.0k
            o->indirect_num = o->object_num = objnum;
533
26.0k
            o->indirect_gen = o->generation_num = gen;
534
26.0k
        }
535
26.3k
        return code;
536
26.7k
    }
537
0
    pdfi_pop(ctx, 2);
538
0
    return_error(gs_error_syntaxerror);
539
26.7k
}
540
541
static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset)
542
801k
{
543
801k
    int code = 0;
544
801k
    int objnum = 0, gen = 0;
545
546
    /* An object consists of 'num gen obj' followed by a token, follwed by an endobj
547
     * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we
548
     * want to deal with it specially by getting the Length, jumping to the end and checking
549
     * for an endobj. Or not, possibly, because it would be slow.
550
     */
551
801k
    code = pdfi_read_bare_int(ctx, s, &objnum);
552
801k
    if (code < 0)
553
20.3k
        return code;
554
781k
    if (code == 0)
555
2.83k
        return_error(gs_error_syntaxerror);
556
557
778k
    code = pdfi_read_bare_int(ctx, s, &gen);
558
778k
    if (code < 0)
559
875
        return code;
560
777k
    if (code == 0)
561
324
        return_error(gs_error_syntaxerror);
562
563
777k
    code = pdfi_read_bare_keyword(ctx, s);
564
777k
    if (code < 0)
565
0
        return code;
566
777k
    if (code == 0)
567
0
        return gs_note_error(gs_error_ioerror);
568
777k
    if (code != TOKEN_OBJ) {
569
1.60k
        return_error(gs_error_syntaxerror);
570
1.60k
    }
571
572
775k
    return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen);
573
777k
}
574
575
static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object,
576
                                 const xref_entry *entry, bool cache)
577
897k
{
578
897k
    int code = 0;
579
897k
    xref_entry *compressed_entry;
580
897k
    pdf_c_stream *compressed_stream = NULL;
581
897k
    pdf_c_stream *SubFile_stream = NULL;
582
897k
    pdf_c_stream *Object_stream = NULL;
583
897k
    int i = 0, object_length = 0;
584
897k
    int64_t num_entries;
585
897k
    int found_object;
586
897k
    int64_t Length, First;
587
897k
    gs_offset_t offset = 0;
588
897k
    pdf_stream *compressed_object = NULL;
589
897k
    pdf_dict *compressed_sdict = NULL; /* alias */
590
897k
    pdf_name *Type = NULL;
591
592
897k
    if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1)
593
49
        return_error(gs_error_undefined);
594
595
897k
    compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num];
596
597
897k
    if (ctx->args.pdfdebug) {
598
0
        dmprintf1(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj);
599
0
        dmprintf1(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num);
600
0
    }
601
602
897k
    if (compressed_entry->cache == NULL) {
603
#if CACHE_STATISTICS
604
        ctx->compressed_misses++;
605
#endif
606
40.0k
        code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET);
607
40.0k
        if (code < 0)
608
0
            goto exit;
609
610
40.0k
        code = pdfi_read_object(ctx, ctx->main_stream, 0);
611
40.0k
        if (code < 0)
612
5.23k
            goto exit;
613
614
34.7k
        if (pdfi_count_stack(ctx) < 1) {
615
0
            code = gs_note_error(gs_error_stackunderflow);
616
0
            goto exit;
617
0
        }
618
619
34.7k
        if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) {
620
981
            pdfi_pop(ctx, 1);
621
981
            code = gs_note_error(gs_error_typecheck);
622
981
            goto exit;
623
981
        }
624
33.8k
        if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) {
625
25
            pdfi_pop(ctx, 1);
626
            /* Same error (undefined) as when we read an uncompressed object with the wrong number */
627
25
            code = gs_note_error(gs_error_undefined);
628
25
            goto exit;
629
25
        }
630
33.7k
        compressed_object = (pdf_stream *)ctx->stack_top[-1];
631
33.7k
        pdfi_countup(compressed_object);
632
33.7k
        pdfi_pop(ctx, 1);
633
33.7k
        code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object);
634
33.7k
        if (code < 0)
635
0
            goto exit;
636
857k
    } else {
637
#if CACHE_STATISTICS
638
        ctx->compressed_hits++;
639
#endif
640
857k
        compressed_object = (pdf_stream *)compressed_entry->cache->o;
641
857k
        pdfi_countup(compressed_object);
642
857k
        pdfi_promote_cache_entry(ctx, compressed_entry->cache);
643
857k
    }
644
891k
    code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict);
645
891k
    if (code < 0)
646
0
        return code;
647
648
891k
    if (ctx->loop_detection != NULL) {
649
882k
        code = pdfi_loop_detector_mark(ctx);
650
882k
        if (code < 0)
651
0
            goto exit;
652
882k
        if (compressed_sdict->object_num != 0) {
653
882k
            if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) {
654
45
                code = gs_note_error(gs_error_circular_reference);
655
882k
            } else {
656
882k
                code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num);
657
882k
            }
658
882k
            if (code < 0) {
659
45
                (void)pdfi_loop_detector_cleartomark(ctx);
660
45
                goto exit;
661
45
            }
662
882k
        }
663
882k
    }
664
    /* Check its an ObjStm ! */
665
891k
    code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type);
666
891k
    if (code < 0) {
667
227
        if (ctx->loop_detection != NULL)
668
227
            (void)pdfi_loop_detector_cleartomark(ctx);
669
227
        goto exit;
670
227
    }
671
672
891k
    if (!pdfi_name_is(Type, "ObjStm")){
673
22
        if (ctx->loop_detection != NULL)
674
22
            (void)pdfi_loop_detector_cleartomark(ctx);
675
22
        code = gs_note_error(gs_error_syntaxerror);
676
22
        goto exit;
677
22
    }
678
679
    /* Need to check the /N entry to see if the object is actually in this stream! */
680
891k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries);
681
891k
    if (code < 0) {
682
10
        if (ctx->loop_detection != NULL)
683
10
            (void)pdfi_loop_detector_cleartomark(ctx);
684
10
        goto exit;
685
10
    }
686
687
891k
    if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) {
688
6
        if (ctx->loop_detection != NULL)
689
6
            (void)pdfi_loop_detector_cleartomark(ctx);
690
6
        code = gs_note_error(gs_error_rangecheck);
691
6
        goto exit;
692
6
    }
693
694
891k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
695
891k
    if (code < 0) {
696
6.97k
        if (ctx->loop_detection != NULL)
697
6.97k
            (void)pdfi_loop_detector_cleartomark(ctx);
698
6.97k
        goto exit;
699
6.97k
    }
700
701
884k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First);
702
884k
    if (code < 0) {
703
221
        if (ctx->loop_detection != NULL)
704
221
            (void)pdfi_loop_detector_cleartomark(ctx);
705
221
        goto exit;
706
221
    }
707
708
883k
    if (ctx->loop_detection != NULL)
709
875k
        (void)pdfi_loop_detector_cleartomark(ctx);
710
711
883k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
712
883k
    if (code < 0)
713
0
        goto exit;
714
715
883k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
716
883k
    if (code < 0)
717
0
        goto exit;
718
719
883k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
720
883k
    if (code < 0)
721
126
        goto exit;
722
723
55.7M
    for (i=0;i < num_entries;i++)
724
54.8M
    {
725
54.8M
        int new_offset;
726
54.8M
        code = pdfi_read_bare_int(ctx, compressed_stream, &found_object);
727
54.8M
        if (code < 0)
728
2.69k
            goto exit;
729
54.8M
        if (code == 0) {
730
21
            code = gs_note_error(gs_error_syntaxerror);
731
21
            goto exit;
732
21
        }
733
54.8M
        code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset);
734
54.8M
        if (code < 0)
735
2.03k
            goto exit;
736
54.8M
        if (code == 0) {
737
303
            code = gs_note_error(gs_error_syntaxerror);
738
303
            goto exit;
739
303
        }
740
54.8M
        if (i == entry->u.compressed.object_index) {
741
881k
            if (found_object != obj) {
742
325
                code = gs_note_error(gs_error_undefined);
743
325
                goto exit;
744
325
            }
745
880k
            offset = new_offset;
746
880k
        }
747
54.8M
        if (i == entry->u.compressed.object_index + 1)
748
858k
            object_length = new_offset - offset;
749
54.8M
    }
750
751
    /* Bug #705259 - The first object need not lie immediately after the initial
752
     * table of object numbers and offsets. The start of the first object is given
753
     * by the value of First. We don't know how many bytes we consumed getting to
754
     * the end of the table, unfortunately, so we close the stream, rewind the main
755
     * stream back to the beginning of the ObjStm, and then read and discard 'First'
756
     * bytes in order to get to the start of the first object. Then we read the
757
     * number of bytes required to get from there to the start of the object we
758
     * actually want.
759
     * If this ever looks like it's causing performance problems we could read the
760
     * initial table above manually instead of using the existing code, and track
761
     * how many bytes we'd read, which would avoid us having to tear down and
762
     * rebuild the stream.
763
     */
764
878k
    if (compressed_stream)
765
878k
        pdfi_close_file(ctx, compressed_stream);
766
878k
    if (SubFile_stream)
767
878k
        pdfi_close_file(ctx, SubFile_stream);
768
769
878k
    code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET);
770
878k
    if (code < 0)
771
0
        goto exit;
772
773
    /* We already dereferenced this above, so we don't need the loop detection checking here */
774
878k
    code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length);
775
878k
    if (code < 0)
776
0
        goto exit;
777
778
878k
    code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false);
779
878k
    if (code < 0)
780
0
        goto exit;
781
782
878k
    code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false);
783
878k
    if (code < 0)
784
0
        goto exit;
785
786
478M
    for (i=0;i < First;i++)
787
477M
    {
788
477M
        int c = pdfi_read_byte(ctx, compressed_stream);
789
477M
        if (c < 0) {
790
7
            code = gs_note_error(gs_error_ioerror);
791
7
            goto exit;
792
7
        }
793
477M
    }
794
795
    /* Skip to the offset of the object we want to read */
796
2.96G
    for (i=0;i < offset;i++)
797
2.96G
    {
798
2.96G
        int c = pdfi_read_byte(ctx, compressed_stream);
799
2.96G
        if (c < 0) {
800
49.6k
            code = gs_note_error(gs_error_ioerror);
801
49.6k
            goto exit;
802
49.6k
        }
803
2.96G
    }
804
805
    /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit
806
     * the number of bytes we read to the declared size of the object (difference between
807
     * the offsets of the object we want to read, and the next object). If it is 0 then
808
     * we're reading the last object in the stream, so we just rely on the SubFileDecode
809
     * we set up when we created compressed_stream to limit the bytes to the length of
810
     * that stream.
811
     */
812
828k
    if (object_length > 0) {
813
807k
        code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false);
814
807k
        if (code < 0)
815
0
            goto exit;
816
807k
    } else {
817
21.4k
        Object_stream = compressed_stream;
818
21.4k
    }
819
820
828k
    code = pdfi_read_token(ctx, Object_stream, obj, gen);
821
828k
    if (code < 0)
822
1.77k
        goto exit;
823
826k
    if (code == 0) {
824
7
        code = gs_note_error(gs_error_syntaxerror);
825
7
        goto exit;
826
7
    }
827
826k
    if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) {
828
820k
        int start_depth = pdfi_count_stack(ctx);
829
830
        /* Need to read all the elements from COS objects */
831
28.6M
        do {
832
28.6M
            code = pdfi_read_token(ctx, Object_stream, obj, gen);
833
28.6M
            if (code < 0)
834
9.96k
                goto exit;
835
28.6M
            if (code == 0) {
836
4.02k
                code = gs_note_error(gs_error_syntaxerror);
837
4.02k
                goto exit;
838
4.02k
            }
839
28.6M
            if (compressed_stream->eof == true) {
840
334
                code = gs_note_error(gs_error_ioerror);
841
334
                goto exit;
842
334
            }
843
28.6M
        } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth);
844
820k
    }
845
846
812k
    *object = ctx->stack_top[-1];
847
    /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets
848
     * the object number for uncompressed objects. So we need to do that here.
849
     */
850
812k
    if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) {
851
811k
        (*object)->indirect_num = (*object)->object_num = obj;
852
811k
        (*object)->indirect_gen = (*object)->generation_num = gen;
853
811k
        pdfi_countup(*object);
854
811k
    }
855
812k
    pdfi_pop(ctx, 1);
856
857
812k
    if (cache) {
858
809k
        code = pdfi_add_to_cache(ctx, *object);
859
809k
        if (code < 0) {
860
0
            pdfi_countdown(*object);
861
0
            goto exit;
862
0
        }
863
809k
    }
864
865
897k
 exit:
866
897k
    if (Object_stream)
867
828k
        pdfi_close_file(ctx, Object_stream);
868
897k
    if (Object_stream != compressed_stream)
869
862k
        if (compressed_stream)
870
862k
            pdfi_close_file(ctx, compressed_stream);
871
897k
    if (SubFile_stream)
872
883k
        pdfi_close_file(ctx, SubFile_stream);
873
897k
    pdfi_countdown(compressed_object);
874
897k
    pdfi_countdown(Type);
875
897k
    return code;
876
812k
}
877
878
/* pdf_dereference returns an object with a reference count of at least 1, this represents the
879
 * reference being held by the caller (in **object) when we return from this function.
880
 */
881
static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache)
882
3.64M
{
883
3.64M
    xref_entry *entry;
884
3.64M
    int code, stack_depth = pdfi_count_stack(ctx);
885
3.64M
    gs_offset_t saved_stream_offset;
886
3.64M
    bool saved_decrypt_strings = ctx->encryption.decrypt_strings;
887
888
3.64M
    *object = NULL;
889
890
3.64M
    if (ctx->xref_table == NULL)
891
13
        return_error(gs_error_typecheck);
892
893
3.64M
    if (obj >= ctx->xref_table->xref_size) {
894
28.1k
        char extra_info[gp_file_name_sizeof];
895
896
28.1k
        gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj);
897
28.1k
        pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info);
898
899
28.1k
        if(ctx->args.pdfstoponerror)
900
0
            return_error(gs_error_rangecheck);
901
902
28.1k
        code = pdfi_repair_file(ctx);
903
28.1k
        if (code < 0) {
904
28.1k
            *object = NULL;
905
28.1k
            return code;
906
28.1k
        }
907
13
        if (obj >= ctx->xref_table->xref_size) {
908
7
            *object = NULL;
909
7
            return_error(gs_error_rangecheck);
910
7
        }
911
13
    }
912
913
3.61M
    entry = &ctx->xref_table->xref[obj];
914
915
3.61M
    if(entry->object_num == 0)
916
559k
        return_error(gs_error_undefined);
917
918
3.05M
    if (entry->free) {
919
86
        char extra_info[gp_file_name_sizeof];
920
921
86
        gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", trying next object number as offset.\n", entry->object_num);
922
86
        pdfi_set_error(ctx, 0, NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info);
923
86
    }
924
925
3.05M
    if (ctx->loop_detection) {
926
2.73M
        if (pdfi_loop_detector_check_object(ctx, obj) == true)
927
179
            return_error(gs_error_circular_reference);
928
2.73M
        if (entry->free) {
929
86
            code = pdfi_loop_detector_add_object(ctx, obj);
930
86
            if (code < 0)
931
0
                return code;
932
86
        }
933
2.73M
    }
934
3.05M
    if (entry->cache != NULL){
935
1.39M
        pdf_obj_cache_entry *cache_entry = entry->cache;
936
937
#if CACHE_STATISTICS
938
        ctx->hits++;
939
#endif
940
1.39M
        *object = cache_entry->o;
941
1.39M
        pdfi_countup(*object);
942
943
1.39M
        pdfi_promote_cache_entry(ctx, cache_entry);
944
1.65M
    } else {
945
1.65M
        saved_stream_offset = pdfi_unread_tell(ctx);
946
947
1.65M
        if (entry->compressed) {
948
            /* This is an object in a compressed object stream */
949
897k
            ctx->encryption.decrypt_strings = false;
950
951
897k
            code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache);
952
897k
            if (code < 0 || *object == NULL)
953
85.0k
                goto error;
954
897k
        } else {
955
761k
            pdf_c_stream *SubFile_stream = NULL;
956
#if CACHE_STATISTICS
957
            ctx->misses++;
958
#endif
959
761k
            ctx->encryption.decrypt_strings = true;
960
961
761k
            code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET);
962
761k
            if (code < 0)
963
9
                goto error;
964
965
761k
            code = pdfi_apply_SubFileDecode_filter(ctx, 0, "trailer", ctx->main_stream, &SubFile_stream, false);
966
761k
            if (code < 0)
967
0
                goto error;
968
969
761k
            code = pdfi_read_object(ctx, SubFile_stream, entry->u.uncompressed.offset);
970
971
            /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it.
972
             * reload the xref entry to be certain it is valid.
973
             */
974
761k
            entry = &ctx->xref_table->xref[obj];
975
976
761k
            pdfi_close_file(ctx, SubFile_stream);
977
761k
            if (code < 0) {
978
58.9k
                int code1 = 0;
979
58.9k
                if (entry->free) {
980
5
                    dmprintf2(ctx->memory, "Dereference of free object %"PRIu64", next object number as offset failed (code = %d), returning NULL object.\n", entry->object_num, code);
981
5
                    *object = PDF_NULL_OBJ;
982
5
                    goto free_obj;
983
5
                }
984
58.9k
                ctx->encryption.decrypt_strings = saved_decrypt_strings;
985
58.9k
                (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
986
58.9k
                pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
987
988
58.9k
                code1 = pdfi_repair_file(ctx);
989
58.9k
                if (code1 == 0)
990
773
                    return pdfi_dereference_main(ctx, obj, gen, object, cache);
991
                /* Repair failed, just give up and return an error */
992
58.1k
                return code;
993
58.9k
            }
994
995
702k
            if (pdfi_count_stack(ctx) > 0 &&
996
702k
                (ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) &&
997
702k
                (ctx->stack_top[-1])->object_num == obj)) {
998
701k
                *object = ctx->stack_top[-1];
999
701k
                pdfi_countup(*object);
1000
701k
                pdfi_pop(ctx, 1);
1001
701k
                if (pdfi_type_of(*object) == PDF_INDIRECT) {
1002
207
                    pdf_indirect_ref *iref = (pdf_indirect_ref *)*object;
1003
1004
207
                    if (iref->ref_object_num == obj) {
1005
0
                        code = gs_note_error(gs_error_circular_reference);
1006
0
                        pdfi_countdown(*object);
1007
0
                        *object = NULL;
1008
0
                        goto error;
1009
0
                    }
1010
207
                }
1011
701k
                if (cache) {
1012
701k
                    code = pdfi_add_to_cache(ctx, *object);
1013
701k
                    if (code < 0) {
1014
0
                        pdfi_countdown(*object);
1015
0
                        goto error;
1016
0
                    }
1017
701k
                }
1018
701k
            } else {
1019
675
                pdfi_pop(ctx, 1);
1020
675
                if (entry->free) {
1021
78
                    dmprintf1(ctx->memory, "Dereference of free object %"PRIu64", next object number as offset failed, returning NULL object.\n", entry->object_num);
1022
78
                    *object = PDF_NULL_OBJ;
1023
78
                    return 0;
1024
78
                }
1025
597
                code = gs_note_error(gs_error_undefined);
1026
597
                goto error;
1027
675
            }
1028
702k
        }
1029
1.51M
free_obj:
1030
1.51M
        (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1031
1.51M
    }
1032
1033
2.91M
    if (ctx->loop_detection && pdf_object_num(*object) != 0) {
1034
2.59M
        code = pdfi_loop_detector_add_object(ctx, (*object)->object_num);
1035
2.59M
        if (code < 0) {
1036
0
            ctx->encryption.decrypt_strings = saved_decrypt_strings;
1037
0
            return code;
1038
0
        }
1039
2.59M
    }
1040
2.91M
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1041
2.91M
    return 0;
1042
1043
85.6k
error:
1044
85.6k
    ctx->encryption.decrypt_strings = saved_decrypt_strings;
1045
85.6k
    (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET);
1046
    /* Return the stack to the state at entry */
1047
85.6k
    pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth);
1048
85.6k
    return code;
1049
2.91M
}
1050
1051
int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1052
3.63M
{
1053
3.63M
    return pdfi_dereference_main(ctx, obj, gen, object, true);
1054
3.63M
}
1055
1056
int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1057
11.1k
{
1058
11.1k
    return pdfi_dereference_main(ctx, obj, gen, object, false);
1059
11.1k
}
1060
1061
/* do a derefence with loop detection */
1062
int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1063
1.63M
{
1064
1.63M
    int code;
1065
1066
1.63M
    code = pdfi_loop_detector_mark(ctx);
1067
1.63M
    if (code < 0)
1068
0
        return code;
1069
1070
1.63M
    code = pdfi_dereference(ctx, obj, gen, object);
1071
1.63M
    (void)pdfi_loop_detector_cleartomark(ctx);
1072
1.63M
    return code;
1073
1.63M
}
1074
1075
int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object)
1076
11.1k
{
1077
11.1k
    int code;
1078
1079
11.1k
    code = pdfi_loop_detector_mark(ctx);
1080
11.1k
    if (code < 0)
1081
0
        return code;
1082
1083
11.1k
    code = pdfi_dereference_nocache(ctx, obj, gen, object);
1084
11.1k
    (void)pdfi_loop_detector_cleartomark(ctx);
1085
11.1k
    return code;
1086
11.1k
}
1087
1088
static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse)
1089
98.0k
{
1090
98.0k
    int code = 0;
1091
98.0k
    uint64_t index, arraysize;
1092
98.0k
    pdf_obj *object = NULL;
1093
98.0k
    pdf_array *array = (pdf_array *)obj;
1094
1095
98.0k
    arraysize = pdfi_array_size(array);
1096
449k
    for (index = 0; index < arraysize; index++) {
1097
350k
        if (ctx->loop_detection != NULL) {
1098
350k
            code = pdfi_loop_detector_mark(ctx);
1099
350k
            if (code < 0)
1100
0
                return code;
1101
350k
        }
1102
1103
350k
        code = pdfi_array_get_no_store_R(ctx, array, index, &object);
1104
1105
350k
        if (ctx->loop_detection != NULL) {
1106
350k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1107
350k
            if (code1 < 0)
1108
0
                return code1;
1109
350k
        }
1110
1111
350k
        if (code == gs_error_circular_reference) {
1112
            /* Just leave as an indirect ref */
1113
0
            code = 0;
1114
350k
        } else {
1115
350k
            if (code < 0) goto exit;
1116
350k
            if (recurse)
1117
116
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse);
1118
350k
            if (code < 0) goto exit;
1119
            /* don't store the object if it's a stream (leave as a ref) */
1120
350k
            if (pdfi_type_of(object) != PDF_STREAM)
1121
350k
                code = pdfi_array_put(ctx, array, index, object);
1122
350k
        }
1123
350k
        if (code < 0) goto exit;
1124
1125
350k
        pdfi_countdown(object);
1126
350k
        object = NULL;
1127
350k
    }
1128
1129
98.0k
 exit:
1130
98.0k
    pdfi_countdown(object);
1131
98.0k
    return code;
1132
98.0k
}
1133
1134
static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse)
1135
11.4k
{
1136
11.4k
    int code = 0;
1137
11.4k
    pdf_dict *dict = (pdf_dict *)obj;
1138
11.4k
    pdf_name *Key = NULL;
1139
11.4k
    pdf_obj *Value = NULL;
1140
11.4k
    uint64_t index, dictsize;
1141
1142
11.4k
    dictsize = pdfi_dict_entries(dict);
1143
1144
    /* Note: I am not using pdfi_dict_first/next because of needing to handle
1145
     * circular references.
1146
     */
1147
23.8k
    for (index=0; index<dictsize; index ++) {
1148
12.4k
        Key = (pdf_name *)dict->list[index].key;
1149
12.4k
        if (pdfi_name_is(Key, "Parent"))
1150
0
            continue;
1151
1152
12.4k
        if (ctx->loop_detection != NULL) {
1153
12.4k
            code = pdfi_loop_detector_mark(ctx);
1154
12.4k
            if (code < 0)
1155
0
                return code;
1156
12.4k
        }
1157
1158
12.4k
        code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value);
1159
1160
12.4k
        if (ctx->loop_detection != NULL) {
1161
12.4k
            int code1 = pdfi_loop_detector_cleartomark(ctx);
1162
12.4k
            if (code1 < 0)
1163
0
                return code1;
1164
12.4k
        }
1165
1166
12.4k
        if (code == gs_error_circular_reference) {
1167
            /* Just leave as an indirect ref */
1168
0
            code = 0;
1169
12.4k
        } else {
1170
12.4k
            if (code < 0) goto exit;
1171
            /* don't store the object if it's a stream (leave as a ref) */
1172
12.4k
            if (pdfi_type_of(Value) != PDF_STREAM)
1173
12.4k
                pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true);
1174
12.4k
            if (recurse)
1175
455
                code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse);
1176
12.4k
        }
1177
12.4k
        if (code < 0) goto exit;
1178
1179
12.4k
        pdfi_countdown(Value);
1180
12.4k
        Value = NULL;
1181
12.4k
    }
1182
1183
11.4k
 exit:
1184
11.4k
    pdfi_countdown(Value);
1185
11.4k
    return code;
1186
11.4k
}
1187
1188
/* Resolve all the indirect references for an object
1189
 * Note: This can be recursive
1190
 */
1191
int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse)
1192
401k
{
1193
401k
    int code = 0;
1194
1195
401k
    switch(pdfi_type_of(value)) {
1196
98.0k
    case PDF_ARRAY:
1197
98.0k
        code = pdfi_resolve_indirect_array(ctx, value, recurse);
1198
98.0k
        break;
1199
11.4k
    case PDF_DICT:
1200
11.4k
        code = pdfi_resolve_indirect_dict(ctx, value, recurse);
1201
11.4k
        break;
1202
291k
    default:
1203
291k
        break;
1204
401k
    }
1205
401k
    return code;
1206
401k
}
1207
1208
/* Resolve all the indirect references for an object
1209
 * Resolve indirect references, either one level or recursively, with loop detect on
1210
 * the parent (can by NULL) and the value.
1211
 */
1212
int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse)
1213
401k
{
1214
401k
    int code = 0;
1215
1216
401k
    code = pdfi_loop_detector_mark(ctx);
1217
401k
    if (code < 0) goto exit;
1218
401k
    if (parent && parent->object_num != 0) {
1219
400k
        code = pdfi_loop_detector_add_object(ctx, parent->object_num);
1220
400k
        if (code < 0) goto exit;
1221
400k
    }
1222
1223
401k
    if (pdf_object_num(value) != 0) {
1224
269
        if (pdfi_loop_detector_check_object(ctx, value->object_num)) {
1225
2
            code = gs_note_error(gs_error_circular_reference);
1226
2
            goto exit;
1227
2
        }
1228
267
        code = pdfi_loop_detector_add_object(ctx, value->object_num);
1229
267
        if (code < 0) goto exit;
1230
267
    }
1231
401k
    code = pdfi_resolve_indirect(ctx, value, recurse);
1232
1233
401k
 exit:
1234
401k
    (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */
1235
401k
    return code;
1236
401k
}