/src/ghostpdl/pdf/pdf_deref.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2020-2025 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | /* Functions to deal with dereferencing indirect objects |
17 | | * for the PDF interpreter. In here we also keep the code |
18 | | * for dealing with the object cache, because the dereferencing |
19 | | * functions are currently the only place that deals with it. |
20 | | */ |
21 | | |
22 | | #include "pdf_int.h" |
23 | | #include "pdf_stack.h" |
24 | | #include "pdf_loop_detect.h" |
25 | | #include "strmio.h" |
26 | | #include "stream.h" |
27 | | #include "pdf_file.h" |
28 | | #include "pdf_misc.h" |
29 | | #include "pdf_dict.h" |
30 | | #include "pdf_array.h" |
31 | | #include "pdf_deref.h" |
32 | | #include "pdf_repair.h" |
33 | | |
34 | | /* Start with the object caching functions */ |
35 | | /* Disable object caching (for easier debugging with reference counting) |
36 | | * by uncommenting the following line |
37 | | */ |
38 | | /*#define DISABLE CACHE*/ |
39 | | |
40 | | /* given an object, create a cache entry for it. If we have too many entries |
41 | | * then delete the leat-recently-used cache entry. Make the new entry be the |
42 | | * most-recently-used entry. The actual entries are attached to the xref table |
43 | | * (as well as being a double-linked list), because we detect an existing |
44 | | * cache entry by seeing that the xref table for the object number has a non-NULL |
45 | | * 'cache' member. |
46 | | * So we need to update the xref as well if we add or delete cache entries. |
47 | | */ |
48 | | static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o) |
49 | 2.89M | { |
50 | 2.89M | #ifndef DISABLE_CACHE |
51 | 2.89M | pdf_obj_cache_entry *entry; |
52 | | |
53 | 2.89M | if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) |
54 | 5.25k | return 0; |
55 | | |
56 | 2.89M | if (ctx->xref_table->xref[o->object_num].cache != NULL) { |
57 | | #if DEBUG_CACHE |
58 | | outprintf(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num); |
59 | | #endif |
60 | 0 | return_error(gs_error_unknownerror); |
61 | 0 | } |
62 | | |
63 | 2.89M | if (o->object_num > ctx->xref_table->xref_size) |
64 | 0 | return_error(gs_error_rangecheck); |
65 | | |
66 | | #if DEBUG_CACHE |
67 | | dbgmprintf1(ctx->memory, "Adding object %d\n", o->object_num); |
68 | | #endif |
69 | 2.89M | if (ctx->cache_entries == ctx->args.PDFCacheSize) |
70 | 721k | { |
71 | | #if DEBUG_CACHE |
72 | | dbgmprintf(ctx->memory, "Cache full, evicting LRU\n"); |
73 | | #endif |
74 | 721k | if (ctx->cache_LRU) { |
75 | 721k | entry = ctx->cache_LRU; |
76 | | #if DEBUG_CACHE |
77 | | dbgmprintf1(ctx->memory, "Evicting %d\n", entry->o->object_num); |
78 | | #endif |
79 | 721k | ctx->cache_LRU = entry->next; |
80 | 721k | if (entry->next) |
81 | 721k | ((pdf_obj_cache_entry *)entry->next)->previous = NULL; |
82 | 721k | ctx->xref_table->xref[entry->o->object_num].cache = NULL; |
83 | 721k | pdfi_countdown(entry->o); |
84 | 721k | ctx->cache_entries--; |
85 | 721k | gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU"); |
86 | 721k | } else |
87 | 0 | return_error(gs_error_unknownerror); |
88 | 721k | } |
89 | 2.89M | entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache"); |
90 | 2.89M | if (entry == NULL) |
91 | 0 | return_error(gs_error_VMerror); |
92 | | |
93 | 2.89M | memset(entry, 0x00, sizeof(pdf_obj_cache_entry)); |
94 | | |
95 | 2.89M | entry->o = o; |
96 | 2.89M | pdfi_countup(o); |
97 | 2.89M | if (ctx->cache_MRU) { |
98 | 2.80M | entry->previous = ctx->cache_MRU; |
99 | 2.80M | ctx->cache_MRU->next = entry; |
100 | 2.80M | } |
101 | 2.89M | ctx->cache_MRU = entry; |
102 | 2.89M | if (ctx->cache_LRU == NULL) |
103 | 87.1k | ctx->cache_LRU = entry; |
104 | | |
105 | 2.89M | ctx->cache_entries++; |
106 | 2.89M | ctx->xref_table->xref[o->object_num].cache = entry; |
107 | 2.89M | #endif |
108 | 2.89M | return 0; |
109 | 2.89M | } |
110 | | |
111 | | /* Given an existing cache entry, promote it to be the most-recently-used |
112 | | * cache entry. |
113 | | */ |
114 | | static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry) |
115 | 5.60M | { |
116 | 5.60M | #ifndef DISABLE_CACHE |
117 | 5.60M | if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) { |
118 | 3.47M | if ((pdf_obj_cache_entry *)cache_entry->next != NULL) |
119 | 3.47M | ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous; |
120 | 3.47M | if ((pdf_obj_cache_entry *)cache_entry->previous != NULL) |
121 | 3.46M | ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next; |
122 | 1.94k | else { |
123 | | /* the existing entry is the current least recently used, we need to make the 'next' |
124 | | * cache entry into the LRU. |
125 | | */ |
126 | 1.94k | ctx->cache_LRU = cache_entry->next; |
127 | 1.94k | } |
128 | 3.47M | cache_entry->next = NULL; |
129 | 3.47M | cache_entry->previous = ctx->cache_MRU; |
130 | 3.47M | ctx->cache_MRU->next = cache_entry; |
131 | 3.47M | ctx->cache_MRU = cache_entry; |
132 | 3.47M | } |
133 | 5.60M | #endif |
134 | 5.60M | return; |
135 | 5.60M | } |
136 | | |
137 | | int pdfi_cache_object(pdf_context *ctx, pdf_obj *o) |
138 | 3.47M | { |
139 | 3.47M | if (o->object_num == 0) |
140 | 2.31M | return 0; |
141 | 1.16M | if (ctx->xref_table->xref[o->object_num].cache == NULL) |
142 | 36 | return pdfi_add_to_cache(ctx, o); |
143 | 1.16M | else |
144 | 1.16M | pdfi_promote_cache_entry(ctx, ctx->xref_table->xref[o->object_num].cache); |
145 | 1.16M | return 0; |
146 | 1.16M | } |
147 | | |
148 | | /* This one's a bit of an oddity, its used for fonts. When we build a PDF font object |
149 | | * we want the object cache to reference *that* object, not the dictionary which was |
150 | | * read out of the PDF file, so this allows us to replace the font dictionary in the |
151 | | * cache with the actual font object, so that later dereferences will get this font |
152 | | * object. |
153 | | */ |
154 | | int replace_cache_entry(pdf_context *ctx, pdf_obj *o) |
155 | 183k | { |
156 | 183k | #ifndef DISABLE_CACHE |
157 | 183k | xref_entry *entry; |
158 | 183k | pdf_obj_cache_entry *cache_entry; |
159 | 183k | pdf_obj *old_cached_obj = NULL; |
160 | | |
161 | | /* Limited error checking here, we assume that things like the |
162 | | * validity of the object (eg not a free oobject) have already been handled. |
163 | | */ |
164 | | |
165 | 183k | entry = &ctx->xref_table->xref[o->object_num]; |
166 | 183k | cache_entry = entry->cache; |
167 | | |
168 | 183k | if (cache_entry == NULL) { |
169 | 5.20k | return(pdfi_add_to_cache(ctx, o)); |
170 | 178k | } else { |
171 | | /* NOTE: We grab the object without decrementing, to avoid triggering |
172 | | * a warning message for freeing an object that's in the cache |
173 | | */ |
174 | 178k | if (cache_entry->o != NULL) |
175 | 178k | old_cached_obj = cache_entry->o; |
176 | | |
177 | | /* Put new entry in the cache */ |
178 | 178k | cache_entry->o = o; |
179 | 178k | pdfi_countup(o); |
180 | 178k | pdfi_promote_cache_entry(ctx, cache_entry); |
181 | | |
182 | | /* Now decrement the old cache entry, if any */ |
183 | 178k | pdfi_countdown(old_cached_obj); |
184 | 178k | } |
185 | 178k | #endif |
186 | 178k | return 0; |
187 | 183k | } |
188 | | |
189 | | /* Now the dereferencing functions */ |
190 | | |
191 | | /* |
192 | | * Technically we can accept a stream other than the main PDF file stream here. This is |
193 | | * really for the case of compressed objects where we read tokens from the compressed |
194 | | * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode |
195 | | * on top of the main file stream, which may be useful. Note that this cannot work with |
196 | | * objects in compressed object streams! They should always pass a value of 0 for the stream_offset. |
197 | | * The stream_offset is the offset from the start of the underlying uncompressed PDF file of |
198 | | * the stream we are using. See the comments below when keyword is PDF_STREAM. |
199 | | */ |
200 | | |
201 | | /* Determine if a PDF object is in a compressed ObjStm. Returns < 0 |
202 | | * for an error, 0 if it is not in a compressed ObjStm and 1 if it is. |
203 | | * Currently errors are inmpossible. This is only used by the decryption code |
204 | | * to determine if a string is in a compressed object stream, if it is then |
205 | | * it can't be used for decryption. |
206 | | */ |
207 | | int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen) |
208 | 17.8k | { |
209 | 17.8k | xref_entry *entry; |
210 | | |
211 | | /* Can't possibly be a compressed object before we have finished reading |
212 | | * the xref. |
213 | | */ |
214 | 17.8k | if (ctx->xref_table == NULL) |
215 | 0 | return 0; |
216 | | |
217 | 17.8k | entry = &ctx->xref_table->xref[obj]; |
218 | | |
219 | 17.8k | if (entry->compressed) |
220 | 0 | return 1; |
221 | | |
222 | 17.8k | return 0; |
223 | 17.8k | } |
224 | | |
225 | | /* We should never read a 'stream' keyword from a compressed object stream |
226 | | * so this case should never end up here. |
227 | | */ |
228 | | static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, |
229 | | uint32_t objnum, uint32_t gen) |
230 | 869k | { |
231 | 869k | int code = 0; |
232 | 869k | int64_t i; |
233 | 869k | pdf_dict *dict = NULL; |
234 | 869k | gs_offset_t offset; |
235 | 869k | pdf_stream *stream_obj = NULL; |
236 | | |
237 | | /* Strange code time.... |
238 | | * If we are using a stream which is *not* the PDF uncompressed main file stream |
239 | | * then doing stell on it will only tell us how many bytes have been read from |
240 | | * that stream, it won't tell us the underlying file position. So we add on the |
241 | | * 'unread' bytes, *and* we add on the position of the start of the stream in |
242 | | * the actual main file. This is all done so that we can check the /Length |
243 | | * of the object. Note that this will *only* work for regular objects it can |
244 | | * not be used for compressed object streams, but those don't need checking anyway |
245 | | * they have a different mechanism altogether and should never get here. |
246 | | */ |
247 | 869k | if (s != ctx->main_stream) { |
248 | 0 | offset = stell(s->s) - s->unread_size + stream_offset; |
249 | 0 | code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET); |
250 | 0 | if (code < 0) |
251 | 0 | return_error(gs_error_ioerror); |
252 | 869k | } else { |
253 | 869k | offset = stell(s->s) - s->unread_size; |
254 | 869k | } |
255 | | |
256 | 869k | if (pdfi_count_stack(ctx) < 1) |
257 | 0 | return_error(gs_error_stackunderflow); |
258 | | |
259 | 869k | dict = (pdf_dict *)ctx->stack_top[-1]; |
260 | | |
261 | 869k | if (pdfi_type_of(dict) != PDF_DICT) { |
262 | 8.24k | pdfi_pop(ctx, 1); |
263 | 8.24k | return_error(gs_error_syntaxerror); |
264 | 8.24k | } |
265 | | |
266 | 861k | dict->indirect_num = dict->object_num = objnum; |
267 | 861k | dict->indirect_gen = dict->generation_num = gen; |
268 | | |
269 | | /* Convert the dict into a stream */ |
270 | 861k | code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true); |
271 | 861k | if (code < 0) { |
272 | 0 | pdfi_pop(ctx, 1); |
273 | 0 | return code; |
274 | 0 | } |
275 | | /* Pop off the dict and push the stream */ |
276 | 861k | pdfi_pop(ctx, 1); |
277 | 861k | dict = NULL; |
278 | 861k | pdfi_push(ctx, (pdf_obj *)stream_obj); |
279 | | |
280 | 861k | stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum; |
281 | 861k | stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen; |
282 | 861k | stream_obj->stream_offset = offset; |
283 | | |
284 | | /* Exceptional code. Normally we do not need to worry about detecting circular references |
285 | | * when reading objects, because we do not dereference any indirect objects. However streams |
286 | | * are a slight exception in that we do get the Length from the stream dictionay and if that |
287 | | * is an indirect reference, then we dereference it. |
288 | | * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect |
289 | | * reference to the same stream object, and leads to infinite recursion. So deal with that |
290 | | * possibility here. |
291 | | */ |
292 | 861k | code = pdfi_loop_detector_mark(ctx); |
293 | 861k | if (code < 0) { |
294 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
295 | 0 | return code; |
296 | 0 | } |
297 | 861k | if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) { |
298 | 160 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
299 | 160 | pdfi_loop_detector_cleartomark(ctx); |
300 | 160 | return_error(gs_error_circular_reference); |
301 | 160 | } |
302 | | |
303 | 861k | code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num); |
304 | 861k | if (code < 0) { |
305 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
306 | 0 | pdfi_loop_detector_cleartomark(ctx); |
307 | 0 | return code; |
308 | 0 | } |
309 | | |
310 | | /* This code may be a performance overhead, it simply skips over the stream contents |
311 | | * and checks that the stream ends with a 'endstream endobj' pair. We could add a |
312 | | * 'go faster' flag for users who are certain their PDF files are well-formed. This |
313 | | * could also allow us to skip all kinds of other checking..... |
314 | | */ |
315 | | |
316 | 861k | code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i); |
317 | 861k | if (code < 0) { |
318 | 19.3k | char extra_info[gp_file_name_sizeof]; |
319 | | |
320 | 19.3k | (void)pdfi_loop_detector_cleartomark(ctx); |
321 | 19.3k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum); |
322 | 19.3k | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info); |
323 | 19.3k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
324 | 19.3k | return code; |
325 | 19.3k | } |
326 | 841k | code = pdfi_loop_detector_cleartomark(ctx); |
327 | 841k | if (code < 0) { |
328 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
329 | 0 | return code; |
330 | 0 | } |
331 | | |
332 | 841k | if (i < 0 || (i + offset)> ctx->main_stream_length) { |
333 | 45.4k | char extra_info[gp_file_name_sizeof]; |
334 | | |
335 | 45.4k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum); |
336 | 45.4k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info))< 0) { |
337 | 0 | pdfi_pop(ctx, 1); |
338 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
339 | 0 | return code; |
340 | 0 | } |
341 | 796k | } else { |
342 | 796k | code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR); |
343 | 796k | if (code < 0) { |
344 | 0 | pdfi_pop(ctx, 1); |
345 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
346 | 0 | return code; |
347 | 0 | } |
348 | | |
349 | 796k | stream_obj->Length = 0; |
350 | 796k | stream_obj->length_valid = false; |
351 | | |
352 | 796k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
353 | 796k | if (code == 0) { |
354 | 0 | char extra_info[gp_file_name_sizeof]; |
355 | |
|
356 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum); |
357 | 0 | pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info); |
358 | | /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that |
359 | | * happens give up. |
360 | | */ |
361 | 0 | if (pdfi_count_stack(ctx) == 0) { |
362 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
363 | 0 | return code; |
364 | 0 | } |
365 | 796k | } else if (code < 0) { |
366 | 0 | char extra_info[gp_file_name_sizeof]; |
367 | |
|
368 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum); |
369 | 0 | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info)) < 0) { |
370 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
371 | 0 | return code; |
372 | 0 | } |
373 | 796k | } else if (code != TOKEN_ENDSTREAM) { |
374 | 85.7k | char extra_info[gp_file_name_sizeof]; |
375 | | |
376 | 85.7k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i); |
377 | 85.7k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BAD_LENGTH, "pdfi_read_stream_object", extra_info)) < 0) { |
378 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
379 | 0 | return code; |
380 | 0 | } |
381 | 710k | } else { |
382 | | /* Cache the Length in the stream object and mark it valid */ |
383 | 710k | stream_obj->Length = i; |
384 | 710k | stream_obj->length_valid = true; |
385 | 710k | } |
386 | 796k | } |
387 | | |
388 | | /* If we failed to find a valid object, or the object wasn't a keyword, or the |
389 | | * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct |
390 | | * Length for streams if we have encrypted files, because we must install a |
391 | | * SubFileDecode filter with a Length (EODString is incompatible with AES encryption) |
392 | | * Rather than mess about checking for encryption, we'll choose to just correctly |
393 | | * calculate the Length of all streams. Although this takes time, it will only |
394 | | * happen for files which are invalid. |
395 | | */ |
396 | 841k | if (stream_obj->length_valid != true) { |
397 | 131k | char Buffer[10]; |
398 | 131k | unsigned int bytes, total = 0; |
399 | 131k | int c = 0; |
400 | | |
401 | 131k | code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET); |
402 | 131k | if (code < 0) { |
403 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
404 | 0 | pdfi_pop(ctx, 1); |
405 | 0 | return code; |
406 | 0 | } |
407 | 131k | memset(Buffer, 0x00, 10); |
408 | 131k | bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream); |
409 | 131k | if (bytes < 9) { |
410 | 393 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
411 | 393 | return_error(gs_error_ioerror); |
412 | 393 | } |
413 | | |
414 | 130k | total = bytes; |
415 | 2.23G | do { |
416 | 2.23G | if (memcmp(Buffer, "endstream", 9) == 0) { |
417 | 80.0k | if (Buffer[9] != 0x00) |
418 | 80.0k | total--; |
419 | 80.0k | stream_obj->Length = total - 9; |
420 | 80.0k | stream_obj->length_valid = true; |
421 | 80.0k | break; |
422 | 80.0k | } |
423 | 2.23G | if (memcmp(Buffer, "endobj", 6) == 0) { |
424 | 8.18k | if (Buffer[9] != 0x00) |
425 | 8.09k | total--; |
426 | 8.18k | stream_obj->Length = total - 6; |
427 | 8.18k | stream_obj->length_valid = true; |
428 | 8.18k | break; |
429 | 8.18k | } |
430 | 2.23G | memmove(Buffer, Buffer+1, 9); |
431 | 2.23G | c = pdfi_read_byte(ctx, ctx->main_stream); |
432 | 2.23G | if (c < 0) |
433 | 42.5k | break; |
434 | 2.23G | Buffer[9] = (byte)c; |
435 | 2.23G | total++; |
436 | 2.23G | } while(1); |
437 | 130k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
438 | 130k | if (c < 0) |
439 | 42.5k | return_error(gs_error_ioerror); |
440 | 88.2k | return 0; |
441 | 130k | } |
442 | | |
443 | 710k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
444 | 710k | if (code < 0) { |
445 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
446 | 0 | if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", "")) < 0) { |
447 | 0 | return code; |
448 | 0 | } |
449 | | /* Something went wrong looking for endobj, but we found endstream, so assume |
450 | | * for now that will suffice. |
451 | | */ |
452 | 0 | return 0; |
453 | 0 | } |
454 | | |
455 | 710k | if (code == 0) { |
456 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
457 | 0 | return_error(gs_error_stackunderflow); |
458 | 0 | } |
459 | | |
460 | 710k | if (code != TOKEN_ENDOBJ) { |
461 | 1.94k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
462 | 1.94k | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_typecheck), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL); |
463 | | /* Didn't find an endobj, but we have an endstream, so assume |
464 | | * for now that will suffice |
465 | | */ |
466 | 1.94k | return code; |
467 | 1.94k | } |
468 | 708k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
469 | | |
470 | 708k | return 0; |
471 | 710k | } |
472 | | |
473 | | /* This reads an object *after* the x y obj keyword has been found. Its broken out |
474 | | * separately for the benefit of the repair code when reading the dictionary following |
475 | | * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does |
476 | | * not have an 'endobj', we rely on the error handling to take care of that for us. |
477 | | */ |
478 | | int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen) |
479 | 2.50M | { |
480 | 2.50M | int code = 0, initial_depth = 0; |
481 | 2.50M | pdf_key keyword; |
482 | 2.50M | gs_offset_t saved_offset[3]; |
483 | 2.50M | pdf_obj_type type; |
484 | | |
485 | 2.50M | initial_depth = pdfi_count_stack(ctx); |
486 | 2.50M | saved_offset[0] = saved_offset[1] = saved_offset[2] = 0; |
487 | | |
488 | 2.50M | code = pdfi_read_token(ctx, s, objnum, gen); |
489 | 2.50M | if (code < 0) |
490 | 5.20k | return code; |
491 | | |
492 | 2.49M | if (code == 0) |
493 | | /* failed to read a token */ |
494 | 75 | return_error(gs_error_syntaxerror); |
495 | | |
496 | 2.49M | if (pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) { |
497 | 27.6k | keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]); |
498 | 27.6k | if (keyword == TOKEN_ENDOBJ) { |
499 | 372 | ctx->stack_top[-1] = PDF_NULL_OBJ; |
500 | 372 | return 0; |
501 | 372 | } |
502 | 27.6k | } |
503 | | |
504 | 81.1M | do { |
505 | | /* move all the saved offsets up by one */ |
506 | 81.1M | saved_offset[0] = saved_offset[1]; |
507 | 81.1M | saved_offset[1] = saved_offset[2]; |
508 | 81.1M | saved_offset[2] = pdfi_unread_tell(ctx); |
509 | | |
510 | 81.1M | code = pdfi_read_token(ctx, s, objnum, gen); |
511 | 81.1M | if (code < 0) { |
512 | 225k | pdfi_clearstack(ctx); |
513 | 225k | return code; |
514 | 225k | } |
515 | 80.9M | if (s->eof) |
516 | 2.84k | return_error(gs_error_syntaxerror); |
517 | 80.9M | code = 0; |
518 | 80.9M | type = pdfi_type_of(ctx->stack_top[-1]); |
519 | 80.9M | if (type == PDF_KEYWORD) |
520 | 170k | goto missing_endobj; |
521 | 80.9M | } while (type != PDF_FAST_KEYWORD); |
522 | | |
523 | 2.09M | keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]); |
524 | 2.09M | if (keyword == TOKEN_ENDOBJ) { |
525 | 1.16M | pdf_obj *o; |
526 | | |
527 | 1.16M | if (pdfi_count_stack(ctx) - initial_depth < 2) { |
528 | 234 | pdfi_clearstack(ctx); |
529 | 234 | return_error(gs_error_stackunderflow); |
530 | 234 | } |
531 | | |
532 | 1.16M | o = ctx->stack_top[-2]; |
533 | | |
534 | 1.16M | pdfi_pop(ctx, 1); |
535 | | |
536 | 1.16M | if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
537 | 1.16M | o->indirect_num = o->object_num = objnum; |
538 | 1.16M | o->indirect_gen = o->generation_num = gen; |
539 | 1.16M | } |
540 | 1.16M | return code; |
541 | 1.16M | } |
542 | 928k | if (keyword == TOKEN_STREAM) { |
543 | 869k | pdfi_pop(ctx, 1); |
544 | 869k | return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen); |
545 | 869k | } |
546 | 58.5k | if (keyword == TOKEN_OBJ) { |
547 | 6.50k | pdf_obj *o; |
548 | | |
549 | 6.50k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL)) < 0) { |
550 | 0 | return code; |
551 | 0 | } |
552 | | |
553 | | /* 4 for; the object we want, the object number, generation number and 'obj' keyword */ |
554 | 6.50k | if (pdfi_count_stack(ctx) - initial_depth < 4) |
555 | 1.48k | return_error(gs_error_stackunderflow); |
556 | | |
557 | | /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */ |
558 | 5.01k | o = ctx->stack_top[-4]; |
559 | | |
560 | 5.01k | pdfi_pop(ctx, 3); |
561 | | |
562 | 5.01k | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
563 | 4.98k | o->indirect_num = o->object_num = objnum; |
564 | 4.98k | o->indirect_gen = o->generation_num = gen; |
565 | 4.98k | } |
566 | 5.01k | if (saved_offset[0] > 0) |
567 | 5.01k | (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET); |
568 | 5.01k | return 0; |
569 | 6.50k | } |
570 | | |
571 | 222k | missing_endobj: |
572 | | /* Assume that any other keyword means a missing 'endobj' */ |
573 | 222k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_xref_stream_dict", "")) == 0) { |
574 | 222k | pdf_obj *o; |
575 | | |
576 | 222k | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL); |
577 | | |
578 | 222k | if (pdfi_count_stack(ctx) - initial_depth < 2) |
579 | 4.70k | return_error(gs_error_stackunderflow); |
580 | | |
581 | 218k | o = ctx->stack_top[-2]; |
582 | | |
583 | 218k | pdfi_pop(ctx, 1); |
584 | | |
585 | 218k | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
586 | 216k | o->indirect_num = o->object_num = objnum; |
587 | 216k | o->indirect_gen = o->generation_num = gen; |
588 | 216k | } |
589 | 218k | return code; |
590 | 222k | } |
591 | 0 | pdfi_pop(ctx, 2); |
592 | 0 | return_error(gs_error_syntaxerror); |
593 | 222k | } |
594 | | |
595 | | static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset) |
596 | 2.52M | { |
597 | 2.52M | int code = 0; |
598 | 2.52M | int objnum = 0, gen = 0; |
599 | | |
600 | | /* An object consists of 'num gen obj' followed by a token, follwed by an endobj |
601 | | * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we |
602 | | * want to deal with it specially by getting the Length, jumping to the end and checking |
603 | | * for an endobj. Or not, possibly, because it would be slow. |
604 | | */ |
605 | 2.52M | code = pdfi_read_bare_int(ctx, s, &objnum); |
606 | 2.52M | if (code < 0) |
607 | 58.9k | return code; |
608 | 2.46M | if (code == 0) |
609 | 16.6k | return_error(gs_error_syntaxerror); |
610 | | |
611 | 2.44M | code = pdfi_read_bare_int(ctx, s, &gen); |
612 | 2.44M | if (code < 0) |
613 | 3.96k | return code; |
614 | 2.44M | if (code == 0) |
615 | 3.48k | return_error(gs_error_syntaxerror); |
616 | | |
617 | 2.43M | code = pdfi_read_bare_keyword(ctx, s); |
618 | 2.43M | if (code < 0) |
619 | 0 | return code; |
620 | 2.43M | if (code == 0) |
621 | 0 | return gs_note_error(gs_error_ioerror); |
622 | 2.43M | if (code != TOKEN_OBJ) { |
623 | 7.65k | return_error(gs_error_syntaxerror); |
624 | 7.65k | } |
625 | | |
626 | 2.42M | return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen); |
627 | 2.43M | } |
628 | | |
629 | | static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, |
630 | | const xref_entry *entry, bool cache) |
631 | 1.43M | { |
632 | 1.43M | int code = 0; |
633 | 1.43M | xref_entry *compressed_entry; |
634 | 1.43M | pdf_c_stream *compressed_stream = NULL; |
635 | 1.43M | pdf_c_stream *SubFile_stream = NULL; |
636 | 1.43M | pdf_c_stream *Object_stream = NULL; |
637 | 1.43M | int i = 0, object_length = 0; |
638 | 1.43M | int64_t num_entries; |
639 | 1.43M | int found_object; |
640 | 1.43M | int64_t Length, First; |
641 | 1.43M | gs_offset_t offset = 0; |
642 | 1.43M | pdf_stream *compressed_object = NULL; |
643 | 1.43M | pdf_dict *compressed_sdict = NULL; /* alias */ |
644 | 1.43M | pdf_name *Type = NULL; |
645 | | |
646 | 1.43M | if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1) |
647 | 2.06k | return_error(gs_error_undefined); |
648 | | |
649 | 1.42M | compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num]; |
650 | | |
651 | 1.42M | if (ctx->args.pdfdebug) { |
652 | 0 | outprintf(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj); |
653 | 0 | outprintf(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num); |
654 | 0 | } |
655 | | |
656 | 1.42M | if (compressed_entry->cache == NULL) { |
657 | | #if CACHE_STATISTICS |
658 | | ctx->compressed_misses++; |
659 | | #endif |
660 | 81.1k | code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET); |
661 | 81.1k | if (code < 0) |
662 | 0 | goto exit; |
663 | | |
664 | 81.1k | code = pdfi_read_object(ctx, ctx->main_stream, 0); |
665 | 81.1k | if (code < 0) |
666 | 17.4k | goto exit; |
667 | | |
668 | 63.7k | if (pdfi_count_stack(ctx) < 1) { |
669 | 2 | code = gs_note_error(gs_error_stackunderflow); |
670 | 2 | goto exit; |
671 | 2 | } |
672 | | |
673 | 63.7k | if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) { |
674 | 13.2k | pdfi_pop(ctx, 1); |
675 | 13.2k | code = gs_note_error(gs_error_typecheck); |
676 | 13.2k | goto exit; |
677 | 13.2k | } |
678 | 50.5k | if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) { |
679 | 298 | pdfi_pop(ctx, 1); |
680 | | /* Same error (undefined) as when we read an uncompressed object with the wrong number */ |
681 | 298 | code = gs_note_error(gs_error_undefined); |
682 | 298 | goto exit; |
683 | 298 | } |
684 | 50.2k | compressed_object = (pdf_stream *)ctx->stack_top[-1]; |
685 | 50.2k | pdfi_countup(compressed_object); |
686 | 50.2k | pdfi_pop(ctx, 1); |
687 | 50.2k | code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object); |
688 | 50.2k | if (code < 0) |
689 | 0 | goto exit; |
690 | 1.34M | } else { |
691 | | #if CACHE_STATISTICS |
692 | | ctx->compressed_hits++; |
693 | | #endif |
694 | 1.34M | compressed_object = (pdf_stream *)compressed_entry->cache->o; |
695 | 1.34M | pdfi_countup(compressed_object); |
696 | 1.34M | pdfi_promote_cache_entry(ctx, compressed_entry->cache); |
697 | 1.34M | } |
698 | 1.39M | code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict); |
699 | 1.39M | if (code < 0) |
700 | 11 | return code; |
701 | | |
702 | 1.39M | if (ctx->loop_detection != NULL) { |
703 | 1.39M | code = pdfi_loop_detector_mark(ctx); |
704 | 1.39M | if (code < 0) |
705 | 0 | goto exit; |
706 | 1.39M | if (compressed_sdict->object_num != 0) { |
707 | 1.39M | if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) { |
708 | 212 | code = gs_note_error(gs_error_circular_reference); |
709 | 1.39M | } else { |
710 | 1.39M | code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num); |
711 | 1.39M | } |
712 | 1.39M | if (code < 0) { |
713 | 212 | (void)pdfi_loop_detector_cleartomark(ctx); |
714 | 212 | goto exit; |
715 | 212 | } |
716 | 1.39M | } |
717 | 1.39M | } |
718 | | /* Check its an ObjStm ! */ |
719 | 1.39M | code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type); |
720 | 1.39M | if (code < 0) { |
721 | 277 | if (ctx->loop_detection != NULL) |
722 | 277 | (void)pdfi_loop_detector_cleartomark(ctx); |
723 | 277 | goto exit; |
724 | 277 | } |
725 | | |
726 | 1.39M | if (!pdfi_name_is(Type, "ObjStm")){ |
727 | 1.21k | if (ctx->loop_detection != NULL) |
728 | 1.21k | (void)pdfi_loop_detector_cleartomark(ctx); |
729 | 1.21k | code = gs_note_error(gs_error_syntaxerror); |
730 | 1.21k | goto exit; |
731 | 1.21k | } |
732 | | |
733 | | /* Need to check the /N entry to see if the object is actually in this stream! */ |
734 | 1.39M | code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries); |
735 | 1.39M | if (code < 0) { |
736 | 270 | if (ctx->loop_detection != NULL) |
737 | 270 | (void)pdfi_loop_detector_cleartomark(ctx); |
738 | 270 | goto exit; |
739 | 270 | } |
740 | | |
741 | 1.39M | if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) { |
742 | 84 | if (ctx->loop_detection != NULL) |
743 | 84 | (void)pdfi_loop_detector_cleartomark(ctx); |
744 | 84 | code = gs_note_error(gs_error_rangecheck); |
745 | 84 | goto exit; |
746 | 84 | } |
747 | | |
748 | 1.39M | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
749 | 1.39M | if (code < 0) { |
750 | 250k | if (ctx->loop_detection != NULL) |
751 | 250k | (void)pdfi_loop_detector_cleartomark(ctx); |
752 | 250k | goto exit; |
753 | 250k | } |
754 | | |
755 | 1.14M | code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First); |
756 | 1.14M | if (code < 0) { |
757 | 4.60k | if (ctx->loop_detection != NULL) |
758 | 4.60k | (void)pdfi_loop_detector_cleartomark(ctx); |
759 | 4.60k | goto exit; |
760 | 4.60k | } |
761 | | |
762 | 1.14M | if (ctx->loop_detection != NULL) |
763 | 1.13M | (void)pdfi_loop_detector_cleartomark(ctx); |
764 | | |
765 | 1.14M | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
766 | 1.14M | if (code < 0) |
767 | 0 | goto exit; |
768 | | |
769 | 1.14M | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
770 | 1.14M | if (code < 0) |
771 | 0 | goto exit; |
772 | | |
773 | 1.14M | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
774 | 1.14M | if (code < 0) |
775 | 1.64k | goto exit; |
776 | | |
777 | 68.4M | for (i=0;i < num_entries;i++) |
778 | 67.3M | { |
779 | 67.3M | int new_offset; |
780 | 67.3M | code = pdfi_read_bare_int(ctx, compressed_stream, &found_object); |
781 | 67.3M | if (code < 0) |
782 | 17.2k | goto exit; |
783 | 67.3M | if (code == 0) { |
784 | 2.55k | code = gs_note_error(gs_error_syntaxerror); |
785 | 2.55k | goto exit; |
786 | 2.55k | } |
787 | 67.3M | code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset); |
788 | 67.3M | if (code < 0) |
789 | 25.0k | goto exit; |
790 | 67.2M | if (code == 0) { |
791 | 1.13k | code = gs_note_error(gs_error_syntaxerror); |
792 | 1.13k | goto exit; |
793 | 1.13k | } |
794 | 67.2M | if (i == entry->u.compressed.object_index) { |
795 | 1.11M | if (found_object != obj) { |
796 | 2.97k | code = gs_note_error(gs_error_undefined); |
797 | 2.97k | goto exit; |
798 | 2.97k | } |
799 | 1.10M | offset = new_offset; |
800 | 1.10M | } |
801 | 67.2M | if (i == entry->u.compressed.object_index + 1) |
802 | 1.06M | object_length = new_offset - offset; |
803 | 67.2M | } |
804 | | |
805 | | /* Bug #705259 - The first object need not lie immediately after the initial |
806 | | * table of object numbers and offsets. The start of the first object is given |
807 | | * by the value of First. We don't know how many bytes we consumed getting to |
808 | | * the end of the table, unfortunately, so we close the stream, rewind the main |
809 | | * stream back to the beginning of the ObjStm, and then read and discard 'First' |
810 | | * bytes in order to get to the start of the first object. Then we read the |
811 | | * number of bytes required to get from there to the start of the object we |
812 | | * actually want. |
813 | | * If this ever looks like it's causing performance problems we could read the |
814 | | * initial table above manually instead of using the existing code, and track |
815 | | * how many bytes we'd read, which would avoid us having to tear down and |
816 | | * rebuild the stream. |
817 | | */ |
818 | 1.08M | if (compressed_stream) |
819 | 1.08M | pdfi_close_file(ctx, compressed_stream); |
820 | 1.08M | if (SubFile_stream) |
821 | 1.08M | pdfi_close_file(ctx, SubFile_stream); |
822 | | |
823 | 1.08M | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
824 | 1.08M | if (code < 0) |
825 | 0 | goto exit; |
826 | | |
827 | | /* We already dereferenced this above, so we don't need the loop detection checking here */ |
828 | 1.08M | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
829 | 1.08M | if (code < 0) |
830 | 0 | goto exit; |
831 | | |
832 | 1.08M | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
833 | 1.08M | if (code < 0) |
834 | 0 | goto exit; |
835 | | |
836 | 1.08M | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
837 | 1.08M | if (code < 0) |
838 | 0 | goto exit; |
839 | | |
840 | 575M | for (i=0;i < First;i++) |
841 | 574M | { |
842 | 574M | int c = pdfi_read_byte(ctx, compressed_stream); |
843 | 574M | if (c < 0) { |
844 | 37 | code = gs_note_error(gs_error_ioerror); |
845 | 37 | goto exit; |
846 | 37 | } |
847 | 574M | } |
848 | | |
849 | | /* Skip to the offset of the object we want to read */ |
850 | 3.76G | for (i=0;i < offset;i++) |
851 | 3.76G | { |
852 | 3.76G | int c = pdfi_read_byte(ctx, compressed_stream); |
853 | 3.76G | if (c < 0) { |
854 | 84.3k | code = gs_note_error(gs_error_ioerror); |
855 | 84.3k | goto exit; |
856 | 84.3k | } |
857 | 3.76G | } |
858 | | |
859 | | /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit |
860 | | * the number of bytes we read to the declared size of the object (difference between |
861 | | * the offsets of the object we want to read, and the next object). If it is 0 then |
862 | | * we're reading the last object in the stream, so we just rely on the SubFileDecode |
863 | | * we set up when we created compressed_stream to limit the bytes to the length of |
864 | | * that stream. |
865 | | */ |
866 | 1.00M | if (object_length > 0) { |
867 | 970k | code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false); |
868 | 970k | if (code < 0) |
869 | 0 | goto exit; |
870 | 970k | } else { |
871 | 34.7k | Object_stream = compressed_stream; |
872 | 34.7k | } |
873 | | |
874 | 1.00M | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
875 | 1.00M | if (code < 0) |
876 | 6.63k | goto exit; |
877 | 998k | if (code == 0) { |
878 | 137 | code = gs_note_error(gs_error_syntaxerror); |
879 | 137 | goto exit; |
880 | 137 | } |
881 | 998k | if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) { |
882 | 983k | int start_depth = pdfi_count_stack(ctx); |
883 | | |
884 | | /* Need to read all the elements from COS objects */ |
885 | 34.6M | do { |
886 | 34.6M | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
887 | 34.6M | if (code < 0) |
888 | 34.4k | goto exit; |
889 | 34.6M | if (code == 0) { |
890 | 6.73k | code = gs_note_error(gs_error_syntaxerror); |
891 | 6.73k | goto exit; |
892 | 6.73k | } |
893 | 34.6M | if (compressed_stream->eof == true) { |
894 | 478 | code = gs_note_error(gs_error_ioerror); |
895 | 478 | goto exit; |
896 | 478 | } |
897 | 34.6M | } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth); |
898 | 983k | } |
899 | | |
900 | 957k | *object = ctx->stack_top[-1]; |
901 | | /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets |
902 | | * the object number for uncompressed objects. So we need to do that here. |
903 | | */ |
904 | 957k | if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
905 | 952k | (*object)->indirect_num = (*object)->object_num = obj; |
906 | 952k | (*object)->indirect_gen = (*object)->generation_num = gen; |
907 | 952k | pdfi_countup(*object); |
908 | 952k | } |
909 | 957k | pdfi_pop(ctx, 1); |
910 | | |
911 | 957k | if (cache) { |
912 | 922k | code = pdfi_add_to_cache(ctx, *object); |
913 | 922k | if (code < 0) { |
914 | 0 | pdfi_countdown(*object); |
915 | 0 | goto exit; |
916 | 0 | } |
917 | 922k | } |
918 | | |
919 | 1.42M | exit: |
920 | 1.42M | if (Object_stream) |
921 | 1.00M | pdfi_close_file(ctx, Object_stream); |
922 | 1.42M | if (Object_stream != compressed_stream) |
923 | 1.10M | if (compressed_stream) |
924 | 1.10M | pdfi_close_file(ctx, compressed_stream); |
925 | 1.42M | if (SubFile_stream) |
926 | 1.14M | pdfi_close_file(ctx, SubFile_stream); |
927 | 1.42M | pdfi_countdown(compressed_object); |
928 | 1.42M | pdfi_countdown(Type); |
929 | 1.42M | return code; |
930 | 957k | } |
931 | | |
932 | | /* pdf_dereference returns an object with a reference count of at least 1, this represents the |
933 | | * reference being held by the caller (in **object) when we return from this function. |
934 | | */ |
935 | | static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache) |
936 | 9.22M | { |
937 | 9.22M | xref_entry *entry; |
938 | 9.22M | int code, stack_depth = pdfi_count_stack(ctx); |
939 | 9.22M | gs_offset_t saved_stream_offset; |
940 | 9.22M | bool saved_decrypt_strings = ctx->encryption.decrypt_strings; |
941 | | |
942 | 9.22M | *object = NULL; |
943 | | |
944 | 9.22M | if (ctx->xref_table == NULL) |
945 | 62 | return_error(gs_error_typecheck); |
946 | | |
947 | 9.22M | if (ctx->main_stream == NULL || ctx->main_stream->s == NULL) |
948 | 0 | return_error(gs_error_ioerror); |
949 | | |
950 | 9.22M | if (obj >= ctx->xref_table->xref_size) { |
951 | 305k | char extra_info[gp_file_name_sizeof]; |
952 | | |
953 | 305k | gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj); |
954 | 305k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info)) < 0) { |
955 | 0 | return code; |
956 | 0 | } |
957 | | |
958 | 305k | code = pdfi_repair_file(ctx); |
959 | 305k | if (code < 0) { |
960 | 305k | *object = NULL; |
961 | 305k | return code; |
962 | 305k | } |
963 | 37 | if (obj >= ctx->xref_table->xref_size) { |
964 | 25 | *object = NULL; |
965 | 25 | return_error(gs_error_rangecheck); |
966 | 25 | } |
967 | 37 | } |
968 | | |
969 | 8.91M | entry = &ctx->xref_table->xref[obj]; |
970 | | |
971 | 8.91M | if(entry->object_num == 0) { |
972 | 2.12M | pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference_main", "Attempt to dereference object 0"); |
973 | 2.12M | return_error(gs_error_undefined); |
974 | 2.12M | } |
975 | | |
976 | 6.78M | if (entry->free) { |
977 | 7.78k | char extra_info[gp_file_name_sizeof]; |
978 | | |
979 | 7.78k | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
980 | 7.78k | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
981 | 7.78k | *object = PDF_NULL_OBJ; |
982 | 7.78k | return code; |
983 | 6.78M | }else { |
984 | 6.78M | if (!entry->compressed) { |
985 | 4.92M | if(entry->u.uncompressed.generation_num != gen) |
986 | 5.22k | pdfi_set_warning(ctx, 0, NULL, W_PDF_MISMATCH_GENERATION, "pdfi_dereference_main", ""); |
987 | 4.92M | } |
988 | 6.78M | } |
989 | | |
990 | 6.78M | if (ctx->loop_detection) { |
991 | 6.35M | if (pdfi_loop_detector_check_object(ctx, obj) == true) |
992 | 1.77k | return_error(gs_error_circular_reference); |
993 | 6.35M | if (entry->free) { |
994 | 0 | code = pdfi_loop_detector_add_object(ctx, obj); |
995 | 0 | if (code < 0) |
996 | 0 | return code; |
997 | 0 | } |
998 | 6.35M | } |
999 | 6.77M | if (entry->cache != NULL){ |
1000 | 2.90M | pdf_obj_cache_entry *cache_entry = entry->cache; |
1001 | | |
1002 | | #if CACHE_STATISTICS |
1003 | | ctx->hits++; |
1004 | | #endif |
1005 | 2.90M | *object = cache_entry->o; |
1006 | 2.90M | pdfi_countup(*object); |
1007 | | |
1008 | 2.90M | pdfi_promote_cache_entry(ctx, cache_entry); |
1009 | 3.86M | } else { |
1010 | 3.86M | saved_stream_offset = pdfi_unread_tell(ctx); |
1011 | | |
1012 | 3.86M | if (entry->compressed) { |
1013 | | /* This is an object in a compressed object stream */ |
1014 | 1.43M | ctx->encryption.decrypt_strings = false; |
1015 | | |
1016 | 1.43M | code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache); |
1017 | 1.43M | if (code < 0 || *object == NULL) |
1018 | 473k | goto error; |
1019 | 2.43M | } else { |
1020 | | #if CACHE_STATISTICS |
1021 | | ctx->misses++; |
1022 | | #endif |
1023 | 2.43M | ctx->encryption.decrypt_strings = true; |
1024 | | |
1025 | 2.43M | code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET); |
1026 | 2.43M | if (code < 0) |
1027 | 129 | goto error; |
1028 | | |
1029 | 2.43M | code = pdfi_read_object(ctx, ctx->main_stream, entry->u.uncompressed.offset); |
1030 | | |
1031 | | /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it. |
1032 | | * reload the xref entry to be certain it is valid. |
1033 | | */ |
1034 | 2.43M | entry = &ctx->xref_table->xref[obj]; |
1035 | 2.43M | if (code < 0) { |
1036 | 352k | int code1 = 0; |
1037 | 352k | if (entry->free) { |
1038 | 0 | char extra_info[gp_file_name_sizeof]; |
1039 | |
|
1040 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
1041 | 0 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
1042 | 0 | *object = PDF_NULL_OBJ; |
1043 | 0 | if (code < 0) |
1044 | 0 | goto error; |
1045 | 0 | goto free_obj; |
1046 | 0 | } |
1047 | 352k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1048 | 352k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1049 | 352k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
1050 | | |
1051 | 352k | code1 = pdfi_repair_file(ctx); |
1052 | 352k | if (code1 == 0) |
1053 | 1.96k | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1054 | | /* Repair failed, just give up and return an error */ |
1055 | 350k | goto error; |
1056 | 352k | } |
1057 | | |
1058 | | /* We only expect a single object back when dereferencing an indirect reference |
1059 | | * The only way (I think) we can end up with more than one is if the object initially |
1060 | | * appears to be a dictionary or array, but the object terminates (with endobj or |
1061 | | * simply reaching EOF) without terminating the array or dictionary. That's clearly |
1062 | | * an error. We might, as a future 'improvement' choose to walk back through |
1063 | | * the stack looking for unterminated dictionary or array markers, and closing them |
1064 | | * so that (hopefully!) we end up with a single 'repaired' object on the stack. |
1065 | | * But for now I'm simply going to treat these as errors. We will try a repair on the |
1066 | | * file to see if we end up using a different (hopefully intact) object from the file. |
1067 | | */ |
1068 | 2.08M | if (pdfi_count_stack(ctx) - stack_depth > 1) { |
1069 | 155k | int code1 = 0; |
1070 | | |
1071 | 155k | code1 = pdfi_repair_file(ctx); |
1072 | 155k | if (code1 == 0) |
1073 | 613 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1074 | | /* Repair failed, just give up and return an error */ |
1075 | 154k | code = gs_note_error(gs_error_syntaxerror); |
1076 | 154k | goto error; |
1077 | 155k | } |
1078 | | |
1079 | 1.93M | if (pdfi_count_stack(ctx) > 0 && |
1080 | 1.93M | ((ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) && |
1081 | 1.93M | (ctx->stack_top[-1])->object_num == obj) |
1082 | 1.93M | || ctx->stack_top[-1] == PDF_NULL_OBJ)) { |
1083 | 1.92M | *object = ctx->stack_top[-1]; |
1084 | 1.92M | pdfi_countup(*object); |
1085 | 1.92M | pdfi_pop(ctx, 1); |
1086 | 1.92M | if (pdfi_type_of(*object) == PDF_INDIRECT) { |
1087 | 0 | pdf_indirect_ref *iref = (pdf_indirect_ref *)*object; |
1088 | |
|
1089 | 0 | if (iref->ref_object_num == obj) { |
1090 | 0 | code = gs_note_error(gs_error_circular_reference); |
1091 | 0 | pdfi_countdown(*object); |
1092 | 0 | *object = NULL; |
1093 | 0 | goto error; |
1094 | 0 | } |
1095 | 0 | } |
1096 | | /* There's really no point in caching an indirect reference and |
1097 | | * I think it could be potentially confusing to later calls. |
1098 | | */ |
1099 | 1.92M | if (cache && pdfi_type_of(*object) != PDF_INDIRECT) { |
1100 | 1.92M | code = pdfi_add_to_cache(ctx, *object); |
1101 | 1.92M | if (code < 0) { |
1102 | 0 | pdfi_countdown(*object); |
1103 | 0 | goto error; |
1104 | 0 | } |
1105 | 1.92M | } |
1106 | 1.92M | } else { |
1107 | 1.88k | int code1 = 0; |
1108 | | |
1109 | 1.88k | if (pdfi_count_stack(ctx) > 0) |
1110 | 1.75k | pdfi_pop(ctx, 1); |
1111 | | |
1112 | 1.88k | if (entry->free) { |
1113 | 0 | char extra_info[gp_file_name_sizeof]; |
1114 | |
|
1115 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
1116 | 0 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
1117 | 0 | *object = PDF_NULL_OBJ; |
1118 | 0 | if (code < 0) |
1119 | 0 | goto error; |
1120 | 0 | return code; |
1121 | 0 | } |
1122 | 1.88k | code1 = pdfi_repair_file(ctx); |
1123 | 1.88k | if (code1 == 0) |
1124 | 236 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1125 | | /* Repair failed, just give up and return an error */ |
1126 | 1.64k | code = gs_note_error(gs_error_undefined); |
1127 | 1.64k | goto error; |
1128 | 1.88k | } |
1129 | 1.93M | } |
1130 | 2.88M | free_obj: |
1131 | 2.88M | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1132 | 2.88M | } |
1133 | | |
1134 | 5.79M | if (ctx->loop_detection && pdf_object_num(*object) != 0) { |
1135 | 5.37M | code = pdfi_loop_detector_add_object(ctx, (*object)->object_num); |
1136 | 5.37M | if (code < 0) { |
1137 | 0 | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1138 | 0 | return code; |
1139 | 0 | } |
1140 | 5.37M | } |
1141 | 5.79M | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1142 | 5.79M | return 0; |
1143 | | |
1144 | 980k | error: |
1145 | 980k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1146 | 980k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1147 | | /* Return the stack to the state at entry */ |
1148 | 980k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
1149 | 980k | return code; |
1150 | 5.79M | } |
1151 | | |
1152 | | int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1153 | 9.16M | { |
1154 | 9.16M | return pdfi_dereference_main(ctx, obj, gen, object, true); |
1155 | 9.16M | } |
1156 | | |
1157 | | int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1158 | 54.9k | { |
1159 | 54.9k | return pdfi_dereference_main(ctx, obj, gen, object, false); |
1160 | 54.9k | } |
1161 | | |
1162 | | /* do a derefence with loop detection */ |
1163 | | int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1164 | 3.43M | { |
1165 | 3.43M | int code; |
1166 | | |
1167 | 3.43M | code = pdfi_loop_detector_mark(ctx); |
1168 | 3.43M | if (code < 0) |
1169 | 0 | return code; |
1170 | | |
1171 | 3.43M | code = pdfi_dereference(ctx, obj, gen, object); |
1172 | 3.43M | (void)pdfi_loop_detector_cleartomark(ctx); |
1173 | 3.43M | return code; |
1174 | 3.43M | } |
1175 | | |
1176 | | int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1177 | 54.9k | { |
1178 | 54.9k | int code; |
1179 | | |
1180 | 54.9k | code = pdfi_loop_detector_mark(ctx); |
1181 | 54.9k | if (code < 0) |
1182 | 0 | return code; |
1183 | | |
1184 | 54.9k | code = pdfi_dereference_nocache(ctx, obj, gen, object); |
1185 | 54.9k | (void)pdfi_loop_detector_cleartomark(ctx); |
1186 | 54.9k | return code; |
1187 | 54.9k | } |
1188 | | |
1189 | | static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1190 | 90.3k | { |
1191 | 90.3k | int code = 0; |
1192 | 90.3k | uint64_t index, arraysize; |
1193 | 90.3k | pdf_obj *object = NULL; |
1194 | 90.3k | pdf_array *array = (pdf_array *)obj; |
1195 | | |
1196 | 90.3k | arraysize = pdfi_array_size(array); |
1197 | 413k | for (index = 0; index < arraysize; index++) { |
1198 | 323k | if (ctx->loop_detection != NULL) { |
1199 | 323k | code = pdfi_loop_detector_mark(ctx); |
1200 | 323k | if (code < 0) |
1201 | 0 | return code; |
1202 | 323k | } |
1203 | | |
1204 | 323k | code = pdfi_array_get_no_store_R(ctx, array, index, &object); |
1205 | | |
1206 | 323k | if (ctx->loop_detection != NULL) { |
1207 | 323k | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1208 | 323k | if (code1 < 0) |
1209 | 0 | return code1; |
1210 | 323k | } |
1211 | | |
1212 | 323k | if (code == gs_error_circular_reference) { |
1213 | | /* Previously we just left as an indirect reference, but now we want |
1214 | | * to return the error so we don't end up replacing indirect references |
1215 | | * to objects with circular references. |
1216 | | */ |
1217 | 323k | } else { |
1218 | 323k | if (code < 0) goto exit; |
1219 | 323k | if (recurse) { |
1220 | 2.46k | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse); |
1221 | 2.46k | if (code < 0) goto exit; |
1222 | 2.46k | } |
1223 | | /* don't store the object if it's a stream (leave as a ref) */ |
1224 | 323k | if (pdfi_type_of(object) != PDF_STREAM) |
1225 | 323k | code = pdfi_array_put(ctx, array, index, object); |
1226 | 323k | } |
1227 | 323k | if (code < 0) goto exit; |
1228 | | |
1229 | 323k | pdfi_countdown(object); |
1230 | 323k | object = NULL; |
1231 | 323k | } |
1232 | | |
1233 | 90.3k | exit: |
1234 | 90.3k | pdfi_countdown(object); |
1235 | 90.3k | return code; |
1236 | 90.3k | } |
1237 | | |
1238 | | static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1239 | 12.7k | { |
1240 | 12.7k | int code = 0; |
1241 | 12.7k | pdf_dict *dict = (pdf_dict *)obj; |
1242 | 12.7k | pdf_name *Key = NULL; |
1243 | 12.7k | pdf_obj *Value = NULL; |
1244 | 12.7k | uint64_t index, dictsize; |
1245 | | |
1246 | 12.7k | dictsize = pdfi_dict_entries(dict); |
1247 | | |
1248 | | /* Note: I am not using pdfi_dict_first/next because of needing to handle |
1249 | | * circular references. |
1250 | | */ |
1251 | 29.0k | for (index=0; index<dictsize; index ++) { |
1252 | 16.4k | Key = (pdf_name *)dict->list[index].key; |
1253 | 16.4k | if (pdfi_name_is(Key, "Parent")) |
1254 | 9 | continue; |
1255 | | |
1256 | 16.4k | if (ctx->loop_detection != NULL) { |
1257 | 16.3k | code = pdfi_loop_detector_mark(ctx); |
1258 | 16.3k | if (code < 0) |
1259 | 0 | return code; |
1260 | 16.3k | } |
1261 | | |
1262 | 16.4k | code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value); |
1263 | | |
1264 | 16.4k | if (ctx->loop_detection != NULL) { |
1265 | 16.3k | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1266 | 16.3k | if (code1 < 0) |
1267 | 0 | return code1; |
1268 | 16.3k | } |
1269 | | |
1270 | 16.4k | if (code == gs_error_circular_reference) { |
1271 | | /* Just leave as an indirect ref */ |
1272 | 9 | code = 0; |
1273 | 16.4k | } else { |
1274 | 16.4k | if (code < 0) goto exit; |
1275 | 16.3k | if (recurse) { |
1276 | 5.57k | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse); |
1277 | 5.57k | if (code < 0) |
1278 | 78 | goto exit; |
1279 | 5.57k | } |
1280 | | /* don't store the object if it's a stream (leave as a ref) */ |
1281 | 16.3k | if (pdfi_type_of(Value) != PDF_STREAM) |
1282 | 16.2k | code = pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true); |
1283 | 16.3k | } |
1284 | 16.3k | if (code < 0) goto exit; |
1285 | | |
1286 | 16.3k | pdfi_countdown(Value); |
1287 | 16.3k | Value = NULL; |
1288 | 16.3k | } |
1289 | | |
1290 | 12.7k | exit: |
1291 | 12.7k | pdfi_countdown(Value); |
1292 | 12.7k | return code; |
1293 | 12.7k | } |
1294 | | |
1295 | | /* Resolve all the indirect references for an object |
1296 | | * Note: This can be recursive |
1297 | | */ |
1298 | | int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse) |
1299 | 373k | { |
1300 | 373k | int code = 0; |
1301 | | |
1302 | 373k | switch(pdfi_type_of(value)) { |
1303 | 90.3k | case PDF_ARRAY: |
1304 | 90.3k | code = pdfi_resolve_indirect_array(ctx, value, recurse); |
1305 | 90.3k | break; |
1306 | 12.7k | case PDF_DICT: |
1307 | 12.7k | code = pdfi_resolve_indirect_dict(ctx, value, recurse); |
1308 | 12.7k | break; |
1309 | 270k | default: |
1310 | 270k | break; |
1311 | 373k | } |
1312 | 373k | return code; |
1313 | 373k | } |
1314 | | |
1315 | | /* Resolve all the indirect references for an object |
1316 | | * Resolve indirect references, either one level or recursively, with loop detect on |
1317 | | * the parent (can by NULL) and the value. |
1318 | | */ |
1319 | | int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse) |
1320 | 373k | { |
1321 | 373k | int code = 0; |
1322 | | |
1323 | 373k | code = pdfi_loop_detector_mark(ctx); |
1324 | 373k | if (code < 0) goto exit; |
1325 | 373k | if (parent && parent->object_num != 0) { |
1326 | 364k | code = pdfi_loop_detector_add_object(ctx, parent->object_num); |
1327 | 364k | if (code < 0) goto exit; |
1328 | 364k | } |
1329 | | |
1330 | 373k | if (pdf_object_num(value) != 0) { |
1331 | 2.14k | if (pdfi_loop_detector_check_object(ctx, value->object_num)) { |
1332 | 6 | code = gs_note_error(gs_error_circular_reference); |
1333 | 6 | goto exit; |
1334 | 6 | } |
1335 | 2.13k | code = pdfi_loop_detector_add_object(ctx, value->object_num); |
1336 | 2.13k | if (code < 0) goto exit; |
1337 | 2.13k | } |
1338 | 373k | code = pdfi_resolve_indirect(ctx, value, recurse); |
1339 | | |
1340 | 373k | exit: |
1341 | 373k | (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */ |
1342 | 373k | return code; |
1343 | 373k | } |