/src/ghostpdl/pdf/pdf_deref.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2020-2025 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | /* Functions to deal with dereferencing indirect objects |
17 | | * for the PDF interpreter. In here we also keep the code |
18 | | * for dealing with the object cache, because the dereferencing |
19 | | * functions are currently the only place that deals with it. |
20 | | */ |
21 | | |
22 | | #include "pdf_int.h" |
23 | | #include "pdf_stack.h" |
24 | | #include "pdf_loop_detect.h" |
25 | | #include "strmio.h" |
26 | | #include "stream.h" |
27 | | #include "pdf_file.h" |
28 | | #include "pdf_misc.h" |
29 | | #include "pdf_dict.h" |
30 | | #include "pdf_array.h" |
31 | | #include "pdf_deref.h" |
32 | | #include "pdf_repair.h" |
33 | | |
34 | | /* Start with the object caching functions */ |
35 | | /* Disable object caching (for easier debugging with reference counting) |
36 | | * by uncommenting the following line |
37 | | */ |
38 | | /*#define DISABLE CACHE*/ |
39 | | |
40 | | /* given an object, create a cache entry for it. If we have too many entries |
41 | | * then delete the leat-recently-used cache entry. Make the new entry be the |
42 | | * most-recently-used entry. The actual entries are attached to the xref table |
43 | | * (as well as being a double-linked list), because we detect an existing |
44 | | * cache entry by seeing that the xref table for the object number has a non-NULL |
45 | | * 'cache' member. |
46 | | * So we need to update the xref as well if we add or delete cache entries. |
47 | | */ |
48 | | static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o) |
49 | 2.24M | { |
50 | 2.24M | #ifndef DISABLE_CACHE |
51 | 2.24M | pdf_obj_cache_entry *entry; |
52 | | |
53 | 2.24M | if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) |
54 | 4.39k | return 0; |
55 | | |
56 | 2.23M | if (ctx->xref_table->xref[o->object_num].cache != NULL) { |
57 | | #if DEBUG_CACHE |
58 | | outprintf(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num); |
59 | | #endif |
60 | 0 | return_error(gs_error_unknownerror); |
61 | 0 | } |
62 | | |
63 | 2.23M | if (o->object_num > ctx->xref_table->xref_size) |
64 | 0 | return_error(gs_error_rangecheck); |
65 | | |
66 | 2.23M | if (ctx->cache_entries == MAX_OBJECT_CACHE_SIZE) |
67 | 488k | { |
68 | | #if DEBUG_CACHE |
69 | | dbgmprintf(ctx->memory, "Cache full, evicting LRU\n"); |
70 | | #endif |
71 | 488k | if (ctx->cache_LRU) { |
72 | 488k | entry = ctx->cache_LRU; |
73 | 488k | ctx->cache_LRU = entry->next; |
74 | 488k | if (entry->next) |
75 | 488k | ((pdf_obj_cache_entry *)entry->next)->previous = NULL; |
76 | 488k | ctx->xref_table->xref[entry->o->object_num].cache = NULL; |
77 | 488k | pdfi_countdown(entry->o); |
78 | 488k | ctx->cache_entries--; |
79 | 488k | gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU"); |
80 | 488k | } else |
81 | 0 | return_error(gs_error_unknownerror); |
82 | 488k | } |
83 | 2.23M | entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache"); |
84 | 2.23M | if (entry == NULL) |
85 | 0 | return_error(gs_error_VMerror); |
86 | | |
87 | 2.23M | memset(entry, 0x00, sizeof(pdf_obj_cache_entry)); |
88 | | |
89 | 2.23M | entry->o = o; |
90 | 2.23M | pdfi_countup(o); |
91 | 2.23M | if (ctx->cache_MRU) { |
92 | 2.16M | entry->previous = ctx->cache_MRU; |
93 | 2.16M | ctx->cache_MRU->next = entry; |
94 | 2.16M | } |
95 | 2.23M | ctx->cache_MRU = entry; |
96 | 2.23M | if (ctx->cache_LRU == NULL) |
97 | 74.1k | ctx->cache_LRU = entry; |
98 | | |
99 | 2.23M | ctx->cache_entries++; |
100 | 2.23M | ctx->xref_table->xref[o->object_num].cache = entry; |
101 | 2.23M | #endif |
102 | 2.23M | return 0; |
103 | 2.23M | } |
104 | | |
105 | | /* Given an existing cache entry, promote it to be the most-recently-used |
106 | | * cache entry. |
107 | | */ |
108 | | static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry) |
109 | 4.33M | { |
110 | 4.33M | #ifndef DISABLE_CACHE |
111 | 4.33M | if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) { |
112 | 3.18M | if ((pdf_obj_cache_entry *)cache_entry->next != NULL) |
113 | 3.18M | ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous; |
114 | 3.18M | if ((pdf_obj_cache_entry *)cache_entry->previous != NULL) |
115 | 3.18M | ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next; |
116 | 1.90k | else { |
117 | | /* the existing entry is the current least recently used, we need to make the 'next' |
118 | | * cache entry into the LRU. |
119 | | */ |
120 | 1.90k | ctx->cache_LRU = cache_entry->next; |
121 | 1.90k | } |
122 | 3.18M | cache_entry->next = NULL; |
123 | 3.18M | cache_entry->previous = ctx->cache_MRU; |
124 | 3.18M | ctx->cache_MRU->next = cache_entry; |
125 | 3.18M | ctx->cache_MRU = cache_entry; |
126 | 3.18M | } |
127 | 4.33M | #endif |
128 | 4.33M | return; |
129 | 4.33M | } |
130 | | |
131 | | /* This one's a bit of an oddity, its used for fonts. When we build a PDF font object |
132 | | * we want the object cache to reference *that* object, not the dictionary which was |
133 | | * read out of the PDF file, so this allows us to replace the font dictionary in the |
134 | | * cache with the actual font object, so that later dereferences will get this font |
135 | | * object. |
136 | | */ |
137 | | int replace_cache_entry(pdf_context *ctx, pdf_obj *o) |
138 | 157k | { |
139 | 157k | #ifndef DISABLE_CACHE |
140 | 157k | xref_entry *entry; |
141 | 157k | pdf_obj_cache_entry *cache_entry; |
142 | 157k | pdf_obj *old_cached_obj = NULL; |
143 | | |
144 | | /* Limited error checking here, we assume that things like the |
145 | | * validity of the object (eg not a free oobject) have already been handled. |
146 | | */ |
147 | | |
148 | 157k | entry = &ctx->xref_table->xref[o->object_num]; |
149 | 157k | cache_entry = entry->cache; |
150 | | |
151 | 157k | if (cache_entry == NULL) { |
152 | 4.69k | return(pdfi_add_to_cache(ctx, o)); |
153 | 153k | } else { |
154 | | /* NOTE: We grab the object without decrementing, to avoid triggering |
155 | | * a warning message for freeing an object that's in the cache |
156 | | */ |
157 | 153k | if (cache_entry->o != NULL) |
158 | 153k | old_cached_obj = cache_entry->o; |
159 | | |
160 | | /* Put new entry in the cache */ |
161 | 153k | cache_entry->o = o; |
162 | 153k | pdfi_countup(o); |
163 | 153k | pdfi_promote_cache_entry(ctx, cache_entry); |
164 | | |
165 | | /* Now decrement the old cache entry, if any */ |
166 | 153k | pdfi_countdown(old_cached_obj); |
167 | 153k | } |
168 | 153k | #endif |
169 | 153k | return 0; |
170 | 157k | } |
171 | | |
172 | | /* Now the dereferencing functions */ |
173 | | |
174 | | /* |
175 | | * Technically we can accept a stream other than the main PDF file stream here. This is |
176 | | * really for the case of compressed objects where we read tokens from the compressed |
177 | | * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode |
178 | | * on top of the main file stream, which may be useful. Note that this cannot work with |
179 | | * objects in compressed object streams! They should always pass a value of 0 for the stream_offset. |
180 | | * The stream_offset is the offset from the start of the underlying uncompressed PDF file of |
181 | | * the stream we are using. See the comments below when keyword is PDF_STREAM. |
182 | | */ |
183 | | |
184 | | /* Determine if a PDF object is in a compressed ObjStm. Returns < 0 |
185 | | * for an error, 0 if it is not in a compressed ObjStm and 1 if it is. |
186 | | * Currently errors are inmpossible. This is only used by the decryption code |
187 | | * to determine if a string is in a compressed object stream, if it is then |
188 | | * it can't be used for decryption. |
189 | | */ |
190 | | int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen) |
191 | 15.6k | { |
192 | 15.6k | xref_entry *entry; |
193 | | |
194 | | /* Can't possibly be a compressed object before we have finished reading |
195 | | * the xref. |
196 | | */ |
197 | 15.6k | if (ctx->xref_table == NULL) |
198 | 0 | return 0; |
199 | | |
200 | 15.6k | entry = &ctx->xref_table->xref[obj]; |
201 | | |
202 | 15.6k | if (entry->compressed) |
203 | 0 | return 1; |
204 | | |
205 | 15.6k | return 0; |
206 | 15.6k | } |
207 | | |
208 | | /* We should never read a 'stream' keyword from a compressed object stream |
209 | | * so this case should never end up here. |
210 | | */ |
211 | | static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, |
212 | | uint32_t objnum, uint32_t gen) |
213 | 685k | { |
214 | 685k | int code = 0; |
215 | 685k | int64_t i; |
216 | 685k | pdf_dict *dict = NULL; |
217 | 685k | gs_offset_t offset; |
218 | 685k | pdf_stream *stream_obj = NULL; |
219 | | |
220 | | /* Strange code time.... |
221 | | * If we are using a stream which is *not* the PDF uncompressed main file stream |
222 | | * then doing stell on it will only tell us how many bytes have been read from |
223 | | * that stream, it won't tell us the underlying file position. So we add on the |
224 | | * 'unread' bytes, *and* we add on the position of the start of the stream in |
225 | | * the actual main file. This is all done so that we can check the /Length |
226 | | * of the object. Note that this will *only* work for regular objects it can |
227 | | * not be used for compressed object streams, but those don't need checking anyway |
228 | | * they have a different mechanism altogether and should never get here. |
229 | | */ |
230 | 685k | if (s != ctx->main_stream) { |
231 | 0 | offset = stell(s->s) - s->unread_size + stream_offset; |
232 | 0 | code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET); |
233 | 0 | if (code < 0) |
234 | 0 | return_error(gs_error_ioerror); |
235 | 685k | } else { |
236 | 685k | offset = stell(s->s) - s->unread_size; |
237 | 685k | } |
238 | | |
239 | 685k | if (pdfi_count_stack(ctx) < 1) |
240 | 0 | return_error(gs_error_stackunderflow); |
241 | | |
242 | 685k | dict = (pdf_dict *)ctx->stack_top[-1]; |
243 | | |
244 | 685k | if (pdfi_type_of(dict) != PDF_DICT) { |
245 | 5.91k | pdfi_pop(ctx, 1); |
246 | 5.91k | return_error(gs_error_syntaxerror); |
247 | 5.91k | } |
248 | | |
249 | 679k | dict->indirect_num = dict->object_num = objnum; |
250 | 679k | dict->indirect_gen = dict->generation_num = gen; |
251 | | |
252 | | /* Convert the dict into a stream */ |
253 | 679k | code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true); |
254 | 679k | if (code < 0) { |
255 | 0 | pdfi_pop(ctx, 1); |
256 | 0 | return code; |
257 | 0 | } |
258 | | /* Pop off the dict and push the stream */ |
259 | 679k | pdfi_pop(ctx, 1); |
260 | 679k | dict = NULL; |
261 | 679k | pdfi_push(ctx, (pdf_obj *)stream_obj); |
262 | | |
263 | 679k | stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum; |
264 | 679k | stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen; |
265 | 679k | stream_obj->stream_offset = offset; |
266 | | |
267 | | /* Exceptional code. Normally we do not need to worry about detecting circular references |
268 | | * when reading objects, because we do not dereference any indirect objects. However streams |
269 | | * are a slight exception in that we do get the Length from the stream dictionay and if that |
270 | | * is an indirect reference, then we dereference it. |
271 | | * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect |
272 | | * reference to the same stream object, and leads to infinite recursion. So deal with that |
273 | | * possibility here. |
274 | | */ |
275 | 679k | code = pdfi_loop_detector_mark(ctx); |
276 | 679k | if (code < 0) { |
277 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
278 | 0 | return code; |
279 | 0 | } |
280 | 679k | if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) { |
281 | 109 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
282 | 109 | pdfi_loop_detector_cleartomark(ctx); |
283 | 109 | return_error(gs_error_circular_reference); |
284 | 109 | } |
285 | | |
286 | 679k | code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num); |
287 | 679k | if (code < 0) { |
288 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
289 | 0 | pdfi_loop_detector_cleartomark(ctx); |
290 | 0 | return code; |
291 | 0 | } |
292 | | |
293 | | /* This code may be a performance overhead, it simply skips over the stream contents |
294 | | * and checks that the stream ends with a 'endstream endobj' pair. We could add a |
295 | | * 'go faster' flag for users who are certain their PDF files are well-formed. This |
296 | | * could also allow us to skip all kinds of other checking..... |
297 | | */ |
298 | | |
299 | 679k | code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i); |
300 | 679k | if (code < 0) { |
301 | 17.6k | char extra_info[gp_file_name_sizeof]; |
302 | | |
303 | 17.6k | (void)pdfi_loop_detector_cleartomark(ctx); |
304 | 17.6k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum); |
305 | 17.6k | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info); |
306 | 17.6k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
307 | 17.6k | return code; |
308 | 17.6k | } |
309 | 662k | code = pdfi_loop_detector_cleartomark(ctx); |
310 | 662k | if (code < 0) { |
311 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
312 | 0 | return code; |
313 | 0 | } |
314 | | |
315 | 662k | if (i < 0 || (i + offset)> ctx->main_stream_length) { |
316 | 39.3k | char extra_info[gp_file_name_sizeof]; |
317 | | |
318 | 39.3k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum); |
319 | 39.3k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info))< 0) { |
320 | 0 | pdfi_pop(ctx, 1); |
321 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
322 | 0 | return code; |
323 | 0 | } |
324 | 622k | } else { |
325 | 622k | code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR); |
326 | 622k | if (code < 0) { |
327 | 0 | pdfi_pop(ctx, 1); |
328 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
329 | 0 | return code; |
330 | 0 | } |
331 | | |
332 | 622k | stream_obj->Length = 0; |
333 | 622k | stream_obj->length_valid = false; |
334 | | |
335 | 622k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
336 | 622k | if (code == 0) { |
337 | 0 | char extra_info[gp_file_name_sizeof]; |
338 | |
|
339 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum); |
340 | 0 | pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info); |
341 | | /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that |
342 | | * happens give up. |
343 | | */ |
344 | 0 | if (pdfi_count_stack(ctx) == 0) { |
345 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
346 | 0 | return code; |
347 | 0 | } |
348 | 622k | } else if (code < 0) { |
349 | 0 | char extra_info[gp_file_name_sizeof]; |
350 | |
|
351 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum); |
352 | 0 | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info)) < 0) { |
353 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
354 | 0 | return code; |
355 | 0 | } |
356 | 622k | } else if (code != TOKEN_ENDSTREAM) { |
357 | 70.8k | char extra_info[gp_file_name_sizeof]; |
358 | | |
359 | 70.8k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i); |
360 | 70.8k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BAD_LENGTH, "pdfi_read_stream_object", extra_info)) < 0) { |
361 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
362 | 0 | return code; |
363 | 0 | } |
364 | 551k | } else { |
365 | | /* Cache the Length in the stream object and mark it valid */ |
366 | 551k | stream_obj->Length = i; |
367 | 551k | stream_obj->length_valid = true; |
368 | 551k | } |
369 | 622k | } |
370 | | |
371 | | /* If we failed to find a valid object, or the object wasn't a keyword, or the |
372 | | * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct |
373 | | * Length for streams if we have encrypted files, because we must install a |
374 | | * SubFileDecode filter with a Length (EODString is incompatible with AES encryption) |
375 | | * Rather than mess about checking for encryption, we'll choose to just correctly |
376 | | * calculate the Length of all streams. Although this takes time, it will only |
377 | | * happen for files which are invalid. |
378 | | */ |
379 | 662k | if (stream_obj->length_valid != true) { |
380 | 110k | char Buffer[10]; |
381 | 110k | unsigned int bytes, total = 0; |
382 | 110k | int c = 0; |
383 | | |
384 | 110k | code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET); |
385 | 110k | if (code < 0) { |
386 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
387 | 0 | pdfi_pop(ctx, 1); |
388 | 0 | return code; |
389 | 0 | } |
390 | 110k | memset(Buffer, 0x00, 10); |
391 | 110k | bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream); |
392 | 110k | if (bytes < 9) { |
393 | 364 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
394 | 364 | return_error(gs_error_ioerror); |
395 | 364 | } |
396 | | |
397 | 109k | total = bytes; |
398 | 1.91G | do { |
399 | 1.91G | if (memcmp(Buffer, "endstream", 9) == 0) { |
400 | 66.5k | if (Buffer[9] != 0x00) |
401 | 66.5k | total--; |
402 | 66.5k | stream_obj->Length = total - 9; |
403 | 66.5k | stream_obj->length_valid = true; |
404 | 66.5k | break; |
405 | 66.5k | } |
406 | 1.91G | if (memcmp(Buffer, "endobj", 6) == 0) { |
407 | 7.01k | if (Buffer[9] != 0x00) |
408 | 6.93k | total--; |
409 | 7.01k | stream_obj->Length = total - 6; |
410 | 7.01k | stream_obj->length_valid = true; |
411 | 7.01k | break; |
412 | 7.01k | } |
413 | 1.91G | memmove(Buffer, Buffer+1, 9); |
414 | 1.91G | c = pdfi_read_byte(ctx, ctx->main_stream); |
415 | 1.91G | if (c < 0) |
416 | 36.3k | break; |
417 | 1.91G | Buffer[9] = (byte)c; |
418 | 1.91G | total++; |
419 | 1.91G | } while(1); |
420 | 109k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
421 | 109k | if (c < 0) |
422 | 36.3k | return_error(gs_error_ioerror); |
423 | 73.5k | return 0; |
424 | 109k | } |
425 | | |
426 | 551k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
427 | 551k | if (code < 0) { |
428 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
429 | 0 | if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", "")) < 0) { |
430 | 0 | return code; |
431 | 0 | } |
432 | | /* Something went wrong looking for endobj, but we found endstream, so assume |
433 | | * for now that will suffice. |
434 | | */ |
435 | 0 | return 0; |
436 | 0 | } |
437 | | |
438 | 551k | if (code == 0) { |
439 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
440 | 0 | return_error(gs_error_stackunderflow); |
441 | 0 | } |
442 | | |
443 | 551k | if (code != TOKEN_ENDOBJ) { |
444 | 1.69k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
445 | 1.69k | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_typecheck), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL); |
446 | | /* Didn't find an endobj, but we have an endstream, so assume |
447 | | * for now that will suffice |
448 | | */ |
449 | 1.69k | return code; |
450 | 1.69k | } |
451 | 550k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
452 | | |
453 | 550k | return 0; |
454 | 551k | } |
455 | | |
456 | | /* This reads an object *after* the x y obj keyword has been found. Its broken out |
457 | | * separately for the benefit of the repair code when reading the dictionary following |
458 | | * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does |
459 | | * not have an 'endobj', we rely on the error handling to take care of that for us. |
460 | | */ |
461 | | int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen) |
462 | 2.16M | { |
463 | 2.16M | int code = 0, initial_depth = 0; |
464 | 2.16M | pdf_key keyword; |
465 | 2.16M | gs_offset_t saved_offset[3]; |
466 | 2.16M | pdf_obj_type type; |
467 | | |
468 | 2.16M | initial_depth = pdfi_count_stack(ctx); |
469 | 2.16M | saved_offset[0] = saved_offset[1] = saved_offset[2] = 0; |
470 | | |
471 | 2.16M | code = pdfi_read_token(ctx, s, objnum, gen); |
472 | 2.16M | if (code < 0) |
473 | 4.50k | return code; |
474 | | |
475 | 2.15M | if (code == 0) |
476 | | /* failed to read a token */ |
477 | 72 | return_error(gs_error_syntaxerror); |
478 | | |
479 | 2.15M | if (pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) { |
480 | 24.5k | keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]); |
481 | 24.5k | if (keyword == TOKEN_ENDOBJ) { |
482 | 486 | ctx->stack_top[-1] = PDF_NULL_OBJ; |
483 | 486 | return 0; |
484 | 486 | } |
485 | 24.5k | } |
486 | | |
487 | 71.2M | do { |
488 | | /* move all the saved offsets up by one */ |
489 | 71.2M | saved_offset[0] = saved_offset[1]; |
490 | 71.2M | saved_offset[1] = saved_offset[2]; |
491 | 71.2M | saved_offset[2] = pdfi_unread_tell(ctx); |
492 | | |
493 | 71.2M | code = pdfi_read_token(ctx, s, objnum, gen); |
494 | 71.2M | if (code < 0) { |
495 | 213k | pdfi_clearstack(ctx); |
496 | 213k | return code; |
497 | 213k | } |
498 | 71.0M | if (s->eof) |
499 | 2.51k | return_error(gs_error_syntaxerror); |
500 | 71.0M | code = 0; |
501 | 71.0M | type = pdfi_type_of(ctx->stack_top[-1]); |
502 | 71.0M | if (type == PDF_KEYWORD) |
503 | 149k | goto missing_endobj; |
504 | 71.0M | } while (type != PDF_FAST_KEYWORD); |
505 | | |
506 | 1.79M | keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]); |
507 | 1.79M | if (keyword == TOKEN_ENDOBJ) { |
508 | 1.05M | pdf_obj *o; |
509 | | |
510 | 1.05M | if (pdfi_count_stack(ctx) - initial_depth < 2) { |
511 | 270 | pdfi_clearstack(ctx); |
512 | 270 | return_error(gs_error_stackunderflow); |
513 | 270 | } |
514 | | |
515 | 1.05M | o = ctx->stack_top[-2]; |
516 | | |
517 | 1.05M | pdfi_pop(ctx, 1); |
518 | | |
519 | 1.05M | if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
520 | 1.05M | o->indirect_num = o->object_num = objnum; |
521 | 1.05M | o->indirect_gen = o->generation_num = gen; |
522 | 1.05M | } |
523 | 1.05M | return code; |
524 | 1.05M | } |
525 | 735k | if (keyword == TOKEN_STREAM) { |
526 | 685k | pdfi_pop(ctx, 1); |
527 | 685k | return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen); |
528 | 685k | } |
529 | 49.4k | if (keyword == TOKEN_OBJ) { |
530 | 5.62k | pdf_obj *o; |
531 | | |
532 | 5.62k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL)) < 0) { |
533 | 0 | return code; |
534 | 0 | } |
535 | | |
536 | | /* 4 for; the object we want, the object number, generation number and 'obj' keyword */ |
537 | 5.62k | if (pdfi_count_stack(ctx) - initial_depth < 4) |
538 | 1.32k | return_error(gs_error_stackunderflow); |
539 | | |
540 | | /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */ |
541 | 4.30k | o = ctx->stack_top[-4]; |
542 | | |
543 | 4.30k | pdfi_pop(ctx, 3); |
544 | | |
545 | 4.30k | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
546 | 4.28k | o->indirect_num = o->object_num = objnum; |
547 | 4.28k | o->indirect_gen = o->generation_num = gen; |
548 | 4.28k | } |
549 | 4.30k | if (saved_offset[0] > 0) |
550 | 4.30k | (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET); |
551 | 4.30k | return 0; |
552 | 5.62k | } |
553 | | |
554 | 193k | missing_endobj: |
555 | | /* Assume that any other keyword means a missing 'endobj' */ |
556 | 193k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_xref_stream_dict", "")) == 0) { |
557 | 193k | pdf_obj *o; |
558 | | |
559 | 193k | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL); |
560 | | |
561 | 193k | if (pdfi_count_stack(ctx) - initial_depth < 2) |
562 | 3.53k | return_error(gs_error_stackunderflow); |
563 | | |
564 | 190k | o = ctx->stack_top[-2]; |
565 | | |
566 | 190k | pdfi_pop(ctx, 1); |
567 | | |
568 | 190k | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
569 | 188k | o->indirect_num = o->object_num = objnum; |
570 | 188k | o->indirect_gen = o->generation_num = gen; |
571 | 188k | } |
572 | 190k | return code; |
573 | 193k | } |
574 | 0 | pdfi_pop(ctx, 2); |
575 | 0 | return_error(gs_error_syntaxerror); |
576 | 193k | } |
577 | | |
578 | | static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset) |
579 | 2.17M | { |
580 | 2.17M | int code = 0; |
581 | 2.17M | int objnum = 0, gen = 0; |
582 | | |
583 | | /* An object consists of 'num gen obj' followed by a token, follwed by an endobj |
584 | | * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we |
585 | | * want to deal with it specially by getting the Length, jumping to the end and checking |
586 | | * for an endobj. Or not, possibly, because it would be slow. |
587 | | */ |
588 | 2.17M | code = pdfi_read_bare_int(ctx, s, &objnum); |
589 | 2.17M | if (code < 0) |
590 | 44.6k | return code; |
591 | 2.12M | if (code == 0) |
592 | 13.4k | return_error(gs_error_syntaxerror); |
593 | | |
594 | 2.11M | code = pdfi_read_bare_int(ctx, s, &gen); |
595 | 2.11M | if (code < 0) |
596 | 2.37k | return code; |
597 | 2.11M | if (code == 0) |
598 | 2.88k | return_error(gs_error_syntaxerror); |
599 | | |
600 | 2.10M | code = pdfi_read_bare_keyword(ctx, s); |
601 | 2.10M | if (code < 0) |
602 | 0 | return code; |
603 | 2.10M | if (code == 0) |
604 | 0 | return gs_note_error(gs_error_ioerror); |
605 | 2.10M | if (code != TOKEN_OBJ) { |
606 | 5.98k | return_error(gs_error_syntaxerror); |
607 | 5.98k | } |
608 | | |
609 | 2.10M | return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen); |
610 | 2.10M | } |
611 | | |
612 | | static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, |
613 | | const xref_entry *entry, bool cache) |
614 | 956k | { |
615 | 956k | int code = 0; |
616 | 956k | xref_entry *compressed_entry; |
617 | 956k | pdf_c_stream *compressed_stream = NULL; |
618 | 956k | pdf_c_stream *SubFile_stream = NULL; |
619 | 956k | pdf_c_stream *Object_stream = NULL; |
620 | 956k | int i = 0, object_length = 0; |
621 | 956k | int64_t num_entries; |
622 | 956k | int found_object; |
623 | 956k | int64_t Length, First; |
624 | 956k | gs_offset_t offset = 0; |
625 | 956k | pdf_stream *compressed_object = NULL; |
626 | 956k | pdf_dict *compressed_sdict = NULL; /* alias */ |
627 | 956k | pdf_name *Type = NULL; |
628 | | |
629 | 956k | if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1) |
630 | 1.80k | return_error(gs_error_undefined); |
631 | | |
632 | 955k | compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num]; |
633 | | |
634 | 955k | if (ctx->args.pdfdebug) { |
635 | 0 | outprintf(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj); |
636 | 0 | outprintf(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num); |
637 | 0 | } |
638 | | |
639 | 955k | if (compressed_entry->cache == NULL) { |
640 | | #if CACHE_STATISTICS |
641 | | ctx->compressed_misses++; |
642 | | #endif |
643 | 57.8k | code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET); |
644 | 57.8k | if (code < 0) |
645 | 0 | goto exit; |
646 | | |
647 | 57.8k | code = pdfi_read_object(ctx, ctx->main_stream, 0); |
648 | 57.8k | if (code < 0) |
649 | 12.9k | goto exit; |
650 | | |
651 | 44.9k | if (pdfi_count_stack(ctx) < 1) { |
652 | 0 | code = gs_note_error(gs_error_stackunderflow); |
653 | 0 | goto exit; |
654 | 0 | } |
655 | | |
656 | 44.9k | if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) { |
657 | 8.83k | pdfi_pop(ctx, 1); |
658 | 8.83k | code = gs_note_error(gs_error_typecheck); |
659 | 8.83k | goto exit; |
660 | 8.83k | } |
661 | 36.1k | if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) { |
662 | 282 | pdfi_pop(ctx, 1); |
663 | | /* Same error (undefined) as when we read an uncompressed object with the wrong number */ |
664 | 282 | code = gs_note_error(gs_error_undefined); |
665 | 282 | goto exit; |
666 | 282 | } |
667 | 35.8k | compressed_object = (pdf_stream *)ctx->stack_top[-1]; |
668 | 35.8k | pdfi_countup(compressed_object); |
669 | 35.8k | pdfi_pop(ctx, 1); |
670 | 35.8k | code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object); |
671 | 35.8k | if (code < 0) |
672 | 0 | goto exit; |
673 | 897k | } else { |
674 | | #if CACHE_STATISTICS |
675 | | ctx->compressed_hits++; |
676 | | #endif |
677 | 897k | compressed_object = (pdf_stream *)compressed_entry->cache->o; |
678 | 897k | pdfi_countup(compressed_object); |
679 | 897k | pdfi_promote_cache_entry(ctx, compressed_entry->cache); |
680 | 897k | } |
681 | 933k | code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict); |
682 | 933k | if (code < 0) |
683 | 11 | return code; |
684 | | |
685 | 933k | if (ctx->loop_detection != NULL) { |
686 | 927k | code = pdfi_loop_detector_mark(ctx); |
687 | 927k | if (code < 0) |
688 | 0 | goto exit; |
689 | 927k | if (compressed_sdict->object_num != 0) { |
690 | 927k | if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) { |
691 | 211 | code = gs_note_error(gs_error_circular_reference); |
692 | 927k | } else { |
693 | 927k | code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num); |
694 | 927k | } |
695 | 927k | if (code < 0) { |
696 | 211 | (void)pdfi_loop_detector_cleartomark(ctx); |
697 | 211 | goto exit; |
698 | 211 | } |
699 | 927k | } |
700 | 927k | } |
701 | | /* Check its an ObjStm ! */ |
702 | 932k | code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type); |
703 | 932k | if (code < 0) { |
704 | 215 | if (ctx->loop_detection != NULL) |
705 | 215 | (void)pdfi_loop_detector_cleartomark(ctx); |
706 | 215 | goto exit; |
707 | 215 | } |
708 | | |
709 | 932k | if (!pdfi_name_is(Type, "ObjStm")){ |
710 | 669 | if (ctx->loop_detection != NULL) |
711 | 668 | (void)pdfi_loop_detector_cleartomark(ctx); |
712 | 669 | code = gs_note_error(gs_error_syntaxerror); |
713 | 669 | goto exit; |
714 | 669 | } |
715 | | |
716 | | /* Need to check the /N entry to see if the object is actually in this stream! */ |
717 | 931k | code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries); |
718 | 931k | if (code < 0) { |
719 | 195 | if (ctx->loop_detection != NULL) |
720 | 195 | (void)pdfi_loop_detector_cleartomark(ctx); |
721 | 195 | goto exit; |
722 | 195 | } |
723 | | |
724 | 931k | if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) { |
725 | 42 | if (ctx->loop_detection != NULL) |
726 | 42 | (void)pdfi_loop_detector_cleartomark(ctx); |
727 | 42 | code = gs_note_error(gs_error_rangecheck); |
728 | 42 | goto exit; |
729 | 42 | } |
730 | | |
731 | 931k | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
732 | 931k | if (code < 0) { |
733 | 206k | if (ctx->loop_detection != NULL) |
734 | 206k | (void)pdfi_loop_detector_cleartomark(ctx); |
735 | 206k | goto exit; |
736 | 206k | } |
737 | | |
738 | 725k | code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First); |
739 | 725k | if (code < 0) { |
740 | 3.01k | if (ctx->loop_detection != NULL) |
741 | 3.01k | (void)pdfi_loop_detector_cleartomark(ctx); |
742 | 3.01k | goto exit; |
743 | 3.01k | } |
744 | | |
745 | 722k | if (ctx->loop_detection != NULL) |
746 | 716k | (void)pdfi_loop_detector_cleartomark(ctx); |
747 | | |
748 | 722k | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
749 | 722k | if (code < 0) |
750 | 0 | goto exit; |
751 | | |
752 | 722k | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
753 | 722k | if (code < 0) |
754 | 0 | goto exit; |
755 | | |
756 | 722k | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
757 | 722k | if (code < 0) |
758 | 1.34k | goto exit; |
759 | | |
760 | 42.4M | for (i=0;i < num_entries;i++) |
761 | 41.7M | { |
762 | 41.7M | int new_offset; |
763 | 41.7M | code = pdfi_read_bare_int(ctx, compressed_stream, &found_object); |
764 | 41.7M | if (code < 0) |
765 | 14.3k | goto exit; |
766 | 41.7M | if (code == 0) { |
767 | 1.50k | code = gs_note_error(gs_error_syntaxerror); |
768 | 1.50k | goto exit; |
769 | 1.50k | } |
770 | 41.7M | code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset); |
771 | 41.7M | if (code < 0) |
772 | 23.1k | goto exit; |
773 | 41.6M | if (code == 0) { |
774 | 1.05k | code = gs_note_error(gs_error_syntaxerror); |
775 | 1.05k | goto exit; |
776 | 1.05k | } |
777 | 41.6M | if (i == entry->u.compressed.object_index) { |
778 | 696k | if (found_object != obj) { |
779 | 2.05k | code = gs_note_error(gs_error_undefined); |
780 | 2.05k | goto exit; |
781 | 2.05k | } |
782 | 694k | offset = new_offset; |
783 | 694k | } |
784 | 41.6M | if (i == entry->u.compressed.object_index + 1) |
785 | 665k | object_length = new_offset - offset; |
786 | 41.6M | } |
787 | | |
788 | | /* Bug #705259 - The first object need not lie immediately after the initial |
789 | | * table of object numbers and offsets. The start of the first object is given |
790 | | * by the value of First. We don't know how many bytes we consumed getting to |
791 | | * the end of the table, unfortunately, so we close the stream, rewind the main |
792 | | * stream back to the beginning of the ObjStm, and then read and discard 'First' |
793 | | * bytes in order to get to the start of the first object. Then we read the |
794 | | * number of bytes required to get from there to the start of the object we |
795 | | * actually want. |
796 | | * If this ever looks like it's causing performance problems we could read the |
797 | | * initial table above manually instead of using the existing code, and track |
798 | | * how many bytes we'd read, which would avoid us having to tear down and |
799 | | * rebuild the stream. |
800 | | */ |
801 | 678k | if (compressed_stream) |
802 | 678k | pdfi_close_file(ctx, compressed_stream); |
803 | 678k | if (SubFile_stream) |
804 | 678k | pdfi_close_file(ctx, SubFile_stream); |
805 | | |
806 | 678k | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
807 | 678k | if (code < 0) |
808 | 0 | goto exit; |
809 | | |
810 | | /* We already dereferenced this above, so we don't need the loop detection checking here */ |
811 | 678k | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
812 | 678k | if (code < 0) |
813 | 0 | goto exit; |
814 | | |
815 | 678k | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
816 | 678k | if (code < 0) |
817 | 0 | goto exit; |
818 | | |
819 | 678k | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
820 | 678k | if (code < 0) |
821 | 0 | goto exit; |
822 | | |
823 | 353M | for (i=0;i < First;i++) |
824 | 352M | { |
825 | 352M | int c = pdfi_read_byte(ctx, compressed_stream); |
826 | 352M | if (c < 0) { |
827 | 25 | code = gs_note_error(gs_error_ioerror); |
828 | 25 | goto exit; |
829 | 25 | } |
830 | 352M | } |
831 | | |
832 | | /* Skip to the offset of the object we want to read */ |
833 | 2.39G | for (i=0;i < offset;i++) |
834 | 2.39G | { |
835 | 2.39G | int c = pdfi_read_byte(ctx, compressed_stream); |
836 | 2.39G | if (c < 0) { |
837 | 62.5k | code = gs_note_error(gs_error_ioerror); |
838 | 62.5k | goto exit; |
839 | 62.5k | } |
840 | 2.39G | } |
841 | | |
842 | | /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit |
843 | | * the number of bytes we read to the declared size of the object (difference between |
844 | | * the offsets of the object we want to read, and the next object). If it is 0 then |
845 | | * we're reading the last object in the stream, so we just rely on the SubFileDecode |
846 | | * we set up when we created compressed_stream to limit the bytes to the length of |
847 | | * that stream. |
848 | | */ |
849 | 615k | if (object_length > 0) { |
850 | 590k | code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false); |
851 | 590k | if (code < 0) |
852 | 0 | goto exit; |
853 | 590k | } else { |
854 | 25.3k | Object_stream = compressed_stream; |
855 | 25.3k | } |
856 | | |
857 | 615k | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
858 | 615k | if (code < 0) |
859 | 5.57k | goto exit; |
860 | 610k | if (code == 0) { |
861 | 95 | code = gs_note_error(gs_error_syntaxerror); |
862 | 95 | goto exit; |
863 | 95 | } |
864 | 610k | if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) { |
865 | 597k | int start_depth = pdfi_count_stack(ctx); |
866 | | |
867 | | /* Need to read all the elements from COS objects */ |
868 | 21.7M | do { |
869 | 21.7M | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
870 | 21.7M | if (code < 0) |
871 | 26.0k | goto exit; |
872 | 21.6M | if (code == 0) { |
873 | 4.50k | code = gs_note_error(gs_error_syntaxerror); |
874 | 4.50k | goto exit; |
875 | 4.50k | } |
876 | 21.6M | if (compressed_stream->eof == true) { |
877 | 253 | code = gs_note_error(gs_error_ioerror); |
878 | 253 | goto exit; |
879 | 253 | } |
880 | 21.6M | } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth); |
881 | 597k | } |
882 | | |
883 | 579k | *object = ctx->stack_top[-1]; |
884 | | /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets |
885 | | * the object number for uncompressed objects. So we need to do that here. |
886 | | */ |
887 | 579k | if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
888 | 575k | (*object)->indirect_num = (*object)->object_num = obj; |
889 | 575k | (*object)->indirect_gen = (*object)->generation_num = gen; |
890 | 575k | pdfi_countup(*object); |
891 | 575k | } |
892 | 579k | pdfi_pop(ctx, 1); |
893 | | |
894 | 579k | if (cache) { |
895 | 550k | code = pdfi_add_to_cache(ctx, *object); |
896 | 550k | if (code < 0) { |
897 | 0 | pdfi_countdown(*object); |
898 | 0 | goto exit; |
899 | 0 | } |
900 | 550k | } |
901 | | |
902 | 955k | exit: |
903 | 955k | if (Object_stream) |
904 | 615k | pdfi_close_file(ctx, Object_stream); |
905 | 955k | if (Object_stream != compressed_stream) |
906 | 695k | if (compressed_stream) |
907 | 695k | pdfi_close_file(ctx, compressed_stream); |
908 | 955k | if (SubFile_stream) |
909 | 722k | pdfi_close_file(ctx, SubFile_stream); |
910 | 955k | pdfi_countdown(compressed_object); |
911 | 955k | pdfi_countdown(Type); |
912 | 955k | return code; |
913 | 579k | } |
914 | | |
915 | | /* pdf_dereference returns an object with a reference count of at least 1, this represents the |
916 | | * reference being held by the caller (in **object) when we return from this function. |
917 | | */ |
918 | | static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache) |
919 | 8.42M | { |
920 | 8.42M | xref_entry *entry; |
921 | 8.42M | int code, stack_depth = pdfi_count_stack(ctx); |
922 | 8.42M | gs_offset_t saved_stream_offset; |
923 | 8.42M | bool saved_decrypt_strings = ctx->encryption.decrypt_strings; |
924 | | |
925 | 8.42M | *object = NULL; |
926 | | |
927 | 8.42M | if (ctx->xref_table == NULL) |
928 | 55 | return_error(gs_error_typecheck); |
929 | | |
930 | 8.42M | if (ctx->main_stream == NULL || ctx->main_stream->s == NULL) |
931 | 0 | return_error(gs_error_ioerror); |
932 | | |
933 | 8.42M | if (obj >= ctx->xref_table->xref_size) { |
934 | 265k | char extra_info[gp_file_name_sizeof]; |
935 | | |
936 | 265k | gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj); |
937 | 265k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info)) < 0) { |
938 | 0 | return code; |
939 | 0 | } |
940 | | |
941 | 265k | code = pdfi_repair_file(ctx); |
942 | 265k | if (code < 0) { |
943 | 265k | *object = NULL; |
944 | 265k | return code; |
945 | 265k | } |
946 | 29 | if (obj >= ctx->xref_table->xref_size) { |
947 | 19 | *object = NULL; |
948 | 19 | return_error(gs_error_rangecheck); |
949 | 19 | } |
950 | 29 | } |
951 | | |
952 | 8.16M | entry = &ctx->xref_table->xref[obj]; |
953 | | |
954 | 8.16M | if(entry->object_num == 0) { |
955 | 1.79M | pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference_main", "Attempt to dereference object 0"); |
956 | 1.79M | return_error(gs_error_undefined); |
957 | 1.79M | } |
958 | | |
959 | 6.36M | if (entry->free) { |
960 | 10.5k | char extra_info[gp_file_name_sizeof]; |
961 | | |
962 | 10.5k | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
963 | 10.5k | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
964 | 10.5k | *object = PDF_NULL_OBJ; |
965 | 10.5k | return code; |
966 | 6.35M | }else { |
967 | 6.35M | if (!entry->compressed) { |
968 | 4.94M | if(entry->u.uncompressed.generation_num != gen) |
969 | 4.47k | pdfi_set_warning(ctx, 0, NULL, W_PDF_MISMATCH_GENERATION, "pdfi_dereference_main", ""); |
970 | 4.94M | } |
971 | 6.35M | } |
972 | | |
973 | 6.35M | if (ctx->loop_detection) { |
974 | 6.08M | if (pdfi_loop_detector_check_object(ctx, obj) == true) |
975 | 2.08k | return_error(gs_error_circular_reference); |
976 | 6.08M | if (entry->free) { |
977 | 0 | code = pdfi_loop_detector_add_object(ctx, obj); |
978 | 0 | if (code < 0) |
979 | 0 | return code; |
980 | 0 | } |
981 | 6.08M | } |
982 | 6.35M | if (entry->cache != NULL){ |
983 | 3.28M | pdf_obj_cache_entry *cache_entry = entry->cache; |
984 | | |
985 | | #if CACHE_STATISTICS |
986 | | ctx->hits++; |
987 | | #endif |
988 | 3.28M | *object = cache_entry->o; |
989 | 3.28M | pdfi_countup(*object); |
990 | | |
991 | 3.28M | pdfi_promote_cache_entry(ctx, cache_entry); |
992 | 3.28M | } else { |
993 | 3.07M | saved_stream_offset = pdfi_unread_tell(ctx); |
994 | | |
995 | 3.07M | if (entry->compressed) { |
996 | | /* This is an object in a compressed object stream */ |
997 | 956k | ctx->encryption.decrypt_strings = false; |
998 | | |
999 | 956k | code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache); |
1000 | 956k | if (code < 0 || *object == NULL) |
1001 | 377k | goto error; |
1002 | 2.11M | } else { |
1003 | | #if CACHE_STATISTICS |
1004 | | ctx->misses++; |
1005 | | #endif |
1006 | 2.11M | ctx->encryption.decrypt_strings = true; |
1007 | | |
1008 | 2.11M | code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET); |
1009 | 2.11M | if (code < 0) |
1010 | 96 | goto error; |
1011 | | |
1012 | 2.11M | code = pdfi_read_object(ctx, ctx->main_stream, entry->u.uncompressed.offset); |
1013 | | |
1014 | | /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it. |
1015 | | * reload the xref entry to be certain it is valid. |
1016 | | */ |
1017 | 2.11M | entry = &ctx->xref_table->xref[obj]; |
1018 | 2.11M | if (code < 0) { |
1019 | 315k | int code1 = 0; |
1020 | 315k | if (entry->free) { |
1021 | 0 | char extra_info[gp_file_name_sizeof]; |
1022 | |
|
1023 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
1024 | 0 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
1025 | 0 | *object = PDF_NULL_OBJ; |
1026 | 0 | if (code < 0) |
1027 | 0 | goto error; |
1028 | 0 | goto free_obj; |
1029 | 0 | } |
1030 | 315k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1031 | 315k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1032 | 315k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
1033 | | |
1034 | 315k | code1 = pdfi_repair_file(ctx); |
1035 | 315k | if (code1 == 0) |
1036 | 1.45k | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1037 | | /* Repair failed, just give up and return an error */ |
1038 | 313k | goto error; |
1039 | 315k | } |
1040 | | |
1041 | | /* We only expect a single object back when dereferencing an indirect reference |
1042 | | * The only way (I think) we can end up with more than one is if the object initially |
1043 | | * appears to be a dictionary or array, but the object terminates (with endobj or |
1044 | | * simply reaching EOF) without terminating the array or dictionary. That's clearly |
1045 | | * an error. We might, as a future 'improvement' choose to walk back through |
1046 | | * the stack looking for unterminated dictionary or array markers, and closing them |
1047 | | * so that (hopefully!) we end up with a single 'repaired' object on the stack. |
1048 | | * But for now I'm simply going to treat these as errors. We will try a repair on the |
1049 | | * file to see if we end up using a different (hopefully intact) object from the file. |
1050 | | */ |
1051 | 1.79M | if (pdfi_count_stack(ctx) - stack_depth > 1) { |
1052 | 139k | int code1 = 0; |
1053 | | |
1054 | 139k | code1 = pdfi_repair_file(ctx); |
1055 | 139k | if (code1 == 0) |
1056 | 521 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1057 | | /* Repair failed, just give up and return an error */ |
1058 | 138k | code = gs_note_error(gs_error_syntaxerror); |
1059 | 138k | goto error; |
1060 | 139k | } |
1061 | | |
1062 | 1.66M | if (pdfi_count_stack(ctx) > 0 && |
1063 | 1.66M | ((ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) && |
1064 | 1.65M | (ctx->stack_top[-1])->object_num == obj) |
1065 | 1.65M | || ctx->stack_top[-1] == PDF_NULL_OBJ)) { |
1066 | 1.65M | *object = ctx->stack_top[-1]; |
1067 | 1.65M | pdfi_countup(*object); |
1068 | 1.65M | pdfi_pop(ctx, 1); |
1069 | 1.65M | if (pdfi_type_of(*object) == PDF_INDIRECT) { |
1070 | 0 | pdf_indirect_ref *iref = (pdf_indirect_ref *)*object; |
1071 | |
|
1072 | 0 | if (iref->ref_object_num == obj) { |
1073 | 0 | code = gs_note_error(gs_error_circular_reference); |
1074 | 0 | pdfi_countdown(*object); |
1075 | 0 | *object = NULL; |
1076 | 0 | goto error; |
1077 | 0 | } |
1078 | 0 | } |
1079 | | /* There's really no point in caching an indirect reference and |
1080 | | * I think it could be potentially confusing to later calls. |
1081 | | */ |
1082 | 1.65M | if (cache && pdfi_type_of(*object) != PDF_INDIRECT) { |
1083 | 1.65M | code = pdfi_add_to_cache(ctx, *object); |
1084 | 1.65M | if (code < 0) { |
1085 | 0 | pdfi_countdown(*object); |
1086 | 0 | goto error; |
1087 | 0 | } |
1088 | 1.65M | } |
1089 | 1.65M | } else { |
1090 | 1.75k | int code1 = 0; |
1091 | | |
1092 | 1.75k | if (pdfi_count_stack(ctx) > 0) |
1093 | 1.66k | pdfi_pop(ctx, 1); |
1094 | | |
1095 | 1.75k | if (entry->free) { |
1096 | 0 | char extra_info[gp_file_name_sizeof]; |
1097 | |
|
1098 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
1099 | 0 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
1100 | 0 | *object = PDF_NULL_OBJ; |
1101 | 0 | if (code < 0) |
1102 | 0 | goto error; |
1103 | 0 | return code; |
1104 | 0 | } |
1105 | 1.75k | code1 = pdfi_repair_file(ctx); |
1106 | 1.75k | if (code1 == 0) |
1107 | 177 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1108 | | /* Repair failed, just give up and return an error */ |
1109 | 1.57k | code = gs_note_error(gs_error_undefined); |
1110 | 1.57k | goto error; |
1111 | 1.75k | } |
1112 | 1.66M | } |
1113 | 2.23M | free_obj: |
1114 | 2.23M | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1115 | 2.23M | } |
1116 | | |
1117 | 5.52M | if (ctx->loop_detection && pdf_object_num(*object) != 0) { |
1118 | 5.24M | code = pdfi_loop_detector_add_object(ctx, (*object)->object_num); |
1119 | 5.24M | if (code < 0) { |
1120 | 0 | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1121 | 0 | return code; |
1122 | 0 | } |
1123 | 5.24M | } |
1124 | 5.52M | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1125 | 5.52M | return 0; |
1126 | | |
1127 | 831k | error: |
1128 | 831k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1129 | 831k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1130 | | /* Return the stack to the state at entry */ |
1131 | 831k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
1132 | 831k | return code; |
1133 | 5.52M | } |
1134 | | |
1135 | | int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1136 | 8.37M | { |
1137 | 8.37M | return pdfi_dereference_main(ctx, obj, gen, object, true); |
1138 | 8.37M | } |
1139 | | |
1140 | | int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1141 | 46.3k | { |
1142 | 46.3k | return pdfi_dereference_main(ctx, obj, gen, object, false); |
1143 | 46.3k | } |
1144 | | |
1145 | | /* do a derefence with loop detection */ |
1146 | | int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1147 | 2.62M | { |
1148 | 2.62M | int code; |
1149 | | |
1150 | 2.62M | code = pdfi_loop_detector_mark(ctx); |
1151 | 2.62M | if (code < 0) |
1152 | 0 | return code; |
1153 | | |
1154 | 2.62M | code = pdfi_dereference(ctx, obj, gen, object); |
1155 | 2.62M | (void)pdfi_loop_detector_cleartomark(ctx); |
1156 | 2.62M | return code; |
1157 | 2.62M | } |
1158 | | |
1159 | | int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1160 | 46.3k | { |
1161 | 46.3k | int code; |
1162 | | |
1163 | 46.3k | code = pdfi_loop_detector_mark(ctx); |
1164 | 46.3k | if (code < 0) |
1165 | 0 | return code; |
1166 | | |
1167 | 46.3k | code = pdfi_dereference_nocache(ctx, obj, gen, object); |
1168 | 46.3k | (void)pdfi_loop_detector_cleartomark(ctx); |
1169 | 46.3k | return code; |
1170 | 46.3k | } |
1171 | | |
1172 | | static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1173 | 63.9k | { |
1174 | 63.9k | int code = 0; |
1175 | 63.9k | uint64_t index, arraysize; |
1176 | 63.9k | pdf_obj *object = NULL; |
1177 | 63.9k | pdf_array *array = (pdf_array *)obj; |
1178 | | |
1179 | 63.9k | arraysize = pdfi_array_size(array); |
1180 | 292k | for (index = 0; index < arraysize; index++) { |
1181 | 228k | if (ctx->loop_detection != NULL) { |
1182 | 228k | code = pdfi_loop_detector_mark(ctx); |
1183 | 228k | if (code < 0) |
1184 | 0 | return code; |
1185 | 228k | } |
1186 | | |
1187 | 228k | code = pdfi_array_get_no_store_R(ctx, array, index, &object); |
1188 | | |
1189 | 228k | if (ctx->loop_detection != NULL) { |
1190 | 228k | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1191 | 228k | if (code1 < 0) |
1192 | 0 | return code1; |
1193 | 228k | } |
1194 | | |
1195 | 228k | if (code == gs_error_circular_reference) { |
1196 | | /* Previously we just left as an indirect reference, but now we want |
1197 | | * to return the error so we don't end up replacing indirect references |
1198 | | * to objects with circular references. |
1199 | | */ |
1200 | 228k | } else { |
1201 | 228k | if (code < 0) goto exit; |
1202 | 228k | if (recurse) { |
1203 | 1.57k | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse); |
1204 | 1.57k | if (code < 0) goto exit; |
1205 | 1.57k | } |
1206 | | /* don't store the object if it's a stream (leave as a ref) */ |
1207 | 228k | if (pdfi_type_of(object) != PDF_STREAM) |
1208 | 228k | code = pdfi_array_put(ctx, array, index, object); |
1209 | 228k | } |
1210 | 228k | if (code < 0) goto exit; |
1211 | | |
1212 | 228k | pdfi_countdown(object); |
1213 | 228k | object = NULL; |
1214 | 228k | } |
1215 | | |
1216 | 63.9k | exit: |
1217 | 63.9k | pdfi_countdown(object); |
1218 | 63.9k | return code; |
1219 | 63.9k | } |
1220 | | |
1221 | | static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1222 | 9.25k | { |
1223 | 9.25k | int code = 0; |
1224 | 9.25k | pdf_dict *dict = (pdf_dict *)obj; |
1225 | 9.25k | pdf_name *Key = NULL; |
1226 | 9.25k | pdf_obj *Value = NULL; |
1227 | 9.25k | uint64_t index, dictsize; |
1228 | | |
1229 | 9.25k | dictsize = pdfi_dict_entries(dict); |
1230 | | |
1231 | | /* Note: I am not using pdfi_dict_first/next because of needing to handle |
1232 | | * circular references. |
1233 | | */ |
1234 | 21.4k | for (index=0; index<dictsize; index ++) { |
1235 | 12.2k | Key = (pdf_name *)dict->list[index].key; |
1236 | 12.2k | if (pdfi_name_is(Key, "Parent")) |
1237 | 8 | continue; |
1238 | | |
1239 | 12.2k | if (ctx->loop_detection != NULL) { |
1240 | 12.1k | code = pdfi_loop_detector_mark(ctx); |
1241 | 12.1k | if (code < 0) |
1242 | 0 | return code; |
1243 | 12.1k | } |
1244 | | |
1245 | 12.2k | code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value); |
1246 | | |
1247 | 12.2k | if (ctx->loop_detection != NULL) { |
1248 | 12.1k | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1249 | 12.1k | if (code1 < 0) |
1250 | 0 | return code1; |
1251 | 12.1k | } |
1252 | | |
1253 | 12.2k | if (code == gs_error_circular_reference) { |
1254 | | /* Just leave as an indirect ref */ |
1255 | 7 | code = 0; |
1256 | 12.2k | } else { |
1257 | 12.2k | if (code < 0) goto exit; |
1258 | 12.2k | if (recurse) { |
1259 | 4.58k | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse); |
1260 | 4.58k | if (code < 0) |
1261 | 53 | goto exit; |
1262 | 4.58k | } |
1263 | | /* don't store the object if it's a stream (leave as a ref) */ |
1264 | 12.1k | if (pdfi_type_of(Value) != PDF_STREAM) |
1265 | 12.1k | code = pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true); |
1266 | 12.1k | } |
1267 | 12.1k | if (code < 0) goto exit; |
1268 | | |
1269 | 12.1k | pdfi_countdown(Value); |
1270 | 12.1k | Value = NULL; |
1271 | 12.1k | } |
1272 | | |
1273 | 9.25k | exit: |
1274 | 9.25k | pdfi_countdown(Value); |
1275 | 9.25k | return code; |
1276 | 9.25k | } |
1277 | | |
1278 | | /* Resolve all the indirect references for an object |
1279 | | * Note: This can be recursive |
1280 | | */ |
1281 | | int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse) |
1282 | 263k | { |
1283 | 263k | int code = 0; |
1284 | | |
1285 | 263k | switch(pdfi_type_of(value)) { |
1286 | 63.9k | case PDF_ARRAY: |
1287 | 63.9k | code = pdfi_resolve_indirect_array(ctx, value, recurse); |
1288 | 63.9k | break; |
1289 | 9.25k | case PDF_DICT: |
1290 | 9.25k | code = pdfi_resolve_indirect_dict(ctx, value, recurse); |
1291 | 9.25k | break; |
1292 | 189k | default: |
1293 | 189k | break; |
1294 | 263k | } |
1295 | 263k | return code; |
1296 | 263k | } |
1297 | | |
1298 | | /* Resolve all the indirect references for an object |
1299 | | * Resolve indirect references, either one level or recursively, with loop detect on |
1300 | | * the parent (can by NULL) and the value. |
1301 | | */ |
1302 | | int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse) |
1303 | 263k | { |
1304 | 263k | int code = 0; |
1305 | | |
1306 | 263k | code = pdfi_loop_detector_mark(ctx); |
1307 | 263k | if (code < 0) goto exit; |
1308 | 263k | if (parent && parent->object_num != 0) { |
1309 | 255k | code = pdfi_loop_detector_add_object(ctx, parent->object_num); |
1310 | 255k | if (code < 0) goto exit; |
1311 | 255k | } |
1312 | | |
1313 | 263k | if (pdf_object_num(value) != 0) { |
1314 | 1.98k | if (pdfi_loop_detector_check_object(ctx, value->object_num)) { |
1315 | 5 | code = gs_note_error(gs_error_circular_reference); |
1316 | 5 | goto exit; |
1317 | 5 | } |
1318 | 1.97k | code = pdfi_loop_detector_add_object(ctx, value->object_num); |
1319 | 1.97k | if (code < 0) goto exit; |
1320 | 1.97k | } |
1321 | 262k | code = pdfi_resolve_indirect(ctx, value, recurse); |
1322 | | |
1323 | 263k | exit: |
1324 | 263k | (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */ |
1325 | 263k | return code; |
1326 | 262k | } |