/src/ghostpdl/pdf/pdf_deref.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2020-2022 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, |
13 | | CA 94945, U.S.A., +1(415)492-9861, for further information. |
14 | | */ |
15 | | |
16 | | /* Functions to deal with dereferencing indirect objects |
17 | | * for the PDF interpreter. In here we also keep the code |
18 | | * for dealing with the object cache, because the dereferencing |
19 | | * functions are currently the only place that deals with it. |
20 | | */ |
21 | | |
22 | | #include "pdf_int.h" |
23 | | #include "pdf_stack.h" |
24 | | #include "pdf_loop_detect.h" |
25 | | #include "strmio.h" |
26 | | #include "stream.h" |
27 | | #include "pdf_file.h" |
28 | | #include "pdf_misc.h" |
29 | | #include "pdf_dict.h" |
30 | | #include "pdf_array.h" |
31 | | #include "pdf_deref.h" |
32 | | #include "pdf_repair.h" |
33 | | |
34 | | /* Start with the object caching functions */ |
35 | | |
36 | | /* given an object, create a cache entry for it. If we have too many entries |
37 | | * then delete the leat-recently-used cache entry. Make the new entry be the |
38 | | * most-recently-used entry. The actual entries are attached to the xref table |
39 | | * (as well as being a double-linked list), because we detect an existing |
40 | | * cache entry by seeing that the xref table for the object number has a non-NULL |
41 | | * 'cache' member. |
42 | | * So we need to update the xref as well if we add or delete cache entries. |
43 | | */ |
44 | | static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o) |
45 | 1.54M | { |
46 | 1.54M | pdf_obj_cache_entry *entry; |
47 | | |
48 | 1.54M | if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) |
49 | 1.29k | return 0; |
50 | | |
51 | 1.54M | if (ctx->xref_table->xref[o->object_num].cache != NULL) { |
52 | | #if DEBUG_CACHE |
53 | | dmprintf1(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num); |
54 | | #endif |
55 | 0 | return_error(gs_error_unknownerror); |
56 | 0 | } |
57 | | |
58 | 1.54M | if (o->object_num > ctx->xref_table->xref_size) |
59 | 0 | return_error(gs_error_rangecheck); |
60 | | |
61 | 1.54M | if (ctx->cache_entries == MAX_OBJECT_CACHE_SIZE) |
62 | 596k | { |
63 | | #if DEBUG_CACHE |
64 | | dbgmprintf(ctx->memory, "Cache full, evicting LRU\n"); |
65 | | #endif |
66 | 596k | if (ctx->cache_LRU) { |
67 | 596k | entry = ctx->cache_LRU; |
68 | 596k | ctx->cache_LRU = entry->next; |
69 | 596k | if (entry->next) |
70 | 596k | ((pdf_obj_cache_entry *)entry->next)->previous = NULL; |
71 | 596k | ctx->xref_table->xref[entry->o->object_num].cache = NULL; |
72 | 596k | pdfi_countdown(entry->o); |
73 | 596k | ctx->cache_entries--; |
74 | 596k | gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU"); |
75 | 596k | } else |
76 | 0 | return_error(gs_error_unknownerror); |
77 | 596k | } |
78 | 1.54M | entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache"); |
79 | 1.54M | if (entry == NULL) |
80 | 0 | return_error(gs_error_VMerror); |
81 | | |
82 | 1.54M | memset(entry, 0x00, sizeof(pdf_obj_cache_entry)); |
83 | | |
84 | 1.54M | entry->o = o; |
85 | 1.54M | pdfi_countup(o); |
86 | 1.54M | if (ctx->cache_MRU) { |
87 | 1.51M | entry->previous = ctx->cache_MRU; |
88 | 1.51M | ctx->cache_MRU->next = entry; |
89 | 1.51M | } |
90 | 1.54M | ctx->cache_MRU = entry; |
91 | 1.54M | if (ctx->cache_LRU == NULL) |
92 | 26.2k | ctx->cache_LRU = entry; |
93 | | |
94 | 1.54M | ctx->cache_entries++; |
95 | 1.54M | ctx->xref_table->xref[o->object_num].cache = entry; |
96 | 1.54M | return 0; |
97 | 1.54M | } |
98 | | |
99 | | /* Given an existing cache entry, promote it to be the most-recently-used |
100 | | * cache entry. |
101 | | */ |
102 | | static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry) |
103 | 2.31M | { |
104 | 2.31M | if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) { |
105 | 2.01M | if ((pdf_obj_cache_entry *)cache_entry->next != NULL) |
106 | 2.01M | ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous; |
107 | 2.01M | if ((pdf_obj_cache_entry *)cache_entry->previous != NULL) |
108 | 2.01M | ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next; |
109 | 277 | else { |
110 | | /* the existing entry is the current least recently used, we need to make the 'next' |
111 | | * cache entry into the LRU. |
112 | | */ |
113 | 277 | ctx->cache_LRU = cache_entry->next; |
114 | 277 | } |
115 | 2.01M | cache_entry->next = NULL; |
116 | 2.01M | cache_entry->previous = ctx->cache_MRU; |
117 | 2.01M | ctx->cache_MRU->next = cache_entry; |
118 | 2.01M | ctx->cache_MRU = cache_entry; |
119 | 2.01M | } |
120 | 2.31M | return; |
121 | 2.31M | } |
122 | | |
123 | | /* This one's a bit of an oddity, its used for fonts. When we build a PDF font object |
124 | | * we want the object cache to reference *that* object, not the dictionary which was |
125 | | * read out of the PDF file, so this allows us to replace the font dictionary in the |
126 | | * cache with the actual font object, so that later dereferences will get this font |
127 | | * object. |
128 | | */ |
129 | | int replace_cache_entry(pdf_context *ctx, pdf_obj *o) |
130 | 57.0k | { |
131 | 57.0k | xref_entry *entry; |
132 | 57.0k | pdf_obj_cache_entry *cache_entry; |
133 | 57.0k | pdf_obj *old_cached_obj = NULL; |
134 | | |
135 | | /* Limited error checking here, we assume that things like the |
136 | | * validity of the object (eg not a free oobject) have already been handled. |
137 | | */ |
138 | | |
139 | 57.0k | entry = &ctx->xref_table->xref[o->object_num]; |
140 | 57.0k | cache_entry = entry->cache; |
141 | | |
142 | 57.0k | if (cache_entry == NULL) { |
143 | 0 | return(pdfi_add_to_cache(ctx, o)); |
144 | 57.0k | } else { |
145 | | /* NOTE: We grab the object without decrementing, to avoid triggering |
146 | | * a warning message for freeing an object that's in the cache |
147 | | */ |
148 | 57.0k | if (cache_entry->o != NULL) |
149 | 57.0k | old_cached_obj = cache_entry->o; |
150 | | |
151 | | /* Put new entry in the cache */ |
152 | 57.0k | cache_entry->o = o; |
153 | 57.0k | pdfi_countup(o); |
154 | 57.0k | pdfi_promote_cache_entry(ctx, cache_entry); |
155 | | |
156 | | /* Now decrement the old cache entry, if any */ |
157 | 57.0k | pdfi_countdown(old_cached_obj); |
158 | 57.0k | } |
159 | 57.0k | return 0; |
160 | 57.0k | } |
161 | | |
162 | | /* Now the dereferencing functions */ |
163 | | |
164 | | /* |
165 | | * Technically we can accept a stream other than the main PDF file stream here. This is |
166 | | * really for the case of compressed objects where we read tokens from the compressed |
167 | | * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode |
168 | | * on top of the main file stream, which may be useful. Note that this cannot work with |
169 | | * objects in compressed object streams! They should always pass a value of 0 for the stream_offset. |
170 | | * The stream_offset is the offset from the start of the underlying uncompressed PDF file of |
171 | | * the stream we are using. See the comments below when keyword is PDF_STREAM. |
172 | | */ |
173 | | |
174 | | /* Determine if a PDF object is in a compressed ObjStm. Returns < 0 |
175 | | * for an error, 0 if it is not in a compressed ObjStm and 1 if it is. |
176 | | * Currently errors are inmpossible. This is only used by the decryption code |
177 | | * to determine if a string is in a compressed object stream, if it is then |
178 | | * it can't be used for decryption. |
179 | | */ |
180 | | int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen) |
181 | 3.42k | { |
182 | 3.42k | xref_entry *entry; |
183 | | |
184 | | /* Can't possibly be a compressed object before we have finished reading |
185 | | * the xref. |
186 | | */ |
187 | 3.42k | if (ctx->xref_table == NULL) |
188 | 0 | return 0; |
189 | | |
190 | 3.42k | entry = &ctx->xref_table->xref[obj]; |
191 | | |
192 | 3.42k | if (entry->compressed) |
193 | 0 | return 1; |
194 | | |
195 | 3.42k | return 0; |
196 | 3.42k | } |
197 | | |
198 | | /* We should never read a 'stream' keyword from a compressed object stream |
199 | | * so this case should never end up here. |
200 | | */ |
201 | | static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, |
202 | | uint32_t objnum, uint32_t gen) |
203 | 383k | { |
204 | 383k | int code = 0; |
205 | 383k | int64_t i; |
206 | 383k | pdf_dict *dict = NULL; |
207 | 383k | gs_offset_t offset; |
208 | 383k | pdf_stream *stream_obj = NULL; |
209 | | |
210 | | /* Strange code time.... |
211 | | * If we are using a stream which is *not* the PDF uncompressed main file stream |
212 | | * then doing stell on it will only tell us how many bytes have been read from |
213 | | * that stream, it won't tell us the underlying file position. So we add on the |
214 | | * 'unread' bytes, *and* we add on the position of the start of the stream in |
215 | | * the actual main file. This is all done so that we can check the /Length |
216 | | * of the object. Note that this will *only* work for regular objects it can |
217 | | * not be used for compressed object streams, but those don't need checking anyway |
218 | | * they have a different mechanism altogether and should never get here. |
219 | | */ |
220 | 383k | offset = stell(s->s) - s->unread_size + stream_offset; |
221 | 383k | code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET); |
222 | | |
223 | 383k | if (pdfi_count_stack(ctx) < 1) |
224 | 0 | return_error(gs_error_stackunderflow); |
225 | | |
226 | 383k | dict = (pdf_dict *)ctx->stack_top[-1]; |
227 | | |
228 | 383k | if (pdfi_type_of(dict) != PDF_DICT) { |
229 | 474 | pdfi_pop(ctx, 1); |
230 | 474 | return_error(gs_error_syntaxerror); |
231 | 474 | } |
232 | | |
233 | 383k | dict->indirect_num = dict->object_num = objnum; |
234 | 383k | dict->indirect_gen = dict->generation_num = gen; |
235 | | |
236 | | /* Convert the dict into a stream */ |
237 | 383k | code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true); |
238 | 383k | if (code < 0) { |
239 | 0 | pdfi_pop(ctx, 1); |
240 | 0 | return code; |
241 | 0 | } |
242 | | /* Pop off the dict and push the stream */ |
243 | 383k | pdfi_pop(ctx, 1); |
244 | 383k | dict = NULL; |
245 | 383k | pdfi_push(ctx, (pdf_obj *)stream_obj); |
246 | | |
247 | 383k | stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum; |
248 | 383k | stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen; |
249 | 383k | stream_obj->stream_offset = offset; |
250 | | |
251 | | /* Exceptional code. Normally we do not need to worry about detecting circular references |
252 | | * when reading objects, because we do not dereference any indirect objects. However streams |
253 | | * are a slight exception in that we do get the Length from the stream dictionay and if that |
254 | | * is an indirect reference, then we dereference it. |
255 | | * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect |
256 | | * reference to the same stream object, and leads to infinite recursion. So deal with that |
257 | | * possibility here. |
258 | | */ |
259 | 383k | code = pdfi_loop_detector_mark(ctx); |
260 | 383k | if (code < 0) { |
261 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
262 | 0 | return code; |
263 | 0 | } |
264 | 383k | if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) { |
265 | 30 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
266 | 30 | pdfi_loop_detector_cleartomark(ctx); |
267 | 30 | return_error(gs_error_circular_reference); |
268 | 30 | } |
269 | | |
270 | 383k | code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num); |
271 | 383k | if (code < 0) { |
272 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
273 | 0 | pdfi_loop_detector_cleartomark(ctx); |
274 | 0 | return code; |
275 | 0 | } |
276 | | |
277 | | /* This code may be a performance overhead, it simply skips over the stream contents |
278 | | * and checks that the stream ends with a 'endstream endobj' pair. We could add a |
279 | | * 'go faster' flag for users who are certain their PDF files are well-formed. This |
280 | | * could also allow us to skip all kinds of other checking..... |
281 | | */ |
282 | | |
283 | 383k | code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i); |
284 | 383k | if (code < 0) { |
285 | 4.49k | char extra_info[gp_file_name_sizeof]; |
286 | | |
287 | 4.49k | (void)pdfi_loop_detector_cleartomark(ctx); |
288 | 4.49k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum); |
289 | 4.49k | pdfi_set_error(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info); |
290 | 4.49k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
291 | 4.49k | return 0; |
292 | 4.49k | } |
293 | 378k | code = pdfi_loop_detector_cleartomark(ctx); |
294 | 378k | if (code < 0) { |
295 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
296 | 0 | return code; |
297 | 0 | } |
298 | | |
299 | 378k | if (i < 0 || (i + offset)> ctx->main_stream_length) { |
300 | 4.41k | char extra_info[gp_file_name_sizeof]; |
301 | | |
302 | 4.41k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum); |
303 | 4.41k | pdfi_set_error(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info); |
304 | 374k | } else { |
305 | 374k | code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR); |
306 | 374k | if (code < 0) { |
307 | 0 | pdfi_pop(ctx, 1); |
308 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
309 | 0 | return code; |
310 | 0 | } |
311 | | |
312 | 374k | stream_obj->Length = 0; |
313 | 374k | stream_obj->length_valid = false; |
314 | | |
315 | 374k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
316 | 374k | if (code == 0) { |
317 | 0 | char extra_info[gp_file_name_sizeof]; |
318 | |
|
319 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum); |
320 | 0 | pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info); |
321 | | /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that |
322 | | * happens give up. |
323 | | */ |
324 | 0 | if (pdfi_count_stack(ctx) == 0) { |
325 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
326 | 0 | return code; |
327 | 0 | } |
328 | 374k | } else if (code < 0) { |
329 | 0 | char extra_info[gp_file_name_sizeof]; |
330 | |
|
331 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum); |
332 | 0 | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info); |
333 | 374k | } else if (code != TOKEN_ENDSTREAM) { |
334 | 26.0k | char extra_info[gp_file_name_sizeof]; |
335 | | |
336 | 26.0k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i); |
337 | 26.0k | pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info); |
338 | 348k | } else { |
339 | | /* Cache the Length in the stream object and mark it valid */ |
340 | 348k | stream_obj->Length = i; |
341 | 348k | stream_obj->length_valid = true; |
342 | 348k | } |
343 | 374k | } |
344 | | |
345 | | /* If we failed to find a valid object, or the object wasn't a keyword, or the |
346 | | * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct |
347 | | * Length for streams if we have encrypted files, because we must install a |
348 | | * SubFileDecode filter with a Length (EODString is incompatible with AES encryption) |
349 | | * Rather than mess about checking for encryption, we'll choose to just correctly |
350 | | * calculate the Length of all streams. Although this takes time, it will only |
351 | | * happen for files which are invalid. |
352 | | */ |
353 | 378k | if (stream_obj->length_valid != true) { |
354 | 30.4k | char Buffer[10]; |
355 | 30.4k | unsigned int bytes, total = 0; |
356 | 30.4k | int c = 0; |
357 | | |
358 | 30.4k | code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET); |
359 | 30.4k | if (code < 0) { |
360 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
361 | 0 | pdfi_pop(ctx, 1); |
362 | 0 | return code; |
363 | 0 | } |
364 | 30.4k | memset(Buffer, 0x00, 10); |
365 | 30.4k | bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream); |
366 | 30.4k | if (bytes < 9) { |
367 | 3.50k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
368 | 3.50k | return_error(gs_error_ioerror); |
369 | 3.50k | } |
370 | | |
371 | 26.9k | total = bytes; |
372 | 680M | do { |
373 | 680M | if (memcmp(Buffer, "endstream", 9) == 0) { |
374 | 23.5k | stream_obj->Length = total - 9; |
375 | 23.5k | stream_obj->length_valid = true; |
376 | 23.5k | break; |
377 | 23.5k | } |
378 | 680M | if (memcmp(Buffer, "endobj", 6) == 0) { |
379 | 1.86k | stream_obj->Length = total - 6; |
380 | 1.86k | stream_obj->length_valid = true; |
381 | 1.86k | break; |
382 | 1.86k | } |
383 | 680M | memmove(Buffer, Buffer+1, 9); |
384 | 680M | c = pdfi_read_byte(ctx, ctx->main_stream); |
385 | 680M | if (c < 0) |
386 | 1.52k | break; |
387 | 680M | Buffer[9] = (byte)c; |
388 | 680M | total++; |
389 | 680M | } while(1); |
390 | 26.9k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
391 | 26.9k | if (c < 0) |
392 | 1.52k | return_error(gs_error_ioerror); |
393 | 25.4k | return 0; |
394 | 26.9k | } |
395 | | |
396 | 348k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
397 | 348k | if (code < 0) { |
398 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
399 | 0 | if (ctx->args.pdfstoponerror) |
400 | 0 | return code; |
401 | 0 | else |
402 | | /* Something went wrong looking for endobj, but we found endstream, so assume |
403 | | * for now that will suffice. |
404 | | */ |
405 | 0 | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL); |
406 | 0 | return 0; |
407 | 0 | } |
408 | | |
409 | 348k | if (code == 0) { |
410 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
411 | 0 | return_error(gs_error_stackunderflow); |
412 | 0 | } |
413 | | |
414 | 348k | if (code != TOKEN_ENDOBJ) { |
415 | 589 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
416 | 589 | if (ctx->args.pdfstoponerror) |
417 | 0 | return_error(gs_error_typecheck); |
418 | 589 | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL); |
419 | | /* Didn't find an endobj, but we have an endstream, so assume |
420 | | * for now that will suffice |
421 | | */ |
422 | 589 | return 0; |
423 | 589 | } |
424 | 347k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
425 | | |
426 | 347k | return 0; |
427 | 348k | } |
428 | | |
429 | | /* This reads an object *after* the x y obj keyword has been found. Its broken out |
430 | | * separately for the benefit of the repair code when reading the dictionary following |
431 | | * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does |
432 | | * not have an 'endobj', we rely on the error handling to take care of that for us. |
433 | | */ |
434 | | int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen) |
435 | 797k | { |
436 | 797k | int code = 0, initial_depth = 0; |
437 | 797k | pdf_key keyword; |
438 | 797k | gs_offset_t saved_offset[3]; |
439 | 797k | pdf_obj_type type; |
440 | | |
441 | 797k | initial_depth = pdfi_count_stack(ctx); |
442 | 797k | saved_offset[0] = saved_offset[1] = saved_offset[2] = 0; |
443 | | |
444 | 797k | code = pdfi_read_token(ctx, s, objnum, gen); |
445 | 797k | if (code < 0) |
446 | 1.44k | return code; |
447 | | |
448 | 795k | if (code == 0) |
449 | | /* failed to read a token */ |
450 | 24 | return_error(gs_error_syntaxerror); |
451 | | |
452 | 21.9M | do { |
453 | | /* move all the saved offsets up by one */ |
454 | 21.9M | saved_offset[0] = saved_offset[1]; |
455 | 21.9M | saved_offset[1] = saved_offset[2]; |
456 | 21.9M | saved_offset[2] = pdfi_unread_tell(ctx); |
457 | | |
458 | 21.9M | code = pdfi_read_token(ctx, s, objnum, gen); |
459 | 21.9M | if (code < 0) { |
460 | 32.9k | pdfi_clearstack(ctx); |
461 | 32.9k | return code; |
462 | 32.9k | } |
463 | 21.8M | if (s->eof) |
464 | 442 | return_error(gs_error_syntaxerror); |
465 | 21.8M | code = 0; |
466 | 21.8M | type = pdfi_type_of(ctx->stack_top[-1]); |
467 | 21.8M | if (type == PDF_KEYWORD) |
468 | 14.6k | goto missing_endobj; |
469 | 21.8M | } while (type != PDF_FAST_KEYWORD); |
470 | | |
471 | 747k | keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]); |
472 | 747k | if (keyword == TOKEN_ENDOBJ) { |
473 | 351k | pdf_obj *o; |
474 | | |
475 | 351k | if (pdfi_count_stack(ctx) - initial_depth < 2) { |
476 | 29 | pdfi_clearstack(ctx); |
477 | 29 | return_error(gs_error_stackunderflow); |
478 | 29 | } |
479 | | |
480 | 351k | o = ctx->stack_top[-2]; |
481 | | |
482 | 351k | pdfi_pop(ctx, 1); |
483 | | |
484 | 351k | if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
485 | 351k | o->indirect_num = o->object_num = objnum; |
486 | 351k | o->indirect_gen = o->generation_num = gen; |
487 | 351k | } |
488 | 351k | return code; |
489 | 351k | } |
490 | 396k | if (keyword == TOKEN_STREAM) { |
491 | 383k | pdfi_pop(ctx, 1); |
492 | 383k | return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen); |
493 | 383k | } |
494 | 12.9k | if (keyword == TOKEN_OBJ) { |
495 | 802 | pdf_obj *o; |
496 | | |
497 | 802 | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL); |
498 | | |
499 | | /* 4 for; the object we want, the object number, generation number and 'obj' keyword */ |
500 | 802 | if (pdfi_count_stack(ctx) - initial_depth < 4) |
501 | 320 | return_error(gs_error_stackunderflow); |
502 | | |
503 | | /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */ |
504 | 482 | o = ctx->stack_top[-4]; |
505 | | |
506 | 482 | pdfi_pop(ctx, 3); |
507 | | |
508 | 482 | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
509 | 438 | o->indirect_num = o->object_num = objnum; |
510 | 438 | o->indirect_gen = o->generation_num = gen; |
511 | 438 | } |
512 | 482 | if (saved_offset[0] > 0) |
513 | 482 | (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET); |
514 | 482 | return 0; |
515 | 802 | } |
516 | | |
517 | 26.7k | missing_endobj: |
518 | | /* Assume that any other keyword means a missing 'endobj' */ |
519 | 26.7k | if (!ctx->args.pdfstoponerror) { |
520 | 26.7k | pdf_obj *o; |
521 | | |
522 | 26.7k | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL); |
523 | | |
524 | 26.7k | if (pdfi_count_stack(ctx) - initial_depth < 2) |
525 | 456 | return_error(gs_error_stackunderflow); |
526 | | |
527 | 26.3k | o = ctx->stack_top[-2]; |
528 | | |
529 | 26.3k | pdfi_pop(ctx, 1); |
530 | | |
531 | 26.3k | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
532 | 26.0k | o->indirect_num = o->object_num = objnum; |
533 | 26.0k | o->indirect_gen = o->generation_num = gen; |
534 | 26.0k | } |
535 | 26.3k | return code; |
536 | 26.7k | } |
537 | 0 | pdfi_pop(ctx, 2); |
538 | 0 | return_error(gs_error_syntaxerror); |
539 | 26.7k | } |
540 | | |
541 | | static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset) |
542 | 801k | { |
543 | 801k | int code = 0; |
544 | 801k | int objnum = 0, gen = 0; |
545 | | |
546 | | /* An object consists of 'num gen obj' followed by a token, follwed by an endobj |
547 | | * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we |
548 | | * want to deal with it specially by getting the Length, jumping to the end and checking |
549 | | * for an endobj. Or not, possibly, because it would be slow. |
550 | | */ |
551 | 801k | code = pdfi_read_bare_int(ctx, s, &objnum); |
552 | 801k | if (code < 0) |
553 | 20.3k | return code; |
554 | 781k | if (code == 0) |
555 | 2.83k | return_error(gs_error_syntaxerror); |
556 | | |
557 | 778k | code = pdfi_read_bare_int(ctx, s, &gen); |
558 | 778k | if (code < 0) |
559 | 875 | return code; |
560 | 777k | if (code == 0) |
561 | 324 | return_error(gs_error_syntaxerror); |
562 | | |
563 | 777k | code = pdfi_read_bare_keyword(ctx, s); |
564 | 777k | if (code < 0) |
565 | 0 | return code; |
566 | 777k | if (code == 0) |
567 | 0 | return gs_note_error(gs_error_ioerror); |
568 | 777k | if (code != TOKEN_OBJ) { |
569 | 1.60k | return_error(gs_error_syntaxerror); |
570 | 1.60k | } |
571 | | |
572 | 775k | return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen); |
573 | 777k | } |
574 | | |
575 | | static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, |
576 | | const xref_entry *entry, bool cache) |
577 | 897k | { |
578 | 897k | int code = 0; |
579 | 897k | xref_entry *compressed_entry; |
580 | 897k | pdf_c_stream *compressed_stream = NULL; |
581 | 897k | pdf_c_stream *SubFile_stream = NULL; |
582 | 897k | pdf_c_stream *Object_stream = NULL; |
583 | 897k | int i = 0, object_length = 0; |
584 | 897k | int64_t num_entries; |
585 | 897k | int found_object; |
586 | 897k | int64_t Length, First; |
587 | 897k | gs_offset_t offset = 0; |
588 | 897k | pdf_stream *compressed_object = NULL; |
589 | 897k | pdf_dict *compressed_sdict = NULL; /* alias */ |
590 | 897k | pdf_name *Type = NULL; |
591 | | |
592 | 897k | if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1) |
593 | 49 | return_error(gs_error_undefined); |
594 | | |
595 | 897k | compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num]; |
596 | | |
597 | 897k | if (ctx->args.pdfdebug) { |
598 | 0 | dmprintf1(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj); |
599 | 0 | dmprintf1(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num); |
600 | 0 | } |
601 | | |
602 | 897k | if (compressed_entry->cache == NULL) { |
603 | | #if CACHE_STATISTICS |
604 | | ctx->compressed_misses++; |
605 | | #endif |
606 | 40.0k | code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET); |
607 | 40.0k | if (code < 0) |
608 | 0 | goto exit; |
609 | | |
610 | 40.0k | code = pdfi_read_object(ctx, ctx->main_stream, 0); |
611 | 40.0k | if (code < 0) |
612 | 5.23k | goto exit; |
613 | | |
614 | 34.7k | if (pdfi_count_stack(ctx) < 1) { |
615 | 0 | code = gs_note_error(gs_error_stackunderflow); |
616 | 0 | goto exit; |
617 | 0 | } |
618 | | |
619 | 34.7k | if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) { |
620 | 981 | pdfi_pop(ctx, 1); |
621 | 981 | code = gs_note_error(gs_error_typecheck); |
622 | 981 | goto exit; |
623 | 981 | } |
624 | 33.8k | if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) { |
625 | 25 | pdfi_pop(ctx, 1); |
626 | | /* Same error (undefined) as when we read an uncompressed object with the wrong number */ |
627 | 25 | code = gs_note_error(gs_error_undefined); |
628 | 25 | goto exit; |
629 | 25 | } |
630 | 33.7k | compressed_object = (pdf_stream *)ctx->stack_top[-1]; |
631 | 33.7k | pdfi_countup(compressed_object); |
632 | 33.7k | pdfi_pop(ctx, 1); |
633 | 33.7k | code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object); |
634 | 33.7k | if (code < 0) |
635 | 0 | goto exit; |
636 | 857k | } else { |
637 | | #if CACHE_STATISTICS |
638 | | ctx->compressed_hits++; |
639 | | #endif |
640 | 857k | compressed_object = (pdf_stream *)compressed_entry->cache->o; |
641 | 857k | pdfi_countup(compressed_object); |
642 | 857k | pdfi_promote_cache_entry(ctx, compressed_entry->cache); |
643 | 857k | } |
644 | 891k | code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict); |
645 | 891k | if (code < 0) |
646 | 0 | return code; |
647 | | |
648 | 891k | if (ctx->loop_detection != NULL) { |
649 | 882k | code = pdfi_loop_detector_mark(ctx); |
650 | 882k | if (code < 0) |
651 | 0 | goto exit; |
652 | 882k | if (compressed_sdict->object_num != 0) { |
653 | 882k | if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) { |
654 | 45 | code = gs_note_error(gs_error_circular_reference); |
655 | 882k | } else { |
656 | 882k | code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num); |
657 | 882k | } |
658 | 882k | if (code < 0) { |
659 | 45 | (void)pdfi_loop_detector_cleartomark(ctx); |
660 | 45 | goto exit; |
661 | 45 | } |
662 | 882k | } |
663 | 882k | } |
664 | | /* Check its an ObjStm ! */ |
665 | 891k | code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type); |
666 | 891k | if (code < 0) { |
667 | 227 | if (ctx->loop_detection != NULL) |
668 | 227 | (void)pdfi_loop_detector_cleartomark(ctx); |
669 | 227 | goto exit; |
670 | 227 | } |
671 | | |
672 | 891k | if (!pdfi_name_is(Type, "ObjStm")){ |
673 | 22 | if (ctx->loop_detection != NULL) |
674 | 22 | (void)pdfi_loop_detector_cleartomark(ctx); |
675 | 22 | code = gs_note_error(gs_error_syntaxerror); |
676 | 22 | goto exit; |
677 | 22 | } |
678 | | |
679 | | /* Need to check the /N entry to see if the object is actually in this stream! */ |
680 | 891k | code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries); |
681 | 891k | if (code < 0) { |
682 | 10 | if (ctx->loop_detection != NULL) |
683 | 10 | (void)pdfi_loop_detector_cleartomark(ctx); |
684 | 10 | goto exit; |
685 | 10 | } |
686 | | |
687 | 891k | if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) { |
688 | 6 | if (ctx->loop_detection != NULL) |
689 | 6 | (void)pdfi_loop_detector_cleartomark(ctx); |
690 | 6 | code = gs_note_error(gs_error_rangecheck); |
691 | 6 | goto exit; |
692 | 6 | } |
693 | | |
694 | 891k | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
695 | 891k | if (code < 0) { |
696 | 6.97k | if (ctx->loop_detection != NULL) |
697 | 6.97k | (void)pdfi_loop_detector_cleartomark(ctx); |
698 | 6.97k | goto exit; |
699 | 6.97k | } |
700 | | |
701 | 884k | code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First); |
702 | 884k | if (code < 0) { |
703 | 221 | if (ctx->loop_detection != NULL) |
704 | 221 | (void)pdfi_loop_detector_cleartomark(ctx); |
705 | 221 | goto exit; |
706 | 221 | } |
707 | | |
708 | 883k | if (ctx->loop_detection != NULL) |
709 | 875k | (void)pdfi_loop_detector_cleartomark(ctx); |
710 | | |
711 | 883k | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
712 | 883k | if (code < 0) |
713 | 0 | goto exit; |
714 | | |
715 | 883k | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
716 | 883k | if (code < 0) |
717 | 0 | goto exit; |
718 | | |
719 | 883k | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
720 | 883k | if (code < 0) |
721 | 126 | goto exit; |
722 | | |
723 | 55.7M | for (i=0;i < num_entries;i++) |
724 | 54.8M | { |
725 | 54.8M | int new_offset; |
726 | 54.8M | code = pdfi_read_bare_int(ctx, compressed_stream, &found_object); |
727 | 54.8M | if (code < 0) |
728 | 2.69k | goto exit; |
729 | 54.8M | if (code == 0) { |
730 | 21 | code = gs_note_error(gs_error_syntaxerror); |
731 | 21 | goto exit; |
732 | 21 | } |
733 | 54.8M | code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset); |
734 | 54.8M | if (code < 0) |
735 | 2.03k | goto exit; |
736 | 54.8M | if (code == 0) { |
737 | 303 | code = gs_note_error(gs_error_syntaxerror); |
738 | 303 | goto exit; |
739 | 303 | } |
740 | 54.8M | if (i == entry->u.compressed.object_index) { |
741 | 881k | if (found_object != obj) { |
742 | 325 | code = gs_note_error(gs_error_undefined); |
743 | 325 | goto exit; |
744 | 325 | } |
745 | 880k | offset = new_offset; |
746 | 880k | } |
747 | 54.8M | if (i == entry->u.compressed.object_index + 1) |
748 | 858k | object_length = new_offset - offset; |
749 | 54.8M | } |
750 | | |
751 | | /* Bug #705259 - The first object need not lie immediately after the initial |
752 | | * table of object numbers and offsets. The start of the first object is given |
753 | | * by the value of First. We don't know how many bytes we consumed getting to |
754 | | * the end of the table, unfortunately, so we close the stream, rewind the main |
755 | | * stream back to the beginning of the ObjStm, and then read and discard 'First' |
756 | | * bytes in order to get to the start of the first object. Then we read the |
757 | | * number of bytes required to get from there to the start of the object we |
758 | | * actually want. |
759 | | * If this ever looks like it's causing performance problems we could read the |
760 | | * initial table above manually instead of using the existing code, and track |
761 | | * how many bytes we'd read, which would avoid us having to tear down and |
762 | | * rebuild the stream. |
763 | | */ |
764 | 878k | if (compressed_stream) |
765 | 878k | pdfi_close_file(ctx, compressed_stream); |
766 | 878k | if (SubFile_stream) |
767 | 878k | pdfi_close_file(ctx, SubFile_stream); |
768 | | |
769 | 878k | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
770 | 878k | if (code < 0) |
771 | 0 | goto exit; |
772 | | |
773 | | /* We already dereferenced this above, so we don't need the loop detection checking here */ |
774 | 878k | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
775 | 878k | if (code < 0) |
776 | 0 | goto exit; |
777 | | |
778 | 878k | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
779 | 878k | if (code < 0) |
780 | 0 | goto exit; |
781 | | |
782 | 878k | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
783 | 878k | if (code < 0) |
784 | 0 | goto exit; |
785 | | |
786 | 478M | for (i=0;i < First;i++) |
787 | 477M | { |
788 | 477M | int c = pdfi_read_byte(ctx, compressed_stream); |
789 | 477M | if (c < 0) { |
790 | 7 | code = gs_note_error(gs_error_ioerror); |
791 | 7 | goto exit; |
792 | 7 | } |
793 | 477M | } |
794 | | |
795 | | /* Skip to the offset of the object we want to read */ |
796 | 2.96G | for (i=0;i < offset;i++) |
797 | 2.96G | { |
798 | 2.96G | int c = pdfi_read_byte(ctx, compressed_stream); |
799 | 2.96G | if (c < 0) { |
800 | 49.6k | code = gs_note_error(gs_error_ioerror); |
801 | 49.6k | goto exit; |
802 | 49.6k | } |
803 | 2.96G | } |
804 | | |
805 | | /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit |
806 | | * the number of bytes we read to the declared size of the object (difference between |
807 | | * the offsets of the object we want to read, and the next object). If it is 0 then |
808 | | * we're reading the last object in the stream, so we just rely on the SubFileDecode |
809 | | * we set up when we created compressed_stream to limit the bytes to the length of |
810 | | * that stream. |
811 | | */ |
812 | 828k | if (object_length > 0) { |
813 | 807k | code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false); |
814 | 807k | if (code < 0) |
815 | 0 | goto exit; |
816 | 807k | } else { |
817 | 21.4k | Object_stream = compressed_stream; |
818 | 21.4k | } |
819 | | |
820 | 828k | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
821 | 828k | if (code < 0) |
822 | 1.77k | goto exit; |
823 | 826k | if (code == 0) { |
824 | 7 | code = gs_note_error(gs_error_syntaxerror); |
825 | 7 | goto exit; |
826 | 7 | } |
827 | 826k | if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) { |
828 | 820k | int start_depth = pdfi_count_stack(ctx); |
829 | | |
830 | | /* Need to read all the elements from COS objects */ |
831 | 28.6M | do { |
832 | 28.6M | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
833 | 28.6M | if (code < 0) |
834 | 9.96k | goto exit; |
835 | 28.6M | if (code == 0) { |
836 | 4.02k | code = gs_note_error(gs_error_syntaxerror); |
837 | 4.02k | goto exit; |
838 | 4.02k | } |
839 | 28.6M | if (compressed_stream->eof == true) { |
840 | 334 | code = gs_note_error(gs_error_ioerror); |
841 | 334 | goto exit; |
842 | 334 | } |
843 | 28.6M | } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth); |
844 | 820k | } |
845 | | |
846 | 812k | *object = ctx->stack_top[-1]; |
847 | | /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets |
848 | | * the object number for uncompressed objects. So we need to do that here. |
849 | | */ |
850 | 812k | if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
851 | 811k | (*object)->indirect_num = (*object)->object_num = obj; |
852 | 811k | (*object)->indirect_gen = (*object)->generation_num = gen; |
853 | 811k | pdfi_countup(*object); |
854 | 811k | } |
855 | 812k | pdfi_pop(ctx, 1); |
856 | | |
857 | 812k | if (cache) { |
858 | 809k | code = pdfi_add_to_cache(ctx, *object); |
859 | 809k | if (code < 0) { |
860 | 0 | pdfi_countdown(*object); |
861 | 0 | goto exit; |
862 | 0 | } |
863 | 809k | } |
864 | | |
865 | 897k | exit: |
866 | 897k | if (Object_stream) |
867 | 828k | pdfi_close_file(ctx, Object_stream); |
868 | 897k | if (Object_stream != compressed_stream) |
869 | 862k | if (compressed_stream) |
870 | 862k | pdfi_close_file(ctx, compressed_stream); |
871 | 897k | if (SubFile_stream) |
872 | 883k | pdfi_close_file(ctx, SubFile_stream); |
873 | 897k | pdfi_countdown(compressed_object); |
874 | 897k | pdfi_countdown(Type); |
875 | 897k | return code; |
876 | 812k | } |
877 | | |
878 | | /* pdf_dereference returns an object with a reference count of at least 1, this represents the |
879 | | * reference being held by the caller (in **object) when we return from this function. |
880 | | */ |
881 | | static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache) |
882 | 3.64M | { |
883 | 3.64M | xref_entry *entry; |
884 | 3.64M | int code, stack_depth = pdfi_count_stack(ctx); |
885 | 3.64M | gs_offset_t saved_stream_offset; |
886 | 3.64M | bool saved_decrypt_strings = ctx->encryption.decrypt_strings; |
887 | | |
888 | 3.64M | *object = NULL; |
889 | | |
890 | 3.64M | if (ctx->xref_table == NULL) |
891 | 13 | return_error(gs_error_typecheck); |
892 | | |
893 | 3.64M | if (obj >= ctx->xref_table->xref_size) { |
894 | 28.1k | char extra_info[gp_file_name_sizeof]; |
895 | | |
896 | 28.1k | gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj); |
897 | 28.1k | pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info); |
898 | | |
899 | 28.1k | if(ctx->args.pdfstoponerror) |
900 | 0 | return_error(gs_error_rangecheck); |
901 | | |
902 | 28.1k | code = pdfi_repair_file(ctx); |
903 | 28.1k | if (code < 0) { |
904 | 28.1k | *object = NULL; |
905 | 28.1k | return code; |
906 | 28.1k | } |
907 | 13 | if (obj >= ctx->xref_table->xref_size) { |
908 | 7 | *object = NULL; |
909 | 7 | return_error(gs_error_rangecheck); |
910 | 7 | } |
911 | 13 | } |
912 | | |
913 | 3.61M | entry = &ctx->xref_table->xref[obj]; |
914 | | |
915 | 3.61M | if(entry->object_num == 0) |
916 | 559k | return_error(gs_error_undefined); |
917 | | |
918 | 3.05M | if (entry->free) { |
919 | 86 | char extra_info[gp_file_name_sizeof]; |
920 | | |
921 | 86 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", trying next object number as offset.\n", entry->object_num); |
922 | 86 | pdfi_set_error(ctx, 0, NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
923 | 86 | } |
924 | | |
925 | 3.05M | if (ctx->loop_detection) { |
926 | 2.73M | if (pdfi_loop_detector_check_object(ctx, obj) == true) |
927 | 179 | return_error(gs_error_circular_reference); |
928 | 2.73M | if (entry->free) { |
929 | 86 | code = pdfi_loop_detector_add_object(ctx, obj); |
930 | 86 | if (code < 0) |
931 | 0 | return code; |
932 | 86 | } |
933 | 2.73M | } |
934 | 3.05M | if (entry->cache != NULL){ |
935 | 1.39M | pdf_obj_cache_entry *cache_entry = entry->cache; |
936 | | |
937 | | #if CACHE_STATISTICS |
938 | | ctx->hits++; |
939 | | #endif |
940 | 1.39M | *object = cache_entry->o; |
941 | 1.39M | pdfi_countup(*object); |
942 | | |
943 | 1.39M | pdfi_promote_cache_entry(ctx, cache_entry); |
944 | 1.65M | } else { |
945 | 1.65M | saved_stream_offset = pdfi_unread_tell(ctx); |
946 | | |
947 | 1.65M | if (entry->compressed) { |
948 | | /* This is an object in a compressed object stream */ |
949 | 897k | ctx->encryption.decrypt_strings = false; |
950 | | |
951 | 897k | code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache); |
952 | 897k | if (code < 0 || *object == NULL) |
953 | 85.0k | goto error; |
954 | 897k | } else { |
955 | 761k | pdf_c_stream *SubFile_stream = NULL; |
956 | | #if CACHE_STATISTICS |
957 | | ctx->misses++; |
958 | | #endif |
959 | 761k | ctx->encryption.decrypt_strings = true; |
960 | | |
961 | 761k | code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET); |
962 | 761k | if (code < 0) |
963 | 9 | goto error; |
964 | | |
965 | 761k | code = pdfi_apply_SubFileDecode_filter(ctx, 0, "trailer", ctx->main_stream, &SubFile_stream, false); |
966 | 761k | if (code < 0) |
967 | 0 | goto error; |
968 | | |
969 | 761k | code = pdfi_read_object(ctx, SubFile_stream, entry->u.uncompressed.offset); |
970 | | |
971 | | /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it. |
972 | | * reload the xref entry to be certain it is valid. |
973 | | */ |
974 | 761k | entry = &ctx->xref_table->xref[obj]; |
975 | | |
976 | 761k | pdfi_close_file(ctx, SubFile_stream); |
977 | 761k | if (code < 0) { |
978 | 58.9k | int code1 = 0; |
979 | 58.9k | if (entry->free) { |
980 | 5 | dmprintf2(ctx->memory, "Dereference of free object %"PRIu64", next object number as offset failed (code = %d), returning NULL object.\n", entry->object_num, code); |
981 | 5 | *object = PDF_NULL_OBJ; |
982 | 5 | goto free_obj; |
983 | 5 | } |
984 | 58.9k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
985 | 58.9k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
986 | 58.9k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
987 | | |
988 | 58.9k | code1 = pdfi_repair_file(ctx); |
989 | 58.9k | if (code1 == 0) |
990 | 773 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
991 | | /* Repair failed, just give up and return an error */ |
992 | 58.1k | return code; |
993 | 58.9k | } |
994 | | |
995 | 702k | if (pdfi_count_stack(ctx) > 0 && |
996 | 702k | (ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) && |
997 | 702k | (ctx->stack_top[-1])->object_num == obj)) { |
998 | 701k | *object = ctx->stack_top[-1]; |
999 | 701k | pdfi_countup(*object); |
1000 | 701k | pdfi_pop(ctx, 1); |
1001 | 701k | if (pdfi_type_of(*object) == PDF_INDIRECT) { |
1002 | 207 | pdf_indirect_ref *iref = (pdf_indirect_ref *)*object; |
1003 | | |
1004 | 207 | if (iref->ref_object_num == obj) { |
1005 | 0 | code = gs_note_error(gs_error_circular_reference); |
1006 | 0 | pdfi_countdown(*object); |
1007 | 0 | *object = NULL; |
1008 | 0 | goto error; |
1009 | 0 | } |
1010 | 207 | } |
1011 | 701k | if (cache) { |
1012 | 701k | code = pdfi_add_to_cache(ctx, *object); |
1013 | 701k | if (code < 0) { |
1014 | 0 | pdfi_countdown(*object); |
1015 | 0 | goto error; |
1016 | 0 | } |
1017 | 701k | } |
1018 | 701k | } else { |
1019 | 675 | pdfi_pop(ctx, 1); |
1020 | 675 | if (entry->free) { |
1021 | 78 | dmprintf1(ctx->memory, "Dereference of free object %"PRIu64", next object number as offset failed, returning NULL object.\n", entry->object_num); |
1022 | 78 | *object = PDF_NULL_OBJ; |
1023 | 78 | return 0; |
1024 | 78 | } |
1025 | 597 | code = gs_note_error(gs_error_undefined); |
1026 | 597 | goto error; |
1027 | 675 | } |
1028 | 702k | } |
1029 | 1.51M | free_obj: |
1030 | 1.51M | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1031 | 1.51M | } |
1032 | | |
1033 | 2.91M | if (ctx->loop_detection && pdf_object_num(*object) != 0) { |
1034 | 2.59M | code = pdfi_loop_detector_add_object(ctx, (*object)->object_num); |
1035 | 2.59M | if (code < 0) { |
1036 | 0 | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1037 | 0 | return code; |
1038 | 0 | } |
1039 | 2.59M | } |
1040 | 2.91M | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1041 | 2.91M | return 0; |
1042 | | |
1043 | 85.6k | error: |
1044 | 85.6k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1045 | 85.6k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1046 | | /* Return the stack to the state at entry */ |
1047 | 85.6k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
1048 | 85.6k | return code; |
1049 | 2.91M | } |
1050 | | |
1051 | | int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1052 | 3.63M | { |
1053 | 3.63M | return pdfi_dereference_main(ctx, obj, gen, object, true); |
1054 | 3.63M | } |
1055 | | |
1056 | | int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1057 | 11.1k | { |
1058 | 11.1k | return pdfi_dereference_main(ctx, obj, gen, object, false); |
1059 | 11.1k | } |
1060 | | |
1061 | | /* do a derefence with loop detection */ |
1062 | | int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1063 | 1.63M | { |
1064 | 1.63M | int code; |
1065 | | |
1066 | 1.63M | code = pdfi_loop_detector_mark(ctx); |
1067 | 1.63M | if (code < 0) |
1068 | 0 | return code; |
1069 | | |
1070 | 1.63M | code = pdfi_dereference(ctx, obj, gen, object); |
1071 | 1.63M | (void)pdfi_loop_detector_cleartomark(ctx); |
1072 | 1.63M | return code; |
1073 | 1.63M | } |
1074 | | |
1075 | | int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1076 | 11.1k | { |
1077 | 11.1k | int code; |
1078 | | |
1079 | 11.1k | code = pdfi_loop_detector_mark(ctx); |
1080 | 11.1k | if (code < 0) |
1081 | 0 | return code; |
1082 | | |
1083 | 11.1k | code = pdfi_dereference_nocache(ctx, obj, gen, object); |
1084 | 11.1k | (void)pdfi_loop_detector_cleartomark(ctx); |
1085 | 11.1k | return code; |
1086 | 11.1k | } |
1087 | | |
1088 | | static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1089 | 98.0k | { |
1090 | 98.0k | int code = 0; |
1091 | 98.0k | uint64_t index, arraysize; |
1092 | 98.0k | pdf_obj *object = NULL; |
1093 | 98.0k | pdf_array *array = (pdf_array *)obj; |
1094 | | |
1095 | 98.0k | arraysize = pdfi_array_size(array); |
1096 | 449k | for (index = 0; index < arraysize; index++) { |
1097 | 350k | if (ctx->loop_detection != NULL) { |
1098 | 350k | code = pdfi_loop_detector_mark(ctx); |
1099 | 350k | if (code < 0) |
1100 | 0 | return code; |
1101 | 350k | } |
1102 | | |
1103 | 350k | code = pdfi_array_get_no_store_R(ctx, array, index, &object); |
1104 | | |
1105 | 350k | if (ctx->loop_detection != NULL) { |
1106 | 350k | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1107 | 350k | if (code1 < 0) |
1108 | 0 | return code1; |
1109 | 350k | } |
1110 | | |
1111 | 350k | if (code == gs_error_circular_reference) { |
1112 | | /* Just leave as an indirect ref */ |
1113 | 0 | code = 0; |
1114 | 350k | } else { |
1115 | 350k | if (code < 0) goto exit; |
1116 | 350k | if (recurse) |
1117 | 116 | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse); |
1118 | 350k | if (code < 0) goto exit; |
1119 | | /* don't store the object if it's a stream (leave as a ref) */ |
1120 | 350k | if (pdfi_type_of(object) != PDF_STREAM) |
1121 | 350k | code = pdfi_array_put(ctx, array, index, object); |
1122 | 350k | } |
1123 | 350k | if (code < 0) goto exit; |
1124 | | |
1125 | 350k | pdfi_countdown(object); |
1126 | 350k | object = NULL; |
1127 | 350k | } |
1128 | | |
1129 | 98.0k | exit: |
1130 | 98.0k | pdfi_countdown(object); |
1131 | 98.0k | return code; |
1132 | 98.0k | } |
1133 | | |
1134 | | static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1135 | 11.4k | { |
1136 | 11.4k | int code = 0; |
1137 | 11.4k | pdf_dict *dict = (pdf_dict *)obj; |
1138 | 11.4k | pdf_name *Key = NULL; |
1139 | 11.4k | pdf_obj *Value = NULL; |
1140 | 11.4k | uint64_t index, dictsize; |
1141 | | |
1142 | 11.4k | dictsize = pdfi_dict_entries(dict); |
1143 | | |
1144 | | /* Note: I am not using pdfi_dict_first/next because of needing to handle |
1145 | | * circular references. |
1146 | | */ |
1147 | 23.8k | for (index=0; index<dictsize; index ++) { |
1148 | 12.4k | Key = (pdf_name *)dict->list[index].key; |
1149 | 12.4k | if (pdfi_name_is(Key, "Parent")) |
1150 | 0 | continue; |
1151 | | |
1152 | 12.4k | if (ctx->loop_detection != NULL) { |
1153 | 12.4k | code = pdfi_loop_detector_mark(ctx); |
1154 | 12.4k | if (code < 0) |
1155 | 0 | return code; |
1156 | 12.4k | } |
1157 | | |
1158 | 12.4k | code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value); |
1159 | | |
1160 | 12.4k | if (ctx->loop_detection != NULL) { |
1161 | 12.4k | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1162 | 12.4k | if (code1 < 0) |
1163 | 0 | return code1; |
1164 | 12.4k | } |
1165 | | |
1166 | 12.4k | if (code == gs_error_circular_reference) { |
1167 | | /* Just leave as an indirect ref */ |
1168 | 0 | code = 0; |
1169 | 12.4k | } else { |
1170 | 12.4k | if (code < 0) goto exit; |
1171 | | /* don't store the object if it's a stream (leave as a ref) */ |
1172 | 12.4k | if (pdfi_type_of(Value) != PDF_STREAM) |
1173 | 12.4k | pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true); |
1174 | 12.4k | if (recurse) |
1175 | 455 | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse); |
1176 | 12.4k | } |
1177 | 12.4k | if (code < 0) goto exit; |
1178 | | |
1179 | 12.4k | pdfi_countdown(Value); |
1180 | 12.4k | Value = NULL; |
1181 | 12.4k | } |
1182 | | |
1183 | 11.4k | exit: |
1184 | 11.4k | pdfi_countdown(Value); |
1185 | 11.4k | return code; |
1186 | 11.4k | } |
1187 | | |
1188 | | /* Resolve all the indirect references for an object |
1189 | | * Note: This can be recursive |
1190 | | */ |
1191 | | int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse) |
1192 | 401k | { |
1193 | 401k | int code = 0; |
1194 | | |
1195 | 401k | switch(pdfi_type_of(value)) { |
1196 | 98.0k | case PDF_ARRAY: |
1197 | 98.0k | code = pdfi_resolve_indirect_array(ctx, value, recurse); |
1198 | 98.0k | break; |
1199 | 11.4k | case PDF_DICT: |
1200 | 11.4k | code = pdfi_resolve_indirect_dict(ctx, value, recurse); |
1201 | 11.4k | break; |
1202 | 291k | default: |
1203 | 291k | break; |
1204 | 401k | } |
1205 | 401k | return code; |
1206 | 401k | } |
1207 | | |
1208 | | /* Resolve all the indirect references for an object |
1209 | | * Resolve indirect references, either one level or recursively, with loop detect on |
1210 | | * the parent (can by NULL) and the value. |
1211 | | */ |
1212 | | int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse) |
1213 | 401k | { |
1214 | 401k | int code = 0; |
1215 | | |
1216 | 401k | code = pdfi_loop_detector_mark(ctx); |
1217 | 401k | if (code < 0) goto exit; |
1218 | 401k | if (parent && parent->object_num != 0) { |
1219 | 400k | code = pdfi_loop_detector_add_object(ctx, parent->object_num); |
1220 | 400k | if (code < 0) goto exit; |
1221 | 400k | } |
1222 | | |
1223 | 401k | if (pdf_object_num(value) != 0) { |
1224 | 269 | if (pdfi_loop_detector_check_object(ctx, value->object_num)) { |
1225 | 2 | code = gs_note_error(gs_error_circular_reference); |
1226 | 2 | goto exit; |
1227 | 2 | } |
1228 | 267 | code = pdfi_loop_detector_add_object(ctx, value->object_num); |
1229 | 267 | if (code < 0) goto exit; |
1230 | 267 | } |
1231 | 401k | code = pdfi_resolve_indirect(ctx, value, recurse); |
1232 | | |
1233 | 401k | exit: |
1234 | 401k | (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */ |
1235 | 401k | return code; |
1236 | 401k | } |