/src/ghostpdl/pdf/pdf_deref.c
Line | Count | Source |
1 | | /* Copyright (C) 2020-2026 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | /* Functions to deal with dereferencing indirect objects |
17 | | * for the PDF interpreter. In here we also keep the code |
18 | | * for dealing with the object cache, because the dereferencing |
19 | | * functions are currently the only place that deals with it. |
20 | | */ |
21 | | |
22 | | #include "pdf_int.h" |
23 | | #include "pdf_stack.h" |
24 | | #include "pdf_loop_detect.h" |
25 | | #include "strmio.h" |
26 | | #include "stream.h" |
27 | | #include "pdf_file.h" |
28 | | #include "pdf_misc.h" |
29 | | #include "pdf_dict.h" |
30 | | #include "pdf_array.h" |
31 | | #include "pdf_deref.h" |
32 | | #include "pdf_repair.h" |
33 | | |
34 | | /* Start with the object caching functions */ |
35 | | /* Disable object caching (for easier debugging with reference counting) |
36 | | * by uncommenting the following line |
37 | | */ |
38 | | /*#define DISABLE CACHE*/ |
39 | | |
40 | | /* given an object, create a cache entry for it. If we have too many entries |
41 | | * then delete the leat-recently-used cache entry. Make the new entry be the |
42 | | * most-recently-used entry. The actual entries are attached to the xref table |
43 | | * (as well as being a double-linked list), because we detect an existing |
44 | | * cache entry by seeing that the xref table for the object number has a non-NULL |
45 | | * 'cache' member. |
46 | | * So we need to update the xref as well if we add or delete cache entries. |
47 | | */ |
48 | | static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o) |
49 | 2.08M | { |
50 | 2.08M | #ifndef DISABLE_CACHE |
51 | 2.08M | pdf_obj_cache_entry *entry; |
52 | | |
53 | 2.08M | if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) |
54 | 2.86k | return 0; |
55 | | |
56 | 2.08M | if (o->object_num >= ctx->xref_table->xref_size) |
57 | 0 | return_error(gs_error_rangecheck); |
58 | | |
59 | 2.08M | if (ctx->xref_table->xref[o->object_num].cache != NULL) { |
60 | | #if DEBUG_CACHE |
61 | | outprintf(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num); |
62 | | #endif |
63 | 0 | return_error(gs_error_unknownerror); |
64 | 0 | } |
65 | | |
66 | | #if DEBUG_CACHE |
67 | | dbgmprintf1(ctx->memory, "Adding object %d\n", o->object_num); |
68 | | #endif |
69 | 2.08M | if (ctx->cache_entries == ctx->args.PDFCacheSize) |
70 | 533k | { |
71 | | #if DEBUG_CACHE |
72 | | dbgmprintf(ctx->memory, "Cache full, evicting LRU\n"); |
73 | | #endif |
74 | 533k | if (ctx->cache_LRU) { |
75 | 533k | entry = ctx->cache_LRU; |
76 | | #if DEBUG_CACHE |
77 | | dbgmprintf1(ctx->memory, "Evicting %d\n", entry->o->object_num); |
78 | | #endif |
79 | 533k | ctx->cache_LRU = entry->next; |
80 | 533k | if (entry->next) |
81 | 533k | ((pdf_obj_cache_entry *)entry->next)->previous = NULL; |
82 | 533k | ctx->xref_table->xref[entry->o->object_num].cache = NULL; |
83 | 533k | pdfi_countdown(entry->o); |
84 | 533k | ctx->cache_entries--; |
85 | 533k | gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU"); |
86 | 533k | } else |
87 | 0 | return_error(gs_error_unknownerror); |
88 | 533k | } |
89 | 2.08M | entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache"); |
90 | 2.08M | if (entry == NULL) |
91 | 0 | return_error(gs_error_VMerror); |
92 | | |
93 | 2.08M | memset(entry, 0x00, sizeof(pdf_obj_cache_entry)); |
94 | | |
95 | 2.08M | entry->o = o; |
96 | 2.08M | pdfi_countup(o); |
97 | 2.08M | if (ctx->cache_MRU) { |
98 | 2.01M | entry->previous = ctx->cache_MRU; |
99 | 2.01M | ctx->cache_MRU->next = entry; |
100 | 2.01M | } |
101 | 2.08M | ctx->cache_MRU = entry; |
102 | 2.08M | if (ctx->cache_LRU == NULL) |
103 | 69.4k | ctx->cache_LRU = entry; |
104 | | |
105 | 2.08M | ctx->cache_entries++; |
106 | 2.08M | ctx->xref_table->xref[o->object_num].cache = entry; |
107 | 2.08M | #endif |
108 | 2.08M | return 0; |
109 | 2.08M | } |
110 | | |
111 | | /* Given an existing cache entry, promote it to be the most-recently-used |
112 | | * cache entry. |
113 | | */ |
114 | | static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry) |
115 | 3.66M | { |
116 | 3.66M | #ifndef DISABLE_CACHE |
117 | 3.66M | if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) { |
118 | 2.35M | if ((pdf_obj_cache_entry *)cache_entry->next != NULL) |
119 | 2.35M | ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous; |
120 | 2.35M | if ((pdf_obj_cache_entry *)cache_entry->previous != NULL) |
121 | 2.35M | ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next; |
122 | 1.49k | else { |
123 | | /* the existing entry is the current least recently used, we need to make the 'next' |
124 | | * cache entry into the LRU. |
125 | | */ |
126 | 1.49k | ctx->cache_LRU = cache_entry->next; |
127 | 1.49k | } |
128 | 2.35M | cache_entry->next = NULL; |
129 | 2.35M | cache_entry->previous = ctx->cache_MRU; |
130 | 2.35M | ctx->cache_MRU->next = cache_entry; |
131 | 2.35M | ctx->cache_MRU = cache_entry; |
132 | 2.35M | } |
133 | 3.66M | #endif |
134 | 3.66M | return; |
135 | 3.66M | } |
136 | | |
137 | | int pdfi_cache_object(pdf_context *ctx, pdf_obj *o) |
138 | 2.69M | { |
139 | 2.69M | if (o->object_num == 0) |
140 | 1.87M | return 0; |
141 | 820k | if (ctx->xref_table->xref[o->object_num].cache == NULL) |
142 | 12 | return pdfi_add_to_cache(ctx, o); |
143 | 820k | else |
144 | 820k | pdfi_promote_cache_entry(ctx, ctx->xref_table->xref[o->object_num].cache); |
145 | 820k | return 0; |
146 | 820k | } |
147 | | |
148 | | /* This one's a bit of an oddity, its used for fonts. When we build a PDF font object |
149 | | * we want the object cache to reference *that* object, not the dictionary which was |
150 | | * read out of the PDF file, so this allows us to replace the font dictionary in the |
151 | | * cache with the actual font object, so that later dereferences will get this font |
152 | | * object. |
153 | | */ |
154 | | int replace_cache_entry(pdf_context *ctx, pdf_obj *o) |
155 | 131k | { |
156 | 131k | #ifndef DISABLE_CACHE |
157 | 131k | xref_entry *entry; |
158 | 131k | pdf_obj_cache_entry *cache_entry; |
159 | 131k | pdf_obj *old_cached_obj = NULL; |
160 | | |
161 | | /* Limited error checking here, we assume that things like the |
162 | | * validity of the object (eg not a free oobject) have already been handled. |
163 | | */ |
164 | | |
165 | 131k | entry = &ctx->xref_table->xref[o->object_num]; |
166 | 131k | cache_entry = entry->cache; |
167 | | |
168 | 131k | if (cache_entry == NULL) { |
169 | 2.98k | return(pdfi_add_to_cache(ctx, o)); |
170 | 128k | } else { |
171 | | /* NOTE: We grab the object without decrementing, to avoid triggering |
172 | | * a warning message for freeing an object that's in the cache |
173 | | */ |
174 | 128k | if (cache_entry->o != NULL) |
175 | 128k | old_cached_obj = cache_entry->o; |
176 | | |
177 | | /* Put new entry in the cache */ |
178 | 128k | cache_entry->o = o; |
179 | 128k | pdfi_countup(o); |
180 | 128k | pdfi_promote_cache_entry(ctx, cache_entry); |
181 | | |
182 | | /* Now decrement the old cache entry, if any */ |
183 | 128k | pdfi_countdown(old_cached_obj); |
184 | 128k | } |
185 | 128k | #endif |
186 | 128k | return 0; |
187 | 131k | } |
188 | | |
189 | | /* Now the dereferencing functions */ |
190 | | |
191 | | /* |
192 | | * Technically we can accept a stream other than the main PDF file stream here. This is |
193 | | * really for the case of compressed objects where we read tokens from the compressed |
194 | | * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode |
195 | | * on top of the main file stream, which may be useful. Note that this cannot work with |
196 | | * objects in compressed object streams! They should always pass a value of 0 for the stream_offset. |
197 | | * The stream_offset is the offset from the start of the underlying uncompressed PDF file of |
198 | | * the stream we are using. See the comments below when keyword is PDF_STREAM. |
199 | | */ |
200 | | |
201 | | /* Determine if a PDF object is in a compressed ObjStm. Returns < 0 |
202 | | * for an error, 0 if it is not in a compressed ObjStm and 1 if it is. |
203 | | * Currently errors are inmpossible. This is only used by the decryption code |
204 | | * to determine if a string is in a compressed object stream, if it is then |
205 | | * it can't be used for decryption. |
206 | | */ |
207 | | int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen) |
208 | 14.3k | { |
209 | 14.3k | xref_entry *entry; |
210 | | |
211 | | /* Can't possibly be a compressed object before we have finished reading |
212 | | * the xref. |
213 | | */ |
214 | 14.3k | if (ctx->xref_table == NULL) |
215 | 0 | return 0; |
216 | | |
217 | 14.3k | entry = &ctx->xref_table->xref[obj]; |
218 | | |
219 | 14.3k | if (entry->compressed) |
220 | 0 | return 1; |
221 | | |
222 | 14.3k | return 0; |
223 | 14.3k | } |
224 | | |
225 | | /* We should never read a 'stream' keyword from a compressed object stream |
226 | | * so this case should never end up here. |
227 | | */ |
228 | | static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, |
229 | | uint32_t objnum, uint32_t gen) |
230 | 643k | { |
231 | 643k | int code = 0; |
232 | 643k | int64_t i; |
233 | 643k | pdf_dict *dict = NULL; |
234 | 643k | gs_offset_t offset; |
235 | 643k | pdf_stream *stream_obj = NULL; |
236 | | |
237 | | /* Strange code time.... |
238 | | * If we are using a stream which is *not* the PDF uncompressed main file stream |
239 | | * then doing stell on it will only tell us how many bytes have been read from |
240 | | * that stream, it won't tell us the underlying file position. So we add on the |
241 | | * 'unread' bytes, *and* we add on the position of the start of the stream in |
242 | | * the actual main file. This is all done so that we can check the /Length |
243 | | * of the object. Note that this will *only* work for regular objects it can |
244 | | * not be used for compressed object streams, but those don't need checking anyway |
245 | | * they have a different mechanism altogether and should never get here. |
246 | | */ |
247 | 643k | if (s != ctx->main_stream) { |
248 | 0 | offset = stell(s->s) - s->unread_size + stream_offset; |
249 | 0 | code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET); |
250 | 0 | if (code < 0) |
251 | 0 | return_error(gs_error_ioerror); |
252 | 643k | } else { |
253 | 643k | offset = stell(s->s) - s->unread_size; |
254 | 643k | } |
255 | | |
256 | 643k | if (pdfi_count_stack(ctx) < 1) |
257 | 0 | return_error(gs_error_stackunderflow); |
258 | | |
259 | 643k | dict = (pdf_dict *)ctx->stack_top[-1]; |
260 | | |
261 | 643k | if (pdfi_type_of(dict) != PDF_DICT) { |
262 | 8.42k | pdfi_pop(ctx, 1); |
263 | 8.42k | return_error(gs_error_syntaxerror); |
264 | 8.42k | } |
265 | | |
266 | 635k | dict->indirect_num = dict->object_num = objnum; |
267 | 635k | dict->indirect_gen = dict->generation_num = gen; |
268 | | |
269 | | /* Convert the dict into a stream */ |
270 | 635k | code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true); |
271 | 635k | if (code < 0) { |
272 | 0 | pdfi_pop(ctx, 1); |
273 | 0 | return code; |
274 | 0 | } |
275 | | /* Pop off the dict and push the stream */ |
276 | 635k | pdfi_pop(ctx, 1); |
277 | 635k | dict = NULL; |
278 | 635k | pdfi_push(ctx, (pdf_obj *)stream_obj); |
279 | | |
280 | 635k | stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum; |
281 | 635k | stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen; |
282 | 635k | stream_obj->stream_offset = offset; |
283 | | |
284 | | /* Exceptional code. Normally we do not need to worry about detecting circular references |
285 | | * when reading objects, because we do not dereference any indirect objects. However streams |
286 | | * are a slight exception in that we do get the Length from the stream dictionay and if that |
287 | | * is an indirect reference, then we dereference it. |
288 | | * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect |
289 | | * reference to the same stream object, and leads to infinite recursion. So deal with that |
290 | | * possibility here. |
291 | | */ |
292 | 635k | code = pdfi_loop_detector_mark(ctx); |
293 | 635k | if (code < 0) { |
294 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
295 | 0 | return code; |
296 | 0 | } |
297 | 635k | if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) { |
298 | 107 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
299 | 107 | pdfi_loop_detector_cleartomark(ctx); |
300 | 107 | return_error(gs_error_circular_reference); |
301 | 107 | } |
302 | | |
303 | 635k | code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num); |
304 | 635k | if (code < 0) { |
305 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
306 | 0 | pdfi_loop_detector_cleartomark(ctx); |
307 | 0 | return code; |
308 | 0 | } |
309 | | |
310 | | /* This code may be a performance overhead, it simply skips over the stream contents |
311 | | * and checks that the stream ends with a 'endstream endobj' pair. We could add a |
312 | | * 'go faster' flag for users who are certain their PDF files are well-formed. This |
313 | | * could also allow us to skip all kinds of other checking..... |
314 | | */ |
315 | | |
316 | 635k | code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i); |
317 | 635k | if (code < 0) { |
318 | 14.3k | char extra_info[gp_file_name_sizeof]; |
319 | | |
320 | 14.3k | (void)pdfi_loop_detector_cleartomark(ctx); |
321 | 14.3k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum); |
322 | 14.3k | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info); |
323 | 14.3k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
324 | 14.3k | return code; |
325 | 14.3k | } |
326 | 621k | code = pdfi_loop_detector_cleartomark(ctx); |
327 | 621k | if (code < 0) { |
328 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
329 | 0 | return code; |
330 | 0 | } |
331 | | |
332 | 621k | if (i < 0 || (i + offset)> ctx->main_stream_length) { |
333 | 32.4k | char extra_info[gp_file_name_sizeof]; |
334 | | |
335 | 32.4k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum); |
336 | 32.4k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info))< 0) { |
337 | 0 | pdfi_pop(ctx, 1); |
338 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
339 | 0 | return code; |
340 | 0 | } |
341 | 588k | } else { |
342 | 588k | code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR); |
343 | 588k | if (code < 0) { |
344 | 0 | pdfi_pop(ctx, 1); |
345 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
346 | 0 | return code; |
347 | 0 | } |
348 | | |
349 | 588k | stream_obj->Length = 0; |
350 | 588k | stream_obj->length_valid = false; |
351 | | |
352 | 588k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
353 | 588k | if (code == 0) { |
354 | 0 | char extra_info[gp_file_name_sizeof]; |
355 | |
|
356 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum); |
357 | 0 | pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info); |
358 | | /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that |
359 | | * happens give up. |
360 | | */ |
361 | 0 | if (pdfi_count_stack(ctx) == 0) { |
362 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
363 | 0 | return code; |
364 | 0 | } |
365 | 588k | } else if (code < 0) { |
366 | 0 | char extra_info[gp_file_name_sizeof]; |
367 | |
|
368 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum); |
369 | 0 | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info)) < 0) { |
370 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
371 | 0 | return code; |
372 | 0 | } |
373 | 588k | } else if (code != TOKEN_ENDSTREAM) { |
374 | 64.5k | char extra_info[gp_file_name_sizeof]; |
375 | | |
376 | 64.5k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i); |
377 | 64.5k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BAD_LENGTH, "pdfi_read_stream_object", extra_info)) < 0) { |
378 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
379 | 0 | return code; |
380 | 0 | } |
381 | 524k | } else { |
382 | | /* Cache the Length in the stream object and mark it valid */ |
383 | 524k | stream_obj->Length = i; |
384 | 524k | stream_obj->length_valid = true; |
385 | 524k | } |
386 | 588k | } |
387 | | |
388 | | /* If we failed to find a valid object, or the object wasn't a keyword, or the |
389 | | * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct |
390 | | * Length for streams if we have encrypted files, because we must install a |
391 | | * SubFileDecode filter with a Length (EODString is incompatible with AES encryption) |
392 | | * Rather than mess about checking for encryption, we'll choose to just correctly |
393 | | * calculate the Length of all streams. Although this takes time, it will only |
394 | | * happen for files which are invalid. |
395 | | */ |
396 | 621k | if (stream_obj->length_valid != true) { |
397 | 96.9k | char Buffer[10]; |
398 | 96.9k | unsigned int bytes, total = 0; |
399 | 96.9k | int c = 0; |
400 | | |
401 | 96.9k | code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET); |
402 | 96.9k | if (code < 0) { |
403 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
404 | 0 | pdfi_pop(ctx, 1); |
405 | 0 | return code; |
406 | 0 | } |
407 | 96.9k | memset(Buffer, 0x00, 10); |
408 | 96.9k | bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream); |
409 | 96.9k | if (bytes < 9) { |
410 | 624 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
411 | 624 | return_error(gs_error_ioerror); |
412 | 624 | } |
413 | | |
414 | 96.2k | total = bytes; |
415 | 1.67G | do { |
416 | 1.67G | if (memcmp(Buffer, "endstream", 9) == 0) { |
417 | 60.5k | if (Buffer[9] != 0x00) |
418 | 60.4k | total--; |
419 | 60.5k | stream_obj->Length = total - 9; |
420 | 60.5k | stream_obj->length_valid = true; |
421 | 60.5k | break; |
422 | 60.5k | } |
423 | 1.67G | if (memcmp(Buffer, "endobj", 6) == 0) { |
424 | 6.44k | if (Buffer[9] != 0x00) |
425 | 6.36k | total--; |
426 | 6.44k | stream_obj->Length = total - 6; |
427 | 6.44k | stream_obj->length_valid = true; |
428 | 6.44k | break; |
429 | 6.44k | } |
430 | 1.67G | memmove(Buffer, Buffer+1, 9); |
431 | 1.67G | c = pdfi_read_byte(ctx, ctx->main_stream); |
432 | 1.67G | if (c < 0) |
433 | 29.3k | break; |
434 | 1.67G | Buffer[9] = (byte)c; |
435 | 1.67G | total++; |
436 | 1.67G | } while(1); |
437 | 96.2k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
438 | 96.2k | if (c < 0) |
439 | 29.3k | return_error(gs_error_ioerror); |
440 | 66.9k | return 0; |
441 | 96.2k | } |
442 | | |
443 | 524k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
444 | 524k | if (code < 0) { |
445 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
446 | 0 | if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", "")) < 0) { |
447 | 0 | return code; |
448 | 0 | } |
449 | | /* Something went wrong looking for endobj, but we found endstream, so assume |
450 | | * for now that will suffice. |
451 | | */ |
452 | 0 | return 0; |
453 | 0 | } |
454 | | |
455 | 524k | if (code == 0) { |
456 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
457 | 0 | return_error(gs_error_stackunderflow); |
458 | 0 | } |
459 | | |
460 | 524k | if (code != TOKEN_ENDOBJ) { |
461 | 1.37k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
462 | 1.37k | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_typecheck), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL); |
463 | | /* Didn't find an endobj, but we have an endstream, so assume |
464 | | * for now that will suffice |
465 | | */ |
466 | 1.37k | return code; |
467 | 1.37k | } |
468 | 522k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
469 | | |
470 | 522k | return 0; |
471 | 524k | } |
472 | | |
473 | | /* This reads an object *after* the x y obj keyword has been found. Its broken out |
474 | | * separately for the benefit of the repair code when reading the dictionary following |
475 | | * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does |
476 | | * not have an 'endobj', we rely on the error handling to take care of that for us. |
477 | | */ |
478 | | int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen) |
479 | 1.71M | { |
480 | 1.71M | int code = 0, initial_depth = 0; |
481 | 1.71M | pdf_key keyword; |
482 | 1.71M | gs_offset_t saved_offset[3]; |
483 | 1.71M | pdf_obj_type type; |
484 | | |
485 | 1.71M | initial_depth = pdfi_count_stack(ctx); |
486 | 1.71M | saved_offset[0] = saved_offset[1] = saved_offset[2] = 0; |
487 | | |
488 | 1.71M | code = pdfi_read_token(ctx, s, objnum, gen); |
489 | 1.71M | if (code < 0) |
490 | 4.43k | return code; |
491 | | |
492 | 1.71M | if (code == 0) |
493 | | /* failed to read a token */ |
494 | 59 | return_error(gs_error_syntaxerror); |
495 | | |
496 | 1.71M | if (pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) { |
497 | 20.8k | keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]); |
498 | 20.8k | if (keyword == TOKEN_ENDOBJ) { |
499 | 331 | ctx->stack_top[-1] = PDF_NULL_OBJ; |
500 | 331 | return 0; |
501 | 331 | } |
502 | 20.8k | } |
503 | | |
504 | 56.5M | do { |
505 | | /* move all the saved offsets up by one */ |
506 | 56.5M | saved_offset[0] = saved_offset[1]; |
507 | 56.5M | saved_offset[1] = saved_offset[2]; |
508 | 56.5M | saved_offset[2] = pdfi_unread_tell(ctx); |
509 | | |
510 | 56.5M | code = pdfi_read_token(ctx, s, objnum, gen); |
511 | 56.5M | if (code < 0) { |
512 | 162k | pdfi_clearstack(ctx); |
513 | 162k | return code; |
514 | 162k | } |
515 | 56.4M | if (s->eof) |
516 | 2.25k | return_error(gs_error_syntaxerror); |
517 | 56.4M | code = 0; |
518 | 56.4M | type = pdfi_type_of(ctx->stack_top[-1]); |
519 | 56.4M | if (type == PDF_KEYWORD) |
520 | 129k | goto missing_endobj; |
521 | 56.4M | } while (type != PDF_FAST_KEYWORD); |
522 | | |
523 | 1.41M | keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]); |
524 | 1.41M | if (keyword == TOKEN_ENDOBJ) { |
525 | 728k | pdf_obj *o; |
526 | | |
527 | 728k | if (pdfi_count_stack(ctx) - initial_depth < 2) { |
528 | 221 | pdfi_clearstack(ctx); |
529 | 221 | return_error(gs_error_stackunderflow); |
530 | 221 | } |
531 | | |
532 | 728k | o = ctx->stack_top[-2]; |
533 | | |
534 | 728k | pdfi_pop(ctx, 1); |
535 | | |
536 | 728k | if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
537 | 727k | o->indirect_num = o->object_num = objnum; |
538 | 727k | o->indirect_gen = o->generation_num = gen; |
539 | 727k | } |
540 | 728k | return code; |
541 | 728k | } |
542 | 689k | if (keyword == TOKEN_STREAM) { |
543 | 643k | pdfi_pop(ctx, 1); |
544 | 643k | return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen); |
545 | 643k | } |
546 | 45.2k | if (keyword == TOKEN_OBJ) { |
547 | 5.30k | pdf_obj *o; |
548 | | |
549 | 5.30k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL)) < 0) { |
550 | 0 | return code; |
551 | 0 | } |
552 | | |
553 | | /* 4 for; the object we want, the object number, generation number and 'obj' keyword */ |
554 | 5.30k | if (pdfi_count_stack(ctx) - initial_depth < 4) |
555 | 1.35k | return_error(gs_error_stackunderflow); |
556 | | |
557 | | /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */ |
558 | 3.94k | o = ctx->stack_top[-4]; |
559 | | |
560 | 3.94k | pdfi_pop(ctx, 3); |
561 | | |
562 | 3.94k | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
563 | 3.92k | o->indirect_num = o->object_num = objnum; |
564 | 3.92k | o->indirect_gen = o->generation_num = gen; |
565 | 3.92k | } |
566 | 3.94k | if (saved_offset[0] > 0) |
567 | 3.94k | (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET); |
568 | 3.94k | return 0; |
569 | 5.30k | } |
570 | | |
571 | 169k | missing_endobj: |
572 | | /* Assume that any other keyword means a missing 'endobj' */ |
573 | 169k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_xref_stream_dict", "")) == 0) { |
574 | 169k | pdf_obj *o; |
575 | | |
576 | 169k | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL); |
577 | | |
578 | 169k | if (pdfi_count_stack(ctx) - initial_depth < 2) |
579 | 3.09k | return_error(gs_error_stackunderflow); |
580 | | |
581 | 166k | o = ctx->stack_top[-2]; |
582 | | |
583 | 166k | pdfi_pop(ctx, 1); |
584 | | |
585 | 166k | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
586 | 164k | o->indirect_num = o->object_num = objnum; |
587 | 164k | o->indirect_gen = o->generation_num = gen; |
588 | 164k | } |
589 | 166k | return code; |
590 | 169k | } |
591 | 0 | pdfi_pop(ctx, 2); |
592 | 0 | return_error(gs_error_syntaxerror); |
593 | 169k | } |
594 | | |
595 | | static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset) |
596 | 1.72M | { |
597 | 1.72M | int code = 0; |
598 | 1.72M | int objnum = 0, gen = 0; |
599 | | |
600 | | /* An object consists of 'num gen obj' followed by a token, follwed by an endobj |
601 | | * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we |
602 | | * want to deal with it specially by getting the Length, jumping to the end and checking |
603 | | * for an endobj. Or not, possibly, because it would be slow. |
604 | | */ |
605 | 1.72M | code = pdfi_read_bare_int(ctx, s, &objnum); |
606 | 1.72M | if (code < 0) |
607 | 39.4k | return code; |
608 | 1.68M | if (code == 0) |
609 | 9.71k | return_error(gs_error_syntaxerror); |
610 | | |
611 | 1.67M | code = pdfi_read_bare_int(ctx, s, &gen); |
612 | 1.67M | if (code < 0) |
613 | 3.36k | return code; |
614 | 1.66M | if (code == 0) |
615 | 1.08k | return_error(gs_error_syntaxerror); |
616 | | |
617 | 1.66M | code = pdfi_read_bare_keyword(ctx, s); |
618 | 1.66M | if (code < 0) |
619 | 0 | return code; |
620 | 1.66M | if (code == 0) |
621 | 0 | return gs_note_error(gs_error_ioerror); |
622 | 1.66M | if (code != TOKEN_OBJ) { |
623 | 4.98k | return_error(gs_error_syntaxerror); |
624 | 4.98k | } |
625 | | |
626 | 1.66M | return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen); |
627 | 1.66M | } |
628 | | |
629 | | static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, |
630 | | const xref_entry *entry, bool cache) |
631 | 1.02M | { |
632 | 1.02M | int code = 0; |
633 | 1.02M | xref_entry *compressed_entry; |
634 | 1.02M | pdf_c_stream *compressed_stream = NULL; |
635 | 1.02M | pdf_c_stream *SubFile_stream = NULL; |
636 | 1.02M | pdf_c_stream *Object_stream = NULL; |
637 | 1.02M | int i = 0, object_length = 0; |
638 | 1.02M | int64_t num_entries; |
639 | 1.02M | int found_object; |
640 | 1.02M | int64_t Length, First; |
641 | 1.02M | gs_offset_t offset = 0; |
642 | 1.02M | pdf_stream *compressed_object = NULL; |
643 | 1.02M | pdf_dict *compressed_sdict = NULL; /* alias */ |
644 | 1.02M | pdf_name *Type = NULL; |
645 | | |
646 | 1.02M | if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1) |
647 | 953 | return_error(gs_error_undefined); |
648 | | |
649 | 1.01M | compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num]; |
650 | | |
651 | 1.01M | if (ctx->args.pdfdebug) { |
652 | 0 | outprintf(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj); |
653 | 0 | outprintf(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num); |
654 | 0 | } |
655 | | |
656 | 1.01M | if (compressed_entry->cache == NULL) { |
657 | | #if CACHE_STATISTICS |
658 | | ctx->compressed_misses++; |
659 | | #endif |
660 | 61.8k | code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET); |
661 | 61.8k | if (code < 0) |
662 | 0 | goto exit; |
663 | | |
664 | 61.8k | code = pdfi_read_object(ctx, ctx->main_stream, 0); |
665 | 61.8k | if (code < 0) |
666 | 9.02k | goto exit; |
667 | | |
668 | 52.8k | if (pdfi_count_stack(ctx) < 1) { |
669 | 1 | code = gs_note_error(gs_error_stackunderflow); |
670 | 1 | goto exit; |
671 | 1 | } |
672 | | |
673 | 52.8k | if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) { |
674 | 11.5k | pdfi_pop(ctx, 1); |
675 | 11.5k | code = gs_note_error(gs_error_typecheck); |
676 | 11.5k | goto exit; |
677 | 11.5k | } |
678 | 41.2k | if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) { |
679 | 265 | pdfi_pop(ctx, 1); |
680 | | /* Same error (undefined) as when we read an uncompressed object with the wrong number */ |
681 | 265 | code = gs_note_error(gs_error_undefined); |
682 | 265 | goto exit; |
683 | 265 | } |
684 | 41.0k | compressed_object = (pdf_stream *)ctx->stack_top[-1]; |
685 | 41.0k | pdfi_countup(compressed_object); |
686 | 41.0k | pdfi_pop(ctx, 1); |
687 | 41.0k | code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object); |
688 | 41.0k | if (code < 0) |
689 | 0 | goto exit; |
690 | 957k | } else { |
691 | | #if CACHE_STATISTICS |
692 | | ctx->compressed_hits++; |
693 | | #endif |
694 | 957k | compressed_object = (pdf_stream *)compressed_entry->cache->o; |
695 | 957k | pdfi_countup(compressed_object); |
696 | 957k | pdfi_promote_cache_entry(ctx, compressed_entry->cache); |
697 | 957k | } |
698 | 998k | code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict); |
699 | 998k | if (code < 0) |
700 | 20 | return code; |
701 | | |
702 | 998k | if (ctx->loop_detection != NULL) { |
703 | 997k | code = pdfi_loop_detector_mark(ctx); |
704 | 997k | if (code < 0) |
705 | 0 | goto exit; |
706 | 997k | if (compressed_sdict->object_num != 0) { |
707 | 997k | if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) { |
708 | 212 | code = gs_note_error(gs_error_circular_reference); |
709 | 997k | } else { |
710 | 997k | code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num); |
711 | 997k | } |
712 | 997k | if (code < 0) { |
713 | 212 | (void)pdfi_loop_detector_cleartomark(ctx); |
714 | 212 | goto exit; |
715 | 212 | } |
716 | 997k | } |
717 | 997k | } |
718 | | /* Check its an ObjStm ! */ |
719 | 998k | code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type); |
720 | 998k | if (code < 0) { |
721 | 264 | if (ctx->loop_detection != NULL) |
722 | 264 | (void)pdfi_loop_detector_cleartomark(ctx); |
723 | 264 | goto exit; |
724 | 264 | } |
725 | | |
726 | 998k | if (!pdfi_name_is(Type, "ObjStm")){ |
727 | 1.18k | if (ctx->loop_detection != NULL) |
728 | 1.18k | (void)pdfi_loop_detector_cleartomark(ctx); |
729 | 1.18k | code = gs_note_error(gs_error_syntaxerror); |
730 | 1.18k | goto exit; |
731 | 1.18k | } |
732 | | |
733 | | /* Need to check the /N entry to see if the object is actually in this stream! */ |
734 | 997k | code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries); |
735 | 997k | if (code < 0) { |
736 | 246 | if (ctx->loop_detection != NULL) |
737 | 246 | (void)pdfi_loop_detector_cleartomark(ctx); |
738 | 246 | goto exit; |
739 | 246 | } |
740 | | |
741 | 996k | if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) { |
742 | 81 | if (ctx->loop_detection != NULL) |
743 | 81 | (void)pdfi_loop_detector_cleartomark(ctx); |
744 | 81 | code = gs_note_error(gs_error_rangecheck); |
745 | 81 | goto exit; |
746 | 81 | } |
747 | | |
748 | 996k | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
749 | 996k | if (code < 0) { |
750 | 130k | if (ctx->loop_detection != NULL) |
751 | 130k | (void)pdfi_loop_detector_cleartomark(ctx); |
752 | 130k | goto exit; |
753 | 130k | } |
754 | | |
755 | 866k | code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First); |
756 | 866k | if (code < 0) { |
757 | 1.14k | if (ctx->loop_detection != NULL) |
758 | 1.14k | (void)pdfi_loop_detector_cleartomark(ctx); |
759 | 1.14k | goto exit; |
760 | 1.14k | } |
761 | | |
762 | 865k | if (ctx->loop_detection != NULL) |
763 | 864k | (void)pdfi_loop_detector_cleartomark(ctx); |
764 | | |
765 | 865k | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
766 | 865k | if (code < 0) |
767 | 0 | goto exit; |
768 | | |
769 | 865k | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
770 | 865k | if (code < 0) |
771 | 0 | goto exit; |
772 | | |
773 | 865k | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
774 | 865k | if (code < 0) |
775 | 1.42k | goto exit; |
776 | | |
777 | 53.5M | for (i=0;i < num_entries;i++) |
778 | 52.6M | { |
779 | 52.6M | int new_offset; |
780 | 52.6M | code = pdfi_read_bare_int(ctx, compressed_stream, &found_object); |
781 | 52.6M | if (code < 0) |
782 | 5.24k | goto exit; |
783 | 52.6M | if (code == 0) { |
784 | 590 | code = gs_note_error(gs_error_syntaxerror); |
785 | 590 | goto exit; |
786 | 590 | } |
787 | 52.6M | code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset); |
788 | 52.6M | if (code < 0) |
789 | 5.26k | goto exit; |
790 | 52.6M | if (code == 0) { |
791 | 477 | code = gs_note_error(gs_error_syntaxerror); |
792 | 477 | goto exit; |
793 | 477 | } |
794 | 52.6M | if (i == entry->u.compressed.object_index) { |
795 | 858k | if (found_object != obj) { |
796 | 1.77k | code = gs_note_error(gs_error_undefined); |
797 | 1.77k | goto exit; |
798 | 1.77k | } |
799 | 856k | offset = new_offset; |
800 | 856k | } |
801 | 52.6M | if (i == entry->u.compressed.object_index + 1) |
802 | 826k | object_length = new_offset - offset; |
803 | 52.6M | } |
804 | | |
805 | | /* Bug #705259 - The first object need not lie immediately after the initial |
806 | | * table of object numbers and offsets. The start of the first object is given |
807 | | * by the value of First. We don't know how many bytes we consumed getting to |
808 | | * the end of the table, unfortunately, so we close the stream, rewind the main |
809 | | * stream back to the beginning of the ObjStm, and then read and discard 'First' |
810 | | * bytes in order to get to the start of the first object. Then we read the |
811 | | * number of bytes required to get from there to the start of the object we |
812 | | * actually want. |
813 | | * If this ever looks like it's causing performance problems we could read the |
814 | | * initial table above manually instead of using the existing code, and track |
815 | | * how many bytes we'd read, which would avoid us having to tear down and |
816 | | * rebuild the stream. |
817 | | */ |
818 | 850k | if (compressed_stream) { |
819 | 850k | pdfi_close_file(ctx, compressed_stream); |
820 | 850k | compressed_stream = NULL; |
821 | 850k | } |
822 | 850k | if (SubFile_stream) { |
823 | 850k | pdfi_close_file(ctx, SubFile_stream); |
824 | 850k | SubFile_stream = NULL; |
825 | 850k | } |
826 | | |
827 | 850k | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
828 | 850k | if (code < 0) |
829 | 0 | goto exit; |
830 | | |
831 | | /* We already dereferenced this above, so we don't need the loop detection checking here */ |
832 | 850k | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
833 | 850k | if (code < 0) |
834 | 0 | goto exit; |
835 | | |
836 | 850k | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
837 | 850k | if (code < 0) |
838 | 0 | goto exit; |
839 | | |
840 | 850k | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
841 | 850k | if (code < 0) |
842 | 0 | goto exit; |
843 | | |
844 | 455M | for (i=0;i < First;i++) |
845 | 454M | { |
846 | 454M | int c = pdfi_read_byte(ctx, compressed_stream); |
847 | 454M | if (c < 0) { |
848 | 34 | code = gs_note_error(gs_error_ioerror); |
849 | 34 | goto exit; |
850 | 34 | } |
851 | 454M | } |
852 | | |
853 | | /* Skip to the offset of the object we want to read */ |
854 | 2.86G | for (i=0;i < offset;i++) |
855 | 2.86G | { |
856 | 2.86G | int c = pdfi_read_byte(ctx, compressed_stream); |
857 | 2.86G | if (c < 0) { |
858 | 43.3k | code = gs_note_error(gs_error_ioerror); |
859 | 43.3k | goto exit; |
860 | 43.3k | } |
861 | 2.86G | } |
862 | | |
863 | | /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit |
864 | | * the number of bytes we read to the declared size of the object (difference between |
865 | | * the offsets of the object we want to read, and the next object). If it is 0 then |
866 | | * we're reading the last object in the stream, so we just rely on the SubFileDecode |
867 | | * we set up when we created compressed_stream to limit the bytes to the length of |
868 | | * that stream. |
869 | | */ |
870 | 807k | if (object_length > 0) { |
871 | 778k | code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false); |
872 | 778k | if (code < 0) |
873 | 0 | goto exit; |
874 | 778k | } else { |
875 | 28.4k | Object_stream = compressed_stream; |
876 | 28.4k | } |
877 | | |
878 | 807k | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
879 | 807k | if (code < 0) |
880 | 3.47k | goto exit; |
881 | 803k | if (code == 0) { |
882 | 84 | code = gs_note_error(gs_error_syntaxerror); |
883 | 84 | goto exit; |
884 | 84 | } |
885 | 803k | if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) { |
886 | 793k | int start_depth = pdfi_count_stack(ctx); |
887 | | |
888 | | /* Need to read all the elements from COS objects */ |
889 | 27.9M | do { |
890 | 27.9M | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
891 | 27.9M | if (code < 0) |
892 | 21.9k | goto exit; |
893 | 27.9M | if (code == 0) { |
894 | 4.60k | code = gs_note_error(gs_error_syntaxerror); |
895 | 4.60k | goto exit; |
896 | 4.60k | } |
897 | 27.9M | if (compressed_stream->eof == true) { |
898 | 336 | code = gs_note_error(gs_error_ioerror); |
899 | 336 | goto exit; |
900 | 336 | } |
901 | 27.9M | } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth); |
902 | 793k | } |
903 | | |
904 | 776k | *object = ctx->stack_top[-1]; |
905 | | /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets |
906 | | * the object number for uncompressed objects. So we need to do that here. |
907 | | */ |
908 | 776k | if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
909 | 774k | (*object)->indirect_num = (*object)->object_num = obj; |
910 | 774k | (*object)->indirect_gen = (*object)->generation_num = gen; |
911 | 774k | pdfi_countup(*object); |
912 | 774k | } |
913 | 776k | pdfi_pop(ctx, 1); |
914 | | |
915 | 776k | if (cache) { |
916 | 760k | code = pdfi_add_to_cache(ctx, *object); |
917 | 760k | if (code < 0) { |
918 | 0 | pdfi_countdown(*object); |
919 | 0 | goto exit; |
920 | 0 | } |
921 | 760k | } |
922 | | |
923 | 1.01M | exit: |
924 | 1.01M | if (Object_stream) |
925 | 807k | pdfi_close_file(ctx, Object_stream); |
926 | 1.01M | if (Object_stream != compressed_stream) |
927 | 835k | if (compressed_stream) |
928 | 835k | pdfi_close_file(ctx, compressed_stream); |
929 | 1.01M | if (SubFile_stream) |
930 | 865k | pdfi_close_file(ctx, SubFile_stream); |
931 | 1.01M | pdfi_countdown(compressed_object); |
932 | 1.01M | pdfi_countdown(Type); |
933 | 1.01M | return code; |
934 | 776k | } |
935 | | |
936 | | /* pdf_dereference returns an object with a reference count of at least 1, this represents the |
937 | | * reference being held by the caller (in **object) when we return from this function. |
938 | | */ |
939 | | static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache) |
940 | 6.06M | { |
941 | 6.06M | xref_entry *entry; |
942 | 6.06M | int code, stack_depth = pdfi_count_stack(ctx); |
943 | 6.06M | gs_offset_t saved_stream_offset; |
944 | 6.06M | bool saved_decrypt_strings = ctx->encryption.decrypt_strings; |
945 | | |
946 | 6.06M | *object = NULL; |
947 | | |
948 | 6.06M | if (ctx->xref_table == NULL) |
949 | 51 | return_error(gs_error_typecheck); |
950 | | |
951 | 6.06M | if (ctx->main_stream == NULL || ctx->main_stream->s == NULL) |
952 | 0 | return_error(gs_error_ioerror); |
953 | | |
954 | 6.06M | if (obj >= ctx->xref_table->xref_size) { |
955 | 246k | char extra_info[gp_file_name_sizeof]; |
956 | | |
957 | 246k | gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj); |
958 | 246k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info)) < 0) { |
959 | 0 | return code; |
960 | 0 | } |
961 | | |
962 | 246k | code = pdfi_repair_file(ctx); |
963 | 246k | if (code < 0) { |
964 | 246k | *object = NULL; |
965 | 246k | return code; |
966 | 246k | } |
967 | 28 | if (obj >= ctx->xref_table->xref_size) { |
968 | 17 | *object = NULL; |
969 | 17 | return_error(gs_error_rangecheck); |
970 | 17 | } |
971 | 28 | } |
972 | | |
973 | 5.81M | entry = &ctx->xref_table->xref[obj]; |
974 | | |
975 | 5.81M | if(entry->object_num == 0) { |
976 | 1.37M | pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference_main", "Attempt to dereference object 0"); |
977 | 1.37M | return_error(gs_error_undefined); |
978 | 1.37M | } |
979 | | |
980 | 4.44M | if (entry->free) { |
981 | 6.76k | char extra_info[gp_file_name_sizeof]; |
982 | | |
983 | 6.76k | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
984 | 6.76k | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
985 | 6.76k | *object = PDF_NULL_OBJ; |
986 | 6.76k | return code; |
987 | 4.43M | }else { |
988 | 4.43M | if (!entry->compressed) { |
989 | 3.18M | if(entry->u.uncompressed.generation_num != gen) |
990 | 3.47k | pdfi_set_warning(ctx, 0, NULL, W_PDF_MISMATCH_GENERATION, "pdfi_dereference_main", ""); |
991 | 3.18M | } |
992 | 4.43M | } |
993 | | |
994 | 4.43M | if (ctx->loop_detection) { |
995 | 4.11M | if (pdfi_loop_detector_check_object(ctx, obj) == true) |
996 | 770 | return_error(gs_error_circular_reference); |
997 | 4.11M | if (entry->free) { |
998 | 0 | code = pdfi_loop_detector_add_object(ctx, obj); |
999 | 0 | if (code < 0) |
1000 | 0 | return code; |
1001 | 0 | } |
1002 | 4.11M | } |
1003 | 4.43M | if (entry->cache != NULL){ |
1004 | 1.75M | pdf_obj_cache_entry *cache_entry = entry->cache; |
1005 | | |
1006 | | #if CACHE_STATISTICS |
1007 | | ctx->hits++; |
1008 | | #endif |
1009 | 1.75M | *object = cache_entry->o; |
1010 | 1.75M | pdfi_countup(*object); |
1011 | | |
1012 | 1.75M | pdfi_promote_cache_entry(ctx, cache_entry); |
1013 | 2.67M | } else { |
1014 | 2.67M | saved_stream_offset = pdfi_unread_tell(ctx); |
1015 | | |
1016 | 2.67M | if (entry->compressed) { |
1017 | | /* This is an object in a compressed object stream */ |
1018 | 1.02M | ctx->encryption.decrypt_strings = false; |
1019 | | |
1020 | 1.02M | code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache); |
1021 | 1.02M | if (code < 0 || *object == NULL) |
1022 | 243k | goto error; |
1023 | 1.65M | } else { |
1024 | | #if CACHE_STATISTICS |
1025 | | ctx->misses++; |
1026 | | #endif |
1027 | 1.65M | ctx->encryption.decrypt_strings = true; |
1028 | | |
1029 | 1.65M | code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET); |
1030 | 1.65M | if (code < 0) |
1031 | 100 | goto error; |
1032 | | |
1033 | 1.65M | code = pdfi_read_object(ctx, ctx->main_stream, entry->u.uncompressed.offset); |
1034 | | |
1035 | | /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it. |
1036 | | * reload the xref entry to be certain it is valid. |
1037 | | */ |
1038 | 1.65M | entry = &ctx->xref_table->xref[obj]; |
1039 | 1.65M | if (code < 0) { |
1040 | 253k | int code1 = 0; |
1041 | 253k | if (entry->free) { |
1042 | 0 | char extra_info[gp_file_name_sizeof]; |
1043 | |
|
1044 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
1045 | 0 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
1046 | 0 | *object = PDF_NULL_OBJ; |
1047 | 0 | if (code < 0) |
1048 | 0 | goto error; |
1049 | 0 | goto free_obj; |
1050 | 0 | } |
1051 | 253k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1052 | 253k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1053 | 253k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
1054 | | |
1055 | 253k | code1 = pdfi_repair_file(ctx); |
1056 | 253k | if (code1 == 0) |
1057 | 1.56k | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1058 | | /* Repair failed, just give up and return an error */ |
1059 | 251k | goto error; |
1060 | 253k | } |
1061 | | |
1062 | | /* We only expect a single object back when dereferencing an indirect reference |
1063 | | * The only way (I think) we can end up with more than one is if the object initially |
1064 | | * appears to be a dictionary or array, but the object terminates (with endobj or |
1065 | | * simply reaching EOF) without terminating the array or dictionary. That's clearly |
1066 | | * an error. We might, as a future 'improvement' choose to walk back through |
1067 | | * the stack looking for unterminated dictionary or array markers, and closing them |
1068 | | * so that (hopefully!) we end up with a single 'repaired' object on the stack. |
1069 | | * But for now I'm simply going to treat these as errors. We will try a repair on the |
1070 | | * file to see if we end up using a different (hopefully intact) object from the file. |
1071 | | */ |
1072 | 1.40M | if (pdfi_count_stack(ctx) - stack_depth > 1) { |
1073 | 116k | int code1 = 0; |
1074 | | |
1075 | 116k | code1 = pdfi_repair_file(ctx); |
1076 | 116k | if (code1 == 0) |
1077 | 474 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1078 | | /* Repair failed, just give up and return an error */ |
1079 | 115k | code = gs_note_error(gs_error_syntaxerror); |
1080 | 115k | goto error; |
1081 | 116k | } |
1082 | | |
1083 | 1.28M | if (pdfi_count_stack(ctx) > 0 && |
1084 | 1.28M | ((ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) && |
1085 | 1.28M | (ctx->stack_top[-1])->object_num == obj) |
1086 | 1.28M | || ctx->stack_top[-1] == PDF_NULL_OBJ)) { |
1087 | 1.28M | *object = ctx->stack_top[-1]; |
1088 | 1.28M | pdfi_countup(*object); |
1089 | 1.28M | pdfi_pop(ctx, 1); |
1090 | 1.28M | if (pdfi_type_of(*object) == PDF_INDIRECT) { |
1091 | 0 | pdf_indirect_ref *iref = (pdf_indirect_ref *)*object; |
1092 | |
|
1093 | 0 | if (iref->ref_object_num == obj) { |
1094 | 0 | code = gs_note_error(gs_error_circular_reference); |
1095 | 0 | pdfi_countdown(*object); |
1096 | 0 | *object = NULL; |
1097 | 0 | goto error; |
1098 | 0 | } |
1099 | 0 | } |
1100 | | /* There's really no point in caching an indirect reference and |
1101 | | * I think it could be potentially confusing to later calls. |
1102 | | */ |
1103 | 1.28M | if (cache && pdfi_type_of(*object) != PDF_INDIRECT) { |
1104 | 1.28M | code = pdfi_add_to_cache(ctx, *object); |
1105 | 1.28M | if (code < 0) { |
1106 | 0 | pdfi_countdown(*object); |
1107 | 0 | goto error; |
1108 | 0 | } |
1109 | 1.28M | } |
1110 | 1.28M | } else { |
1111 | 1.53k | int code1 = 0; |
1112 | | |
1113 | 1.53k | if (pdfi_count_stack(ctx) > 0) |
1114 | 1.43k | pdfi_pop(ctx, 1); |
1115 | | |
1116 | 1.53k | if (entry->free) { |
1117 | 0 | char extra_info[gp_file_name_sizeof]; |
1118 | |
|
1119 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
1120 | 0 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
1121 | 0 | *object = PDF_NULL_OBJ; |
1122 | 0 | if (code < 0) |
1123 | 0 | goto error; |
1124 | 0 | return code; |
1125 | 0 | } |
1126 | 1.53k | code1 = pdfi_repair_file(ctx); |
1127 | 1.53k | if (code1 == 0) |
1128 | 180 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1129 | | /* Repair failed, just give up and return an error */ |
1130 | 1.35k | code = gs_note_error(gs_error_undefined); |
1131 | 1.35k | goto error; |
1132 | 1.53k | } |
1133 | 1.28M | } |
1134 | 2.06M | free_obj: |
1135 | 2.06M | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1136 | 2.06M | } |
1137 | | |
1138 | 3.81M | if (ctx->loop_detection && pdf_object_num(*object) != 0) { |
1139 | 3.50M | code = pdfi_loop_detector_add_object(ctx, (*object)->object_num); |
1140 | 3.50M | if (code < 0) { |
1141 | 0 | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1142 | 0 | return code; |
1143 | 0 | } |
1144 | 3.50M | } |
1145 | 3.81M | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1146 | 3.81M | return 0; |
1147 | | |
1148 | 612k | error: |
1149 | 612k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1150 | 612k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1151 | | /* Return the stack to the state at entry */ |
1152 | 612k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
1153 | 612k | return code; |
1154 | 3.81M | } |
1155 | | |
1156 | | int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1157 | 6.03M | { |
1158 | 6.03M | return pdfi_dereference_main(ctx, obj, gen, object, true); |
1159 | 6.03M | } |
1160 | | |
1161 | | int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1162 | 26.2k | { |
1163 | 26.2k | return pdfi_dereference_main(ctx, obj, gen, object, false); |
1164 | 26.2k | } |
1165 | | |
1166 | | /* do a derefence with loop detection */ |
1167 | | int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1168 | 2.54M | { |
1169 | 2.54M | int code; |
1170 | | |
1171 | 2.54M | code = pdfi_loop_detector_mark(ctx); |
1172 | 2.54M | if (code < 0) |
1173 | 0 | return code; |
1174 | | |
1175 | 2.54M | code = pdfi_dereference(ctx, obj, gen, object); |
1176 | 2.54M | (void)pdfi_loop_detector_cleartomark(ctx); |
1177 | 2.54M | return code; |
1178 | 2.54M | } |
1179 | | |
1180 | | int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1181 | 26.2k | { |
1182 | 26.2k | int code; |
1183 | | |
1184 | 26.2k | code = pdfi_loop_detector_mark(ctx); |
1185 | 26.2k | if (code < 0) |
1186 | 0 | return code; |
1187 | | |
1188 | 26.2k | code = pdfi_dereference_nocache(ctx, obj, gen, object); |
1189 | 26.2k | (void)pdfi_loop_detector_cleartomark(ctx); |
1190 | 26.2k | return code; |
1191 | 26.2k | } |
1192 | | |
1193 | | static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1194 | 13.3k | { |
1195 | 13.3k | int code = 0; |
1196 | 13.3k | uint64_t index, arraysize; |
1197 | 13.3k | pdf_obj *object = NULL; |
1198 | 13.3k | pdf_array *array = (pdf_array *)obj; |
1199 | | |
1200 | 13.3k | arraysize = pdfi_array_size(array); |
1201 | 60.6k | for (index = 0; index < arraysize; index++) { |
1202 | 47.3k | if (ctx->loop_detection != NULL) { |
1203 | 47.3k | code = pdfi_loop_detector_mark(ctx); |
1204 | 47.3k | if (code < 0) |
1205 | 0 | return code; |
1206 | 47.3k | } |
1207 | | |
1208 | 47.3k | code = pdfi_array_get_no_store_R(ctx, array, index, &object); |
1209 | | |
1210 | 47.3k | if (ctx->loop_detection != NULL) { |
1211 | 47.3k | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1212 | 47.3k | if (code1 < 0) |
1213 | 0 | return code1; |
1214 | 47.3k | } |
1215 | | |
1216 | 47.3k | if (code == gs_error_circular_reference) { |
1217 | | /* Previously we just left as an indirect reference, but now we want |
1218 | | * to return the error so we don't end up replacing indirect references |
1219 | | * to objects with circular references. |
1220 | | */ |
1221 | 47.3k | } else { |
1222 | 47.3k | if (code < 0) goto exit; |
1223 | 47.3k | if (recurse) { |
1224 | 946 | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse); |
1225 | 946 | if (code < 0) goto exit; |
1226 | 946 | } |
1227 | | /* don't store the object if it's a stream (leave as a ref) */ |
1228 | 47.3k | if (pdfi_type_of(object) != PDF_STREAM) |
1229 | 47.3k | code = pdfi_array_put(ctx, array, index, object); |
1230 | 47.3k | } |
1231 | 47.3k | if (code < 0) goto exit; |
1232 | | |
1233 | 47.3k | pdfi_countdown(object); |
1234 | 47.3k | object = NULL; |
1235 | 47.3k | } |
1236 | | |
1237 | 13.3k | exit: |
1238 | 13.3k | pdfi_countdown(object); |
1239 | 13.3k | return code; |
1240 | 13.3k | } |
1241 | | |
1242 | | static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1243 | 2.35k | { |
1244 | 2.35k | int code = 0; |
1245 | 2.35k | pdf_dict *dict = (pdf_dict *)obj; |
1246 | 2.35k | pdf_name *Key = NULL; |
1247 | 2.35k | pdf_obj *Value = NULL; |
1248 | 2.35k | uint64_t index, dictsize; |
1249 | | |
1250 | 2.35k | dictsize = pdfi_dict_entries(dict); |
1251 | | |
1252 | | /* Note: I am not using pdfi_dict_first/next because of needing to handle |
1253 | | * circular references. |
1254 | | */ |
1255 | 5.50k | for (index=0; index<dictsize; index ++) { |
1256 | 3.20k | Key = (pdf_name *)dict->list[index].key; |
1257 | 3.20k | if (pdfi_name_is(Key, "Parent")) |
1258 | 6 | continue; |
1259 | | |
1260 | 3.19k | if (ctx->loop_detection != NULL) { |
1261 | 3.14k | code = pdfi_loop_detector_mark(ctx); |
1262 | 3.14k | if (code < 0) |
1263 | 0 | return code; |
1264 | 3.14k | } |
1265 | | |
1266 | 3.19k | code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value); |
1267 | | |
1268 | 3.19k | if (ctx->loop_detection != NULL) { |
1269 | 3.14k | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1270 | 3.14k | if (code1 < 0) |
1271 | 0 | return code1; |
1272 | 3.14k | } |
1273 | | |
1274 | 3.19k | if (code == gs_error_circular_reference) { |
1275 | | /* Just leave as an indirect ref */ |
1276 | 7 | code = 0; |
1277 | 3.19k | } else { |
1278 | 3.19k | if (code < 0) goto exit; |
1279 | 3.18k | if (recurse) { |
1280 | 1.56k | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse); |
1281 | 1.56k | if (code < 0) |
1282 | 46 | goto exit; |
1283 | 1.56k | } |
1284 | | /* don't store the object if it's a stream (leave as a ref) */ |
1285 | 3.13k | if (pdfi_type_of(Value) != PDF_STREAM) |
1286 | 3.11k | code = pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true); |
1287 | 3.13k | } |
1288 | 3.14k | if (code < 0) goto exit; |
1289 | | |
1290 | 3.14k | pdfi_countdown(Value); |
1291 | 3.14k | Value = NULL; |
1292 | 3.14k | } |
1293 | | |
1294 | 2.35k | exit: |
1295 | 2.35k | pdfi_countdown(Value); |
1296 | 2.35k | return code; |
1297 | 2.35k | } |
1298 | | |
1299 | | /* Resolve all the indirect references for an object |
1300 | | * Note: This can be recursive |
1301 | | */ |
1302 | | int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse) |
1303 | 56.5k | { |
1304 | 56.5k | int code = 0; |
1305 | | |
1306 | 56.5k | switch(pdfi_type_of(value)) { |
1307 | 13.3k | case PDF_ARRAY: |
1308 | 13.3k | code = pdfi_resolve_indirect_array(ctx, value, recurse); |
1309 | 13.3k | break; |
1310 | 2.35k | case PDF_DICT: |
1311 | 2.35k | code = pdfi_resolve_indirect_dict(ctx, value, recurse); |
1312 | 2.35k | break; |
1313 | 40.8k | default: |
1314 | 40.8k | break; |
1315 | 56.5k | } |
1316 | 56.5k | return code; |
1317 | 56.5k | } |
1318 | | |
1319 | | /* Resolve all the indirect references for an object |
1320 | | * Resolve indirect references, either one level or recursively, with loop detect on |
1321 | | * the parent (can by NULL) and the value. |
1322 | | */ |
1323 | | int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse) |
1324 | 56.4k | { |
1325 | 56.4k | int code = 0; |
1326 | | |
1327 | 56.4k | code = pdfi_loop_detector_mark(ctx); |
1328 | 56.4k | if (code < 0) goto exit; |
1329 | 56.4k | if (parent && parent->object_num != 0) { |
1330 | 53.5k | code = pdfi_loop_detector_add_object(ctx, parent->object_num); |
1331 | 53.5k | if (code < 0) goto exit; |
1332 | 53.5k | } |
1333 | | |
1334 | 56.4k | if (pdf_object_num(value) != 0) { |
1335 | 435 | if (pdfi_loop_detector_check_object(ctx, value->object_num)) { |
1336 | 0 | code = gs_note_error(gs_error_circular_reference); |
1337 | 0 | goto exit; |
1338 | 0 | } |
1339 | 435 | code = pdfi_loop_detector_add_object(ctx, value->object_num); |
1340 | 435 | if (code < 0) goto exit; |
1341 | 435 | } |
1342 | 56.4k | code = pdfi_resolve_indirect(ctx, value, recurse); |
1343 | | |
1344 | 56.4k | exit: |
1345 | 56.4k | (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */ |
1346 | 56.4k | return code; |
1347 | 56.4k | } |