/src/ghostpdl/pdf/pdf_deref.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2020-2025 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | /* Functions to deal with dereferencing indirect objects |
17 | | * for the PDF interpreter. In here we also keep the code |
18 | | * for dealing with the object cache, because the dereferencing |
19 | | * functions are currently the only place that deals with it. |
20 | | */ |
21 | | |
22 | | #include "pdf_int.h" |
23 | | #include "pdf_stack.h" |
24 | | #include "pdf_loop_detect.h" |
25 | | #include "strmio.h" |
26 | | #include "stream.h" |
27 | | #include "pdf_file.h" |
28 | | #include "pdf_misc.h" |
29 | | #include "pdf_dict.h" |
30 | | #include "pdf_array.h" |
31 | | #include "pdf_deref.h" |
32 | | #include "pdf_repair.h" |
33 | | |
34 | | /* Start with the object caching functions */ |
35 | | /* Disable object caching (for easier debugging with reference counting) |
36 | | * by uncommenting the following line |
37 | | */ |
38 | | /*#define DISABLE CACHE*/ |
39 | | |
40 | | /* given an object, create a cache entry for it. If we have too many entries |
41 | | * then delete the leat-recently-used cache entry. Make the new entry be the |
42 | | * most-recently-used entry. The actual entries are attached to the xref table |
43 | | * (as well as being a double-linked list), because we detect an existing |
44 | | * cache entry by seeing that the xref table for the object number has a non-NULL |
45 | | * 'cache' member. |
46 | | * So we need to update the xref as well if we add or delete cache entries. |
47 | | */ |
48 | | static int pdfi_add_to_cache(pdf_context *ctx, pdf_obj *o) |
49 | 116k | { |
50 | 116k | #ifndef DISABLE_CACHE |
51 | 116k | pdf_obj_cache_entry *entry; |
52 | | |
53 | 116k | if (o < PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) |
54 | 325 | return 0; |
55 | | |
56 | 116k | if (ctx->xref_table->xref[o->object_num].cache != NULL) { |
57 | | #if DEBUG_CACHE |
58 | | outprintf(ctx->memory, "Attempting to add object %d to cache when the object is already cached!\n", o->object_num); |
59 | | #endif |
60 | 0 | return_error(gs_error_unknownerror); |
61 | 0 | } |
62 | | |
63 | 116k | if (o->object_num > ctx->xref_table->xref_size) |
64 | 0 | return_error(gs_error_rangecheck); |
65 | | |
66 | 116k | if (ctx->cache_entries == MAX_OBJECT_CACHE_SIZE) |
67 | 16.2k | { |
68 | | #if DEBUG_CACHE |
69 | | dbgmprintf(ctx->memory, "Cache full, evicting LRU\n"); |
70 | | #endif |
71 | 16.2k | if (ctx->cache_LRU) { |
72 | 16.2k | entry = ctx->cache_LRU; |
73 | 16.2k | ctx->cache_LRU = entry->next; |
74 | 16.2k | if (entry->next) |
75 | 16.2k | ((pdf_obj_cache_entry *)entry->next)->previous = NULL; |
76 | 16.2k | ctx->xref_table->xref[entry->o->object_num].cache = NULL; |
77 | 16.2k | pdfi_countdown(entry->o); |
78 | 16.2k | ctx->cache_entries--; |
79 | 16.2k | gs_free_object(ctx->memory, entry, "pdfi_add_to_cache, free LRU"); |
80 | 16.2k | } else |
81 | 0 | return_error(gs_error_unknownerror); |
82 | 16.2k | } |
83 | 116k | entry = (pdf_obj_cache_entry *)gs_alloc_bytes(ctx->memory, sizeof(pdf_obj_cache_entry), "pdfi_add_to_cache"); |
84 | 116k | if (entry == NULL) |
85 | 0 | return_error(gs_error_VMerror); |
86 | | |
87 | 116k | memset(entry, 0x00, sizeof(pdf_obj_cache_entry)); |
88 | | |
89 | 116k | entry->o = o; |
90 | 116k | pdfi_countup(o); |
91 | 116k | if (ctx->cache_MRU) { |
92 | 111k | entry->previous = ctx->cache_MRU; |
93 | 111k | ctx->cache_MRU->next = entry; |
94 | 111k | } |
95 | 116k | ctx->cache_MRU = entry; |
96 | 116k | if (ctx->cache_LRU == NULL) |
97 | 4.54k | ctx->cache_LRU = entry; |
98 | | |
99 | 116k | ctx->cache_entries++; |
100 | 116k | ctx->xref_table->xref[o->object_num].cache = entry; |
101 | 116k | #endif |
102 | 116k | return 0; |
103 | 116k | } |
104 | | |
105 | | /* Given an existing cache entry, promote it to be the most-recently-used |
106 | | * cache entry. |
107 | | */ |
108 | | static void pdfi_promote_cache_entry(pdf_context *ctx, pdf_obj_cache_entry *cache_entry) |
109 | 233k | { |
110 | 233k | #ifndef DISABLE_CACHE |
111 | 233k | if (ctx->cache_MRU && cache_entry != ctx->cache_MRU) { |
112 | 154k | if ((pdf_obj_cache_entry *)cache_entry->next != NULL) |
113 | 154k | ((pdf_obj_cache_entry *)cache_entry->next)->previous = cache_entry->previous; |
114 | 154k | if ((pdf_obj_cache_entry *)cache_entry->previous != NULL) |
115 | 154k | ((pdf_obj_cache_entry *)cache_entry->previous)->next = cache_entry->next; |
116 | 137 | else { |
117 | | /* the existing entry is the current least recently used, we need to make the 'next' |
118 | | * cache entry into the LRU. |
119 | | */ |
120 | 137 | ctx->cache_LRU = cache_entry->next; |
121 | 137 | } |
122 | 154k | cache_entry->next = NULL; |
123 | 154k | cache_entry->previous = ctx->cache_MRU; |
124 | 154k | ctx->cache_MRU->next = cache_entry; |
125 | 154k | ctx->cache_MRU = cache_entry; |
126 | 154k | } |
127 | 233k | #endif |
128 | 233k | return; |
129 | 233k | } |
130 | | |
131 | | /* This one's a bit of an oddity, its used for fonts. When we build a PDF font object |
132 | | * we want the object cache to reference *that* object, not the dictionary which was |
133 | | * read out of the PDF file, so this allows us to replace the font dictionary in the |
134 | | * cache with the actual font object, so that later dereferences will get this font |
135 | | * object. |
136 | | */ |
137 | | int replace_cache_entry(pdf_context *ctx, pdf_obj *o) |
138 | 10.2k | { |
139 | 10.2k | #ifndef DISABLE_CACHE |
140 | 10.2k | xref_entry *entry; |
141 | 10.2k | pdf_obj_cache_entry *cache_entry; |
142 | 10.2k | pdf_obj *old_cached_obj = NULL; |
143 | | |
144 | | /* Limited error checking here, we assume that things like the |
145 | | * validity of the object (eg not a free oobject) have already been handled. |
146 | | */ |
147 | | |
148 | 10.2k | entry = &ctx->xref_table->xref[o->object_num]; |
149 | 10.2k | cache_entry = entry->cache; |
150 | | |
151 | 10.2k | if (cache_entry == NULL) { |
152 | 225 | return(pdfi_add_to_cache(ctx, o)); |
153 | 10.0k | } else { |
154 | | /* NOTE: We grab the object without decrementing, to avoid triggering |
155 | | * a warning message for freeing an object that's in the cache |
156 | | */ |
157 | 10.0k | if (cache_entry->o != NULL) |
158 | 10.0k | old_cached_obj = cache_entry->o; |
159 | | |
160 | | /* Put new entry in the cache */ |
161 | 10.0k | cache_entry->o = o; |
162 | 10.0k | pdfi_countup(o); |
163 | 10.0k | pdfi_promote_cache_entry(ctx, cache_entry); |
164 | | |
165 | | /* Now decrement the old cache entry, if any */ |
166 | 10.0k | pdfi_countdown(old_cached_obj); |
167 | 10.0k | } |
168 | 10.0k | #endif |
169 | 10.0k | return 0; |
170 | 10.2k | } |
171 | | |
172 | | /* Now the dereferencing functions */ |
173 | | |
174 | | /* |
175 | | * Technically we can accept a stream other than the main PDF file stream here. This is |
176 | | * really for the case of compressed objects where we read tokens from the compressed |
177 | | * stream, but it also (with some judicious tinkering) allows us to layer a SubFileDecode |
178 | | * on top of the main file stream, which may be useful. Note that this cannot work with |
179 | | * objects in compressed object streams! They should always pass a value of 0 for the stream_offset. |
180 | | * The stream_offset is the offset from the start of the underlying uncompressed PDF file of |
181 | | * the stream we are using. See the comments below when keyword is PDF_STREAM. |
182 | | */ |
183 | | |
184 | | /* Determine if a PDF object is in a compressed ObjStm. Returns < 0 |
185 | | * for an error, 0 if it is not in a compressed ObjStm and 1 if it is. |
186 | | * Currently errors are inmpossible. This is only used by the decryption code |
187 | | * to determine if a string is in a compressed object stream, if it is then |
188 | | * it can't be used for decryption. |
189 | | */ |
190 | | int is_compressed_object(pdf_context *ctx, uint32_t obj, uint32_t gen) |
191 | 1.44k | { |
192 | 1.44k | xref_entry *entry; |
193 | | |
194 | | /* Can't possibly be a compressed object before we have finished reading |
195 | | * the xref. |
196 | | */ |
197 | 1.44k | if (ctx->xref_table == NULL) |
198 | 0 | return 0; |
199 | | |
200 | 1.44k | entry = &ctx->xref_table->xref[obj]; |
201 | | |
202 | 1.44k | if (entry->compressed) |
203 | 0 | return 1; |
204 | | |
205 | 1.44k | return 0; |
206 | 1.44k | } |
207 | | |
208 | | /* We should never read a 'stream' keyword from a compressed object stream |
209 | | * so this case should never end up here. |
210 | | */ |
211 | | static int pdfi_read_stream_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, |
212 | | uint32_t objnum, uint32_t gen) |
213 | 38.6k | { |
214 | 38.6k | int code = 0; |
215 | 38.6k | int64_t i; |
216 | 38.6k | pdf_dict *dict = NULL; |
217 | 38.6k | gs_offset_t offset; |
218 | 38.6k | pdf_stream *stream_obj = NULL; |
219 | | |
220 | | /* Strange code time.... |
221 | | * If we are using a stream which is *not* the PDF uncompressed main file stream |
222 | | * then doing stell on it will only tell us how many bytes have been read from |
223 | | * that stream, it won't tell us the underlying file position. So we add on the |
224 | | * 'unread' bytes, *and* we add on the position of the start of the stream in |
225 | | * the actual main file. This is all done so that we can check the /Length |
226 | | * of the object. Note that this will *only* work for regular objects it can |
227 | | * not be used for compressed object streams, but those don't need checking anyway |
228 | | * they have a different mechanism altogether and should never get here. |
229 | | */ |
230 | 38.6k | if (s != ctx->main_stream) { |
231 | 0 | offset = stell(s->s) - s->unread_size + stream_offset; |
232 | 0 | code = pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET); |
233 | 0 | if (code < 0) |
234 | 0 | return_error(gs_error_ioerror); |
235 | 38.6k | } else { |
236 | 38.6k | offset = stell(s->s) - s->unread_size; |
237 | 38.6k | } |
238 | | |
239 | 38.6k | if (pdfi_count_stack(ctx) < 1) |
240 | 0 | return_error(gs_error_stackunderflow); |
241 | | |
242 | 38.6k | dict = (pdf_dict *)ctx->stack_top[-1]; |
243 | | |
244 | 38.6k | if (pdfi_type_of(dict) != PDF_DICT) { |
245 | 90 | pdfi_pop(ctx, 1); |
246 | 90 | return_error(gs_error_syntaxerror); |
247 | 90 | } |
248 | | |
249 | 38.5k | dict->indirect_num = dict->object_num = objnum; |
250 | 38.5k | dict->indirect_gen = dict->generation_num = gen; |
251 | | |
252 | | /* Convert the dict into a stream */ |
253 | 38.5k | code = pdfi_obj_dict_to_stream(ctx, dict, &stream_obj, true); |
254 | 38.5k | if (code < 0) { |
255 | 0 | pdfi_pop(ctx, 1); |
256 | 0 | return code; |
257 | 0 | } |
258 | | /* Pop off the dict and push the stream */ |
259 | 38.5k | pdfi_pop(ctx, 1); |
260 | 38.5k | dict = NULL; |
261 | 38.5k | pdfi_push(ctx, (pdf_obj *)stream_obj); |
262 | | |
263 | 38.5k | stream_obj->stream_dict->indirect_num = stream_obj->stream_dict->object_num = objnum; |
264 | 38.5k | stream_obj->stream_dict->indirect_gen = stream_obj->stream_dict->generation_num = gen; |
265 | 38.5k | stream_obj->stream_offset = offset; |
266 | | |
267 | | /* Exceptional code. Normally we do not need to worry about detecting circular references |
268 | | * when reading objects, because we do not dereference any indirect objects. However streams |
269 | | * are a slight exception in that we do get the Length from the stream dictionay and if that |
270 | | * is an indirect reference, then we dereference it. |
271 | | * OSS-fuzz bug 43247 has a stream where the value associated iwht the /Length is an indirect |
272 | | * reference to the same stream object, and leads to infinite recursion. So deal with that |
273 | | * possibility here. |
274 | | */ |
275 | 38.5k | code = pdfi_loop_detector_mark(ctx); |
276 | 38.5k | if (code < 0) { |
277 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
278 | 0 | return code; |
279 | 0 | } |
280 | 38.5k | if (pdfi_loop_detector_check_object(ctx, stream_obj->object_num)) { |
281 | 9 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
282 | 9 | pdfi_loop_detector_cleartomark(ctx); |
283 | 9 | return_error(gs_error_circular_reference); |
284 | 9 | } |
285 | | |
286 | 38.5k | code = pdfi_loop_detector_add_object(ctx, stream_obj->object_num); |
287 | 38.5k | if (code < 0) { |
288 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
289 | 0 | pdfi_loop_detector_cleartomark(ctx); |
290 | 0 | return code; |
291 | 0 | } |
292 | | |
293 | | /* This code may be a performance overhead, it simply skips over the stream contents |
294 | | * and checks that the stream ends with a 'endstream endobj' pair. We could add a |
295 | | * 'go faster' flag for users who are certain their PDF files are well-formed. This |
296 | | * could also allow us to skip all kinds of other checking..... |
297 | | */ |
298 | | |
299 | 38.5k | code = pdfi_dict_get_int(ctx, (pdf_dict *)stream_obj->stream_dict, "Length", &i); |
300 | 38.5k | if (code < 0) { |
301 | 890 | char extra_info[gp_file_name_sizeof]; |
302 | | |
303 | 890 | (void)pdfi_loop_detector_cleartomark(ctx); |
304 | 890 | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u missing mandatory keyword /Length, unable to verify the stream length.\n", objnum); |
305 | 890 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info); |
306 | 890 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
307 | 890 | return code; |
308 | 890 | } |
309 | 37.6k | code = pdfi_loop_detector_cleartomark(ctx); |
310 | 37.6k | if (code < 0) { |
311 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
312 | 0 | return code; |
313 | 0 | } |
314 | | |
315 | 37.6k | if (i < 0 || (i + offset)> ctx->main_stream_length) { |
316 | 2.34k | char extra_info[gp_file_name_sizeof]; |
317 | | |
318 | 2.34k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has /Length which, when added to offset of object, exceeds file size.\n", objnum); |
319 | 2.34k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_ioerror), NULL, E_PDF_BADSTREAM, "pdfi_read_stream_object", extra_info))< 0) { |
320 | 0 | pdfi_pop(ctx, 1); |
321 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
322 | 0 | return code; |
323 | 0 | } |
324 | 35.3k | } else { |
325 | 35.3k | code = pdfi_seek(ctx, ctx->main_stream, i, SEEK_CUR); |
326 | 35.3k | if (code < 0) { |
327 | 0 | pdfi_pop(ctx, 1); |
328 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
329 | 0 | return code; |
330 | 0 | } |
331 | | |
332 | 35.3k | stream_obj->Length = 0; |
333 | 35.3k | stream_obj->length_valid = false; |
334 | | |
335 | 35.3k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
336 | 35.3k | if (code == 0) { |
337 | 0 | char extra_info[gp_file_name_sizeof]; |
338 | |
|
339 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find a valid object at end of stream object %u.\n", objnum); |
340 | 0 | pdfi_log_info(ctx, "pdfi_read_stream_object", extra_info); |
341 | | /* It is possible for pdfi_read_token to clear the stack, losing the stream object. If that |
342 | | * happens give up. |
343 | | */ |
344 | 0 | if (pdfi_count_stack(ctx) == 0) { |
345 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
346 | 0 | return code; |
347 | 0 | } |
348 | 35.3k | } else if (code < 0) { |
349 | 0 | char extra_info[gp_file_name_sizeof]; |
350 | |
|
351 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Failed to find 'endstream' keyword at end of stream object %u.\n", objnum); |
352 | 0 | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", extra_info)) < 0) { |
353 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
354 | 0 | return code; |
355 | 0 | } |
356 | 35.3k | } else if (code != TOKEN_ENDSTREAM) { |
357 | 4.38k | char extra_info[gp_file_name_sizeof]; |
358 | | |
359 | 4.38k | gs_snprintf(extra_info, sizeof(extra_info), "Stream object %u has an incorrect /Length of %"PRIu64"\n", objnum, i); |
360 | 4.38k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BAD_LENGTH, "pdfi_read_stream_object", extra_info)) < 0) { |
361 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
362 | 0 | return code; |
363 | 0 | } |
364 | 30.9k | } else { |
365 | | /* Cache the Length in the stream object and mark it valid */ |
366 | 30.9k | stream_obj->Length = i; |
367 | 30.9k | stream_obj->length_valid = true; |
368 | 30.9k | } |
369 | 35.3k | } |
370 | | |
371 | | /* If we failed to find a valid object, or the object wasn't a keyword, or the |
372 | | * keywrod wasn't 'endstream' then the Length is wrong. We need to have the correct |
373 | | * Length for streams if we have encrypted files, because we must install a |
374 | | * SubFileDecode filter with a Length (EODString is incompatible with AES encryption) |
375 | | * Rather than mess about checking for encryption, we'll choose to just correctly |
376 | | * calculate the Length of all streams. Although this takes time, it will only |
377 | | * happen for files which are invalid. |
378 | | */ |
379 | 37.6k | if (stream_obj->length_valid != true) { |
380 | 6.73k | char Buffer[10]; |
381 | 6.73k | unsigned int bytes, total = 0; |
382 | 6.73k | int c = 0; |
383 | | |
384 | 6.73k | code = pdfi_seek(ctx, ctx->main_stream, stream_obj->stream_offset, SEEK_SET); |
385 | 6.73k | if (code < 0) { |
386 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
387 | 0 | pdfi_pop(ctx, 1); |
388 | 0 | return code; |
389 | 0 | } |
390 | 6.73k | memset(Buffer, 0x00, 10); |
391 | 6.73k | bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 9, ctx->main_stream); |
392 | 6.73k | if (bytes < 9) { |
393 | 9 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
394 | 9 | return_error(gs_error_ioerror); |
395 | 9 | } |
396 | | |
397 | 6.72k | total = bytes; |
398 | 132M | do { |
399 | 132M | if (memcmp(Buffer, "endstream", 9) == 0) { |
400 | 4.50k | if (Buffer[9] != 0x00) |
401 | 4.50k | total--; |
402 | 4.50k | stream_obj->Length = total - 9; |
403 | 4.50k | stream_obj->length_valid = true; |
404 | 4.50k | break; |
405 | 4.50k | } |
406 | 132M | if (memcmp(Buffer, "endobj", 6) == 0) { |
407 | 323 | if (Buffer[9] != 0x00) |
408 | 316 | total--; |
409 | 323 | stream_obj->Length = total - 6; |
410 | 323 | stream_obj->length_valid = true; |
411 | 323 | break; |
412 | 323 | } |
413 | 132M | memmove(Buffer, Buffer+1, 9); |
414 | 132M | c = pdfi_read_byte(ctx, ctx->main_stream); |
415 | 132M | if (c < 0) |
416 | 1.89k | break; |
417 | 132M | Buffer[9] = (byte)c; |
418 | 132M | total++; |
419 | 132M | } while(1); |
420 | 6.72k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
421 | 6.72k | if (c < 0) |
422 | 1.89k | return_error(gs_error_ioerror); |
423 | 4.82k | return 0; |
424 | 6.72k | } |
425 | | |
426 | 30.9k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
427 | 30.9k | if (code < 0) { |
428 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
429 | 0 | if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", "")) < 0) { |
430 | 0 | return code; |
431 | 0 | } |
432 | | /* Something went wrong looking for endobj, but we found endstream, so assume |
433 | | * for now that will suffice. |
434 | | */ |
435 | 0 | return 0; |
436 | 0 | } |
437 | | |
438 | 30.9k | if (code == 0) { |
439 | 0 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
440 | 0 | return_error(gs_error_stackunderflow); |
441 | 0 | } |
442 | | |
443 | 30.9k | if (code != TOKEN_ENDOBJ) { |
444 | 124 | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
445 | 124 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_typecheck), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_stream_object", NULL); |
446 | | /* Didn't find an endobj, but we have an endstream, so assume |
447 | | * for now that will suffice |
448 | | */ |
449 | 124 | return code; |
450 | 124 | } |
451 | 30.7k | pdfi_countdown(stream_obj); /* get rid of extra ref */ |
452 | | |
453 | 30.7k | return 0; |
454 | 30.9k | } |
455 | | |
456 | | /* This reads an object *after* the x y obj keyword has been found. Its broken out |
457 | | * separately for the benefit of the repair code when reading the dictionary following |
458 | | * the 'trailer' keyword, which does not have a 'obj' keyword. Note that it also does |
459 | | * not have an 'endobj', we rely on the error handling to take care of that for us. |
460 | | */ |
461 | | int pdfi_read_bare_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset, uint32_t objnum, uint32_t gen) |
462 | 131k | { |
463 | 131k | int code = 0, initial_depth = 0; |
464 | 131k | pdf_key keyword; |
465 | 131k | gs_offset_t saved_offset[3]; |
466 | 131k | pdf_obj_type type; |
467 | | |
468 | 131k | initial_depth = pdfi_count_stack(ctx); |
469 | 131k | saved_offset[0] = saved_offset[1] = saved_offset[2] = 0; |
470 | | |
471 | 131k | code = pdfi_read_token(ctx, s, objnum, gen); |
472 | 131k | if (code < 0) |
473 | 260 | return code; |
474 | | |
475 | 131k | if (code == 0) |
476 | | /* failed to read a token */ |
477 | 3 | return_error(gs_error_syntaxerror); |
478 | | |
479 | 131k | if (pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) { |
480 | 1.02k | keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]); |
481 | 1.02k | if (keyword == TOKEN_ENDOBJ) { |
482 | 74 | ctx->stack_top[-1] = PDF_NULL_OBJ; |
483 | 74 | return 0; |
484 | 74 | } |
485 | 1.02k | } |
486 | | |
487 | 4.61M | do { |
488 | | /* move all the saved offsets up by one */ |
489 | 4.61M | saved_offset[0] = saved_offset[1]; |
490 | 4.61M | saved_offset[1] = saved_offset[2]; |
491 | 4.61M | saved_offset[2] = pdfi_unread_tell(ctx); |
492 | | |
493 | 4.61M | code = pdfi_read_token(ctx, s, objnum, gen); |
494 | 4.61M | if (code < 0) { |
495 | 12.8k | pdfi_clearstack(ctx); |
496 | 12.8k | return code; |
497 | 12.8k | } |
498 | 4.60M | if (s->eof) |
499 | 180 | return_error(gs_error_syntaxerror); |
500 | 4.60M | code = 0; |
501 | 4.60M | type = pdfi_type_of(ctx->stack_top[-1]); |
502 | 4.60M | if (type == PDF_KEYWORD) |
503 | 6.19k | goto missing_endobj; |
504 | 4.60M | } while (type != PDF_FAST_KEYWORD); |
505 | | |
506 | 112k | keyword = (pdf_key)(uintptr_t)(ctx->stack_top[-1]); |
507 | 112k | if (keyword == TOKEN_ENDOBJ) { |
508 | 69.9k | pdf_obj *o; |
509 | | |
510 | 69.9k | if (pdfi_count_stack(ctx) - initial_depth < 2) { |
511 | 78 | pdfi_clearstack(ctx); |
512 | 78 | return_error(gs_error_stackunderflow); |
513 | 78 | } |
514 | | |
515 | 69.8k | o = ctx->stack_top[-2]; |
516 | | |
517 | 69.8k | pdfi_pop(ctx, 1); |
518 | | |
519 | 69.8k | if (o >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
520 | 69.8k | o->indirect_num = o->object_num = objnum; |
521 | 69.8k | o->indirect_gen = o->generation_num = gen; |
522 | 69.8k | } |
523 | 69.8k | return code; |
524 | 69.9k | } |
525 | 42.1k | if (keyword == TOKEN_STREAM) { |
526 | 38.6k | pdfi_pop(ctx, 1); |
527 | 38.6k | return pdfi_read_stream_object(ctx, s, stream_offset, objnum, gen); |
528 | 38.6k | } |
529 | 3.51k | if (keyword == TOKEN_OBJ) { |
530 | 310 | pdf_obj *o; |
531 | | |
532 | 310 | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL)) < 0) { |
533 | 0 | return code; |
534 | 0 | } |
535 | | |
536 | | /* 4 for; the object we want, the object number, generation number and 'obj' keyword */ |
537 | 310 | if (pdfi_count_stack(ctx) - initial_depth < 4) |
538 | 76 | return_error(gs_error_stackunderflow); |
539 | | |
540 | | /* If we have that many objects, assume that we can throw away the x y obj and just use the remaining object */ |
541 | 234 | o = ctx->stack_top[-4]; |
542 | | |
543 | 234 | pdfi_pop(ctx, 3); |
544 | | |
545 | 234 | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
546 | 233 | o->indirect_num = o->object_num = objnum; |
547 | 233 | o->indirect_gen = o->generation_num = gen; |
548 | 233 | } |
549 | 234 | if (saved_offset[0] > 0) |
550 | 234 | (void)pdfi_seek(ctx, s, saved_offset[0], SEEK_SET); |
551 | 234 | return 0; |
552 | 310 | } |
553 | | |
554 | 9.39k | missing_endobj: |
555 | | /* Assume that any other keyword means a missing 'endobj' */ |
556 | 9.39k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_xref_stream_dict", "")) == 0) { |
557 | 9.39k | pdf_obj *o; |
558 | | |
559 | 9.39k | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGENDOBJ, "pdfi_read_bare_object", NULL); |
560 | | |
561 | 9.39k | if (pdfi_count_stack(ctx) - initial_depth < 2) |
562 | 90 | return_error(gs_error_stackunderflow); |
563 | | |
564 | 9.30k | o = ctx->stack_top[-2]; |
565 | | |
566 | 9.30k | pdfi_pop(ctx, 1); |
567 | | |
568 | 9.30k | if (pdfi_type_of(o) != PDF_BOOL && pdfi_type_of(o) != PDF_NULL && pdfi_type_of(o) != PDF_FAST_KEYWORD) { |
569 | 9.20k | o->indirect_num = o->object_num = objnum; |
570 | 9.20k | o->indirect_gen = o->generation_num = gen; |
571 | 9.20k | } |
572 | 9.30k | return code; |
573 | 9.39k | } |
574 | 0 | pdfi_pop(ctx, 2); |
575 | 0 | return_error(gs_error_syntaxerror); |
576 | 9.39k | } |
577 | | |
578 | | static int pdfi_read_object(pdf_context *ctx, pdf_c_stream *s, gs_offset_t stream_offset) |
579 | 129k | { |
580 | 129k | int code = 0; |
581 | 129k | int objnum = 0, gen = 0; |
582 | | |
583 | | /* An object consists of 'num gen obj' followed by a token, follwed by an endobj |
584 | | * A stream dictionary might have a 'stream' instead of an 'endobj', in which case we |
585 | | * want to deal with it specially by getting the Length, jumping to the end and checking |
586 | | * for an endobj. Or not, possibly, because it would be slow. |
587 | | */ |
588 | 129k | code = pdfi_read_bare_int(ctx, s, &objnum); |
589 | 129k | if (code < 0) |
590 | 808 | return code; |
591 | 128k | if (code == 0) |
592 | 690 | return_error(gs_error_syntaxerror); |
593 | | |
594 | 127k | code = pdfi_read_bare_int(ctx, s, &gen); |
595 | 127k | if (code < 0) |
596 | 43 | return code; |
597 | 127k | if (code == 0) |
598 | 14 | return_error(gs_error_syntaxerror); |
599 | | |
600 | 127k | code = pdfi_read_bare_keyword(ctx, s); |
601 | 127k | if (code < 0) |
602 | 0 | return code; |
603 | 127k | if (code == 0) |
604 | 0 | return gs_note_error(gs_error_ioerror); |
605 | 127k | if (code != TOKEN_OBJ) { |
606 | 187 | return_error(gs_error_syntaxerror); |
607 | 187 | } |
608 | | |
609 | 127k | return pdfi_read_bare_object(ctx, s, stream_offset, objnum, gen); |
610 | 127k | } |
611 | | |
612 | | static int pdfi_deref_compressed(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, |
613 | | const xref_entry *entry, bool cache) |
614 | 40.3k | { |
615 | 40.3k | int code = 0; |
616 | 40.3k | xref_entry *compressed_entry; |
617 | 40.3k | pdf_c_stream *compressed_stream = NULL; |
618 | 40.3k | pdf_c_stream *SubFile_stream = NULL; |
619 | 40.3k | pdf_c_stream *Object_stream = NULL; |
620 | 40.3k | int i = 0, object_length = 0; |
621 | 40.3k | int64_t num_entries; |
622 | 40.3k | int found_object; |
623 | 40.3k | int64_t Length, First; |
624 | 40.3k | gs_offset_t offset = 0; |
625 | 40.3k | pdf_stream *compressed_object = NULL; |
626 | 40.3k | pdf_dict *compressed_sdict = NULL; /* alias */ |
627 | 40.3k | pdf_name *Type = NULL; |
628 | | |
629 | 40.3k | if (entry->u.compressed.compressed_stream_num > ctx->xref_table->xref_size - 1) |
630 | 13 | return_error(gs_error_undefined); |
631 | | |
632 | 40.3k | compressed_entry = &ctx->xref_table->xref[entry->u.compressed.compressed_stream_num]; |
633 | | |
634 | 40.3k | if (ctx->args.pdfdebug) { |
635 | 0 | outprintf(ctx->memory, "%% Reading compressed object (%"PRIi64" 0 obj)", obj); |
636 | 0 | outprintf(ctx->memory, " from ObjStm with object number %"PRIi64"\n", compressed_entry->object_num); |
637 | 0 | } |
638 | | |
639 | 40.3k | if (compressed_entry->cache == NULL) { |
640 | | #if CACHE_STATISTICS |
641 | | ctx->compressed_misses++; |
642 | | #endif |
643 | 2.86k | code = pdfi_seek(ctx, ctx->main_stream, compressed_entry->u.uncompressed.offset, SEEK_SET); |
644 | 2.86k | if (code < 0) |
645 | 0 | goto exit; |
646 | | |
647 | 2.86k | code = pdfi_read_object(ctx, ctx->main_stream, 0); |
648 | 2.86k | if (code < 0) |
649 | 801 | goto exit; |
650 | | |
651 | 2.06k | if (pdfi_count_stack(ctx) < 1) { |
652 | 0 | code = gs_note_error(gs_error_stackunderflow); |
653 | 0 | goto exit; |
654 | 0 | } |
655 | | |
656 | 2.06k | if (pdfi_type_of(ctx->stack_top[-1]) != PDF_STREAM) { |
657 | 275 | pdfi_pop(ctx, 1); |
658 | 275 | code = gs_note_error(gs_error_typecheck); |
659 | 275 | goto exit; |
660 | 275 | } |
661 | 1.79k | if (ctx->stack_top[-1]->object_num != compressed_entry->object_num) { |
662 | 21 | pdfi_pop(ctx, 1); |
663 | | /* Same error (undefined) as when we read an uncompressed object with the wrong number */ |
664 | 21 | code = gs_note_error(gs_error_undefined); |
665 | 21 | goto exit; |
666 | 21 | } |
667 | 1.77k | compressed_object = (pdf_stream *)ctx->stack_top[-1]; |
668 | 1.77k | pdfi_countup(compressed_object); |
669 | 1.77k | pdfi_pop(ctx, 1); |
670 | 1.77k | code = pdfi_add_to_cache(ctx, (pdf_obj *)compressed_object); |
671 | 1.77k | if (code < 0) |
672 | 0 | goto exit; |
673 | 37.4k | } else { |
674 | | #if CACHE_STATISTICS |
675 | | ctx->compressed_hits++; |
676 | | #endif |
677 | 37.4k | compressed_object = (pdf_stream *)compressed_entry->cache->o; |
678 | 37.4k | pdfi_countup(compressed_object); |
679 | 37.4k | pdfi_promote_cache_entry(ctx, compressed_entry->cache); |
680 | 37.4k | } |
681 | 39.2k | code = pdfi_dict_from_obj(ctx, (pdf_obj *)compressed_object, &compressed_sdict); |
682 | 39.2k | if (code < 0) |
683 | 0 | return code; |
684 | | |
685 | 39.2k | if (ctx->loop_detection != NULL) { |
686 | 39.2k | code = pdfi_loop_detector_mark(ctx); |
687 | 39.2k | if (code < 0) |
688 | 0 | goto exit; |
689 | 39.2k | if (compressed_sdict->object_num != 0) { |
690 | 39.2k | if (pdfi_loop_detector_check_object(ctx, compressed_sdict->object_num)) { |
691 | 15 | code = gs_note_error(gs_error_circular_reference); |
692 | 39.2k | } else { |
693 | 39.2k | code = pdfi_loop_detector_add_object(ctx, compressed_sdict->object_num); |
694 | 39.2k | } |
695 | 39.2k | if (code < 0) { |
696 | 15 | (void)pdfi_loop_detector_cleartomark(ctx); |
697 | 15 | goto exit; |
698 | 15 | } |
699 | 39.2k | } |
700 | 39.2k | } |
701 | | /* Check its an ObjStm ! */ |
702 | 39.2k | code = pdfi_dict_get_type(ctx, compressed_sdict, "Type", PDF_NAME, (pdf_obj **)&Type); |
703 | 39.2k | if (code < 0) { |
704 | 27 | if (ctx->loop_detection != NULL) |
705 | 27 | (void)pdfi_loop_detector_cleartomark(ctx); |
706 | 27 | goto exit; |
707 | 27 | } |
708 | | |
709 | 39.2k | if (!pdfi_name_is(Type, "ObjStm")){ |
710 | 11 | if (ctx->loop_detection != NULL) |
711 | 11 | (void)pdfi_loop_detector_cleartomark(ctx); |
712 | 11 | code = gs_note_error(gs_error_syntaxerror); |
713 | 11 | goto exit; |
714 | 11 | } |
715 | | |
716 | | /* Need to check the /N entry to see if the object is actually in this stream! */ |
717 | 39.1k | code = pdfi_dict_get_int(ctx, compressed_sdict, "N", &num_entries); |
718 | 39.1k | if (code < 0) { |
719 | 30 | if (ctx->loop_detection != NULL) |
720 | 30 | (void)pdfi_loop_detector_cleartomark(ctx); |
721 | 30 | goto exit; |
722 | 30 | } |
723 | | |
724 | 39.1k | if (num_entries < 0 || num_entries > ctx->xref_table->xref_size) { |
725 | 0 | if (ctx->loop_detection != NULL) |
726 | 0 | (void)pdfi_loop_detector_cleartomark(ctx); |
727 | 0 | code = gs_note_error(gs_error_rangecheck); |
728 | 0 | goto exit; |
729 | 0 | } |
730 | | |
731 | 39.1k | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
732 | 39.1k | if (code < 0) { |
733 | 21.6k | if (ctx->loop_detection != NULL) |
734 | 21.6k | (void)pdfi_loop_detector_cleartomark(ctx); |
735 | 21.6k | goto exit; |
736 | 21.6k | } |
737 | | |
738 | 17.4k | code = pdfi_dict_get_int(ctx, compressed_sdict, "First", &First); |
739 | 17.4k | if (code < 0) { |
740 | 68 | if (ctx->loop_detection != NULL) |
741 | 68 | (void)pdfi_loop_detector_cleartomark(ctx); |
742 | 68 | goto exit; |
743 | 68 | } |
744 | | |
745 | 17.4k | if (ctx->loop_detection != NULL) |
746 | 17.4k | (void)pdfi_loop_detector_cleartomark(ctx); |
747 | | |
748 | 17.4k | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
749 | 17.4k | if (code < 0) |
750 | 0 | goto exit; |
751 | | |
752 | 17.4k | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
753 | 17.4k | if (code < 0) |
754 | 0 | goto exit; |
755 | | |
756 | 17.4k | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
757 | 17.4k | if (code < 0) |
758 | 134 | goto exit; |
759 | | |
760 | 832k | for (i=0;i < num_entries;i++) |
761 | 816k | { |
762 | 816k | int new_offset; |
763 | 816k | code = pdfi_read_bare_int(ctx, compressed_stream, &found_object); |
764 | 816k | if (code < 0) |
765 | 682 | goto exit; |
766 | 816k | if (code == 0) { |
767 | 9 | code = gs_note_error(gs_error_syntaxerror); |
768 | 9 | goto exit; |
769 | 9 | } |
770 | 816k | code = pdfi_read_bare_int(ctx, compressed_stream, &new_offset); |
771 | 816k | if (code < 0) |
772 | 331 | goto exit; |
773 | 815k | if (code == 0) { |
774 | 5 | code = gs_note_error(gs_error_syntaxerror); |
775 | 5 | goto exit; |
776 | 5 | } |
777 | 815k | if (i == entry->u.compressed.object_index) { |
778 | 16.3k | if (found_object != obj) { |
779 | 831 | code = gs_note_error(gs_error_undefined); |
780 | 831 | goto exit; |
781 | 831 | } |
782 | 15.5k | offset = new_offset; |
783 | 15.5k | } |
784 | 814k | if (i == entry->u.compressed.object_index + 1) |
785 | 14.0k | object_length = new_offset - offset; |
786 | 814k | } |
787 | | |
788 | | /* Bug #705259 - The first object need not lie immediately after the initial |
789 | | * table of object numbers and offsets. The start of the first object is given |
790 | | * by the value of First. We don't know how many bytes we consumed getting to |
791 | | * the end of the table, unfortunately, so we close the stream, rewind the main |
792 | | * stream back to the beginning of the ObjStm, and then read and discard 'First' |
793 | | * bytes in order to get to the start of the first object. Then we read the |
794 | | * number of bytes required to get from there to the start of the object we |
795 | | * actually want. |
796 | | * If this ever looks like it's causing performance problems we could read the |
797 | | * initial table above manually instead of using the existing code, and track |
798 | | * how many bytes we'd read, which would avoid us having to tear down and |
799 | | * rebuild the stream. |
800 | | */ |
801 | 15.4k | if (compressed_stream) |
802 | 15.4k | pdfi_close_file(ctx, compressed_stream); |
803 | 15.4k | if (SubFile_stream) |
804 | 15.4k | pdfi_close_file(ctx, SubFile_stream); |
805 | | |
806 | 15.4k | code = pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, compressed_object), SEEK_SET); |
807 | 15.4k | if (code < 0) |
808 | 0 | goto exit; |
809 | | |
810 | | /* We already dereferenced this above, so we don't need the loop detection checking here */ |
811 | 15.4k | code = pdfi_dict_get_int(ctx, compressed_sdict, "Length", &Length); |
812 | 15.4k | if (code < 0) |
813 | 0 | goto exit; |
814 | | |
815 | 15.4k | code = pdfi_apply_SubFileDecode_filter(ctx, Length, NULL, ctx->main_stream, &SubFile_stream, false); |
816 | 15.4k | if (code < 0) |
817 | 0 | goto exit; |
818 | | |
819 | 15.4k | code = pdfi_filter(ctx, compressed_object, SubFile_stream, &compressed_stream, false); |
820 | 15.4k | if (code < 0) |
821 | 0 | goto exit; |
822 | | |
823 | 6.69M | for (i=0;i < First;i++) |
824 | 6.67M | { |
825 | 6.67M | int c = pdfi_read_byte(ctx, compressed_stream); |
826 | 6.67M | if (c < 0) { |
827 | 4 | code = gs_note_error(gs_error_ioerror); |
828 | 4 | goto exit; |
829 | 4 | } |
830 | 6.67M | } |
831 | | |
832 | | /* Skip to the offset of the object we want to read */ |
833 | 47.5M | for (i=0;i < offset;i++) |
834 | 47.5M | { |
835 | 47.5M | int c = pdfi_read_byte(ctx, compressed_stream); |
836 | 47.5M | if (c < 0) { |
837 | 2.00k | code = gs_note_error(gs_error_ioerror); |
838 | 2.00k | goto exit; |
839 | 2.00k | } |
840 | 47.5M | } |
841 | | |
842 | | /* If object_length is not 0, then we want to apply a SubFileDecode filter to limit |
843 | | * the number of bytes we read to the declared size of the object (difference between |
844 | | * the offsets of the object we want to read, and the next object). If it is 0 then |
845 | | * we're reading the last object in the stream, so we just rely on the SubFileDecode |
846 | | * we set up when we created compressed_stream to limit the bytes to the length of |
847 | | * that stream. |
848 | | */ |
849 | 13.4k | if (object_length > 0) { |
850 | 12.0k | code = pdfi_apply_SubFileDecode_filter(ctx, object_length, NULL, compressed_stream, &Object_stream, false); |
851 | 12.0k | if (code < 0) |
852 | 0 | goto exit; |
853 | 12.0k | } else { |
854 | 1.41k | Object_stream = compressed_stream; |
855 | 1.41k | } |
856 | | |
857 | 13.4k | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
858 | 13.4k | if (code < 0) |
859 | 340 | goto exit; |
860 | 13.0k | if (code == 0) { |
861 | 13 | code = gs_note_error(gs_error_syntaxerror); |
862 | 13 | goto exit; |
863 | 13 | } |
864 | 13.0k | if (pdfi_type_of(ctx->stack_top[-1]) == PDF_ARRAY_MARK || pdfi_type_of(ctx->stack_top[-1]) == PDF_DICT_MARK) { |
865 | 12.5k | int start_depth = pdfi_count_stack(ctx); |
866 | | |
867 | | /* Need to read all the elements from COS objects */ |
868 | 493k | do { |
869 | 493k | code = pdfi_read_token(ctx, Object_stream, obj, gen); |
870 | 493k | if (code < 0) |
871 | 706 | goto exit; |
872 | 492k | if (code == 0) { |
873 | 73 | code = gs_note_error(gs_error_syntaxerror); |
874 | 73 | goto exit; |
875 | 73 | } |
876 | 492k | if (compressed_stream->eof == true) { |
877 | 14 | code = gs_note_error(gs_error_ioerror); |
878 | 14 | goto exit; |
879 | 14 | } |
880 | 492k | } while ((pdfi_type_of(ctx->stack_top[-1]) != PDF_ARRAY && pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) || pdfi_count_stack(ctx) > start_depth); |
881 | 12.5k | } |
882 | | |
883 | 12.2k | *object = ctx->stack_top[-1]; |
884 | | /* For compressed objects we don't get a 'obj gen obj' sequence which is what sets |
885 | | * the object number for uncompressed objects. So we need to do that here. |
886 | | */ |
887 | 12.2k | if (*object >= PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY)) { |
888 | 12.0k | (*object)->indirect_num = (*object)->object_num = obj; |
889 | 12.0k | (*object)->indirect_gen = (*object)->generation_num = gen; |
890 | 12.0k | pdfi_countup(*object); |
891 | 12.0k | } |
892 | 12.2k | pdfi_pop(ctx, 1); |
893 | | |
894 | 12.2k | if (cache) { |
895 | 10.3k | code = pdfi_add_to_cache(ctx, *object); |
896 | 10.3k | if (code < 0) { |
897 | 0 | pdfi_countdown(*object); |
898 | 0 | goto exit; |
899 | 0 | } |
900 | 10.3k | } |
901 | | |
902 | 40.3k | exit: |
903 | 40.3k | if (Object_stream) |
904 | 13.4k | pdfi_close_file(ctx, Object_stream); |
905 | 40.3k | if (Object_stream != compressed_stream) |
906 | 15.8k | if (compressed_stream) |
907 | 15.8k | pdfi_close_file(ctx, compressed_stream); |
908 | 40.3k | if (SubFile_stream) |
909 | 17.4k | pdfi_close_file(ctx, SubFile_stream); |
910 | 40.3k | pdfi_countdown(compressed_object); |
911 | 40.3k | pdfi_countdown(Type); |
912 | 40.3k | return code; |
913 | 12.2k | } |
914 | | |
915 | | /* pdf_dereference returns an object with a reference count of at least 1, this represents the |
916 | | * reference being held by the caller (in **object) when we return from this function. |
917 | | */ |
918 | | static int pdfi_dereference_main(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object, bool cache) |
919 | 556k | { |
920 | 556k | xref_entry *entry; |
921 | 556k | int code, stack_depth = pdfi_count_stack(ctx); |
922 | 556k | gs_offset_t saved_stream_offset; |
923 | 556k | bool saved_decrypt_strings = ctx->encryption.decrypt_strings; |
924 | | |
925 | 556k | *object = NULL; |
926 | | |
927 | 556k | if (ctx->xref_table == NULL) |
928 | 4 | return_error(gs_error_typecheck); |
929 | | |
930 | 556k | if (ctx->main_stream == NULL || ctx->main_stream->s == NULL) |
931 | 0 | return_error(gs_error_ioerror); |
932 | | |
933 | 556k | if (obj >= ctx->xref_table->xref_size) { |
934 | 11.6k | char extra_info[gp_file_name_sizeof]; |
935 | | |
936 | 11.6k | gs_snprintf(extra_info, sizeof(extra_info), "Error, attempted to dereference object %"PRIu64", which is not present in the xref table\n", obj); |
937 | 11.6k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference", extra_info)) < 0) { |
938 | 0 | return code; |
939 | 0 | } |
940 | | |
941 | 11.6k | code = pdfi_repair_file(ctx); |
942 | 11.6k | if (code < 0) { |
943 | 11.6k | *object = NULL; |
944 | 11.6k | return code; |
945 | 11.6k | } |
946 | 2 | if (obj >= ctx->xref_table->xref_size) { |
947 | 1 | *object = NULL; |
948 | 1 | return_error(gs_error_rangecheck); |
949 | 1 | } |
950 | 2 | } |
951 | | |
952 | 545k | entry = &ctx->xref_table->xref[obj]; |
953 | | |
954 | 545k | if(entry->object_num == 0) { |
955 | 192k | pdfi_set_error(ctx, 0, NULL, E_PDF_BADOBJNUMBER, "pdfi_dereference_main", "Attempt to dereference object 0"); |
956 | 192k | return_error(gs_error_undefined); |
957 | 192k | } |
958 | | |
959 | 353k | if (entry->free) { |
960 | 282 | char extra_info[gp_file_name_sizeof]; |
961 | | |
962 | 282 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
963 | 282 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
964 | 282 | *object = PDF_NULL_OBJ; |
965 | 282 | return code; |
966 | 352k | }else { |
967 | 352k | if (!entry->compressed) { |
968 | 297k | if(entry->u.uncompressed.generation_num != gen) |
969 | 932 | pdfi_set_warning(ctx, 0, NULL, W_PDF_MISMATCH_GENERATION, "pdfi_dereference_main", ""); |
970 | 297k | } |
971 | 352k | } |
972 | | |
973 | 352k | if (ctx->loop_detection) { |
974 | 343k | if (pdfi_loop_detector_check_object(ctx, obj) == true) |
975 | 502 | return_error(gs_error_circular_reference); |
976 | 343k | if (entry->free) { |
977 | 0 | code = pdfi_loop_detector_add_object(ctx, obj); |
978 | 0 | if (code < 0) |
979 | 0 | return code; |
980 | 0 | } |
981 | 343k | } |
982 | 352k | if (entry->cache != NULL){ |
983 | 185k | pdf_obj_cache_entry *cache_entry = entry->cache; |
984 | | |
985 | | #if CACHE_STATISTICS |
986 | | ctx->hits++; |
987 | | #endif |
988 | 185k | *object = cache_entry->o; |
989 | 185k | pdfi_countup(*object); |
990 | | |
991 | 185k | pdfi_promote_cache_entry(ctx, cache_entry); |
992 | 185k | } else { |
993 | 166k | saved_stream_offset = pdfi_unread_tell(ctx); |
994 | | |
995 | 166k | if (entry->compressed) { |
996 | | /* This is an object in a compressed object stream */ |
997 | 40.3k | ctx->encryption.decrypt_strings = false; |
998 | | |
999 | 40.3k | code = pdfi_deref_compressed(ctx, obj, gen, object, entry, cache); |
1000 | 40.3k | if (code < 0 || *object == NULL) |
1001 | 28.0k | goto error; |
1002 | 126k | } else { |
1003 | | #if CACHE_STATISTICS |
1004 | | ctx->misses++; |
1005 | | #endif |
1006 | 126k | ctx->encryption.decrypt_strings = true; |
1007 | | |
1008 | 126k | code = pdfi_seek(ctx, ctx->main_stream, entry->u.uncompressed.offset, SEEK_SET); |
1009 | 126k | if (code < 0) |
1010 | 3 | goto error; |
1011 | | |
1012 | 126k | code = pdfi_read_object(ctx, ctx->main_stream, entry->u.uncompressed.offset); |
1013 | | |
1014 | | /* pdfi_read_object() could do a repair, which would invalidate the xref and rebuild it. |
1015 | | * reload the xref entry to be certain it is valid. |
1016 | | */ |
1017 | 126k | entry = &ctx->xref_table->xref[obj]; |
1018 | 126k | if (code < 0) { |
1019 | 15.9k | int code1 = 0; |
1020 | 15.9k | if (entry->free) { |
1021 | 0 | char extra_info[gp_file_name_sizeof]; |
1022 | |
|
1023 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
1024 | 0 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
1025 | 0 | *object = PDF_NULL_OBJ; |
1026 | 0 | if (code < 0) |
1027 | 0 | goto error; |
1028 | 0 | goto free_obj; |
1029 | 0 | } |
1030 | 15.9k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1031 | 15.9k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1032 | 15.9k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
1033 | | |
1034 | 15.9k | code1 = pdfi_repair_file(ctx); |
1035 | 15.9k | if (code1 == 0) |
1036 | 94 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1037 | | /* Repair failed, just give up and return an error */ |
1038 | 15.8k | goto error; |
1039 | 15.9k | } |
1040 | | |
1041 | | /* We only expect a single object back when dereferencing an indirect reference |
1042 | | * The only way (I think) we can end up with more than one is if the object initially |
1043 | | * appears to be a dictionary or array, but the object terminates (with endobj or |
1044 | | * simply reaching EOF) without terminating the array or dictionary. That's clearly |
1045 | | * an error. We might, as a future 'improvement' choose to walk back through |
1046 | | * the stack looking for unterminated dictionary or array markers, and closing them |
1047 | | * so that (hopefully!) we end up with a single 'repaired' object on the stack. |
1048 | | * But for now I'm simply going to treat these as errors. We will try a repair on the |
1049 | | * file to see if we end up using a different (hopefully intact) object from the file. |
1050 | | */ |
1051 | 110k | if (pdfi_count_stack(ctx) - stack_depth > 1) { |
1052 | 5.61k | int code1 = 0; |
1053 | | |
1054 | 5.61k | code1 = pdfi_repair_file(ctx); |
1055 | 5.61k | if (code1 == 0) |
1056 | 37 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1057 | | /* Repair failed, just give up and return an error */ |
1058 | 5.58k | code = gs_note_error(gs_error_syntaxerror); |
1059 | 5.58k | goto error; |
1060 | 5.61k | } |
1061 | | |
1062 | 104k | if (pdfi_count_stack(ctx) > 0 && |
1063 | 104k | ((ctx->stack_top[-1] > PDF_TOKEN_AS_OBJ(TOKEN__LAST_KEY) && |
1064 | 104k | (ctx->stack_top[-1])->object_num == obj) |
1065 | 104k | || ctx->stack_top[-1] == PDF_NULL_OBJ)) { |
1066 | 104k | *object = ctx->stack_top[-1]; |
1067 | 104k | pdfi_countup(*object); |
1068 | 104k | pdfi_pop(ctx, 1); |
1069 | 104k | if (pdfi_type_of(*object) == PDF_INDIRECT) { |
1070 | 0 | pdf_indirect_ref *iref = (pdf_indirect_ref *)*object; |
1071 | |
|
1072 | 0 | if (iref->ref_object_num == obj) { |
1073 | 0 | code = gs_note_error(gs_error_circular_reference); |
1074 | 0 | pdfi_countdown(*object); |
1075 | 0 | *object = NULL; |
1076 | 0 | goto error; |
1077 | 0 | } |
1078 | 0 | } |
1079 | | /* There's really no point in caching an indirect reference and |
1080 | | * I think it could be potentially confusing to later calls. |
1081 | | */ |
1082 | 104k | if (cache && pdfi_type_of(*object) != PDF_INDIRECT) { |
1083 | 104k | code = pdfi_add_to_cache(ctx, *object); |
1084 | 104k | if (code < 0) { |
1085 | 0 | pdfi_countdown(*object); |
1086 | 0 | goto error; |
1087 | 0 | } |
1088 | 104k | } |
1089 | 104k | } else { |
1090 | 122 | int code1 = 0; |
1091 | | |
1092 | 122 | if (pdfi_count_stack(ctx) > 0) |
1093 | 102 | pdfi_pop(ctx, 1); |
1094 | | |
1095 | 122 | if (entry->free) { |
1096 | 0 | char extra_info[gp_file_name_sizeof]; |
1097 | |
|
1098 | 0 | gs_snprintf(extra_info, sizeof(extra_info), "Attempt to dereference free object %"PRIu64", treating as NULL object.\n", entry->object_num); |
1099 | 0 | code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_undefined), NULL, E_PDF_DEREF_FREE_OBJ, "pdfi_dereference", extra_info); |
1100 | 0 | *object = PDF_NULL_OBJ; |
1101 | 0 | if (code < 0) |
1102 | 0 | goto error; |
1103 | 0 | return code; |
1104 | 0 | } |
1105 | 122 | code1 = pdfi_repair_file(ctx); |
1106 | 122 | if (code1 == 0) |
1107 | 17 | return pdfi_dereference_main(ctx, obj, gen, object, cache); |
1108 | | /* Repair failed, just give up and return an error */ |
1109 | 105 | code = gs_note_error(gs_error_undefined); |
1110 | 105 | goto error; |
1111 | 122 | } |
1112 | 104k | } |
1113 | 116k | free_obj: |
1114 | 116k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1115 | 116k | } |
1116 | | |
1117 | 302k | if (ctx->loop_detection && pdf_object_num(*object) != 0) { |
1118 | 293k | code = pdfi_loop_detector_add_object(ctx, (*object)->object_num); |
1119 | 293k | if (code < 0) { |
1120 | 0 | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1121 | 0 | return code; |
1122 | 0 | } |
1123 | 293k | } |
1124 | 302k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1125 | 302k | return 0; |
1126 | | |
1127 | 49.5k | error: |
1128 | 49.5k | ctx->encryption.decrypt_strings = saved_decrypt_strings; |
1129 | 49.5k | (void)pdfi_seek(ctx, ctx->main_stream, saved_stream_offset, SEEK_SET); |
1130 | | /* Return the stack to the state at entry */ |
1131 | 49.5k | pdfi_pop(ctx, pdfi_count_stack(ctx) - stack_depth); |
1132 | 49.5k | return code; |
1133 | 302k | } |
1134 | | |
1135 | | int pdfi_dereference(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1136 | 554k | { |
1137 | 554k | return pdfi_dereference_main(ctx, obj, gen, object, true); |
1138 | 554k | } |
1139 | | |
1140 | | int pdfi_dereference_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1141 | 2.45k | { |
1142 | 2.45k | return pdfi_dereference_main(ctx, obj, gen, object, false); |
1143 | 2.45k | } |
1144 | | |
1145 | | /* do a derefence with loop detection */ |
1146 | | int pdfi_deref_loop_detect(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1147 | 228k | { |
1148 | 228k | int code; |
1149 | | |
1150 | 228k | code = pdfi_loop_detector_mark(ctx); |
1151 | 228k | if (code < 0) |
1152 | 0 | return code; |
1153 | | |
1154 | 228k | code = pdfi_dereference(ctx, obj, gen, object); |
1155 | 228k | (void)pdfi_loop_detector_cleartomark(ctx); |
1156 | 228k | return code; |
1157 | 228k | } |
1158 | | |
1159 | | int pdfi_deref_loop_detect_nocache(pdf_context *ctx, uint64_t obj, uint64_t gen, pdf_obj **object) |
1160 | 2.45k | { |
1161 | 2.45k | int code; |
1162 | | |
1163 | 2.45k | code = pdfi_loop_detector_mark(ctx); |
1164 | 2.45k | if (code < 0) |
1165 | 0 | return code; |
1166 | | |
1167 | 2.45k | code = pdfi_dereference_nocache(ctx, obj, gen, object); |
1168 | 2.45k | (void)pdfi_loop_detector_cleartomark(ctx); |
1169 | 2.45k | return code; |
1170 | 2.45k | } |
1171 | | |
1172 | | static int pdfi_resolve_indirect_array(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1173 | 0 | { |
1174 | 0 | int code = 0; |
1175 | 0 | uint64_t index, arraysize; |
1176 | 0 | pdf_obj *object = NULL; |
1177 | 0 | pdf_array *array = (pdf_array *)obj; |
1178 | |
|
1179 | 0 | arraysize = pdfi_array_size(array); |
1180 | 0 | for (index = 0; index < arraysize; index++) { |
1181 | 0 | if (ctx->loop_detection != NULL) { |
1182 | 0 | code = pdfi_loop_detector_mark(ctx); |
1183 | 0 | if (code < 0) |
1184 | 0 | return code; |
1185 | 0 | } |
1186 | | |
1187 | 0 | code = pdfi_array_get_no_store_R(ctx, array, index, &object); |
1188 | |
|
1189 | 0 | if (ctx->loop_detection != NULL) { |
1190 | 0 | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1191 | 0 | if (code1 < 0) |
1192 | 0 | return code1; |
1193 | 0 | } |
1194 | | |
1195 | 0 | if (code == gs_error_circular_reference) { |
1196 | | /* Previously we just left as an indirect reference, but now we want |
1197 | | * to return the error so we don't end up replacing indirect references |
1198 | | * to objects with circular references. |
1199 | | */ |
1200 | 0 | } else { |
1201 | 0 | if (code < 0) goto exit; |
1202 | 0 | if (recurse) { |
1203 | 0 | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, object, recurse); |
1204 | 0 | if (code < 0) goto exit; |
1205 | 0 | } |
1206 | | /* don't store the object if it's a stream (leave as a ref) */ |
1207 | 0 | if (pdfi_type_of(object) != PDF_STREAM) |
1208 | 0 | code = pdfi_array_put(ctx, array, index, object); |
1209 | 0 | } |
1210 | 0 | if (code < 0) goto exit; |
1211 | | |
1212 | 0 | pdfi_countdown(object); |
1213 | 0 | object = NULL; |
1214 | 0 | } |
1215 | | |
1216 | 0 | exit: |
1217 | 0 | pdfi_countdown(object); |
1218 | 0 | return code; |
1219 | 0 | } |
1220 | | |
1221 | | static int pdfi_resolve_indirect_dict(pdf_context *ctx, pdf_obj *obj, bool recurse) |
1222 | 0 | { |
1223 | 0 | int code = 0; |
1224 | 0 | pdf_dict *dict = (pdf_dict *)obj; |
1225 | 0 | pdf_name *Key = NULL; |
1226 | 0 | pdf_obj *Value = NULL; |
1227 | 0 | uint64_t index, dictsize; |
1228 | |
|
1229 | 0 | dictsize = pdfi_dict_entries(dict); |
1230 | | |
1231 | | /* Note: I am not using pdfi_dict_first/next because of needing to handle |
1232 | | * circular references. |
1233 | | */ |
1234 | 0 | for (index=0; index<dictsize; index ++) { |
1235 | 0 | Key = (pdf_name *)dict->list[index].key; |
1236 | 0 | if (pdfi_name_is(Key, "Parent")) |
1237 | 0 | continue; |
1238 | | |
1239 | 0 | if (ctx->loop_detection != NULL) { |
1240 | 0 | code = pdfi_loop_detector_mark(ctx); |
1241 | 0 | if (code < 0) |
1242 | 0 | return code; |
1243 | 0 | } |
1244 | | |
1245 | 0 | code = pdfi_dict_get_no_store_R_key(ctx, dict, Key, &Value); |
1246 | |
|
1247 | 0 | if (ctx->loop_detection != NULL) { |
1248 | 0 | int code1 = pdfi_loop_detector_cleartomark(ctx); |
1249 | 0 | if (code1 < 0) |
1250 | 0 | return code1; |
1251 | 0 | } |
1252 | | |
1253 | 0 | if (code == gs_error_circular_reference) { |
1254 | | /* Just leave as an indirect ref */ |
1255 | 0 | code = 0; |
1256 | 0 | } else { |
1257 | 0 | if (code < 0) goto exit; |
1258 | 0 | if (recurse) { |
1259 | 0 | code = pdfi_resolve_indirect_loop_detect(ctx, NULL, Value, recurse); |
1260 | 0 | if (code < 0) |
1261 | 0 | goto exit; |
1262 | 0 | } |
1263 | | /* don't store the object if it's a stream (leave as a ref) */ |
1264 | 0 | if (pdfi_type_of(Value) != PDF_STREAM) |
1265 | 0 | code = pdfi_dict_put_obj(ctx, dict, (pdf_obj *)Key, Value, true); |
1266 | 0 | } |
1267 | 0 | if (code < 0) goto exit; |
1268 | | |
1269 | 0 | pdfi_countdown(Value); |
1270 | 0 | Value = NULL; |
1271 | 0 | } |
1272 | | |
1273 | 0 | exit: |
1274 | 0 | pdfi_countdown(Value); |
1275 | 0 | return code; |
1276 | 0 | } |
1277 | | |
1278 | | /* Resolve all the indirect references for an object |
1279 | | * Note: This can be recursive |
1280 | | */ |
1281 | | int pdfi_resolve_indirect(pdf_context *ctx, pdf_obj *value, bool recurse) |
1282 | 0 | { |
1283 | 0 | int code = 0; |
1284 | |
|
1285 | 0 | switch(pdfi_type_of(value)) { |
1286 | 0 | case PDF_ARRAY: |
1287 | 0 | code = pdfi_resolve_indirect_array(ctx, value, recurse); |
1288 | 0 | break; |
1289 | 0 | case PDF_DICT: |
1290 | 0 | code = pdfi_resolve_indirect_dict(ctx, value, recurse); |
1291 | 0 | break; |
1292 | 0 | default: |
1293 | 0 | break; |
1294 | 0 | } |
1295 | 0 | return code; |
1296 | 0 | } |
1297 | | |
1298 | | /* Resolve all the indirect references for an object |
1299 | | * Resolve indirect references, either one level or recursively, with loop detect on |
1300 | | * the parent (can by NULL) and the value. |
1301 | | */ |
1302 | | int pdfi_resolve_indirect_loop_detect(pdf_context *ctx, pdf_obj *parent, pdf_obj *value, bool recurse) |
1303 | 0 | { |
1304 | 0 | int code = 0; |
1305 | |
|
1306 | 0 | code = pdfi_loop_detector_mark(ctx); |
1307 | 0 | if (code < 0) goto exit; |
1308 | 0 | if (parent && parent->object_num != 0) { |
1309 | 0 | code = pdfi_loop_detector_add_object(ctx, parent->object_num); |
1310 | 0 | if (code < 0) goto exit; |
1311 | 0 | } |
1312 | | |
1313 | 0 | if (pdf_object_num(value) != 0) { |
1314 | 0 | if (pdfi_loop_detector_check_object(ctx, value->object_num)) { |
1315 | 0 | code = gs_note_error(gs_error_circular_reference); |
1316 | 0 | goto exit; |
1317 | 0 | } |
1318 | 0 | code = pdfi_loop_detector_add_object(ctx, value->object_num); |
1319 | 0 | if (code < 0) goto exit; |
1320 | 0 | } |
1321 | 0 | code = pdfi_resolve_indirect(ctx, value, recurse); |
1322 | |
|
1323 | 0 | exit: |
1324 | 0 | (void)pdfi_loop_detector_cleartomark(ctx); /* Clear to the mark for the current loop */ |
1325 | 0 | return code; |
1326 | 0 | } |