/src/ghostpdl/pdf/pdf_xref.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2018-2025 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | /* xref parsing */ |
17 | | |
18 | | #include "pdf_int.h" |
19 | | #include "pdf_stack.h" |
20 | | #include "pdf_xref.h" |
21 | | #include "pdf_file.h" |
22 | | #include "pdf_loop_detect.h" |
23 | | #include "pdf_dict.h" |
24 | | #include "pdf_array.h" |
25 | | #include "pdf_repair.h" |
26 | | |
27 | | static int resize_xref(pdf_context *ctx, uint64_t new_size) |
28 | 19.3k | { |
29 | 19.3k | xref_entry *new_xrefs; |
30 | | |
31 | | /* Although we can technically handle object numbers larger than this, on some systems (32-bit Windows) |
32 | | * memset is limited to a (signed!) integer for the size of memory to clear. We could deal |
33 | | * with this by clearing the memory in blocks, but really, this is almost certainly a |
34 | | * corrupted file or something. |
35 | | */ |
36 | 19.3k | if (new_size >= (0x7ffffff / sizeof(xref_entry))) |
37 | 20 | return_error(gs_error_rangecheck); |
38 | | |
39 | 19.3k | new_xrefs = (xref_entry *)gs_alloc_bytes(ctx->memory, (new_size) * sizeof(xref_entry), "read_xref_stream allocate xref table entries"); |
40 | 19.3k | if (new_xrefs == NULL){ |
41 | 0 | pdfi_countdown(ctx->xref_table); |
42 | 0 | ctx->xref_table = NULL; |
43 | 0 | return_error(gs_error_VMerror); |
44 | 0 | } |
45 | 19.3k | memset(new_xrefs, 0x00, (new_size) * sizeof(xref_entry)); |
46 | 19.3k | memcpy(new_xrefs, ctx->xref_table->xref, ctx->xref_table->xref_size * sizeof(xref_entry)); |
47 | 19.3k | gs_free_object(ctx->memory, ctx->xref_table->xref, "reallocated xref entries"); |
48 | 19.3k | ctx->xref_table->xref = new_xrefs; |
49 | 19.3k | ctx->xref_table->xref_size = new_size; |
50 | 19.3k | return 0; |
51 | 19.3k | } |
52 | | |
53 | | static int read_xref_stream_entries(pdf_context *ctx, pdf_c_stream *s, int64_t first, int64_t last, int64_t *W) |
54 | 16.7k | { |
55 | 16.7k | uint i, j; |
56 | 16.7k | uint64_t field_width = 0; |
57 | 16.7k | uint32_t type = 0; |
58 | 16.7k | uint64_t objnum = 0, gen = 0; |
59 | 16.7k | byte *Buffer; |
60 | 16.7k | int64_t bytes = 0; |
61 | 16.7k | xref_entry *entry; |
62 | | |
63 | | /* Find max number of bytes to be read */ |
64 | 16.7k | field_width = W[0]; |
65 | 16.7k | if (W[1] > field_width) |
66 | 16.6k | field_width = W[1]; |
67 | 16.7k | if (W[2] > field_width) |
68 | 18 | field_width = W[2]; |
69 | | |
70 | 16.7k | Buffer = gs_alloc_bytes(ctx->memory, field_width, "read_xref_stream_entry working buffer"); |
71 | 16.7k | if (Buffer == NULL) |
72 | 0 | return_error(gs_error_VMerror); |
73 | | |
74 | 710k | for (i=first;i<=last; i++){ |
75 | | /* Defaults if W[n] = 0 */ |
76 | 694k | type = 1; |
77 | 694k | objnum = gen = 0; |
78 | | |
79 | 694k | if (W[0] != 0) { |
80 | 693k | type = 0; |
81 | 693k | bytes = pdfi_read_bytes(ctx, Buffer, 1, W[0], s); |
82 | 693k | if (bytes < W[0]){ |
83 | 129 | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)"); |
84 | 129 | return_error(gs_error_ioerror); |
85 | 129 | } |
86 | 1.38M | for (j=0;j<W[0];j++) |
87 | 693k | type = (type << 8) + Buffer[j]; |
88 | 693k | } |
89 | | |
90 | 694k | if (W[1] != 0) { |
91 | 693k | bytes = pdfi_read_bytes(ctx, Buffer, 1, W[1], s); |
92 | 693k | if (bytes < W[1]){ |
93 | 32 | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry free working buffer (error)"); |
94 | 32 | return_error(gs_error_ioerror); |
95 | 32 | } |
96 | 2.38M | for (j=0;j<W[1];j++) |
97 | 1.68M | objnum = (objnum << 8) + Buffer[j]; |
98 | 693k | } |
99 | | |
100 | 693k | if (W[2] != 0) { |
101 | 674k | bytes = pdfi_read_bytes(ctx, Buffer, 1, W[2], s); |
102 | 674k | if (bytes < W[2]){ |
103 | 40 | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)"); |
104 | 40 | return_error(gs_error_ioerror); |
105 | 40 | } |
106 | 1.36M | for (j=0;j<W[2];j++) |
107 | 689k | gen = (gen << 8) + Buffer[j]; |
108 | 674k | } |
109 | | |
110 | 693k | entry = &ctx->xref_table->xref[i]; |
111 | 693k | if (entry->object_num != 0 && !entry->free) |
112 | 3.92k | continue; |
113 | | |
114 | 690k | entry->compressed = false; |
115 | 690k | entry->free = false; |
116 | 690k | entry->object_num = i; |
117 | 690k | entry->cache = NULL; |
118 | | |
119 | 690k | switch(type) { |
120 | 20.1k | case 0: |
121 | 20.1k | entry->free = true; |
122 | 20.1k | entry->u.uncompressed.offset = objnum; /* For free objects we use the offset to store the object number of the next free object */ |
123 | 20.1k | entry->u.uncompressed.generation_num = gen; /* And the generation number is the numebr to use if this object is used again */ |
124 | 20.1k | break; |
125 | 203k | case 1: |
126 | 203k | entry->u.uncompressed.offset = objnum; |
127 | 203k | entry->u.uncompressed.generation_num = gen; |
128 | 203k | break; |
129 | 466k | case 2: |
130 | 466k | entry->compressed = true; |
131 | 466k | entry->u.compressed.compressed_stream_num = objnum; /* The object number of the compressed stream */ |
132 | 466k | entry->u.compressed.object_index = gen; /* And the index of the object within the stream */ |
133 | 466k | break; |
134 | 146 | default: |
135 | 146 | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer"); |
136 | 146 | return_error(gs_error_rangecheck); |
137 | 0 | break; |
138 | 690k | } |
139 | 690k | } |
140 | 16.4k | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer"); |
141 | 16.4k | return 0; |
142 | 16.7k | } |
143 | | |
144 | | /* Forward definition */ |
145 | | static int read_xref(pdf_context *ctx, pdf_c_stream *s); |
146 | | static int pdfi_check_xref_stream(pdf_context *ctx); |
147 | | /* These two routines are recursive.... */ |
148 | | static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num); |
149 | | |
150 | | static int pdfi_process_xref_stream(pdf_context *ctx, pdf_stream *stream_obj, pdf_c_stream *s) |
151 | 12.9k | { |
152 | 12.9k | pdf_c_stream *XRefStrm; |
153 | 12.9k | int code, i; |
154 | 12.9k | pdf_dict *sdict = NULL; |
155 | 12.9k | pdf_name *n; |
156 | 12.9k | pdf_array *a; |
157 | 12.9k | int64_t size; |
158 | 12.9k | int64_t num; |
159 | 12.9k | int64_t W[3] = {0, 0, 0}; |
160 | 12.9k | int objnum; |
161 | 12.9k | bool known = false; |
162 | | |
163 | 12.9k | if (pdfi_type_of(stream_obj) != PDF_STREAM) |
164 | 0 | return_error(gs_error_typecheck); |
165 | | |
166 | 12.9k | code = pdfi_dict_from_obj(ctx, (pdf_obj *)stream_obj, &sdict); |
167 | 12.9k | if (code < 0) |
168 | 0 | return code; |
169 | | |
170 | 12.9k | code = pdfi_dict_get_type(ctx, sdict, "Type", PDF_NAME, (pdf_obj **)&n); |
171 | 12.9k | if (code < 0) |
172 | 60 | return code; |
173 | | |
174 | 12.8k | if (n->length != 4 || memcmp(n->data, "XRef", 4) != 0) { |
175 | 21 | pdfi_countdown(n); |
176 | 21 | return_error(gs_error_syntaxerror); |
177 | 21 | } |
178 | 12.8k | pdfi_countdown(n); |
179 | | |
180 | 12.8k | code = pdfi_dict_get_int(ctx, sdict, "Size", &size); |
181 | 12.8k | if (code < 0) |
182 | 11 | return code; |
183 | 12.8k | if (size < 1) |
184 | 11 | return 0; |
185 | | |
186 | 12.8k | if (size < 0 || size > floor((double)ARCH_MAX_SIZE_T / (double)sizeof(xref_entry))) |
187 | 0 | return_error(gs_error_rangecheck); |
188 | | |
189 | | /* If this is the first xref stream then allocate the xref table and store the trailer */ |
190 | 12.8k | if (ctx->xref_table == NULL) { |
191 | 7.90k | ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table"); |
192 | 7.90k | if (ctx->xref_table == NULL) { |
193 | 0 | return_error(gs_error_VMerror); |
194 | 0 | } |
195 | 7.90k | memset(ctx->xref_table, 0x00, sizeof(xref_table_t)); |
196 | 7.90k | ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, size * sizeof(xref_entry), "read_xref_stream allocate xref table entries"); |
197 | 7.90k | if (ctx->xref_table->xref == NULL){ |
198 | 3 | gs_free_object(ctx->memory, ctx->xref_table, "failed to allocate xref table entries"); |
199 | 3 | ctx->xref_table = NULL; |
200 | 3 | return_error(gs_error_VMerror); |
201 | 3 | } |
202 | 7.90k | memset(ctx->xref_table->xref, 0x00, size * sizeof(xref_entry)); |
203 | 7.90k | ctx->xref_table->ctx = ctx; |
204 | 7.90k | ctx->xref_table->type = PDF_XREF_TABLE; |
205 | 7.90k | ctx->xref_table->xref_size = size; |
206 | | #if REFCNT_DEBUG |
207 | | ctx->xref_table->UID = ctx->ref_UID++; |
208 | | outprintf(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID); |
209 | | #endif |
210 | 7.90k | pdfi_countup(ctx->xref_table); |
211 | | |
212 | 7.90k | pdfi_countdown(ctx->Trailer); |
213 | | |
214 | 7.90k | ctx->Trailer = sdict; |
215 | 7.90k | pdfi_countup(sdict); |
216 | 7.90k | } else { |
217 | 4.91k | if (size > ctx->xref_table->xref_size) |
218 | 4 | return_error(gs_error_rangecheck); |
219 | | |
220 | 4.90k | code = pdfi_merge_dicts(ctx, ctx->Trailer, sdict); |
221 | 4.90k | if (code < 0 && (code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREF, "pdfi_process_xref_stream", NULL)) < 0) { |
222 | 0 | goto exit; |
223 | 0 | } |
224 | 4.90k | } |
225 | | |
226 | 12.8k | pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, stream_obj), SEEK_SET); |
227 | | |
228 | | /* Bug #691220 has a PDF file with a compressed XRef, the stream dictionary has |
229 | | * a /DecodeParms entry for the stream, which has a /Colors value of 5, which makes |
230 | | * *no* sense whatever. If we try to apply a Predictor then we end up in a loop trying |
231 | | * to read 5 colour samples. Rather than meddles with more parameters to the filter |
232 | | * code, we'll just remove the Colors entry from the DecodeParms dictionary, |
233 | | * because it is nonsense. This means we'll get the (sensible) default value of 1. |
234 | | */ |
235 | 12.8k | code = pdfi_dict_known(ctx, sdict, "DecodeParms", &known); |
236 | 12.8k | if (code < 0) |
237 | 0 | return code; |
238 | | |
239 | 12.8k | if (known) { |
240 | 11.8k | pdf_dict *DP; |
241 | 11.8k | double f; |
242 | 11.8k | pdf_obj *name; |
243 | | |
244 | 11.8k | code = pdfi_dict_get_type(ctx, sdict, "DecodeParms", PDF_DICT, (pdf_obj **)&DP); |
245 | 11.8k | if (code < 0) |
246 | 1 | return code; |
247 | | |
248 | 11.8k | code = pdfi_dict_knownget_number(ctx, DP, "Colors", &f); |
249 | 11.8k | if (code < 0) { |
250 | 0 | pdfi_countdown(DP); |
251 | 0 | return code; |
252 | 0 | } |
253 | 11.8k | if (code > 0 && f != (double)1) |
254 | 0 | { |
255 | 0 | code = pdfi_name_alloc(ctx, (byte *)"Colors", 6, &name); |
256 | 0 | if (code < 0) { |
257 | 0 | pdfi_countdown(DP); |
258 | 0 | return code; |
259 | 0 | } |
260 | 0 | pdfi_countup(name); |
261 | |
|
262 | 0 | code = pdfi_dict_delete_pair(ctx, DP, (pdf_name *)name); |
263 | 0 | pdfi_countdown(name); |
264 | 0 | if (code < 0) { |
265 | 0 | pdfi_countdown(DP); |
266 | 0 | return code; |
267 | 0 | } |
268 | 0 | } |
269 | 11.8k | pdfi_countdown(DP); |
270 | 11.8k | } |
271 | | |
272 | 12.8k | code = pdfi_filter_no_decryption(ctx, stream_obj, s, &XRefStrm, false); |
273 | 12.8k | if (code < 0) { |
274 | 45 | pdfi_countdown(ctx->xref_table); |
275 | 45 | ctx->xref_table = NULL; |
276 | 45 | return code; |
277 | 45 | } |
278 | | |
279 | 12.7k | code = pdfi_dict_get_type(ctx, sdict, "W", PDF_ARRAY, (pdf_obj **)&a); |
280 | 12.7k | if (code < 0) { |
281 | 8 | pdfi_close_file(ctx, XRefStrm); |
282 | 8 | pdfi_countdown(ctx->xref_table); |
283 | 8 | ctx->xref_table = NULL; |
284 | 8 | return code; |
285 | 8 | } |
286 | | |
287 | 12.7k | if (pdfi_array_size(a) != 3) { |
288 | 11 | pdfi_countdown(a); |
289 | 11 | pdfi_close_file(ctx, XRefStrm); |
290 | 11 | pdfi_countdown(ctx->xref_table); |
291 | 11 | ctx->xref_table = NULL; |
292 | 11 | return_error(gs_error_rangecheck); |
293 | 11 | } |
294 | 50.8k | for (i=0;i<3;i++) { |
295 | 38.1k | code = pdfi_array_get_int(ctx, a, (uint64_t)i, (int64_t *)&W[i]); |
296 | 38.1k | if (code < 0 || W[i] < 0) { |
297 | 34 | pdfi_countdown(a); |
298 | 34 | pdfi_close_file(ctx, XRefStrm); |
299 | 34 | pdfi_countdown(ctx->xref_table); |
300 | 34 | ctx->xref_table = NULL; |
301 | 34 | if (W[i] < 0) |
302 | 9 | code = gs_note_error(gs_error_rangecheck); |
303 | 34 | return code; |
304 | 34 | } |
305 | 38.1k | } |
306 | 12.7k | pdfi_countdown(a); |
307 | | |
308 | | /* W[0] is either: |
309 | | * 0 (no type field) or a single byte with the type. |
310 | | * W[1] is either: |
311 | | * The object number of the next free object, the byte offset of this object in the file or the object5 number of the object stream where this object is stored. |
312 | | * W[2] is either: |
313 | | * The generation number to use if this object is used again, the generation number of the object or the index of this object within the object stream. |
314 | | * |
315 | | * Object and generation numbers are limited to unsigned 64-bit values, as are bytes offsets in the file, indexes of objects within the stream likewise (actually |
316 | | * most of these are generally 32-bit max). So we can limit the field widths to 8 bytes, enough to hold a 64-bit number. |
317 | | * Even if a later version of the spec makes these larger (which seems unlikely!) we still cna't cope with integers > 64-bits. |
318 | | */ |
319 | 12.7k | if (W[0] > 1 || W[1] > 8 || W[2] > 8) { |
320 | 29 | pdfi_close_file(ctx, XRefStrm); |
321 | 29 | pdfi_countdown(ctx->xref_table); |
322 | 29 | ctx->xref_table = NULL; |
323 | 29 | return code; |
324 | 29 | } |
325 | | |
326 | 12.6k | code = pdfi_dict_get_type(ctx, sdict, "Index", PDF_ARRAY, (pdf_obj **)&a); |
327 | 12.6k | if (code == gs_error_undefined) { |
328 | 4.63k | code = read_xref_stream_entries(ctx, XRefStrm, 0, size - 1, W); |
329 | 4.63k | if (code < 0) { |
330 | 113 | pdfi_close_file(ctx, XRefStrm); |
331 | 113 | pdfi_countdown(ctx->xref_table); |
332 | 113 | ctx->xref_table = NULL; |
333 | 113 | return code; |
334 | 113 | } |
335 | 8.04k | } else { |
336 | 8.04k | int64_t start, size; |
337 | | |
338 | 8.04k | if (code < 0) { |
339 | 2 | pdfi_close_file(ctx, XRefStrm); |
340 | 2 | pdfi_countdown(ctx->xref_table); |
341 | 2 | ctx->xref_table = NULL; |
342 | 2 | return code; |
343 | 2 | } |
344 | | |
345 | 8.04k | if (pdfi_array_size(a) & 1) { |
346 | 9 | pdfi_countdown(a); |
347 | 9 | pdfi_close_file(ctx, XRefStrm); |
348 | 9 | pdfi_countdown(ctx->xref_table); |
349 | 9 | ctx->xref_table = NULL; |
350 | 9 | return_error(gs_error_rangecheck); |
351 | 9 | } |
352 | | |
353 | 19.9k | for (i=0;i < pdfi_array_size(a);i+=2){ |
354 | 12.1k | code = pdfi_array_get_int(ctx, a, (uint64_t)i, &start); |
355 | 12.1k | if (code < 0 || start < 0) { |
356 | 18 | pdfi_countdown(a); |
357 | 18 | pdfi_close_file(ctx, XRefStrm); |
358 | 18 | pdfi_countdown(ctx->xref_table); |
359 | 18 | ctx->xref_table = NULL; |
360 | 18 | return code; |
361 | 18 | } |
362 | | |
363 | 12.1k | code = pdfi_array_get_int(ctx, a, (uint64_t)i+1, &size); |
364 | 12.1k | if (code < 0) { |
365 | 13 | pdfi_countdown(a); |
366 | 13 | pdfi_close_file(ctx, XRefStrm); |
367 | 13 | pdfi_countdown(ctx->xref_table); |
368 | 13 | ctx->xref_table = NULL; |
369 | 13 | return code; |
370 | 13 | } |
371 | | |
372 | 12.1k | if (size < 1) |
373 | 11 | continue; |
374 | | |
375 | 12.1k | if (start + size >= ctx->xref_table->xref_size) { |
376 | 7.22k | code = resize_xref(ctx, start + size); |
377 | 7.22k | if (code < 0) { |
378 | 7 | pdfi_countdown(a); |
379 | 7 | pdfi_close_file(ctx, XRefStrm); |
380 | 7 | pdfi_countdown(ctx->xref_table); |
381 | 7 | ctx->xref_table = NULL; |
382 | 7 | return code; |
383 | 7 | } |
384 | 7.22k | } |
385 | | |
386 | 12.1k | code = read_xref_stream_entries(ctx, XRefStrm, start, start + size - 1, W); |
387 | 12.1k | if (code < 0) { |
388 | 234 | pdfi_countdown(a); |
389 | 234 | pdfi_close_file(ctx, XRefStrm); |
390 | 234 | pdfi_countdown(ctx->xref_table); |
391 | 234 | ctx->xref_table = NULL; |
392 | 234 | return code; |
393 | 234 | } |
394 | 12.1k | } |
395 | 8.03k | } |
396 | 12.2k | pdfi_countdown(a); |
397 | | |
398 | 12.2k | pdfi_close_file(ctx, XRefStrm); |
399 | | |
400 | 12.2k | code = pdfi_dict_get_int(ctx, sdict, "Prev", &num); |
401 | 12.2k | if (code == gs_error_undefined) |
402 | 5.12k | return 0; |
403 | | |
404 | 7.16k | if (code < 0) |
405 | 12 | return code; |
406 | | |
407 | 7.15k | if (num < 0 || num > ctx->main_stream_length) |
408 | 1.76k | return_error(gs_error_rangecheck); |
409 | | |
410 | 5.38k | if (pdfi_loop_detector_check_object(ctx, num) == true) |
411 | 22 | return_error(gs_error_circular_reference); |
412 | 5.36k | else { |
413 | 5.36k | code = pdfi_loop_detector_add_object(ctx, num); |
414 | 5.36k | if (code < 0) |
415 | 0 | return code; |
416 | 5.36k | } |
417 | | |
418 | 5.36k | if(ctx->args.pdfdebug) |
419 | 0 | outprintf(ctx->memory, "%% Reading /Prev xref\n"); |
420 | | |
421 | 5.36k | pdfi_seek(ctx, s, num, SEEK_SET); |
422 | | |
423 | 5.36k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &objnum); |
424 | 5.36k | if (code == 1) { |
425 | 4.92k | if (pdfi_check_xref_stream(ctx)) |
426 | 4.89k | return pdfi_read_xref_stream_dict(ctx, s, objnum); |
427 | 4.92k | } |
428 | | |
429 | 471 | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
430 | 471 | if (code < 0) |
431 | 0 | return code; |
432 | 471 | if (code == TOKEN_XREF) { |
433 | 53 | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_PREV_NOT_XREF_STREAM, "pdfi_process_xref_stream", NULL)) < 0) { |
434 | 0 | goto exit; |
435 | 0 | } |
436 | | /* Read old-style xref table */ |
437 | 53 | return(read_xref(ctx, ctx->main_stream)); |
438 | 53 | } |
439 | 418 | exit: |
440 | 418 | return_error(gs_error_syntaxerror); |
441 | 471 | } |
442 | | |
443 | | static int pdfi_check_xref_stream(pdf_context *ctx) |
444 | 16.3k | { |
445 | 16.3k | gs_offset_t offset; |
446 | 16.3k | int gen_num, code = 0; |
447 | | |
448 | 16.3k | offset = pdfi_unread_tell(ctx); |
449 | | |
450 | 16.3k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &gen_num); |
451 | 16.3k | if (code <= 0) { |
452 | 1.16k | code = 0; |
453 | 1.16k | goto exit; |
454 | 1.16k | } |
455 | | |
456 | | /* Try to read 'obj' */ |
457 | 15.1k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
458 | 15.1k | if (code <= 0) { |
459 | 0 | code = 0; |
460 | 0 | goto exit; |
461 | 0 | } |
462 | | |
463 | | /* Third element must be obj, or it's not a valid xref */ |
464 | 15.1k | if (code != TOKEN_OBJ) |
465 | 1.75k | code = 0; |
466 | 13.4k | else |
467 | 13.4k | code = 1; |
468 | | |
469 | 16.3k | exit: |
470 | 16.3k | pdfi_seek(ctx, ctx->main_stream, offset, SEEK_SET); |
471 | 16.3k | return code; |
472 | 15.1k | } |
473 | | |
474 | | static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num) |
475 | 13.6k | { |
476 | 13.6k | int code; |
477 | 13.6k | int gen_num; |
478 | | |
479 | 13.6k | if (ctx->args.pdfdebug) |
480 | 0 | outprintf(ctx->memory, "\n%% Reading PDF 1.5+ xref stream\n"); |
481 | | |
482 | | /* We have the obj_num. Lets try for obj_num gen obj as a XRef stream */ |
483 | 13.6k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &gen_num); |
484 | 13.6k | if (code <= 0) { |
485 | 0 | if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", "")) < 0) { |
486 | 0 | return code; |
487 | 0 | } |
488 | 0 | return(pdfi_repair_file(ctx)); |
489 | 0 | } |
490 | | |
491 | | /* Try to read 'obj' */ |
492 | 13.6k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
493 | 13.6k | if (code < 0) |
494 | 0 | return code; |
495 | 13.6k | if (code == 0) |
496 | 0 | return_error(gs_error_syntaxerror); |
497 | | |
498 | | /* Third element must be obj, or it's not a valid xref */ |
499 | 13.6k | if (code != TOKEN_OBJ) { |
500 | 0 | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BAD_XREFSTMOFFSET, "pdfi_read_xref_stream_dict", "")) < 0) { |
501 | 0 | return code; |
502 | 0 | } |
503 | 0 | return(pdfi_repair_file(ctx)); |
504 | 0 | } |
505 | | |
506 | 556k | do { |
507 | 556k | code = pdfi_read_token(ctx, ctx->main_stream, obj_num, gen_num); |
508 | 556k | if (code <= 0) { |
509 | 548 | if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) { |
510 | 0 | return code; |
511 | 0 | } |
512 | 548 | return pdfi_repair_file(ctx); |
513 | 548 | } |
514 | | |
515 | 556k | if (pdfi_count_stack(ctx) >= 2 && pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) { |
516 | 14.8k | uintptr_t keyword = (uintptr_t)ctx->stack_top[-1]; |
517 | 14.8k | if (keyword == TOKEN_STREAM) { |
518 | 12.9k | pdf_dict *dict; |
519 | 12.9k | pdf_stream *sdict = NULL; |
520 | 12.9k | int64_t Length; |
521 | | |
522 | | /* Remove the 'stream' token from the stack, should leave a dictionary object on the stack */ |
523 | 12.9k | pdfi_pop(ctx, 1); |
524 | 12.9k | if (pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) { |
525 | 22 | if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) { |
526 | 0 | return code; |
527 | 0 | } |
528 | 22 | return pdfi_repair_file(ctx); |
529 | 22 | } |
530 | 12.9k | dict = (pdf_dict *)ctx->stack_top[-1]; |
531 | | |
532 | | /* Convert the dict into a stream (sdict comes back with at least one ref) */ |
533 | 12.9k | code = pdfi_obj_dict_to_stream(ctx, dict, &sdict, true); |
534 | | /* Pop off the dict */ |
535 | 12.9k | pdfi_pop(ctx, 1); |
536 | 12.9k | if (code < 0) { |
537 | 0 | if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREFSTREAM, "pdfi_read_xref_stream_dict", NULL)) < 0) { |
538 | 0 | return code; |
539 | 0 | } |
540 | | /* TODO: should I return code instead of trying to repair? |
541 | | * Normally the above routine should not fail so something is |
542 | | * probably seriously fubar. |
543 | | */ |
544 | 0 | return pdfi_repair_file(ctx); |
545 | 0 | } |
546 | 12.9k | dict = NULL; |
547 | | |
548 | | /* Init the stuff for the stream */ |
549 | 12.9k | sdict->stream_offset = pdfi_unread_tell(ctx); |
550 | 12.9k | sdict->object_num = obj_num; |
551 | 12.9k | sdict->generation_num = gen_num; |
552 | | |
553 | 12.9k | code = pdfi_dict_get_int(ctx, sdict->stream_dict, "Length", &Length); |
554 | 12.9k | if (code < 0) { |
555 | | /* TODO: Not positive this will actually have a length -- just use 0 */ |
556 | 63 | (void)pdfi_set_error_var(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_xref_stream_dict", "Xref Stream object %u missing mandatory keyword /Length\n", obj_num); |
557 | 63 | code = 0; |
558 | 63 | Length = 0; |
559 | 63 | } |
560 | 12.9k | sdict->Length = Length; |
561 | 12.9k | sdict->length_valid = true; |
562 | | |
563 | 12.9k | code = pdfi_process_xref_stream(ctx, sdict, ctx->main_stream); |
564 | 12.9k | pdfi_countdown(sdict); |
565 | 12.9k | if (code < 0) { |
566 | 3.06k | pdfi_set_error(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_PREV_NOT_XREF_STREAM, "pdfi_read_xref_stream_dict", NULL); |
567 | 3.06k | return code; |
568 | 3.06k | } |
569 | 9.85k | break; |
570 | 12.9k | } else if (keyword == TOKEN_ENDOBJ) { |
571 | | /* Something went wrong, this is not a stream dictionary */ |
572 | 159 | if ((code = pdfi_set_error_var(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_xref_stream_dict", "Xref Stream object %u missing mandatory keyword /Length\n", obj_num)) < 0) { |
573 | 0 | return code; |
574 | 0 | } |
575 | 159 | return(pdfi_repair_file(ctx)); |
576 | 159 | } |
577 | 14.8k | } |
578 | 556k | } while(1); |
579 | 9.85k | return 0; |
580 | 13.6k | } |
581 | | |
582 | | static int skip_to_digit(pdf_context *ctx, pdf_c_stream *s, unsigned int limit) |
583 | 3.23k | { |
584 | 3.23k | int c, read = 0; |
585 | | |
586 | 11.9k | do { |
587 | 11.9k | c = pdfi_read_byte(ctx, s); |
588 | 11.9k | if (c < 0) |
589 | 0 | return_error(gs_error_ioerror); |
590 | 11.9k | if (c >= '0' && c <= '9') { |
591 | 2.93k | pdfi_unread_byte(ctx, s, (byte)c); |
592 | 2.93k | return read; |
593 | 2.93k | } |
594 | 8.96k | read++; |
595 | 8.96k | } while (read < limit); |
596 | | |
597 | 297 | return read; |
598 | 3.23k | } |
599 | | |
600 | | static int read_digits(pdf_context *ctx, pdf_c_stream *s, byte *Buffer, int limit) |
601 | 3.23k | { |
602 | 3.23k | int c, read = 0; |
603 | | |
604 | | /* Since the "limit" is a value calculated by the caller, |
605 | | it's easier to check it in one place (here) than before |
606 | | every call. |
607 | | */ |
608 | 3.23k | if (limit <= 0) |
609 | 309 | return_error(gs_error_syntaxerror); |
610 | | |
611 | | /* We assume that Buffer always has limit+1 bytes available, so we can |
612 | | * safely terminate it. */ |
613 | | |
614 | 17.5k | do { |
615 | 17.5k | c = pdfi_read_byte(ctx, s); |
616 | 17.5k | if (c < 0) |
617 | 0 | return_error(gs_error_ioerror); |
618 | 17.5k | if (c < '0' || c > '9') { |
619 | 1.27k | pdfi_unread_byte(ctx, s, c); |
620 | 1.27k | break; |
621 | 1.27k | } |
622 | 16.2k | *Buffer++ = (byte)c; |
623 | 16.2k | read++; |
624 | 16.2k | } while (read < limit); |
625 | 2.92k | *Buffer = 0; |
626 | | |
627 | 2.92k | return read; |
628 | 2.92k | } |
629 | | |
630 | | |
631 | | static int read_xref_entry_slow(pdf_context *ctx, pdf_c_stream *s, gs_offset_t *offset, uint32_t *generation_num, unsigned char *free) |
632 | 1.64k | { |
633 | 1.64k | byte Buffer[20]; |
634 | 1.64k | int c, code, read = 0; |
635 | | |
636 | | /* First off, find a number. If we don't find one, and read 20 bytes, throw an error */ |
637 | 1.64k | code = skip_to_digit(ctx, s, 20); |
638 | 1.64k | if (code < 0) |
639 | 0 | return code; |
640 | 1.64k | read += code; |
641 | | |
642 | | /* Now read a number */ |
643 | 1.64k | code = read_digits(ctx, s, (byte *)&Buffer, (read > 10 ? 20 - read : 10)); |
644 | 1.64k | if (code < 0) |
645 | 49 | return code; |
646 | 1.59k | read += code; |
647 | | |
648 | 1.59k | *offset = atol((const char *)Buffer); |
649 | | |
650 | | /* find next number */ |
651 | 1.59k | code = skip_to_digit(ctx, s, 20 - read); |
652 | 1.59k | if (code < 0) |
653 | 0 | return code; |
654 | 1.59k | read += code; |
655 | | |
656 | | /* and read it */ |
657 | 1.59k | code = read_digits(ctx, s, (byte *)&Buffer, (read > 15 ? 20 - read : 5)); |
658 | 1.59k | if (code < 0) |
659 | 260 | return code; |
660 | 1.33k | read += code; |
661 | | |
662 | 1.33k | *generation_num = atol((const char *)Buffer); |
663 | | |
664 | 2.30k | do { |
665 | 2.30k | c = pdfi_read_byte(ctx, s); |
666 | 2.30k | if (c < 0) |
667 | 0 | return_error(gs_error_ioerror); |
668 | 2.30k | read ++; |
669 | 2.30k | if (c == 0x09 || c == 0x20) |
670 | 993 | continue; |
671 | 1.30k | if (c == 'n' || c == 'f') { |
672 | 737 | *free = (unsigned char)c; |
673 | 737 | break; |
674 | 737 | } else { |
675 | 570 | return_error(gs_error_syntaxerror); |
676 | 570 | } |
677 | 1.30k | } while (read < 20); |
678 | 762 | if (read >= 20) |
679 | 33 | return_error(gs_error_syntaxerror); |
680 | | |
681 | 1.83k | do { |
682 | 1.83k | c = pdfi_read_byte(ctx, s); |
683 | 1.83k | if (c < 0) |
684 | 0 | return_error(gs_error_syntaxerror); |
685 | 1.83k | read++; |
686 | 1.83k | if (c == 0x20 || c == 0x09 || c == 0x0d || c == 0x0a) |
687 | 868 | continue; |
688 | 1.83k | } while (read < 20); |
689 | 729 | return 0; |
690 | 729 | } |
691 | | |
692 | | static int write_offset(byte *B, gs_offset_t o, unsigned int g, unsigned char free) |
693 | 729 | { |
694 | 729 | byte b[20], *ptr = B; |
695 | 729 | int index = 0; |
696 | | |
697 | 729 | gs_snprintf((char *)b, sizeof(b), "%"PRIdOFFSET"", o); |
698 | 729 | if (strlen((const char *)b) > 10) |
699 | 0 | return_error(gs_error_rangecheck); |
700 | 5.98k | for(index=0;index < 10 - strlen((const char *)b); index++) { |
701 | 5.25k | *ptr++ = 0x30; |
702 | 5.25k | } |
703 | 729 | memcpy(ptr, b, strlen((const char *)b)); |
704 | 729 | ptr += strlen((const char *)b); |
705 | 729 | *ptr++ = 0x20; |
706 | | |
707 | 729 | gs_snprintf((char *)b, sizeof(b), "%d", g); |
708 | 729 | if (strlen((const char *)b) > 5) |
709 | 0 | return_error(gs_error_rangecheck); |
710 | 3.29k | for(index=0;index < 5 - strlen((const char *)b);index++) { |
711 | 2.56k | *ptr++ = 0x30; |
712 | 2.56k | } |
713 | 729 | memcpy(ptr, b, strlen((const char *)b)); |
714 | 729 | ptr += strlen((const char *)b); |
715 | 729 | *ptr++ = 0x20; |
716 | 729 | *ptr++ = free; |
717 | 729 | *ptr++ = 0x20; |
718 | 729 | *ptr++ = 0x0d; |
719 | 729 | return 0; |
720 | 729 | } |
721 | | |
722 | | static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *section_start, uint64_t *section_size) |
723 | 36.4k | { |
724 | 36.4k | int code = 0, i, j; |
725 | 36.4k | int start = 0; |
726 | 36.4k | int size = 0; |
727 | 36.4k | int64_t bytes = 0; |
728 | 36.4k | char Buffer[21]; |
729 | | |
730 | 36.4k | *section_start = *section_size = 0; |
731 | | |
732 | 36.4k | if (ctx->args.pdfdebug) |
733 | 0 | outprintf(ctx->memory, "\n%% Reading xref section\n"); |
734 | | |
735 | 36.4k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &start); |
736 | 36.4k | if (code < 0) { |
737 | | /* Not an int, might be a keyword */ |
738 | 9.76k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
739 | 9.76k | if (code < 0) |
740 | 0 | return code; |
741 | | |
742 | 9.76k | if (code != TOKEN_TRAILER) { |
743 | | /* element is not an integer, and not a keyword - not a valid xref */ |
744 | 150 | return_error(gs_error_typecheck); |
745 | 150 | } |
746 | 9.61k | return 1; |
747 | 9.76k | } |
748 | | |
749 | 26.6k | if (start < 0) |
750 | 23 | return_error(gs_error_rangecheck); |
751 | | |
752 | 26.6k | *section_start = start; |
753 | | |
754 | 26.6k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &size); |
755 | 26.6k | if (code < 0) |
756 | 20 | return code; |
757 | 26.6k | if (code == 0) |
758 | 46 | return_error(gs_error_syntaxerror); |
759 | | |
760 | | /* Zero sized xref sections are valid; see the file attached to |
761 | | * bug 704947 for an example. */ |
762 | 26.5k | if (size < 0) |
763 | 12 | return_error(gs_error_rangecheck); |
764 | | |
765 | 26.5k | *section_size = size; |
766 | | |
767 | 26.5k | if (ctx->args.pdfdebug) |
768 | 0 | outprintf(ctx->memory, "\n%% Section starts at %d and has %d entries\n", (unsigned int) start, (unsigned int)size); |
769 | | |
770 | 26.5k | if (size > 0) { |
771 | 26.1k | if (ctx->xref_table == NULL) { |
772 | 9.43k | ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table"); |
773 | 9.43k | if (ctx->xref_table == NULL) |
774 | 0 | return_error(gs_error_VMerror); |
775 | 9.43k | memset(ctx->xref_table, 0x00, sizeof(xref_table_t)); |
776 | | |
777 | 9.43k | ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, (start + size) * sizeof(xref_entry), "read_xref_stream allocate xref table entries"); |
778 | 9.43k | if (ctx->xref_table->xref == NULL){ |
779 | 27 | gs_free_object(ctx->memory, ctx->xref_table, "free xref table on error allocating entries"); |
780 | 27 | ctx->xref_table = NULL; |
781 | 27 | return_error(gs_error_VMerror); |
782 | 27 | } |
783 | | #if REFCNT_DEBUG |
784 | | ctx->xref_table->UID = ctx->ref_UID++; |
785 | | outprintf(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID); |
786 | | #endif |
787 | | |
788 | 9.41k | memset(ctx->xref_table->xref, 0x00, (start + size) * sizeof(xref_entry)); |
789 | 9.41k | ctx->xref_table->ctx = ctx; |
790 | 9.41k | ctx->xref_table->type = PDF_XREF_TABLE; |
791 | 9.41k | ctx->xref_table->xref_size = start + size; |
792 | 9.41k | pdfi_countup(ctx->xref_table); |
793 | 16.6k | } else { |
794 | 16.6k | if (start + size > ctx->xref_table->xref_size) { |
795 | 12.1k | code = resize_xref(ctx, start + size); |
796 | 12.1k | if (code < 0) |
797 | 13 | return code; |
798 | 12.1k | } |
799 | 16.6k | } |
800 | 26.1k | } |
801 | | |
802 | 26.5k | pdfi_skip_white(ctx, s); |
803 | 475k | for (i=0;i< size;i++){ |
804 | 449k | xref_entry *entry = &ctx->xref_table->xref[i + start]; |
805 | 449k | unsigned char free; |
806 | 449k | gs_offset_t off; |
807 | 449k | unsigned int gen; |
808 | | |
809 | 449k | bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 20, s); |
810 | 449k | if (bytes < 20) |
811 | 2 | return_error(gs_error_ioerror); |
812 | 449k | j = 19; |
813 | 449k | if ((Buffer[19] != 0x0a && Buffer[19] != 0x0d) || (Buffer[18] != 0x0d && Buffer[18] != 0x0a && Buffer[18] != 0x20)) |
814 | 18.9k | pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_SIZE, "read_xref_section", NULL); |
815 | 472k | while (Buffer[j] != 0x0D && Buffer[j] != 0x0A) { |
816 | 23.3k | pdfi_unread_byte(ctx, s, (byte)Buffer[j]); |
817 | 23.3k | if (--j < 0) { |
818 | 783 | pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_NO_EOL, "read_xref_section", NULL); |
819 | 783 | outprintf(ctx->memory, "Invalid xref entry, line terminator missing.\n"); |
820 | 783 | code = read_xref_entry_slow(ctx, s, &off, &gen, &free); |
821 | 783 | if (code < 0) |
822 | 402 | return code; |
823 | 381 | code = write_offset((byte *)Buffer, off, gen, free); |
824 | 381 | if (code < 0) |
825 | 0 | return code; |
826 | 381 | j = 19; |
827 | 381 | break; |
828 | 381 | } |
829 | 23.3k | } |
830 | 449k | Buffer[j] = 0x00; |
831 | 449k | if (entry->object_num != 0) |
832 | 9.03k | continue; |
833 | | |
834 | 440k | if (sscanf(Buffer, "%"PRIdOFFSET" %d %c", &entry->u.uncompressed.offset, &entry->u.uncompressed.generation_num, &free) != 3) { |
835 | 858 | pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_FORMAT, "read_xref_section", NULL); |
836 | 858 | outprintf(ctx->memory, "Invalid xref entry, incorrect format.\n"); |
837 | 858 | pdfi_unread(ctx, s, (byte *)Buffer, 20); |
838 | 858 | code = read_xref_entry_slow(ctx, s, &off, &gen, &free); |
839 | 858 | if (code < 0) |
840 | 510 | return code; |
841 | 348 | code = write_offset((byte *)Buffer, off, gen, free); |
842 | 348 | if (code < 0) |
843 | 0 | return code; |
844 | 348 | } |
845 | | |
846 | 439k | entry->compressed = false; |
847 | 439k | entry->object_num = i + start; |
848 | 439k | if (free == 'f') |
849 | 134k | entry->free = true; |
850 | 439k | if(free == 'n') |
851 | 304k | entry->free = false; |
852 | 439k | if (entry->object_num == 0) { |
853 | 6.35k | if (!entry->free) { |
854 | 78 | pdfi_set_warning(ctx, 0, NULL, W_PDF_XREF_OBJECT0_NOT_FREE, "read_xref_section", NULL); |
855 | 78 | } |
856 | 6.35k | } |
857 | 439k | } |
858 | | |
859 | 25.6k | return 0; |
860 | 26.5k | } |
861 | | |
862 | | static int read_xref(pdf_context *ctx, pdf_c_stream *s) |
863 | 10.8k | { |
864 | 10.8k | int code = 0; |
865 | 10.8k | pdf_dict *d = NULL; |
866 | 10.8k | uint64_t max_obj = 0; |
867 | 10.8k | int64_t num, XRefStm = 0; |
868 | 10.8k | int obj_num; |
869 | 10.8k | bool known = false; |
870 | | |
871 | 10.8k | if (ctx->repaired) |
872 | 5 | return 0; |
873 | | |
874 | 36.4k | do { |
875 | 36.4k | uint64_t section_start, section_size; |
876 | | |
877 | 36.4k | code = read_xref_section(ctx, s, §ion_start, §ion_size); |
878 | 36.4k | if (code < 0) |
879 | 1.20k | return code; |
880 | | |
881 | 35.2k | if (section_size > 0 && section_start + section_size - 1 > max_obj) |
882 | 22.8k | max_obj = section_start + section_size - 1; |
883 | | |
884 | | /* code == 1 => read_xref_section ended with a trailer. */ |
885 | 35.2k | } while (code != 1); |
886 | | |
887 | 9.61k | code = pdfi_read_dict(ctx, ctx->main_stream, 0, 0); |
888 | 9.61k | if (code < 0) |
889 | 177 | return code; |
890 | | |
891 | 9.43k | d = (pdf_dict *)ctx->stack_top[-1]; |
892 | 9.43k | if (pdfi_type_of(d) != PDF_DICT) { |
893 | 12 | pdfi_pop(ctx, 1); |
894 | 12 | return_error(gs_error_typecheck); |
895 | 12 | } |
896 | 9.42k | pdfi_countup(d); |
897 | 9.42k | pdfi_pop(ctx, 1); |
898 | | |
899 | | /* We don't want to pollute the Trailer dictionary with any XRefStm key/value pairs |
900 | | * which will happen when we do pdfi_merge_dicts(). So we get any XRefStm here and |
901 | | * if there was one, remove it from the dictionary before we merge with the |
902 | | * primary trailer. |
903 | | */ |
904 | 9.42k | code = pdfi_dict_get_int(ctx, d, "XRefStm", &XRefStm); |
905 | 9.42k | if (code < 0 && code != gs_error_undefined) |
906 | 2 | goto error; |
907 | | |
908 | 9.42k | if (code == 0) { |
909 | 413 | code = pdfi_dict_delete(ctx, d, "XRefStm"); |
910 | 413 | if (code < 0) |
911 | 0 | goto error; |
912 | 413 | } |
913 | | |
914 | 9.42k | if (ctx->Trailer == NULL) { |
915 | 8.24k | ctx->Trailer = d; |
916 | 8.24k | pdfi_countup(d); |
917 | 8.24k | } else { |
918 | 1.18k | code = pdfi_merge_dicts(ctx, ctx->Trailer, d); |
919 | 1.18k | if (code < 0) { |
920 | 0 | if ((code = pdfi_set_error_stop(ctx, code, NULL, E_PDF_BADXREF, "read_xref", "")) < 0) { |
921 | 0 | return code; |
922 | 0 | } |
923 | 0 | } |
924 | 1.18k | } |
925 | | |
926 | | /* Check if the highest subsection + size exceeds the /Size in the |
927 | | * trailer dictionary and set a warning flag if it does |
928 | | */ |
929 | 9.42k | code = pdfi_dict_get_int(ctx, d, "Size", &num); |
930 | 9.42k | if (code < 0) |
931 | 17 | goto error; |
932 | | |
933 | 9.40k | if (max_obj >= num) |
934 | 561 | pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_SIZE, "read_xref", NULL); |
935 | | |
936 | | /* Check if this is a modified file and has any |
937 | | * previous xref entries. |
938 | | */ |
939 | 9.40k | code = pdfi_dict_known(ctx, d, "Prev", &known); |
940 | 9.40k | if (known) { |
941 | 4.17k | code = pdfi_dict_get_int(ctx, d, "Prev", &num); |
942 | 4.17k | if (code < 0) |
943 | 17 | goto error; |
944 | | |
945 | 4.16k | if (num < 0 || num > ctx->main_stream_length) { |
946 | 1.31k | code = gs_note_error(gs_error_rangecheck); |
947 | 1.31k | goto error; |
948 | 1.31k | } |
949 | | |
950 | 2.84k | if (pdfi_loop_detector_check_object(ctx, num) == true) { |
951 | 7 | code = gs_note_error(gs_error_circular_reference); |
952 | 7 | goto error; |
953 | 7 | } |
954 | 2.83k | else { |
955 | 2.83k | code = pdfi_loop_detector_add_object(ctx, num); |
956 | 2.83k | if (code < 0) |
957 | 0 | goto error; |
958 | 2.83k | } |
959 | | |
960 | 2.83k | code = pdfi_seek(ctx, s, num, SEEK_SET); |
961 | 2.83k | if (code < 0) |
962 | 0 | goto error; |
963 | | |
964 | 2.83k | if (!ctx->repaired) { |
965 | 2.83k | code = pdfi_read_token(ctx, ctx->main_stream, 0, 0); |
966 | 2.83k | if (code < 0) |
967 | 118 | goto error; |
968 | | |
969 | 2.72k | if (code == 0) { |
970 | 2 | code = gs_note_error(gs_error_syntaxerror); |
971 | 2 | goto error; |
972 | 2 | } |
973 | 2.72k | } else { |
974 | 0 | code = 0; |
975 | 0 | goto error; |
976 | 0 | } |
977 | | |
978 | 2.71k | if ((intptr_t)(ctx->stack_top[-1]) == (intptr_t)TOKEN_XREF) { |
979 | | /* Read old-style xref table */ |
980 | 1.24k | pdfi_pop(ctx, 1); |
981 | 1.24k | code = read_xref(ctx, ctx->main_stream); |
982 | 1.24k | if (code < 0) |
983 | 223 | goto error; |
984 | 1.47k | } else { |
985 | 1.47k | pdfi_pop(ctx, 1); |
986 | 1.47k | code = gs_note_error(gs_error_typecheck); |
987 | 1.47k | goto error; |
988 | 1.47k | } |
989 | 2.71k | } |
990 | | |
991 | | /* Now check if this is a hybrid file. */ |
992 | 6.25k | if (XRefStm != 0) { |
993 | 213 | ctx->is_hybrid = true; |
994 | | |
995 | 213 | if (ctx->args.pdfdebug) |
996 | 0 | outprintf(ctx->memory, "%% File is a hybrid, containing xref table and xref stream. Reading the stream.\n"); |
997 | | |
998 | | |
999 | 213 | if (pdfi_loop_detector_check_object(ctx, XRefStm) == true) { |
1000 | 0 | code = gs_note_error(gs_error_circular_reference); |
1001 | 0 | goto error; |
1002 | 0 | } |
1003 | 213 | else { |
1004 | 213 | code = pdfi_loop_detector_add_object(ctx, XRefStm); |
1005 | 213 | if (code < 0) |
1006 | 0 | goto error; |
1007 | 213 | } |
1008 | | |
1009 | 213 | code = pdfi_loop_detector_mark(ctx); |
1010 | 213 | if (code < 0) |
1011 | 0 | goto error; |
1012 | | |
1013 | | /* Because of the way the code works when we read a file which is a pure |
1014 | | * xref stream file, we need to read the first integer of 'x y obj' |
1015 | | * because the xref stream decoding code expects that to be on the stack. |
1016 | | */ |
1017 | 213 | pdfi_seek(ctx, s, XRefStm, SEEK_SET); |
1018 | | |
1019 | 213 | code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num); |
1020 | 213 | if (code < 0) { |
1021 | 0 | pdfi_set_error(ctx, 0, NULL, E_PDF_BADXREFSTREAM, "read_xref", ""); |
1022 | 0 | pdfi_loop_detector_cleartomark(ctx); |
1023 | 0 | goto error; |
1024 | 0 | } |
1025 | | |
1026 | 213 | code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num); |
1027 | | /* We could just fall through to the exit here, but choose not to in order to avoid possible mistakes in future */ |
1028 | 213 | if (code < 0) { |
1029 | 25 | pdfi_loop_detector_cleartomark(ctx); |
1030 | 25 | goto error; |
1031 | 25 | } |
1032 | | |
1033 | 188 | pdfi_loop_detector_cleartomark(ctx); |
1034 | 188 | } else |
1035 | 6.03k | code = 0; |
1036 | | |
1037 | 9.42k | error: |
1038 | 9.42k | pdfi_countdown(d); |
1039 | 9.42k | return code; |
1040 | 6.25k | } |
1041 | | |
1042 | | int pdfi_read_xref(pdf_context *ctx) |
1043 | 94.5k | { |
1044 | 94.5k | int code = 0; |
1045 | 94.5k | int obj_num; |
1046 | | |
1047 | 94.5k | code = pdfi_loop_detector_mark(ctx); |
1048 | 94.5k | if (code < 0) |
1049 | 0 | return code; |
1050 | | |
1051 | 94.5k | if (ctx->startxref == 0) |
1052 | 53.6k | goto repair; |
1053 | | |
1054 | 40.8k | code = pdfi_loop_detector_add_object(ctx, ctx->startxref); |
1055 | 40.8k | if (code < 0) |
1056 | 0 | goto exit; |
1057 | | |
1058 | 40.8k | if (ctx->args.pdfdebug) |
1059 | 0 | outprintf(ctx->memory, "%% Trying to read 'xref' token for xref table, or 'int int obj' for an xref stream\n"); |
1060 | | |
1061 | 40.8k | if (ctx->startxref > ctx->main_stream_length - 5) { |
1062 | 10.4k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is beyond end of file")) < 0) |
1063 | 0 | goto exit; |
1064 | | |
1065 | 10.4k | goto repair; |
1066 | 10.4k | } |
1067 | 30.3k | if (ctx->startxref < 0) { |
1068 | 485 | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_rangecheck), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is before start of file")) < 0) |
1069 | 0 | goto exit; |
1070 | | |
1071 | 485 | goto repair; |
1072 | 485 | } |
1073 | | |
1074 | | /* Read the xref(s) */ |
1075 | 29.8k | pdfi_seek(ctx, ctx->main_stream, ctx->startxref, SEEK_SET); |
1076 | | |
1077 | | /* If it starts with an int, it's an xref stream dict */ |
1078 | 29.8k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num); |
1079 | 29.8k | if (code == 1) { |
1080 | 11.4k | if (pdfi_check_xref_stream(ctx)) { |
1081 | 8.54k | code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num); |
1082 | 8.54k | if (code < 0) |
1083 | 2.86k | goto repair; |
1084 | 8.54k | } else |
1085 | 2.89k | goto repair; |
1086 | 18.4k | } else { |
1087 | | /* If not, it had better start 'xref', and be an old-style xref table */ |
1088 | 18.4k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
1089 | 18.4k | if (code != TOKEN_XREF) { |
1090 | 8.92k | if ((code = pdfi_set_error_stop(ctx, gs_note_error(gs_error_syntaxerror), NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"Failed to read any token at the startxref location")) < 0) |
1091 | 0 | goto exit; |
1092 | | |
1093 | 8.92k | goto repair; |
1094 | 8.92k | } |
1095 | | |
1096 | 9.52k | code = read_xref(ctx, ctx->main_stream); |
1097 | 9.52k | if (code < 0) |
1098 | 4.33k | goto repair; |
1099 | 9.52k | } |
1100 | | |
1101 | 10.8k | if(ctx->args.pdfdebug && ctx->xref_table) { |
1102 | 0 | int i, j; |
1103 | 0 | xref_entry *entry; |
1104 | 0 | char Buffer[32]; |
1105 | |
|
1106 | 0 | outprintf(ctx->memory, "\n%% Dumping xref table\n"); |
1107 | 0 | for (i=0;i < ctx->xref_table->xref_size;i++) { |
1108 | 0 | entry = &ctx->xref_table->xref[i]; |
1109 | 0 | if(entry->compressed) { |
1110 | 0 | outprintf(ctx->memory, "*"); |
1111 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%"PRId64"", entry->object_num); |
1112 | 0 | j = 10 - strlen(Buffer); |
1113 | 0 | while(j--) { |
1114 | 0 | outprintf(ctx->memory, " "); |
1115 | 0 | } |
1116 | 0 | outprintf(ctx->memory, "%s ", Buffer); |
1117 | |
|
1118 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.compressed_stream_num); |
1119 | 0 | j = 10 - strlen(Buffer); |
1120 | 0 | while(j--) { |
1121 | 0 | outprintf(ctx->memory, " "); |
1122 | 0 | } |
1123 | 0 | outprintf(ctx->memory, "%s ", Buffer); |
1124 | |
|
1125 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.object_index); |
1126 | 0 | j = 10 - strlen(Buffer); |
1127 | 0 | while(j--) { |
1128 | 0 | outprintf(ctx->memory, " "); |
1129 | 0 | } |
1130 | 0 | outprintf(ctx->memory, "%s ", Buffer); |
1131 | 0 | } |
1132 | 0 | else { |
1133 | 0 | outprintf(ctx->memory, " "); |
1134 | |
|
1135 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->object_num); |
1136 | 0 | j = 10 - strlen(Buffer); |
1137 | 0 | while(j--) { |
1138 | 0 | outprintf(ctx->memory, " "); |
1139 | 0 | } |
1140 | 0 | outprintf(ctx->memory, "%s ", Buffer); |
1141 | |
|
1142 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%"PRIdOFFSET"", entry->u.uncompressed.offset); |
1143 | 0 | j = 10 - strlen(Buffer); |
1144 | 0 | while(j--) { |
1145 | 0 | outprintf(ctx->memory, " "); |
1146 | 0 | } |
1147 | 0 | outprintf(ctx->memory, "%s ", Buffer); |
1148 | |
|
1149 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.uncompressed.generation_num); |
1150 | 0 | j = 10 - strlen(Buffer); |
1151 | 0 | while(j--) { |
1152 | 0 | outprintf(ctx->memory, " "); |
1153 | 0 | } |
1154 | 0 | outprintf(ctx->memory, "%s ", Buffer); |
1155 | 0 | } |
1156 | 0 | if (entry->free) |
1157 | 0 | outprintf(ctx->memory, "f\n"); |
1158 | 0 | else |
1159 | 0 | outprintf(ctx->memory, "n\n"); |
1160 | 0 | } |
1161 | 0 | } |
1162 | 10.8k | if (ctx->args.pdfdebug) |
1163 | 0 | outprintf(ctx->memory, "\n"); |
1164 | | |
1165 | 10.8k | exit: |
1166 | 10.8k | (void)pdfi_loop_detector_cleartomark(ctx); |
1167 | | |
1168 | 10.8k | if (code < 0) |
1169 | 0 | return code; |
1170 | | |
1171 | 10.8k | return 0; |
1172 | | |
1173 | 83.6k | repair: |
1174 | 83.6k | (void)pdfi_loop_detector_cleartomark(ctx); |
1175 | 83.6k | if (!ctx->repaired && !ctx->args.pdfstoponerror) |
1176 | 83.5k | return(pdfi_repair_file(ctx)); |
1177 | 63 | return 0; |
1178 | 83.6k | } |