/src/ghostpdl/pdf/pdf_xref.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2018-2022 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, |
13 | | CA 94945, U.S.A., +1(415)492-9861, for further information. |
14 | | */ |
15 | | |
16 | | /* xref parsing */ |
17 | | |
18 | | #include "pdf_int.h" |
19 | | #include "pdf_stack.h" |
20 | | #include "pdf_xref.h" |
21 | | #include "pdf_file.h" |
22 | | #include "pdf_loop_detect.h" |
23 | | #include "pdf_dict.h" |
24 | | #include "pdf_array.h" |
25 | | #include "pdf_repair.h" |
26 | | |
27 | | static int resize_xref(pdf_context *ctx, uint64_t new_size) |
28 | 9.21k | { |
29 | 9.21k | xref_entry *new_xrefs; |
30 | | |
31 | | /* Although we can technically handle object numbers larger than this, on some systems (32-bit Windows) |
32 | | * memset is limited to a (signed!) integer for the size of memory to clear. We could deal |
33 | | * with this by clearing the memory in blocks, but really, this is almost certainly a |
34 | | * corrupted file or something. |
35 | | */ |
36 | 9.21k | if (new_size >= (0x7ffffff / sizeof(xref_entry))) |
37 | 0 | return_error(gs_error_rangecheck); |
38 | | |
39 | 9.21k | new_xrefs = (xref_entry *)gs_alloc_bytes(ctx->memory, (new_size) * sizeof(xref_entry), "read_xref_stream allocate xref table entries"); |
40 | 9.21k | if (new_xrefs == NULL){ |
41 | 0 | pdfi_countdown(ctx->xref_table); |
42 | 0 | ctx->xref_table = NULL; |
43 | 0 | return_error(gs_error_VMerror); |
44 | 0 | } |
45 | 9.21k | memset(new_xrefs, 0x00, (new_size) * sizeof(xref_entry)); |
46 | 9.21k | memcpy(new_xrefs, ctx->xref_table->xref, ctx->xref_table->xref_size * sizeof(xref_entry)); |
47 | 9.21k | gs_free_object(ctx->memory, ctx->xref_table->xref, "reallocated xref entries"); |
48 | 9.21k | ctx->xref_table->xref = new_xrefs; |
49 | 9.21k | ctx->xref_table->xref_size = new_size; |
50 | 9.21k | return 0; |
51 | 9.21k | } |
52 | | |
53 | | static int read_xref_stream_entries(pdf_context *ctx, pdf_c_stream *s, uint64_t first, uint64_t last, uint64_t *W) |
54 | 7.49k | { |
55 | 7.49k | uint i, j; |
56 | 7.49k | uint field_width = 0; |
57 | 7.49k | uint32_t type = 0; |
58 | 7.49k | uint64_t objnum = 0, gen = 0; |
59 | 7.49k | byte *Buffer; |
60 | 7.49k | int64_t bytes = 0; |
61 | 7.49k | xref_entry *entry; |
62 | | |
63 | | /* Find max number of bytes to be read */ |
64 | 7.49k | field_width = W[0]; |
65 | 7.49k | if (W[1] > field_width) |
66 | 7.44k | field_width = W[1]; |
67 | 7.49k | if (W[2] > field_width) |
68 | 6 | field_width = W[2]; |
69 | | |
70 | 7.49k | Buffer = gs_alloc_bytes(ctx->memory, field_width, "read_xref_stream_entry working buffer"); |
71 | 562k | for (i=first;i<=last; i++){ |
72 | | /* Defaults if W[n] = 0 */ |
73 | 555k | type = 1; |
74 | 555k | objnum = gen = 0; |
75 | | |
76 | 555k | if (W[0] != 0) { |
77 | 555k | type = 0; |
78 | 555k | bytes = pdfi_read_bytes(ctx, Buffer, 1, W[0], s); |
79 | 555k | if (bytes < W[0]){ |
80 | 7 | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)"); |
81 | 7 | return_error(gs_error_ioerror); |
82 | 7 | } |
83 | 1.11M | for (j=0;j<W[0];j++) |
84 | 555k | type = (type << 8) + Buffer[j]; |
85 | 555k | } |
86 | | |
87 | 555k | if (W[1] != 0) { |
88 | 555k | bytes = pdfi_read_bytes(ctx, Buffer, 1, W[1], s); |
89 | 555k | if (bytes < W[1]){ |
90 | 1 | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry free working buffer (error)"); |
91 | 1 | return_error(gs_error_ioerror); |
92 | 1 | } |
93 | 1.91M | for (j=0;j<W[1];j++) |
94 | 1.35M | objnum = (objnum << 8) + Buffer[j]; |
95 | 555k | } |
96 | | |
97 | 555k | if (W[2] != 0) { |
98 | 549k | bytes = pdfi_read_bytes(ctx, Buffer, 1, W[2], s); |
99 | 549k | if (bytes < W[2]){ |
100 | 0 | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer (error)"); |
101 | 0 | return_error(gs_error_ioerror); |
102 | 0 | } |
103 | 1.10M | for (j=0;j<W[2];j++) |
104 | 553k | gen = (gen << 8) + Buffer[j]; |
105 | 549k | } |
106 | | |
107 | 555k | entry = &ctx->xref_table->xref[i]; |
108 | 555k | if (entry->object_num != 0) |
109 | 1.00k | continue; |
110 | | |
111 | 554k | entry->compressed = false; |
112 | 554k | entry->free = false; |
113 | 554k | entry->object_num = i; |
114 | 554k | entry->cache = NULL; |
115 | | |
116 | 554k | switch(type) { |
117 | 3.21k | case 0: |
118 | 3.21k | entry->free = true; |
119 | 3.21k | entry->u.uncompressed.offset = objnum; /* For free objects we use the offset to store the object number of the next free object */ |
120 | 3.21k | entry->u.uncompressed.generation_num = gen; /* And the generation number is the numebr to use if this object is used again */ |
121 | 3.21k | break; |
122 | 152k | case 1: |
123 | 152k | entry->u.uncompressed.offset = objnum; |
124 | 152k | entry->u.uncompressed.generation_num = gen; |
125 | 152k | break; |
126 | 398k | case 2: |
127 | 398k | entry->compressed = true; |
128 | 398k | entry->u.compressed.compressed_stream_num = objnum; /* The object number of the compressed stream */ |
129 | 398k | entry->u.compressed.object_index = gen; /* And the index of the object within the stream */ |
130 | 398k | break; |
131 | 64 | default: |
132 | 64 | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer"); |
133 | 64 | return_error(gs_error_rangecheck); |
134 | 0 | break; |
135 | 554k | } |
136 | 554k | } |
137 | 7.42k | gs_free_object(ctx->memory, Buffer, "read_xref_stream_entry, free working buffer"); |
138 | 7.42k | return 0; |
139 | 7.49k | } |
140 | | |
141 | | /* Forward definition */ |
142 | | static int read_xref(pdf_context *ctx, pdf_c_stream *s); |
143 | | /* These two routines are recursive.... */ |
144 | | static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num); |
145 | | |
146 | | static int pdfi_process_xref_stream(pdf_context *ctx, pdf_stream *stream_obj, pdf_c_stream *s) |
147 | 6.24k | { |
148 | 6.24k | pdf_c_stream *XRefStrm; |
149 | 6.24k | int code, i; |
150 | 6.24k | pdf_dict *sdict = NULL; |
151 | 6.24k | pdf_name *n; |
152 | 6.24k | pdf_array *a; |
153 | 6.24k | int64_t size; |
154 | 6.24k | int64_t num; |
155 | 6.24k | int64_t W[3]; |
156 | 6.24k | int objnum; |
157 | 6.24k | bool known = false; |
158 | | |
159 | 6.24k | if (pdfi_type_of(stream_obj) != PDF_STREAM) |
160 | 0 | return_error(gs_error_typecheck); |
161 | | |
162 | 6.24k | code = pdfi_dict_from_obj(ctx, (pdf_obj *)stream_obj, &sdict); |
163 | 6.24k | if (code < 0) |
164 | 0 | return code; |
165 | | |
166 | 6.24k | code = pdfi_dict_get_type(ctx, sdict, "Type", PDF_NAME, (pdf_obj **)&n); |
167 | 6.24k | if (code < 0) |
168 | 14 | return code; |
169 | | |
170 | 6.23k | if (n->length != 4 || memcmp(n->data, "XRef", 4) != 0) { |
171 | 4 | pdfi_countdown(n); |
172 | 4 | return_error(gs_error_syntaxerror); |
173 | 4 | } |
174 | 6.23k | pdfi_countdown(n); |
175 | | |
176 | 6.23k | code = pdfi_dict_get_int(ctx, sdict, "Size", &size); |
177 | 6.23k | if (code < 0) |
178 | 1 | return code; |
179 | 6.23k | if (size < 1) |
180 | 2 | return 0; |
181 | | |
182 | 6.22k | if (size < 0 || size > floor((double)ARCH_MAX_SIZE_T / (double)sizeof(xref_entry))) |
183 | 0 | return_error(gs_error_rangecheck); |
184 | | |
185 | | /* If this is the first xref stream then allocate the xref table and store the trailer */ |
186 | 6.22k | if (ctx->xref_table == NULL) { |
187 | 4.37k | ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table"); |
188 | 4.37k | if (ctx->xref_table == NULL) { |
189 | 0 | return_error(gs_error_VMerror); |
190 | 0 | } |
191 | 4.37k | memset(ctx->xref_table, 0x00, sizeof(xref_table_t)); |
192 | 4.37k | ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, size * sizeof(xref_entry), "read_xref_stream allocate xref table entries"); |
193 | 4.37k | if (ctx->xref_table->xref == NULL){ |
194 | 0 | gs_free_object(ctx->memory, ctx->xref_table, "failed to allocate xref table entries"); |
195 | 0 | ctx->xref_table = NULL; |
196 | 0 | return_error(gs_error_VMerror); |
197 | 0 | } |
198 | 4.37k | memset(ctx->xref_table->xref, 0x00, size * sizeof(xref_entry)); |
199 | 4.37k | ctx->xref_table->ctx = ctx; |
200 | 4.37k | ctx->xref_table->type = PDF_XREF_TABLE; |
201 | 4.37k | ctx->xref_table->xref_size = size; |
202 | | #if REFCNT_DEBUG |
203 | | ctx->xref_table->UID = ctx->ref_UID++; |
204 | | dmprintf1(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID); |
205 | | #endif |
206 | 4.37k | pdfi_countup(ctx->xref_table); |
207 | | |
208 | 4.37k | ctx->Trailer = sdict; |
209 | 4.37k | pdfi_countup(sdict); |
210 | 4.37k | } else { |
211 | 1.85k | if (size > ctx->xref_table->xref_size) |
212 | 2 | return_error(gs_error_rangecheck); |
213 | | |
214 | 1.85k | code = pdfi_merge_dicts(ctx, ctx->Trailer, sdict); |
215 | 1.85k | if (code < 0) { |
216 | 0 | if (code == gs_error_VMerror || ctx->args.pdfstoponerror) |
217 | 0 | return code; |
218 | 0 | } |
219 | 1.85k | } |
220 | | |
221 | 6.22k | pdfi_seek(ctx, ctx->main_stream, pdfi_stream_offset(ctx, stream_obj), SEEK_SET); |
222 | | |
223 | | /* Bug #691220 has a PDF file with a compressed XRef, the stream dictionary has |
224 | | * a /DecodeParms entry for the stream, which has a /Colors value of 5, which makes |
225 | | * *no* sense whatever. If we try to apply a Predictor then we end up in a loop trying |
226 | | * to read 5 colour samples. Rather than meddles with more parameters to the filter |
227 | | * code, we'll just remove the Colors entry from the DecodeParms dictionary, |
228 | | * because it is nonsense. This means we'll get the (sensible) default value of 1. |
229 | | */ |
230 | 6.22k | code = pdfi_dict_known(ctx, sdict, "DecodeParms", &known); |
231 | 6.22k | if (code < 0) |
232 | 0 | return code; |
233 | | |
234 | 6.22k | if (known) { |
235 | 5.97k | pdf_dict *DP; |
236 | 5.97k | double f; |
237 | 5.97k | pdf_obj *name; |
238 | | |
239 | 5.97k | code = pdfi_dict_get_type(ctx, sdict, "DecodeParms", PDF_DICT, (pdf_obj **)&DP); |
240 | 5.97k | if (code < 0) |
241 | 0 | return code; |
242 | | |
243 | 5.97k | code = pdfi_dict_knownget_number(ctx, DP, "Colors", &f); |
244 | 5.97k | if (code < 0) { |
245 | 0 | pdfi_countdown(DP); |
246 | 0 | return code; |
247 | 0 | } |
248 | 5.97k | if (code > 0 && f != (double)1) |
249 | 0 | { |
250 | 0 | code = pdfi_name_alloc(ctx, (byte *)"Colors", 6, &name); |
251 | 0 | if (code < 0) { |
252 | 0 | pdfi_countdown(DP); |
253 | 0 | return code; |
254 | 0 | } |
255 | 0 | pdfi_countup(name); |
256 | |
|
257 | 0 | code = pdfi_dict_delete_pair(ctx, DP, (pdf_name *)name); |
258 | 0 | pdfi_countdown(name); |
259 | 0 | if (code < 0) { |
260 | 0 | pdfi_countdown(DP); |
261 | 0 | return code; |
262 | 0 | } |
263 | 0 | } |
264 | 5.97k | pdfi_countdown(DP); |
265 | 5.97k | } |
266 | | |
267 | 6.22k | code = pdfi_filter_no_decryption(ctx, stream_obj, s, &XRefStrm, false); |
268 | 6.22k | if (code < 0) { |
269 | 7 | pdfi_countdown(ctx->xref_table); |
270 | 7 | ctx->xref_table = NULL; |
271 | 7 | return code; |
272 | 7 | } |
273 | | |
274 | 6.21k | code = pdfi_dict_get_type(ctx, sdict, "W", PDF_ARRAY, (pdf_obj **)&a); |
275 | 6.21k | if (code < 0) { |
276 | 0 | pdfi_close_file(ctx, XRefStrm); |
277 | 0 | pdfi_countdown(ctx->xref_table); |
278 | 0 | ctx->xref_table = NULL; |
279 | 0 | return code; |
280 | 0 | } |
281 | | |
282 | 6.21k | if (pdfi_array_size(a) != 3) { |
283 | 3 | pdfi_countdown(a); |
284 | 3 | pdfi_close_file(ctx, XRefStrm); |
285 | 3 | pdfi_countdown(ctx->xref_table); |
286 | 3 | ctx->xref_table = NULL; |
287 | 3 | return_error(gs_error_rangecheck); |
288 | 3 | } |
289 | 24.8k | for (i=0;i<3;i++) { |
290 | 18.6k | code = pdfi_array_get_int(ctx, a, (uint64_t)i, (int64_t *)&W[i]); |
291 | 18.6k | if (code < 0) { |
292 | 9 | pdfi_countdown(a); |
293 | 9 | pdfi_close_file(ctx, XRefStrm); |
294 | 9 | pdfi_countdown(ctx->xref_table); |
295 | 9 | ctx->xref_table = NULL; |
296 | 9 | return code; |
297 | 9 | } |
298 | 18.6k | } |
299 | 6.20k | pdfi_countdown(a); |
300 | | |
301 | 6.20k | code = pdfi_dict_get_type(ctx, sdict, "Index", PDF_ARRAY, (pdf_obj **)&a); |
302 | 6.20k | if (code == gs_error_undefined) { |
303 | 1.66k | code = read_xref_stream_entries(ctx, XRefStrm, 0, size - 1, (uint64_t *)W); |
304 | 1.66k | if (code < 0) { |
305 | 17 | pdfi_close_file(ctx, XRefStrm); |
306 | 17 | pdfi_countdown(ctx->xref_table); |
307 | 17 | ctx->xref_table = NULL; |
308 | 17 | return code; |
309 | 17 | } |
310 | 4.54k | } else { |
311 | 4.54k | int64_t start, size; |
312 | | |
313 | 4.54k | if (code < 0) { |
314 | 0 | pdfi_close_file(ctx, XRefStrm); |
315 | 0 | pdfi_countdown(ctx->xref_table); |
316 | 0 | ctx->xref_table = NULL; |
317 | 0 | return code; |
318 | 0 | } |
319 | | |
320 | 4.54k | if (pdfi_array_size(a) & 1) { |
321 | 1 | pdfi_countdown(a); |
322 | 1 | pdfi_close_file(ctx, XRefStrm); |
323 | 1 | pdfi_countdown(ctx->xref_table); |
324 | 1 | ctx->xref_table = NULL; |
325 | 1 | return_error(gs_error_rangecheck); |
326 | 1 | } |
327 | | |
328 | 10.3k | for (i=0;i < pdfi_array_size(a);i+=2){ |
329 | 5.83k | code = pdfi_array_get_int(ctx, a, (uint64_t)i, &start); |
330 | 5.83k | if (code < 0 || start < 0) { |
331 | 5 | pdfi_countdown(a); |
332 | 5 | pdfi_close_file(ctx, XRefStrm); |
333 | 5 | pdfi_countdown(ctx->xref_table); |
334 | 5 | ctx->xref_table = NULL; |
335 | 5 | return code; |
336 | 5 | } |
337 | | |
338 | 5.83k | code = pdfi_array_get_int(ctx, a, (uint64_t)i+1, &size); |
339 | 5.83k | if (code < 0) { |
340 | 3 | pdfi_countdown(a); |
341 | 3 | pdfi_close_file(ctx, XRefStrm); |
342 | 3 | pdfi_countdown(ctx->xref_table); |
343 | 3 | ctx->xref_table = NULL; |
344 | 3 | return code; |
345 | 3 | } |
346 | | |
347 | 5.83k | if (size < 1) |
348 | 0 | continue; |
349 | | |
350 | 5.83k | if (start + size >= ctx->xref_table->xref_size) { |
351 | 4.24k | code = resize_xref(ctx, start + size); |
352 | 4.24k | if (code < 0) { |
353 | 0 | pdfi_countdown(a); |
354 | 0 | pdfi_close_file(ctx, XRefStrm); |
355 | 0 | pdfi_countdown(ctx->xref_table); |
356 | 0 | ctx->xref_table = NULL; |
357 | 0 | return code; |
358 | 0 | } |
359 | 4.24k | } |
360 | | |
361 | 5.83k | code = read_xref_stream_entries(ctx, XRefStrm, start, start + size - 1, (uint64_t *)W); |
362 | 5.83k | if (code < 0) { |
363 | 55 | pdfi_countdown(a); |
364 | 55 | pdfi_close_file(ctx, XRefStrm); |
365 | 55 | pdfi_countdown(ctx->xref_table); |
366 | 55 | ctx->xref_table = NULL; |
367 | 55 | return code; |
368 | 55 | } |
369 | 5.83k | } |
370 | 4.54k | } |
371 | 6.12k | pdfi_countdown(a); |
372 | | |
373 | 6.12k | pdfi_close_file(ctx, XRefStrm); |
374 | | |
375 | 6.12k | code = pdfi_dict_get_int(ctx, sdict, "Prev", &num); |
376 | 6.12k | if (code == gs_error_undefined) |
377 | 1.86k | return 0; |
378 | | |
379 | 4.25k | if (code < 0) |
380 | 0 | return code; |
381 | | |
382 | 4.25k | if (num < 0 || num > ctx->main_stream_length) |
383 | 2.32k | return_error(gs_error_rangecheck); |
384 | | |
385 | 1.93k | if (pdfi_loop_detector_check_object(ctx, num) == true) |
386 | 0 | return_error(gs_error_circular_reference); |
387 | 1.93k | else { |
388 | 1.93k | code = pdfi_loop_detector_add_object(ctx, num); |
389 | 1.93k | if (code < 0) |
390 | 0 | return code; |
391 | 1.93k | } |
392 | | |
393 | 1.93k | if(ctx->args.pdfdebug) |
394 | 1.93k | dmprintf(ctx->memory, "%% Reading /Prev xref\n"); |
395 | | |
396 | 1.93k | pdfi_seek(ctx, s, num, SEEK_SET); |
397 | | |
398 | 1.93k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &objnum); |
399 | 1.93k | if (code == 1) |
400 | 1.82k | return pdfi_read_xref_stream_dict(ctx, s, objnum); |
401 | | |
402 | 111 | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
403 | 111 | if (code < 0) |
404 | 0 | return code; |
405 | 111 | if (code == TOKEN_XREF) { |
406 | 20 | pdfi_set_error(ctx, 0, NULL, E_PDF_PREV_NOT_XREF_STREAM, "pdfi_process_xref_stream", NULL); |
407 | 20 | if (!ctx->args.pdfstoponerror) |
408 | | /* Read old-style xref table */ |
409 | 20 | return(read_xref(ctx, ctx->main_stream)); |
410 | 20 | } |
411 | 111 | return_error(gs_error_syntaxerror); |
412 | 111 | } |
413 | | |
414 | | static int pdfi_read_xref_stream_dict(pdf_context *ctx, pdf_c_stream *s, int obj_num) |
415 | 7.46k | { |
416 | 7.46k | int code; |
417 | 7.46k | int gen_num; |
418 | | |
419 | 7.46k | if (ctx->args.pdfdebug) |
420 | 7.46k | dmprintf(ctx->memory, "\n%% Reading PDF 1.5+ xref stream\n"); |
421 | | |
422 | | /* We have the obj_num. Lets try for obj_num gen obj as a XRef stream */ |
423 | 7.46k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &gen_num); |
424 | 7.46k | if (code <= 0) { |
425 | 394 | if (ctx->args.pdfstoponerror) |
426 | 0 | return code; |
427 | 394 | return(pdfi_repair_file(ctx)); |
428 | 394 | } |
429 | | |
430 | | /* Try to read 'obj' */ |
431 | 7.07k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
432 | 7.07k | if (code < 0) |
433 | 0 | return code; |
434 | 7.07k | if (code == 0) |
435 | 0 | return_error(gs_error_syntaxerror); |
436 | | |
437 | | /* Third element must be obj, or it's not a valid xref */ |
438 | 7.07k | if (code != TOKEN_OBJ) { |
439 | 571 | if (ctx->args.pdfstoponerror) |
440 | 0 | return code; |
441 | 571 | return(pdfi_repair_file(ctx)); |
442 | 571 | } |
443 | | |
444 | 267k | do { |
445 | 267k | code = pdfi_read_token(ctx, ctx->main_stream, obj_num, gen_num); |
446 | 267k | if (code <= 0) { |
447 | 199 | if (ctx->args.pdfstoponerror) |
448 | 0 | return code; |
449 | 199 | return pdfi_repair_file(ctx); |
450 | 199 | } |
451 | | |
452 | 267k | if (pdfi_count_stack(ctx) >= 2 && pdfi_type_of(ctx->stack_top[-1]) == PDF_FAST_KEYWORD) { |
453 | 6.42k | uintptr_t keyword = (uintptr_t)ctx->stack_top[-1]; |
454 | 6.42k | if (keyword == TOKEN_STREAM) { |
455 | 6.25k | pdf_dict *dict; |
456 | 6.25k | pdf_stream *sdict = NULL; |
457 | 6.25k | int64_t Length; |
458 | | |
459 | | /* Remove the 'stream' token from the stack, should leave a dictionary object on the stack */ |
460 | 6.25k | pdfi_pop(ctx, 1); |
461 | 6.25k | if (pdfi_type_of(ctx->stack_top[-1]) != PDF_DICT) { |
462 | 5 | if (ctx->args.pdfstoponerror) |
463 | 0 | return code; |
464 | 5 | return pdfi_repair_file(ctx); |
465 | 5 | } |
466 | 6.24k | dict = (pdf_dict *)ctx->stack_top[-1]; |
467 | | |
468 | | /* Convert the dict into a stream (sdict comes back with at least one ref) */ |
469 | 6.24k | code = pdfi_obj_dict_to_stream(ctx, dict, &sdict, true); |
470 | | /* Pop off the dict */ |
471 | 6.24k | pdfi_pop(ctx, 1); |
472 | 6.24k | if (code < 0) { |
473 | 0 | if (ctx->args.pdfstoponerror) |
474 | 0 | return code; |
475 | | /* TODO: should I return code instead of trying to repair? |
476 | | * Normally the above routine should not fail so something is |
477 | | * probably seriously fubar. |
478 | | */ |
479 | 0 | return pdfi_repair_file(ctx); |
480 | 0 | } |
481 | 6.24k | dict = NULL; |
482 | | |
483 | | /* Init the stuff for the stream */ |
484 | 6.24k | sdict->stream_offset = pdfi_unread_tell(ctx); |
485 | 6.24k | sdict->object_num = obj_num; |
486 | 6.24k | sdict->generation_num = gen_num; |
487 | | |
488 | 6.24k | code = pdfi_dict_get_int(ctx, sdict->stream_dict, "Length", &Length); |
489 | 6.24k | if (code < 0) { |
490 | | /* TODO: Not positive this will actually have a length -- just use 0 */ |
491 | 15 | pdfi_set_error_var(ctx, 0, NULL, E_PDF_BADSTREAM, "pdfi_read_xref_stream_dict", "Xref Stream object %u missing mandatory keyword /Length\n", obj_num); |
492 | 15 | code = 0; |
493 | 15 | Length = 0; |
494 | 15 | } |
495 | 6.24k | sdict->Length = Length; |
496 | 6.24k | sdict->length_valid = true; |
497 | | |
498 | 6.24k | code = pdfi_process_xref_stream(ctx, sdict, ctx->main_stream); |
499 | 6.24k | if (code < 0) { |
500 | 2.53k | pdfi_countdown(sdict); |
501 | 2.53k | if (ctx->args.pdfstoponerror) |
502 | 0 | return code; |
503 | 2.53k | return (pdfi_repair_file(ctx)); |
504 | 2.53k | } |
505 | 3.71k | pdfi_countdown(sdict); |
506 | 3.71k | break; |
507 | 6.24k | } else if (keyword == TOKEN_ENDOBJ) { |
508 | | /* Something went wrong, this is not a stream dictionary */ |
509 | 51 | if (ctx->args.pdfstoponerror) |
510 | 0 | return code; |
511 | 51 | return(pdfi_repair_file(ctx)); |
512 | 51 | } |
513 | 6.42k | } |
514 | 267k | } while(1); |
515 | 3.71k | return 0; |
516 | 6.50k | } |
517 | | |
518 | | static int skip_to_digit(pdf_context *ctx, pdf_c_stream *s, unsigned int limit) |
519 | 985 | { |
520 | 985 | int c, read = 0; |
521 | | |
522 | 3.62k | do { |
523 | 3.62k | c = pdfi_read_byte(ctx, s); |
524 | 3.62k | if (c < 0) |
525 | 0 | return_error(gs_error_ioerror); |
526 | 3.62k | if (c >= '0' && c <= '9') { |
527 | 898 | pdfi_unread_byte(ctx, s, (byte)c); |
528 | 898 | return read; |
529 | 898 | } |
530 | 2.72k | read++; |
531 | 2.72k | } while (read < limit); |
532 | | |
533 | 87 | return read; |
534 | 985 | } |
535 | | |
536 | | static int read_digits(pdf_context *ctx, pdf_c_stream *s, byte *Buffer, int limit) |
537 | 985 | { |
538 | 985 | int c, read = 0; |
539 | | |
540 | | /* Since the "limit" is a value calculated by the caller, |
541 | | it's easier to check it in one place (here) than before |
542 | | every call. |
543 | | */ |
544 | 985 | if (limit <= 0) |
545 | 88 | return_error(gs_error_syntaxerror); |
546 | | |
547 | | /* We assume that Buffer always has limit+1 bytes available, so we can |
548 | | * safely terminate it. */ |
549 | | |
550 | 5.40k | do { |
551 | 5.40k | c = pdfi_read_byte(ctx, s); |
552 | 5.40k | if (c < 0) |
553 | 0 | return_error(gs_error_ioerror); |
554 | 5.40k | if (c < '0' || c > '9') { |
555 | 431 | pdfi_unread_byte(ctx, s, c); |
556 | 431 | break; |
557 | 431 | } |
558 | 4.97k | *Buffer++ = (byte)c; |
559 | 4.97k | read++; |
560 | 4.97k | } while (read < limit); |
561 | 897 | *Buffer = 0; |
562 | | |
563 | 897 | return read; |
564 | 897 | } |
565 | | |
566 | | |
567 | | static int read_xref_entry_slow(pdf_context *ctx, pdf_c_stream *s, gs_offset_t *offset, uint32_t *generation_num, unsigned char *free) |
568 | 501 | { |
569 | 501 | byte Buffer[20]; |
570 | 501 | int c, code, read = 0; |
571 | | |
572 | | /* First off, find a number. If we don't find one, and read 20 bytes, throw an error */ |
573 | 501 | code = skip_to_digit(ctx, s, 20); |
574 | 501 | if (code < 0) |
575 | 0 | return code; |
576 | 501 | read += code; |
577 | | |
578 | | /* Now read a number */ |
579 | 501 | code = read_digits(ctx, s, (byte *)&Buffer, (read > 10 ? 20 - read : 10)); |
580 | 501 | if (code < 0) |
581 | 17 | return code; |
582 | 484 | read += code; |
583 | | |
584 | 484 | *offset = atol((const char *)Buffer); |
585 | | |
586 | | /* find next number */ |
587 | 484 | code = skip_to_digit(ctx, s, 20 - read); |
588 | 484 | if (code < 0) |
589 | 0 | return code; |
590 | 484 | read += code; |
591 | | |
592 | | /* and read it */ |
593 | 484 | code = read_digits(ctx, s, (byte *)&Buffer, (read > 15 ? 20 - read : 5)); |
594 | 484 | if (code < 0) |
595 | 71 | return code; |
596 | 413 | read += code; |
597 | | |
598 | 413 | *generation_num = atol((const char *)Buffer); |
599 | | |
600 | 668 | do { |
601 | 668 | c = pdfi_read_byte(ctx, s); |
602 | 668 | if (c < 0) |
603 | 0 | return_error(gs_error_ioerror); |
604 | 668 | read ++; |
605 | 668 | if (c == 0x09 || c == 0x20) |
606 | 260 | continue; |
607 | 408 | if (c == 'n' || c == 'f') { |
608 | 169 | *free = (unsigned char)c; |
609 | 169 | break; |
610 | 239 | } else { |
611 | 239 | return_error(gs_error_syntaxerror); |
612 | 239 | } |
613 | 408 | } while (read < 20); |
614 | 174 | if (read >= 20) |
615 | 10 | return_error(gs_error_syntaxerror); |
616 | | |
617 | 352 | do { |
618 | 352 | c = pdfi_read_byte(ctx, s); |
619 | 352 | if (c < 0) |
620 | 0 | return_error(gs_error_syntaxerror); |
621 | 352 | read++; |
622 | 352 | if (c == 0x20 || c == 0x09 || c == 0x0d || c == 0x0a) |
623 | 145 | continue; |
624 | 352 | } while (read < 20); |
625 | 164 | return 0; |
626 | 164 | } |
627 | | |
628 | | static int write_offset(byte *B, gs_offset_t o, unsigned int g, unsigned char free) |
629 | 164 | { |
630 | 164 | byte b[20], *ptr = B; |
631 | 164 | int index = 0; |
632 | | |
633 | 164 | gs_snprintf((char *)b, sizeof(b), "%"PRIdOFFSET"", o); |
634 | 164 | if (strlen((const char *)b) > 10) |
635 | 0 | return_error(gs_error_rangecheck); |
636 | 1.32k | for(index=0;index < 10 - strlen((const char *)b); index++) { |
637 | 1.15k | *ptr++ = 0x30; |
638 | 1.15k | } |
639 | 164 | memcpy(ptr, b, strlen((const char *)b)); |
640 | 164 | ptr += strlen((const char *)b); |
641 | 164 | *ptr++ = 0x20; |
642 | | |
643 | 164 | gs_snprintf((char *)b, sizeof(b), "%d", g); |
644 | 164 | if (strlen((const char *)b) > 5) |
645 | 0 | return_error(gs_error_rangecheck); |
646 | 737 | for(index=0;index < 5 - strlen((const char *)b);index++) { |
647 | 573 | *ptr++ = 0x30; |
648 | 573 | } |
649 | 164 | memcpy(ptr, b, strlen((const char *)b)); |
650 | 164 | ptr += strlen((const char *)b); |
651 | 164 | *ptr++ = 0x20; |
652 | 164 | *ptr++ = free; |
653 | 164 | *ptr++ = 0x20; |
654 | 164 | *ptr++ = 0x0d; |
655 | 164 | return 0; |
656 | 164 | } |
657 | | |
658 | | static int read_xref_section(pdf_context *ctx, pdf_c_stream *s, uint64_t *section_start, uint64_t *section_size) |
659 | 14.5k | { |
660 | 14.5k | int code = 0, i, j; |
661 | 14.5k | int start = 0; |
662 | 14.5k | int size = 0; |
663 | 14.5k | int64_t bytes = 0; |
664 | 14.5k | char Buffer[21]; |
665 | | |
666 | 14.5k | *section_start = *section_size = 0; |
667 | | |
668 | 14.5k | if (ctx->args.pdfdebug) |
669 | 14.5k | dmprintf(ctx->memory, "\n%% Reading xref section\n"); |
670 | | |
671 | 14.5k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &start); |
672 | 14.5k | if (code < 0) { |
673 | | /* Not an int, might be a keyword */ |
674 | 3.93k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
675 | 3.93k | if (code < 0) |
676 | 0 | return code; |
677 | | |
678 | 3.93k | if (code != TOKEN_TRAILER) { |
679 | | /* element is not an integer, and not a keyword - not a valid xref */ |
680 | 59 | return_error(gs_error_typecheck); |
681 | 59 | } |
682 | 3.87k | return 1; |
683 | 3.93k | } |
684 | | |
685 | 10.6k | if (start < 0) |
686 | 7 | return_error(gs_error_rangecheck); |
687 | | |
688 | 10.6k | *section_start = start; |
689 | | |
690 | 10.6k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &size); |
691 | 10.6k | if (code < 0) |
692 | 8 | return code; |
693 | 10.6k | if (code == 0) |
694 | 14 | return_error(gs_error_syntaxerror); |
695 | | |
696 | | /* Zero sized xref sections are valid; see the file attached to |
697 | | * bug 704947 for an example. */ |
698 | 10.6k | if (size < 0) |
699 | 7 | return_error(gs_error_rangecheck); |
700 | | |
701 | 10.6k | *section_size = size; |
702 | | |
703 | 10.6k | if (ctx->args.pdfdebug) |
704 | 10.6k | dmprintf2(ctx->memory, "\n%% Section starts at %d and has %d entries\n", (unsigned int) start, (unsigned int)size); |
705 | | |
706 | 10.6k | if (size > 0) { |
707 | 10.4k | if (ctx->xref_table == NULL) { |
708 | 3.61k | ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table"); |
709 | 3.61k | if (ctx->xref_table == NULL) |
710 | 0 | return_error(gs_error_VMerror); |
711 | 3.61k | memset(ctx->xref_table, 0x00, sizeof(xref_table_t)); |
712 | | |
713 | 3.61k | ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, (start + size) * sizeof(xref_entry), "read_xref_stream allocate xref table entries"); |
714 | 3.61k | if (ctx->xref_table->xref == NULL){ |
715 | 2 | gs_free_object(ctx->memory, ctx->xref_table, "free xref table on error allocating entries"); |
716 | 2 | ctx->xref_table = NULL; |
717 | 2 | return_error(gs_error_VMerror); |
718 | 2 | } |
719 | | #if REFCNT_DEBUG |
720 | | ctx->xref_table->UID = ctx->ref_UID++; |
721 | | dmprintf1(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID); |
722 | | #endif |
723 | | |
724 | 3.61k | memset(ctx->xref_table->xref, 0x00, (start + size) * sizeof(xref_entry)); |
725 | 3.61k | ctx->xref_table->ctx = ctx; |
726 | 3.61k | ctx->xref_table->type = PDF_XREF_TABLE; |
727 | 3.61k | ctx->xref_table->xref_size = start + size; |
728 | 3.61k | pdfi_countup(ctx->xref_table); |
729 | 6.85k | } else { |
730 | 6.85k | if (start + size > ctx->xref_table->xref_size) { |
731 | 4.97k | code = resize_xref(ctx, start + size); |
732 | 4.97k | if (code < 0) |
733 | 0 | return code; |
734 | 4.97k | } |
735 | 6.85k | } |
736 | 10.4k | } |
737 | | |
738 | 10.6k | pdfi_skip_white(ctx, s); |
739 | 235k | for (i=0;i< size;i++){ |
740 | 225k | xref_entry *entry = &ctx->xref_table->xref[i + start]; |
741 | 225k | unsigned char free; |
742 | 225k | gs_offset_t off; |
743 | 225k | unsigned int gen; |
744 | | |
745 | 225k | bytes = pdfi_read_bytes(ctx, (byte *)Buffer, 1, 20, s); |
746 | 225k | if (bytes < 20) |
747 | 0 | return_error(gs_error_ioerror); |
748 | 225k | j = 19; |
749 | 225k | if ((Buffer[19] != 0x0a && Buffer[19] != 0x0d) || (Buffer[18] != 0x0d && Buffer[18] != 0x0a && Buffer[18] != 0x20)) |
750 | 4.51k | pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_SIZE, "read_xref_section", NULL); |
751 | 231k | while (Buffer[j] != 0x0D && Buffer[j] != 0x0A) { |
752 | 6.50k | pdfi_unread_byte(ctx, s, (byte)Buffer[j]); |
753 | 6.50k | if (--j < 0) { |
754 | 230 | pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_NO_EOL, "read_xref_section", NULL); |
755 | 230 | dmprintf(ctx->memory, "Invalid xref entry, line terminator missing.\n"); |
756 | 230 | code = read_xref_entry_slow(ctx, s, &off, &gen, &free); |
757 | 230 | if (code < 0) |
758 | 153 | return code; |
759 | 77 | code = write_offset((byte *)Buffer, off, gen, free); |
760 | 77 | if (code < 0) |
761 | 0 | return code; |
762 | 77 | j = 19; |
763 | 77 | break; |
764 | 77 | } |
765 | 6.50k | } |
766 | 225k | Buffer[j] = 0x00; |
767 | 225k | if (entry->object_num != 0) |
768 | 31.1k | continue; |
769 | | |
770 | 193k | if (sscanf(Buffer, "%"PRIdOFFSET" %d %c", &entry->u.uncompressed.offset, &entry->u.uncompressed.generation_num, &free) != 3) { |
771 | 271 | pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_ENTRY_FORMAT, "read_xref_section", NULL); |
772 | 271 | dmprintf(ctx->memory, "Invalid xref entry, incorrect format.\n"); |
773 | 271 | pdfi_unread(ctx, s, (byte *)Buffer, 20); |
774 | 271 | code = read_xref_entry_slow(ctx, s, &off, &gen, &free); |
775 | 271 | if (code < 0) |
776 | 184 | return code; |
777 | 87 | code = write_offset((byte *)Buffer, off, gen, free); |
778 | 87 | if (code < 0) |
779 | 0 | return code; |
780 | 87 | } |
781 | | |
782 | 193k | entry->compressed = false; |
783 | 193k | entry->object_num = i + start; |
784 | 193k | if (free == 'f') |
785 | 66.4k | entry->free = true; |
786 | 193k | if(free == 'n') |
787 | 127k | entry->free = false; |
788 | 193k | } |
789 | | |
790 | 10.2k | return 0; |
791 | 10.6k | } |
792 | | |
793 | | static int read_xref(pdf_context *ctx, pdf_c_stream *s) |
794 | 4.30k | { |
795 | 4.30k | int code = 0; |
796 | 4.30k | pdf_dict *d = NULL; |
797 | 4.30k | uint64_t size = 0, max_obj = 0; |
798 | 4.30k | int64_t num; |
799 | 4.30k | int obj_num; |
800 | | |
801 | 4.30k | if (ctx->repaired) |
802 | 1 | return 0; |
803 | | |
804 | 14.5k | do { |
805 | 14.5k | uint64_t section_start, section_size; |
806 | | |
807 | 14.5k | code = read_xref_section(ctx, s, §ion_start, §ion_size); |
808 | 14.5k | if (code < 0) |
809 | 434 | return code; |
810 | | |
811 | 14.1k | if (section_size > 0 && section_start + section_size - 1 > max_obj) |
812 | 9.61k | max_obj = section_start + section_size - 1; |
813 | | |
814 | | /* code == 1 => read_xref_section ended with a trailer. */ |
815 | 14.1k | } while (code != 1); |
816 | | |
817 | 3.87k | code = pdfi_read_dict(ctx, ctx->main_stream, 0, 0); |
818 | 3.87k | if (code < 0) |
819 | 66 | return code; |
820 | | |
821 | 3.80k | d = (pdf_dict *)ctx->stack_top[-1]; |
822 | 3.80k | if (pdfi_type_of(d) != PDF_DICT) { |
823 | 6 | pdfi_pop(ctx, 1); |
824 | 6 | return_error(gs_error_typecheck); |
825 | 6 | } |
826 | | |
827 | 3.79k | if (ctx->Trailer == NULL) { |
828 | 3.19k | ctx->Trailer = d; |
829 | 3.19k | pdfi_countup(d); |
830 | 3.19k | } else { |
831 | 606 | code = pdfi_merge_dicts(ctx, ctx->Trailer, d); |
832 | 606 | if (code < 0) { |
833 | 0 | if (code == gs_error_VMerror || ctx->args.pdfstoponerror) { |
834 | 0 | pdfi_pop(ctx, 1); |
835 | 0 | return code; |
836 | 0 | } |
837 | 0 | } |
838 | 606 | } |
839 | | |
840 | | /* We have the Trailer dictionary. First up check for hybrid files. These have the initial |
841 | | * xref starting at 0 and size of 0. In this case the /Size entry in the trailer dictionary |
842 | | * must tell us how large the xref is, and we need to allocate our xref table anyway. |
843 | | */ |
844 | 3.79k | if (ctx->xref_table == NULL && size == 0) { |
845 | 31 | int64_t size; |
846 | | |
847 | 31 | code = pdfi_dict_get_int(ctx, d, "Size", &size); |
848 | 31 | if (code < 0) { |
849 | 0 | pdfi_pop(ctx, 1); |
850 | 0 | return code; |
851 | 0 | } |
852 | 31 | if (size < 0 || size > floor((double)ARCH_MAX_SIZE_T / (double)sizeof(xref_entry))) { |
853 | 0 | pdfi_pop(ctx, 1); |
854 | 0 | return_error(gs_error_rangecheck); |
855 | 0 | } |
856 | | |
857 | 31 | ctx->xref_table = (xref_table_t *)gs_alloc_bytes(ctx->memory, sizeof(xref_table_t), "read_xref_stream allocate xref table"); |
858 | 31 | if (ctx->xref_table == NULL) { |
859 | 0 | pdfi_pop(ctx, 1); |
860 | 0 | return_error(gs_error_VMerror); |
861 | 0 | } |
862 | 31 | memset(ctx->xref_table, 0x00, sizeof(xref_table_t)); |
863 | | #if REFCNT_DEBUG |
864 | | ctx->xref_table->UID = ctx->ref_UID++; |
865 | | dmprintf1(ctx->memory, "Allocated xref table with UID %"PRIi64"\n", ctx->xref_table->UID); |
866 | | #endif |
867 | | |
868 | 31 | ctx->xref_table->xref = (xref_entry *)gs_alloc_bytes(ctx->memory, size * sizeof(xref_entry), "read_xref_stream allocate xref table entries"); |
869 | 31 | if (ctx->xref_table->xref == NULL){ |
870 | 0 | pdfi_pop(ctx, 1); |
871 | 0 | pdfi_countdown(ctx->xref_table); |
872 | 0 | ctx->xref_table = NULL; |
873 | 0 | return_error(gs_error_VMerror); |
874 | 0 | } |
875 | | |
876 | 31 | memset(ctx->xref_table->xref, 0x00, size * sizeof(xref_entry)); |
877 | 31 | ctx->xref_table->ctx = ctx; |
878 | 31 | ctx->xref_table->type = PDF_XREF_TABLE; |
879 | 31 | ctx->xref_table->xref_size = size; |
880 | 31 | pdfi_countup(ctx->xref_table); |
881 | 31 | } |
882 | | |
883 | | /* Now check if this is a hybrid file. */ |
884 | 3.79k | if (ctx->Trailer == d) { |
885 | 3.19k | code = pdfi_dict_get_int(ctx, d, "XRefStm", &num); |
886 | 3.19k | if (code < 0 && code != gs_error_undefined) { |
887 | 0 | pdfi_pop(ctx, 1); |
888 | 0 | return code; |
889 | 0 | } |
890 | 3.19k | if (code == 0) |
891 | 88 | ctx->is_hybrid = true; |
892 | 3.19k | } else |
893 | 606 | code = gs_error_undefined; |
894 | | |
895 | 3.79k | if (code == 0 && ctx->prefer_xrefstm) { |
896 | 88 | if (ctx->args.pdfdebug) |
897 | 88 | dmprintf(ctx->memory, "%% File is a hybrid, containing xref table and xref stream. Using the stream.\n"); |
898 | | |
899 | | |
900 | 88 | if (pdfi_loop_detector_check_object(ctx, num) == true) { |
901 | 0 | pdfi_pop(ctx, 1); |
902 | 0 | return_error(gs_error_circular_reference); |
903 | 0 | } |
904 | 88 | else { |
905 | 88 | code = pdfi_loop_detector_add_object(ctx, num); |
906 | 88 | if (code < 0) { |
907 | 0 | pdfi_pop(ctx, 1); |
908 | 0 | return code; |
909 | 0 | } |
910 | 88 | } |
911 | | |
912 | 88 | code = pdfi_loop_detector_mark(ctx); |
913 | 88 | if (code < 0) { |
914 | 0 | pdfi_pop(ctx, 1); |
915 | 0 | return code; |
916 | 0 | } |
917 | | /* Because of the way the code works when we read a file which is a pure |
918 | | * xref stream file, we need to read the first integer of 'x y obj' |
919 | | * because the xref stream decoding code expects that to be on the stack. |
920 | | */ |
921 | 88 | pdfi_seek(ctx, s, num, SEEK_SET); |
922 | | |
923 | 88 | code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num); |
924 | 88 | if (code < 0) { |
925 | 8 | pdfi_loop_detector_cleartomark(ctx); |
926 | 8 | pdfi_pop(ctx, 1); |
927 | 8 | return code; |
928 | 8 | } |
929 | | |
930 | 80 | code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num); |
931 | 80 | if (code < 0) { |
932 | 0 | pdfi_loop_detector_cleartomark(ctx); |
933 | 0 | pdfi_pop(ctx, 1); |
934 | 0 | return code; |
935 | 0 | } |
936 | | |
937 | | /* This can happen if pdfi_read_xref_stream tries to repair a broken PDF file */ |
938 | 80 | if (d != ctx->Trailer) |
939 | 6 | d = ctx->Trailer; |
940 | | |
941 | 80 | pdfi_loop_detector_cleartomark(ctx); |
942 | 80 | } |
943 | | |
944 | | /* Not a hybrid file, so now check if this is a modified file and has |
945 | | * previous xref entries. |
946 | | */ |
947 | | /* But first, check if the highest subsection + size exceeds the /Size in the |
948 | | * trailer dictionary and set a warning flag if it does |
949 | | */ |
950 | 3.79k | code = pdfi_dict_get_int(ctx, d, "Size", &num); |
951 | 3.79k | if (code < 0) { |
952 | 5 | pdfi_pop(ctx, 1); |
953 | 5 | return code; |
954 | 5 | } |
955 | 3.78k | if (max_obj > num) |
956 | 248 | pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_XREF_SIZE, "read_xref", NULL); |
957 | | |
958 | 3.78k | code = pdfi_dict_get_int(ctx, d, "Prev", &num); |
959 | 3.78k | if (code < 0) { |
960 | 2.58k | pdfi_pop(ctx, 1); |
961 | 2.58k | if (code == gs_error_undefined) |
962 | 2.57k | return 0; |
963 | 5 | else |
964 | 5 | return code; |
965 | 2.58k | } |
966 | 1.20k | pdfi_pop(ctx, 1); |
967 | | |
968 | 1.20k | if (num < 0 || num > ctx->main_stream_length) |
969 | 236 | return_error(gs_error_rangecheck); |
970 | | |
971 | 968 | if (pdfi_loop_detector_check_object(ctx, num) == true) |
972 | 0 | return_error(gs_error_circular_reference); |
973 | 968 | else { |
974 | 968 | code = pdfi_loop_detector_add_object(ctx, num); |
975 | 968 | if (code < 0) |
976 | 0 | return code; |
977 | 968 | } |
978 | | |
979 | 968 | code = pdfi_seek(ctx, s, num, SEEK_SET); |
980 | 968 | if (code < 0) |
981 | 0 | return code; |
982 | | |
983 | 968 | if (!ctx->repaired) { |
984 | 965 | code = pdfi_read_token(ctx, ctx->main_stream, 0, 0); |
985 | 965 | if (code < 0) |
986 | 21 | return(code); |
987 | 944 | if (code == 0) |
988 | 0 | return_error(gs_error_syntaxerror); |
989 | 944 | } else |
990 | 3 | return 0; |
991 | | |
992 | 944 | if ((intptr_t)(ctx->stack_top[-1]) == (intptr_t)TOKEN_XREF) { |
993 | | /* Read old-style xref table */ |
994 | 608 | pdfi_pop(ctx, 1); |
995 | 608 | return(read_xref(ctx, ctx->main_stream)); |
996 | 608 | } else { |
997 | 336 | pdfi_pop(ctx, 1); |
998 | 336 | return_error(gs_error_typecheck); |
999 | 336 | } |
1000 | 944 | } |
1001 | | |
1002 | | int pdfi_read_xref(pdf_context *ctx) |
1003 | 40.7k | { |
1004 | 40.7k | int code = 0; |
1005 | 40.7k | int obj_num; |
1006 | | |
1007 | 40.7k | code = pdfi_loop_detector_mark(ctx); |
1008 | 40.7k | if (code < 0) |
1009 | 0 | return code; |
1010 | | |
1011 | 40.7k | if (ctx->startxref == 0) |
1012 | 24.5k | goto repair; |
1013 | | |
1014 | 16.2k | code = pdfi_loop_detector_add_object(ctx, ctx->startxref); |
1015 | 16.2k | if (code < 0) |
1016 | 0 | goto exit; |
1017 | | |
1018 | 16.2k | if (ctx->args.pdfdebug) |
1019 | 16.2k | dmprintf(ctx->memory, "%% Trying to read 'xref' token for xref table, or 'int int obj' for an xref stream\n"); |
1020 | | |
1021 | 16.2k | if (ctx->startxref > ctx->main_stream_length - 5) { |
1022 | 3.93k | pdfi_set_error(ctx, 0, NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is beyond end of file"); |
1023 | 3.93k | goto repair; |
1024 | 3.93k | } |
1025 | 12.3k | if (ctx->startxref < 0) { |
1026 | 48 | pdfi_set_error(ctx, 0, NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"startxref offset is before start of file"); |
1027 | 48 | goto repair; |
1028 | 48 | } |
1029 | | |
1030 | | /* Read the xref(s) */ |
1031 | 12.2k | pdfi_seek(ctx, ctx->main_stream, ctx->startxref, SEEK_SET); |
1032 | | |
1033 | | /* If it starts with an int, it's an xref stream dict */ |
1034 | 12.2k | code = pdfi_read_bare_int(ctx, ctx->main_stream, &obj_num); |
1035 | 12.2k | if (code == 1) { |
1036 | 5.56k | code = pdfi_read_xref_stream_dict(ctx, ctx->main_stream, obj_num); |
1037 | 5.56k | if (code < 0) |
1038 | 63 | goto repair; |
1039 | 6.71k | } else { |
1040 | | /* If not, it had better start 'xref', and be an old-style xref table */ |
1041 | 6.71k | code = pdfi_read_bare_keyword(ctx, ctx->main_stream); |
1042 | 6.71k | if (code != TOKEN_XREF) { |
1043 | 3.03k | pdfi_set_error(ctx, 0, NULL, E_PDF_BADSTARTXREF, "pdfi_read_xref", (char *)"Failed to read any token at the startxref location"); |
1044 | 3.03k | goto repair; |
1045 | 3.03k | } |
1046 | | |
1047 | 3.67k | code = read_xref(ctx, ctx->main_stream); |
1048 | 3.67k | if (code < 0) |
1049 | 1.11k | goto repair; |
1050 | 3.67k | } |
1051 | | |
1052 | 8.06k | if(ctx->args.pdfdebug && ctx->xref_table) { |
1053 | 0 | int i, j; |
1054 | 0 | xref_entry *entry; |
1055 | 0 | char Buffer[32]; |
1056 | |
|
1057 | 0 | dmprintf(ctx->memory, "\n%% Dumping xref table\n"); |
1058 | 0 | for (i=0;i < ctx->xref_table->xref_size;i++) { |
1059 | 0 | entry = &ctx->xref_table->xref[i]; |
1060 | 0 | if(entry->compressed) { |
1061 | 0 | dmprintf(ctx->memory, "*"); |
1062 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%"PRId64"", entry->object_num); |
1063 | 0 | j = 10 - strlen(Buffer); |
1064 | 0 | while(j--) { |
1065 | 0 | dmprintf(ctx->memory, " "); |
1066 | 0 | } |
1067 | 0 | dmprintf1(ctx->memory, "%s ", Buffer); |
1068 | |
|
1069 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.compressed_stream_num); |
1070 | 0 | j = 10 - strlen(Buffer); |
1071 | 0 | while(j--) { |
1072 | 0 | dmprintf(ctx->memory, " "); |
1073 | 0 | } |
1074 | 0 | dmprintf1(ctx->memory, "%s ", Buffer); |
1075 | |
|
1076 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.compressed.object_index); |
1077 | 0 | j = 10 - strlen(Buffer); |
1078 | 0 | while(j--) { |
1079 | 0 | dmprintf(ctx->memory, " "); |
1080 | 0 | } |
1081 | 0 | dmprintf1(ctx->memory, "%s ", Buffer); |
1082 | 0 | } |
1083 | 0 | else { |
1084 | 0 | dmprintf(ctx->memory, " "); |
1085 | |
|
1086 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->object_num); |
1087 | 0 | j = 10 - strlen(Buffer); |
1088 | 0 | while(j--) { |
1089 | 0 | dmprintf(ctx->memory, " "); |
1090 | 0 | } |
1091 | 0 | dmprintf1(ctx->memory, "%s ", Buffer); |
1092 | |
|
1093 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%"PRIdOFFSET"", entry->u.uncompressed.offset); |
1094 | 0 | j = 10 - strlen(Buffer); |
1095 | 0 | while(j--) { |
1096 | 0 | dmprintf(ctx->memory, " "); |
1097 | 0 | } |
1098 | 0 | dmprintf1(ctx->memory, "%s ", Buffer); |
1099 | |
|
1100 | 0 | gs_snprintf(Buffer, sizeof(Buffer), "%ld", entry->u.uncompressed.generation_num); |
1101 | 0 | j = 10 - strlen(Buffer); |
1102 | 0 | while(j--) { |
1103 | 0 | dmprintf(ctx->memory, " "); |
1104 | 0 | } |
1105 | 0 | dmprintf1(ctx->memory, "%s ", Buffer); |
1106 | 0 | } |
1107 | 0 | if (entry->free) |
1108 | 0 | dmprintf(ctx->memory, "f\n"); |
1109 | 0 | else |
1110 | 0 | dmprintf(ctx->memory, "n\n"); |
1111 | 0 | } |
1112 | 0 | } |
1113 | 8.06k | if (ctx->args.pdfdebug) |
1114 | 8.06k | dmprintf(ctx->memory, "\n"); |
1115 | | |
1116 | 8.06k | exit: |
1117 | 8.06k | (void)pdfi_loop_detector_cleartomark(ctx); |
1118 | | |
1119 | 8.06k | if (code < 0) |
1120 | 0 | return code; |
1121 | | |
1122 | 8.06k | return 0; |
1123 | | |
1124 | 32.7k | repair: |
1125 | 32.7k | (void)pdfi_loop_detector_cleartomark(ctx); |
1126 | 32.7k | if (!ctx->repaired) |
1127 | 32.6k | return(pdfi_repair_file(ctx)); |
1128 | 68 | return 0; |
1129 | 32.7k | } |