/src/ghostpdl/pdf/pdf_doc.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2020-2022 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, |
13 | | CA 94945, U.S.A., +1(415)492-9861, for further information. |
14 | | */ |
15 | | |
16 | | /* Functions to deal with PDF structure, such as retrieving |
17 | | * the Info, Catalog, Root dictionaries, and finding resources |
18 | | * and page dictionaries. |
19 | | */ |
20 | | |
21 | | #include "ghostpdf.h" |
22 | | #include "pdf_stack.h" |
23 | | #include "pdf_deref.h" |
24 | | #include "pdf_array.h" |
25 | | #include "pdf_dict.h" |
26 | | #include "pdf_loop_detect.h" |
27 | | #include "pdf_misc.h" |
28 | | #include "pdf_repair.h" |
29 | | #include "pdf_doc.h" |
30 | | #include "pdf_mark.h" |
31 | | #include "pdf_colour.h" |
32 | | #include "pdf_device.h" |
33 | | |
34 | | int pdfi_read_Root(pdf_context *ctx) |
35 | 14.2k | { |
36 | 14.2k | pdf_obj *o, *o1; |
37 | 14.2k | pdf_dict *d; |
38 | 14.2k | int code; |
39 | | |
40 | 14.2k | if (ctx->args.pdfdebug) |
41 | 14.2k | dmprintf(ctx->memory, "%% Reading Root dictionary\n"); |
42 | | |
43 | | /* Unusual code. This is because if the entry in the trailer dictionary causes |
44 | | * us to repair the file, the Trailer dictionary in the context can be replaced. |
45 | | * This counts it down and frees it, potentially while pdfi_dict_get is still |
46 | | * using it! Rather than countup and down in the dict_get routine, which is |
47 | | * normally unnecessary, count it up and down round the access here. |
48 | | */ |
49 | 14.2k | d = ctx->Trailer; |
50 | 14.2k | pdfi_countup(d); |
51 | 14.2k | code = pdfi_dict_get(ctx, d, "Root", &o1); |
52 | 14.2k | if (code < 0) { |
53 | 287 | pdfi_countdown(d); |
54 | 287 | return code; |
55 | 287 | } |
56 | 13.9k | pdfi_countdown(d); |
57 | | |
58 | 13.9k | if (pdfi_type_of(o1) == PDF_INDIRECT) { |
59 | 14 | code = pdfi_dereference(ctx, ((pdf_indirect_ref *)o1)->ref_object_num, ((pdf_indirect_ref *)o1)->ref_generation_num, &o); |
60 | 14 | pdfi_countdown(o1); |
61 | 14 | if (code < 0) |
62 | 7 | return code; |
63 | | |
64 | 7 | if (pdfi_type_of(o) != PDF_DICT) { |
65 | 2 | pdfi_countdown(o); |
66 | 2 | return_error(gs_error_typecheck); |
67 | 2 | } |
68 | | |
69 | 5 | code = pdfi_dict_put(ctx, ctx->Trailer, "Root", o); |
70 | 5 | if (code < 0) { |
71 | 0 | pdfi_countdown(o); |
72 | 0 | return code; |
73 | 0 | } |
74 | 5 | o1 = o; |
75 | 13.9k | } else { |
76 | 13.9k | if (pdfi_type_of(o1) != PDF_DICT) { |
77 | 169 | pdfi_countdown(o1); |
78 | 169 | if (ctx->Root == NULL) |
79 | 133 | return_error(gs_error_typecheck); |
80 | 36 | return 0; |
81 | 169 | } |
82 | 13.9k | } |
83 | | |
84 | 13.7k | code = pdfi_dict_get_type(ctx, (pdf_dict *)o1, "Type", PDF_NAME, &o); |
85 | 13.7k | if (code < 0) { |
86 | 56 | bool known = false; |
87 | | |
88 | 56 | pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGTYPE, "pdfi_read_Root", NULL); |
89 | | |
90 | | /* Missing the *required* /Type key! See if it has /Pages at least, if it does carry on */ |
91 | 56 | code = pdfi_dict_known(ctx, (pdf_dict *)o1, "Pages", &known); |
92 | 56 | if (code < 0 || known == false) { |
93 | 26 | pdfi_countdown(o1); |
94 | 26 | return code; |
95 | 26 | } |
96 | 56 | } |
97 | 13.7k | else { |
98 | 13.7k | if (pdfi_name_strcmp((pdf_name *)o, "Catalog") != 0){ |
99 | 72 | pdf_obj *pages = NULL; |
100 | | |
101 | | /* Bug #706038 has a Root dictionary with /Type /Calalog which (of course) Acrobat |
102 | | * happily opens :-( So if we find a /Type and it's not /Catalog, try and see if |
103 | | * we have a /Pages key (the only other required entry). If we do assume that the |
104 | | * Type is wrong and use this dictionary, otherwise fall back to seeing if we |
105 | | * have a repaired Root. See above for handling a missing /Type..... |
106 | | */ |
107 | 72 | code = pdfi_dict_get_type(ctx, (pdf_dict *)o1, "Pages", PDF_DICT, &pages); |
108 | 72 | if (code < 0) { |
109 | 56 | pdfi_countdown(o); |
110 | 56 | pdfi_countdown(o1); |
111 | | /* If we repaired the file, we may already have spotted a potential Root dictionary |
112 | | * so if the one we found here isn't valid, try the one we found when scanning |
113 | | */ |
114 | 56 | if (ctx->Root == NULL) { |
115 | 32 | pdfi_set_error(ctx, 0, NULL, E_PDF_NO_ROOT, "pdfi_read_Root", NULL); |
116 | 32 | return_error(gs_error_syntaxerror); |
117 | 32 | } |
118 | 24 | return 0; |
119 | 56 | } |
120 | 16 | pdfi_countdown(pages); |
121 | 16 | pdfi_set_error(ctx, 0, NULL, E_PDF_BAD_ROOT_TYPE, "pdfi_read_Root", NULL); |
122 | 16 | } |
123 | 13.6k | pdfi_countdown(o); |
124 | 13.6k | } |
125 | | |
126 | 13.7k | if (ctx->args.pdfdebug) |
127 | 13.7k | dmprintf(ctx->memory, "\n"); |
128 | | /* We don't pdfi_countdown(o1) now, because we've transferred our |
129 | | * reference to the pointer in the pdf_context structure. |
130 | | */ |
131 | 13.7k | pdfi_countdown(ctx->Root); /* If file was repaired it might be set already */ |
132 | 13.7k | ctx->Root = (pdf_dict *)o1; |
133 | 13.7k | return 0; |
134 | 13.7k | } |
135 | | |
136 | | static int Info_check_dict(pdf_context *ctx, pdf_dict *d); |
137 | | |
138 | | static int Info_check_array(pdf_context *ctx, pdf_array *a) |
139 | 1.02k | { |
140 | 1.02k | int code = 0, i = 0; |
141 | 1.02k | pdf_obj *array_obj = NULL; |
142 | | |
143 | 1.02k | code = pdfi_loop_detector_mark(ctx); |
144 | 1.02k | if (code < 0) |
145 | 0 | return code; |
146 | | |
147 | 5.54k | for (i = 0;i < pdfi_array_size(a); i++) { |
148 | 4.57k | code = pdfi_array_fetch_recursing(ctx, a, i, &array_obj, true, true); |
149 | 4.57k | if (code < 0) |
150 | 25 | goto error; |
151 | | |
152 | 4.54k | switch(pdfi_type_of(array_obj)) { |
153 | 46 | case PDF_DICT: |
154 | 46 | if (array_obj->object_num != 0) { |
155 | 46 | code = pdfi_loop_detector_add_object(ctx, array_obj->object_num); |
156 | 46 | if (code < 0) |
157 | 0 | goto error; |
158 | 46 | } |
159 | 46 | code = Info_check_dict(ctx, (pdf_dict *)array_obj); |
160 | 46 | if (code < 0) |
161 | 26 | goto error; |
162 | 20 | break; |
163 | 169 | case PDF_ARRAY: |
164 | 169 | if (array_obj->object_num != 0) { |
165 | 0 | code = pdfi_loop_detector_add_object(ctx, array_obj->object_num); |
166 | 0 | if (code < 0) |
167 | 0 | goto error; |
168 | 0 | } |
169 | 169 | code = Info_check_array(ctx, (pdf_array *)array_obj); |
170 | 169 | if (code < 0) |
171 | 0 | goto error; |
172 | 169 | break; |
173 | 4.33k | default: |
174 | 4.33k | break; |
175 | 4.54k | } |
176 | | |
177 | 4.52k | pdfi_countdown(array_obj); |
178 | 4.52k | array_obj = NULL; |
179 | 4.52k | } |
180 | 1.02k | error: |
181 | 1.02k | pdfi_countdown(array_obj); |
182 | 1.02k | pdfi_loop_detector_cleartomark(ctx); |
183 | 1.02k | return code; |
184 | 1.02k | } |
185 | | |
186 | | static int Info_check_dict(pdf_context *ctx, pdf_dict *d) |
187 | 312 | { |
188 | 312 | int code = 0; |
189 | 312 | uint64_t index = 0; |
190 | 312 | pdf_name *Key = NULL; |
191 | 312 | pdf_obj *Value = NULL; |
192 | | |
193 | 312 | code = pdfi_loop_detector_mark(ctx); |
194 | 312 | if (code < 0) |
195 | 0 | return code; |
196 | | |
197 | 312 | code = pdfi_dict_first(ctx, d, (pdf_obj **)&Key, &Value, &index); |
198 | 312 | if (code < 0) { |
199 | 19 | if (code == gs_error_undefined) |
200 | 8 | code = 0; |
201 | 19 | goto error; |
202 | 19 | } |
203 | | |
204 | 1.26k | while (code >= 0) { |
205 | 1.24k | switch(pdfi_type_of(Value)) { |
206 | 168 | case PDF_DICT: |
207 | 168 | if (Value->object_num != 0) { |
208 | 98 | code = pdfi_loop_detector_add_object(ctx, Value->object_num); |
209 | 98 | if (code < 0) |
210 | 0 | goto error; |
211 | 98 | } |
212 | 168 | code = Info_check_dict(ctx, (pdf_dict *)Value); |
213 | 168 | if (code < 0) |
214 | 42 | goto error; |
215 | 126 | break; |
216 | 181 | case PDF_ARRAY: |
217 | 181 | if (Value->object_num != 0) { |
218 | 19 | code = pdfi_loop_detector_add_object(ctx, Value->object_num); |
219 | 19 | if (code < 0) |
220 | 0 | goto error; |
221 | 19 | } |
222 | 181 | code = Info_check_array(ctx, (pdf_array *)Value); |
223 | 181 | if (code < 0) |
224 | 37 | goto error; |
225 | 144 | break; |
226 | 898 | default: |
227 | 898 | break; |
228 | 1.24k | } |
229 | 1.16k | pdfi_countdown(Key); |
230 | 1.16k | Key = NULL; |
231 | 1.16k | pdfi_countdown(Value); |
232 | 1.16k | Value = NULL; |
233 | | |
234 | 1.16k | code = pdfi_dict_next(ctx, d, (pdf_obj **)&Key, &Value, &index); |
235 | 1.16k | if (code == gs_error_undefined) { |
236 | 192 | code = 0; |
237 | 192 | break; |
238 | 192 | } |
239 | 1.16k | } |
240 | 312 | error: |
241 | 312 | pdfi_countdown(Key); |
242 | 312 | pdfi_countdown(Value); |
243 | 312 | pdfi_loop_detector_cleartomark(ctx); |
244 | 312 | return code; |
245 | 293 | } |
246 | | |
247 | | static int pdfi_sanitize_Info_references(pdf_context *ctx, pdf_dict *Info) |
248 | 10.0k | { |
249 | 10.0k | int code = 0; |
250 | 10.0k | uint64_t index = 0; |
251 | 10.0k | pdf_name *Key = NULL; |
252 | 10.0k | pdf_obj *Value = NULL; |
253 | | |
254 | 10.0k | restart_scan: |
255 | 10.0k | code = pdfi_loop_detector_mark(ctx); |
256 | 10.0k | if (code < 0) |
257 | 0 | return code; |
258 | | |
259 | 10.0k | code = pdfi_dict_first(ctx, Info, (pdf_obj **)&Key, &Value, &index); |
260 | 10.0k | if (code == gs_error_undefined) { |
261 | 72 | code = 0; |
262 | 72 | goto error; |
263 | 72 | } |
264 | | |
265 | 52.6k | while (code >= 0) { |
266 | 52.6k | switch(pdfi_type_of(Value)) { |
267 | 98 | case PDF_DICT: |
268 | 98 | code = Info_check_dict(ctx, (pdf_dict *)Value); |
269 | 98 | break; |
270 | 670 | case PDF_ARRAY: |
271 | 670 | code = Info_check_array(ctx, (pdf_array *)Value); |
272 | 670 | break; |
273 | 51.8k | default: |
274 | 51.8k | code = 0; |
275 | 51.8k | break; |
276 | 52.6k | } |
277 | 52.6k | pdfi_countdown(Value); |
278 | 52.6k | Value = NULL; |
279 | 52.6k | if (code < 0) { |
280 | 58 | code = pdfi_dict_delete_pair(ctx, Info, Key); |
281 | 58 | if (code < 0) |
282 | 0 | goto error; |
283 | 58 | pdfi_countdown(Key); |
284 | 58 | Key = NULL; |
285 | | |
286 | 58 | pdfi_loop_detector_cleartomark(ctx); |
287 | 58 | goto restart_scan; |
288 | 58 | } |
289 | 52.5k | pdfi_countdown(Key); |
290 | 52.5k | Key = NULL; |
291 | | |
292 | 52.5k | pdfi_loop_detector_cleartomark(ctx); |
293 | 52.5k | code = pdfi_loop_detector_mark(ctx); |
294 | 52.5k | if (code < 0) { |
295 | 0 | pdfi_countdown(Key); |
296 | 0 | pdfi_countdown(Value); |
297 | 0 | return code; |
298 | 0 | } |
299 | | |
300 | 52.5k | code = pdfi_dict_next(ctx, Info, (pdf_obj **)&Key, &Value, &index); |
301 | 52.5k | if (code == gs_error_undefined) { |
302 | 9.90k | code = 0; |
303 | 9.90k | break; |
304 | 9.90k | } |
305 | 52.5k | } |
306 | 10.0k | error: |
307 | 10.0k | pdfi_countdown(Key); |
308 | 10.0k | pdfi_countdown(Value); |
309 | 10.0k | pdfi_loop_detector_cleartomark(ctx); |
310 | 10.0k | return code; |
311 | 9.98k | } |
312 | | |
313 | | int pdfi_read_Info(pdf_context *ctx) |
314 | 13.7k | { |
315 | 13.7k | pdf_dict *Info; |
316 | 13.7k | int code; |
317 | 13.7k | pdf_dict *d; |
318 | | |
319 | 13.7k | if (ctx->args.pdfdebug) |
320 | 13.7k | dmprintf(ctx->memory, "%% Reading Info dictionary\n"); |
321 | | |
322 | | /* See comment in pdfi_read_Root() for details */ |
323 | 13.7k | d = ctx->Trailer; |
324 | 13.7k | pdfi_countup(d); |
325 | 13.7k | code = pdfi_dict_get_type(ctx, ctx->Trailer, "Info", PDF_DICT, (pdf_obj **)&Info); |
326 | 13.7k | pdfi_countdown(d); |
327 | 13.7k | if (code < 0) |
328 | 3.79k | return code; |
329 | | |
330 | 10.0k | if (ctx->args.pdfdebug) |
331 | 10.0k | dmprintf(ctx->memory, "\n"); |
332 | | |
333 | 10.0k | code = pdfi_loop_detector_mark(ctx); |
334 | 10.0k | if (code < 0) |
335 | 0 | goto error; |
336 | 10.0k | code = pdfi_loop_detector_add_object(ctx, Info->object_num); |
337 | 10.0k | if (code < 0) |
338 | 0 | goto error1; |
339 | | |
340 | | /* sanitize Info for circular references */ |
341 | 10.0k | code = pdfi_sanitize_Info_references(ctx, Info); |
342 | 10.0k | if (code < 0) |
343 | 31 | goto error1; |
344 | | |
345 | 9.97k | (void)pdfi_loop_detector_cleartomark(ctx); |
346 | | |
347 | 9.97k | pdfi_pdfmark_write_docinfo(ctx, Info); |
348 | | |
349 | | /* We don't pdfi_countdown(Info) now, because we've transferred our |
350 | | * reference to the pointer in the pdf_context structure. |
351 | | */ |
352 | 9.97k | ctx->Info = Info; |
353 | 9.97k | return 0; |
354 | | |
355 | 31 | error1: |
356 | 31 | pdfi_loop_detector_cleartomark(ctx); |
357 | 31 | error: |
358 | 31 | pdfi_countdown(Info); |
359 | 31 | return code; |
360 | 31 | } |
361 | | |
362 | | int pdfi_read_Pages(pdf_context *ctx) |
363 | 27.3k | { |
364 | 27.3k | pdf_obj *o, *o1; |
365 | 27.3k | pdf_array *a = NULL; |
366 | 27.3k | int code, pagecount = 0; |
367 | 27.3k | double d; |
368 | | |
369 | 27.3k | if (ctx->args.pdfdebug) |
370 | 27.3k | dmprintf(ctx->memory, "%% Reading Pages dictionary\n"); |
371 | | |
372 | 27.3k | code = pdfi_dict_get(ctx, ctx->Root, "Pages", &o1); |
373 | 27.3k | if (code < 0) |
374 | 1.58k | return code; |
375 | | |
376 | 25.7k | if (pdfi_type_of(o1) == PDF_INDIRECT) { |
377 | 46 | code = pdfi_dereference(ctx, ((pdf_indirect_ref *)o1)->ref_object_num, ((pdf_indirect_ref *)o1)->ref_generation_num, &o); |
378 | 46 | pdfi_countdown(o1); |
379 | 46 | if (code < 0) |
380 | 17 | return code; |
381 | | |
382 | 29 | if (pdfi_type_of(o) != PDF_DICT) { |
383 | 10 | pdfi_countdown(o); |
384 | 10 | if (pdfi_type_of(o) == PDF_INDIRECT) |
385 | 1 | pdfi_set_error(ctx, 0, NULL, E_PDF_BADPAGEDICT, "pdfi_read_Pages", (char *)"*** Error: Something is wrong with the Pages dictionary. Giving up."); |
386 | 9 | else |
387 | 9 | pdfi_set_error(ctx, 0, NULL, E_PDF_BADPAGEDICT, "pdfi_read_Pages", (char *)"*** Error: Something is wrong with the Pages dictionary. Giving up.\n Double indirect reference. Loop in Pages tree?"); |
388 | 10 | return_error(gs_error_typecheck); |
389 | 10 | } |
390 | | |
391 | 19 | code = pdfi_dict_put(ctx, ctx->Root, "Pages", o); |
392 | 19 | if (code < 0) { |
393 | 0 | pdfi_countdown(o); |
394 | 0 | return code; |
395 | 0 | } |
396 | 19 | o1 = o; |
397 | 25.6k | } else { |
398 | 25.6k | if (pdfi_type_of(o1) != PDF_DICT) { |
399 | 251 | pdfi_countdown(o1); |
400 | 251 | return_error(gs_error_typecheck); |
401 | 251 | } |
402 | 25.6k | } |
403 | | |
404 | 25.4k | if (ctx->args.pdfdebug) |
405 | 25.4k | dmprintf(ctx->memory, "\n"); |
406 | | |
407 | | /* Acrobat allows the Pages Count to be a floating point number (!) */ |
408 | | /* sample w_a.PDF from Bug688419 (not on the cluster, maybe it should be?) has no /Count entry because |
409 | | * The Root dictionary Pages key points directly to a single dictionary of type /Page. This is plainly |
410 | | * illegal but Acrobat can deal with it. We do so by ignoring the error her, and adding logic in |
411 | | * pdfi_get_page_dict() which notes that ctx->PagesTree is NULL and tries to get the single Page |
412 | | * dictionary from the Root instead of using the PagesTree. |
413 | | */ |
414 | 25.4k | code = pdfi_dict_get_number(ctx, (pdf_dict *)o1, "Count", &d); |
415 | 25.4k | if (code < 0) { |
416 | 221 | if (code == gs_error_undefined) { |
417 | 201 | pdf_name *n = NULL; |
418 | | /* It may be that the Root dictionary Pages entry points directly to a sinlge Page dictionary |
419 | | * See if the dictionary has a Type of /Page, if so don't throw an error and the pdf_page.c |
420 | | * logic in pdfi_get_page_dict() logic will take care of it. |
421 | | */ |
422 | 201 | code = pdfi_dict_get_type(ctx, (pdf_dict *)o1, "Type", PDF_NAME, (pdf_obj **)&n); |
423 | 201 | if (code == 0) { |
424 | 186 | if(pdfi_name_is(n, "Page")) { |
425 | 122 | ctx->num_pages = 1; |
426 | 122 | code = 0; |
427 | 122 | } |
428 | 64 | else |
429 | 64 | code = gs_error_undefined; |
430 | 186 | pdfi_countdown(n); |
431 | 186 | } |
432 | 201 | } |
433 | 221 | pdfi_countdown(o1); |
434 | 221 | return code; |
435 | 221 | } |
436 | | |
437 | 25.2k | if (floor(d) != d) { |
438 | 0 | pdfi_countdown(o1); |
439 | 0 | return_error(gs_error_rangecheck); |
440 | 25.2k | } else { |
441 | 25.2k | ctx->num_pages = (int)floor(d); |
442 | 25.2k | } |
443 | | |
444 | | /* A simple confidence check in the value of Count. We only do this because |
445 | | * the OSS-fuzz tool keeps on coming up with files that time out because the |
446 | | * initial Count is insanely huge, and we spend much time trying to find |
447 | | * millions of pages which don't exist. |
448 | | */ |
449 | 25.2k | code = pdfi_dict_knownget_type(ctx, (pdf_dict *)o1, "Kids", PDF_ARRAY, (pdf_obj **)&a); |
450 | 25.2k | if (code == 0) |
451 | 17 | code = gs_note_error(gs_error_undefined); |
452 | 25.2k | if (code < 0) { |
453 | 17 | pdfi_countdown(o1); |
454 | 17 | return code; |
455 | 17 | } |
456 | | |
457 | | /* Firstly check if the Kids array has enough nodes, in which case it's |
458 | | * probably flat (the common case) |
459 | | */ |
460 | 25.2k | if (a->size != ctx->num_pages) { |
461 | 510 | int i = 0; |
462 | 510 | pdf_obj *p = NULL, *p1 = NULL; |
463 | 510 | pdf_num *c = NULL; |
464 | | |
465 | | /* Either its not a flat tree, or the top node /Count is incorrect. |
466 | | * Get each entry in the Kids array in turn and total the /Count of |
467 | | * each node and add any leaf nodes. |
468 | | */ |
469 | 1.60k | for (i=0;i < a->size; i++) { |
470 | 1.09k | code = pdfi_array_get(ctx, a, i, &p); |
471 | 1.09k | if (code < 0) |
472 | 73 | continue; |
473 | 1.02k | if (pdfi_type_of(p) != PDF_DICT) { |
474 | 29 | pdfi_countdown(p); |
475 | 29 | p = NULL; |
476 | 29 | continue; |
477 | 29 | } |
478 | | /* Explicit check that the root node Kids array entry is not a self-reference |
479 | | * back to the root node. We only check one level of the Kids array. so we don't |
480 | | * need a full loop detection setup here. |
481 | | */ |
482 | 993 | if (p->object_num != 0 && p->object_num == o1->object_num) { |
483 | 3 | pdfi_countdown(p); |
484 | 3 | p = NULL; |
485 | 3 | ctx->num_pages = 0; |
486 | 3 | return_error(gs_error_circular_reference); |
487 | 3 | } |
488 | 990 | code = pdfi_dict_knownget_type(ctx, (pdf_dict *)p, "Type", PDF_NAME, (pdf_obj **)&p1); |
489 | 990 | if (code <= 0) { |
490 | 4 | pdfi_countdown(p); |
491 | 4 | p = NULL; |
492 | 4 | continue; |
493 | 4 | } |
494 | 986 | if (pdfi_name_is((pdf_name *)p1, "Page")) { |
495 | 123 | pagecount++; |
496 | 863 | } else { |
497 | 863 | if (pdfi_name_is((pdf_name *)p1, "Pages")) { |
498 | 861 | code = pdfi_dict_knownget(ctx, (pdf_dict *)p, "Count", (pdf_obj **)&c); |
499 | 861 | if (code >= 0) { |
500 | 861 | if (pdfi_type_of(c) == PDF_INT) |
501 | 861 | pagecount += c->value.i; |
502 | 861 | if (pdfi_type_of(c) == PDF_REAL) |
503 | 0 | pagecount += (int)c->value.d; |
504 | 861 | pdfi_countdown(c); |
505 | 861 | c = NULL; |
506 | 861 | } |
507 | 861 | } |
508 | 863 | } |
509 | 986 | pdfi_countdown(p1); |
510 | 986 | p1 = NULL; |
511 | 986 | pdfi_countdown(p); |
512 | 986 | p = NULL; |
513 | 986 | } |
514 | 510 | } else |
515 | 24.7k | pagecount = a->size; |
516 | | |
517 | 25.2k | pdfi_countdown(a); |
518 | | |
519 | | /* If the count of the top level of the tree doesn't match the /Count |
520 | | * of the root node then something is wrong. We could abort right now |
521 | | * and will if this continues to be a problem, but initially let's assume |
522 | | * the count of the top level is correct and the root node /Count is wrong. |
523 | | * This will allow us to recover if only the root /Count gets corrupted. |
524 | | * In future we could also try validating the entire tree at this point, |
525 | | * though I suspect that's pointless; if the tree is corrupted we aren't |
526 | | * likely to get much that's usable from it. |
527 | | */ |
528 | 25.2k | if (pagecount != ctx->num_pages) { |
529 | 212 | ctx->num_pages = pagecount; |
530 | 212 | code = pdfi_dict_put_int(ctx, (pdf_dict *)o1, "Count", ctx->num_pages); |
531 | 212 | pdfi_set_error(ctx, 0, NULL, E_PDF_BADPAGECOUNT, "pdfi_read_Pages", NULL); |
532 | 212 | } |
533 | | |
534 | | /* We don't pdfi_countdown(o1) now, because we've transferred our |
535 | | * reference to the pointer in the pdf_context structure. |
536 | | */ |
537 | 25.2k | ctx->PagesTree = (pdf_dict *)o1; |
538 | 25.2k | return 0; |
539 | 25.2k | } |
540 | | |
541 | | /* Read optional things in from Root */ |
542 | | void pdfi_read_OptionalRoot(pdf_context *ctx) |
543 | 25.3k | { |
544 | 25.3k | pdf_obj *obj = NULL; |
545 | 25.3k | int code; |
546 | 25.3k | bool known; |
547 | | |
548 | 25.3k | if (ctx->args.pdfdebug) |
549 | 25.3k | dmprintf(ctx->memory, "%% Reading other Root contents\n"); |
550 | | |
551 | 25.3k | if (ctx->args.pdfdebug) |
552 | 25.3k | dmprintf(ctx->memory, "%% OCProperties\n"); |
553 | 25.3k | code = pdfi_dict_get_type(ctx, ctx->Root, "OCProperties", PDF_DICT, &obj); |
554 | 25.3k | if (code == 0) { |
555 | 775 | ctx->OCProperties = (pdf_dict *)obj; |
556 | 24.5k | } else { |
557 | 24.5k | ctx->OCProperties = NULL; |
558 | 24.5k | if (ctx->args.pdfdebug) |
559 | 24.5k | dmprintf(ctx->memory, "%% (None)\n"); |
560 | 24.5k | } |
561 | | |
562 | 25.3k | (void)pdfi_dict_known(ctx, ctx->Root, "Collection", &known); |
563 | | |
564 | 25.3k | if (known) { |
565 | 0 | if (ctx->args.pdfdebug) |
566 | 0 | dmprintf(ctx->memory, "%% Collection\n"); |
567 | 0 | code = pdfi_dict_get(ctx, ctx->Root, "Collection", (pdf_obj **)&ctx->Collection); |
568 | 0 | if (code < 0) |
569 | 0 | dmprintf(ctx->memory, "\n **** Warning: Failed to read Collection information.\n"); |
570 | 0 | } |
571 | | |
572 | 25.3k | } |
573 | | |
574 | | void pdfi_free_OptionalRoot(pdf_context *ctx) |
575 | 40.8k | { |
576 | 40.8k | if (ctx->OCProperties) { |
577 | 775 | pdfi_countdown(ctx->OCProperties); |
578 | 775 | ctx->OCProperties = NULL; |
579 | 775 | } |
580 | 40.8k | if (ctx->Collection) { |
581 | 0 | pdfi_countdown(ctx->Collection); |
582 | 0 | ctx->Collection = NULL; |
583 | 0 | } |
584 | 40.8k | } |
585 | | |
586 | | /* Handle child node processing for page_dict */ |
587 | | static int pdfi_get_child(pdf_context *ctx, pdf_array *Kids, int i, pdf_dict **pchild) |
588 | 270k | { |
589 | 270k | pdf_indirect_ref *node = NULL; |
590 | 270k | pdf_dict *child = NULL; |
591 | 270k | pdf_name *Type = NULL; |
592 | 270k | pdf_dict *leaf_dict = NULL; |
593 | 270k | pdf_name *Key = NULL; |
594 | 270k | int code = 0; |
595 | | |
596 | 270k | code = pdfi_array_get_no_deref(ctx, Kids, i, (pdf_obj **)&node); |
597 | 270k | if (code < 0) |
598 | 0 | goto errorExit; |
599 | | |
600 | 270k | if (pdfi_type_of(node) != PDF_INDIRECT && pdfi_type_of(node) != PDF_DICT) { |
601 | 2 | code = gs_note_error(gs_error_typecheck); |
602 | 2 | goto errorExit; |
603 | 2 | } |
604 | | |
605 | 270k | if (pdfi_type_of(node) == PDF_INDIRECT) { |
606 | 54.1k | code = pdfi_dereference(ctx, node->ref_object_num, node->ref_generation_num, |
607 | 54.1k | (pdf_obj **)&child); |
608 | 54.1k | if (code < 0) { |
609 | 8.87k | int code1 = pdfi_repair_file(ctx); |
610 | 8.87k | if (code1 < 0) |
611 | 8.85k | goto errorExit; |
612 | 17 | code = pdfi_dereference(ctx, node->ref_object_num, |
613 | 17 | node->ref_generation_num, (pdf_obj **)&child); |
614 | 17 | if (code < 0) |
615 | 8 | goto errorExit; |
616 | 17 | } |
617 | 45.2k | if (pdfi_type_of(child) != PDF_DICT) { |
618 | 1.53k | code = gs_note_error(gs_error_typecheck); |
619 | 1.53k | goto errorExit; |
620 | 1.53k | } |
621 | | /* If its an intermediate node, store it in the page_table, if its a leaf node |
622 | | * then don't store it. Instead we create a special dictionary of our own which |
623 | | * has a /Type of /PageRef and a /PageRef key which is the indirect reference |
624 | | * to the page. However in this case we pass on the actual page dictionary to |
625 | | * the Kids processing below. If we didn't then we'd fall foul of the loop |
626 | | * detection by dereferencing the same object twice. |
627 | | * This is tedious, but it means we don't store all the page dictionaries in |
628 | | * the Pages tree, because page dictionaries can be large and we generally |
629 | | * only use them once. If processed in order we only dereference each page |
630 | | * dictionary once, any other order will dereference each page twice. (or more |
631 | | * if we render the same page multiple times). |
632 | | */ |
633 | 43.7k | code = pdfi_dict_get_type(ctx, child, "Type", PDF_NAME, (pdf_obj **)&Type); |
634 | 43.7k | if (code < 0) |
635 | 105 | goto errorExit; |
636 | 43.6k | if (pdfi_name_is(Type, "Pages")) { |
637 | 248 | code = pdfi_array_put(ctx, Kids, i, (pdf_obj *)child); |
638 | 248 | if (code < 0) |
639 | 0 | goto errorExit; |
640 | 43.3k | } else { |
641 | | /* Bizarrely, one of the QL FTS files (FTS_07_0704.pdf) has a page diciotnary with a /Type of /Template */ |
642 | 43.3k | if (!pdfi_name_is(Type, "Page")) |
643 | 102 | pdfi_set_error(ctx, 0, NULL, E_PDF_BADPAGETYPE, "pdfi_get_child", NULL); |
644 | | /* Make a 'PageRef' entry (just stores an indirect reference to the actual page) |
645 | | * and store that in the Kids array for future reference. But pass on the |
646 | | * dereferenced Page dictionary, in case this is the target page. |
647 | | */ |
648 | | |
649 | 43.3k | code = pdfi_dict_alloc(ctx, 0, &leaf_dict); |
650 | 43.3k | if (code < 0) |
651 | 0 | goto errorExit; |
652 | 43.3k | code = pdfi_name_alloc(ctx, (byte *)"PageRef", 7, (pdf_obj **)&Key); |
653 | 43.3k | if (code < 0) |
654 | 0 | goto errorExit; |
655 | 43.3k | pdfi_countup(Key); |
656 | | |
657 | 43.3k | code = pdfi_dict_put_obj(ctx, leaf_dict, (pdf_obj *)Key, (pdf_obj *)node, true); |
658 | 43.3k | if (code < 0) |
659 | 0 | goto errorExit; |
660 | 43.3k | code = pdfi_dict_put(ctx, leaf_dict, "Type", (pdf_obj *)Key); |
661 | 43.3k | if (code < 0) |
662 | 0 | goto errorExit; |
663 | 43.3k | code = pdfi_array_put(ctx, Kids, i, (pdf_obj *)leaf_dict); |
664 | 43.3k | if (code < 0) |
665 | 0 | goto errorExit; |
666 | 43.3k | leaf_dict = NULL; |
667 | 43.3k | } |
668 | 216k | } else { |
669 | 216k | if (ctx->loop_detection != NULL) { |
670 | 216k | if (node->object_num != 0 && pdfi_loop_detector_check_object(ctx, node->object_num)) { |
671 | 0 | code = gs_note_error(gs_error_circular_reference); |
672 | 0 | goto errorExit; |
673 | 0 | } |
674 | 216k | if (node->object_num > 0) { |
675 | 10.2k | code = pdfi_loop_detector_add_object(ctx, node->object_num); |
676 | 10.2k | if (code < 0) |
677 | 0 | goto errorExit; |
678 | 10.2k | } |
679 | 216k | } |
680 | 216k | child = (pdf_dict *)node; |
681 | 216k | pdfi_countup(child); |
682 | 216k | } |
683 | | |
684 | 259k | *pchild = child; |
685 | 259k | child = NULL; |
686 | | |
687 | 270k | errorExit: |
688 | 270k | pdfi_free_object((pdf_obj *)leaf_dict); |
689 | 270k | pdfi_countdown(child); |
690 | 270k | pdfi_countdown(node); |
691 | 270k | pdfi_countdown(Type); |
692 | 270k | pdfi_countdown(Key); |
693 | 270k | return code; |
694 | 259k | } |
695 | | |
696 | | /* Check if key is in the dictionary, and if so, copy it into the inheritable dict. |
697 | | */ |
698 | | static int pdfi_check_inherited_key(pdf_context *ctx, pdf_dict *d, const char *keyname, pdf_dict *inheritable) |
699 | 414k | { |
700 | 414k | int code = 0; |
701 | 414k | pdf_obj *object = NULL; |
702 | 414k | bool known; |
703 | | |
704 | | /* Check for inheritable keys, if we find any copy them to the 'inheritable' dictionary at this level */ |
705 | 414k | code = pdfi_dict_known(ctx, d, keyname, &known); |
706 | 414k | if (code < 0) |
707 | 0 | goto exit; |
708 | 414k | if (known) { |
709 | 22.7k | code = pdfi_loop_detector_mark(ctx); |
710 | 22.7k | if (code < 0){ |
711 | 0 | goto exit; |
712 | 0 | } |
713 | 22.7k | code = pdfi_dict_get(ctx, d, keyname, &object); |
714 | 22.7k | if (code < 0) { |
715 | 4 | (void)pdfi_loop_detector_cleartomark(ctx); |
716 | 4 | goto exit; |
717 | 4 | } |
718 | 22.7k | code = pdfi_loop_detector_cleartomark(ctx); |
719 | 22.7k | if (code < 0) { |
720 | 0 | goto exit; |
721 | 0 | } |
722 | 22.7k | code = pdfi_dict_put(ctx, inheritable, keyname, object); |
723 | 22.7k | } |
724 | | |
725 | 414k | exit: |
726 | 414k | pdfi_countdown(object); |
727 | 414k | return code; |
728 | 414k | } |
729 | | |
730 | | int pdfi_get_page_dict(pdf_context *ctx, pdf_dict *d, uint64_t page_num, uint64_t *page_offset, |
731 | | pdf_dict **target, pdf_dict *inherited) |
732 | 103k | { |
733 | 103k | int i, code = 0; |
734 | 103k | pdf_array *Kids = NULL; |
735 | 103k | pdf_dict *child = NULL; |
736 | 103k | pdf_name *Type = NULL; |
737 | 103k | pdf_dict *inheritable = NULL; |
738 | 103k | int64_t num; |
739 | 103k | double dbl; |
740 | | |
741 | 103k | if (ctx->args.pdfdebug) |
742 | 103k | dmprintf1(ctx->memory, "%% Finding page dictionary for page %"PRIi64"\n", page_num + 1); |
743 | | |
744 | | /* Allocated inheritable dict (it might stay empty) */ |
745 | 103k | code = pdfi_dict_alloc(ctx, 0, &inheritable); |
746 | 103k | if (code < 0) |
747 | 0 | return code; |
748 | 103k | pdfi_countup(inheritable); |
749 | | |
750 | 103k | code = pdfi_loop_detector_mark(ctx); |
751 | 103k | if (code < 0) |
752 | 0 | return code; |
753 | | |
754 | | /* if we are being passed any inherited values from our parent, copy them now */ |
755 | 103k | if (inherited != NULL) { |
756 | 6.24k | code = pdfi_dict_copy(ctx, inheritable, inherited); |
757 | 6.24k | if (code < 0) |
758 | 0 | goto exit; |
759 | 6.24k | } |
760 | | |
761 | 103k | code = pdfi_dict_get_number(ctx, d, "Count", &dbl); |
762 | 103k | if (code < 0) |
763 | 0 | goto exit; |
764 | 103k | if (dbl != floor(dbl)) { |
765 | 0 | code = gs_note_error(gs_error_rangecheck); |
766 | 0 | goto exit; |
767 | 0 | } |
768 | 103k | num = (int)dbl; |
769 | | |
770 | 103k | if (num < 0 || (num + *page_offset) > ctx->num_pages) { |
771 | 0 | code = gs_note_error(gs_error_rangecheck); |
772 | 0 | goto exit; |
773 | 0 | } |
774 | 103k | if (num + *page_offset < page_num) { |
775 | 0 | *page_offset += num; |
776 | 0 | code = 1; |
777 | 0 | goto exit; |
778 | 0 | } |
779 | | /* The requested page is a descendant of this node */ |
780 | | |
781 | | /* Check for inheritable keys, if we find any copy them to the 'inheritable' dictionary at this level */ |
782 | 103k | code = pdfi_check_inherited_key(ctx, d, "Resources", inheritable); |
783 | 103k | if (code < 0) |
784 | 4 | goto exit; |
785 | 103k | code = pdfi_check_inherited_key(ctx, d, "MediaBox", inheritable); |
786 | 103k | if (code < 0) |
787 | 0 | goto exit; |
788 | 103k | code = pdfi_check_inherited_key(ctx, d, "CropBox", inheritable); |
789 | 103k | if (code < 0) |
790 | 0 | goto exit; |
791 | 103k | code = pdfi_check_inherited_key(ctx, d, "Rotate", inheritable); |
792 | 103k | if (code < 0) { |
793 | 0 | goto exit; |
794 | 0 | } |
795 | | |
796 | | /* Get the Kids array */ |
797 | 103k | code = pdfi_dict_get_type(ctx, d, "Kids", PDF_ARRAY, (pdf_obj **)&Kids); |
798 | 103k | if (code < 0) { |
799 | 0 | goto exit; |
800 | 0 | } |
801 | | |
802 | | /* Check each entry in the Kids array */ |
803 | 270k | for (i = 0;i < pdfi_array_size(Kids);i++) { |
804 | 270k | pdfi_countdown(child); |
805 | 270k | child = NULL; |
806 | 270k | pdfi_countdown(Type); |
807 | 270k | Type = NULL; |
808 | | |
809 | 270k | code = pdfi_get_child(ctx, Kids, i, &child); |
810 | 270k | if (code < 0) { |
811 | 10.5k | goto exit; |
812 | 10.5k | } |
813 | | |
814 | | /* Check the type, if its a Pages entry, then recurse. If its a Page entry, is it the one we want */ |
815 | 259k | code = pdfi_dict_get_type(ctx, child, "Type", PDF_NAME, (pdf_obj **)&Type); |
816 | 259k | if (code == 0) { |
817 | 259k | if (pdfi_name_is(Type, "Pages")) { |
818 | 10.2k | code = pdfi_dict_get_number(ctx, child, "Count", &dbl); |
819 | 10.2k | if (code == 0) { |
820 | 10.2k | if (dbl != floor(dbl)) { |
821 | 0 | code = gs_note_error(gs_error_rangecheck); |
822 | 0 | goto exit; |
823 | 0 | } |
824 | 10.2k | num = (int)dbl; |
825 | 10.2k | if (num < 0 || (num + *page_offset) > ctx->num_pages) { |
826 | 1 | code = gs_note_error(gs_error_rangecheck); |
827 | 1 | goto exit; |
828 | 10.2k | } else { |
829 | 10.2k | if (num + *page_offset <= page_num) { |
830 | 4.04k | *page_offset += num; |
831 | 6.24k | } else { |
832 | 6.24k | code = pdfi_get_page_dict(ctx, child, page_num, page_offset, target, inheritable); |
833 | 6.24k | goto exit; |
834 | 6.24k | } |
835 | 10.2k | } |
836 | 10.2k | } |
837 | 249k | } else { |
838 | 249k | if (pdfi_name_is(Type, "PageRef")) { |
839 | 205k | if ((*page_offset) == page_num) { |
840 | 43.2k | pdf_dict *page_dict = NULL; |
841 | | |
842 | 43.2k | code = pdfi_dict_get(ctx, child, "PageRef", (pdf_obj **)&page_dict); |
843 | 43.2k | if (code < 0) |
844 | 0 | goto exit; |
845 | 43.2k | code = pdfi_merge_dicts(ctx, page_dict, inheritable); |
846 | 43.2k | *target = page_dict; |
847 | 43.2k | pdfi_countup(*target); |
848 | 43.2k | pdfi_countdown(page_dict); |
849 | 43.2k | goto exit; |
850 | 162k | } else { |
851 | 162k | *page_offset += 1; |
852 | 162k | } |
853 | 205k | } else { |
854 | 43.6k | if (!pdfi_name_is(Type, "Page")) |
855 | 102 | pdfi_set_error(ctx, 0, NULL, E_PDF_BADPAGETYPE, "pdfi_get_page_dict", NULL); |
856 | 43.6k | if ((*page_offset) == page_num) { |
857 | 43.5k | code = pdfi_merge_dicts(ctx, child, inheritable); |
858 | 43.5k | *target = child; |
859 | 43.5k | pdfi_countup(*target); |
860 | 43.5k | goto exit; |
861 | 43.5k | } else { |
862 | 80 | *page_offset += 1; |
863 | 80 | } |
864 | 43.6k | } |
865 | 249k | } |
866 | 259k | } |
867 | 166k | if (code < 0) |
868 | 1 | goto exit; |
869 | 166k | } |
870 | | /* Positive return value indicates we did not find the target below this node, try the next one */ |
871 | 0 | code = 1; |
872 | |
|
873 | 103k | exit: |
874 | 103k | pdfi_loop_detector_cleartomark(ctx); |
875 | 103k | pdfi_countdown(inheritable); |
876 | 103k | pdfi_countdown(Kids); |
877 | 103k | pdfi_countdown(child); |
878 | 103k | pdfi_countdown(Type); |
879 | 103k | return code; |
880 | 0 | } |
881 | | |
882 | | int pdfi_doc_page_array_init(pdf_context *ctx) |
883 | 25.3k | { |
884 | 25.3k | size_t size = ctx->num_pages*sizeof(uint32_t); |
885 | | |
886 | 25.3k | ctx->page_array = (uint32_t *)gs_alloc_bytes(ctx->memory, size, |
887 | 25.3k | "pdfi_doc_page_array_init(page_array)"); |
888 | 25.3k | if (ctx->page_array == NULL) |
889 | 0 | return_error(gs_error_VMerror); |
890 | | |
891 | 25.3k | memset(ctx->page_array, 0, size); |
892 | 25.3k | return 0; |
893 | 25.3k | } |
894 | | |
895 | | void pdfi_doc_page_array_free(pdf_context *ctx) |
896 | 40.8k | { |
897 | 40.8k | if (!ctx->page_array) |
898 | 15.5k | return; |
899 | 25.3k | gs_free_object(ctx->memory, ctx->page_array, "pdfi_doc_page_array_free(page_array)"); |
900 | 25.3k | ctx->page_array = NULL; |
901 | 25.3k | } |
902 | | |
903 | | /* |
904 | | * Checks for both "Resource" and "RD" in the specified dict. |
905 | | * And then gets the typedict of Type (e.g. Font or XObject). |
906 | | * Returns 0 if undefined, >0 if found, <0 if error |
907 | | */ |
908 | | static int pdfi_resource_knownget_typedict(pdf_context *ctx, unsigned char *Type, |
909 | | pdf_dict *dict, pdf_dict **typedict) |
910 | 1.97M | { |
911 | 1.97M | int code; |
912 | 1.97M | pdf_dict *Resources = NULL; |
913 | | |
914 | 1.97M | code = pdfi_dict_knownget_type(ctx, dict, "Resources", PDF_DICT, (pdf_obj **)&Resources); |
915 | 1.97M | if (code == 0) |
916 | 738k | code = pdfi_dict_knownget_type(ctx, dict, "DR", PDF_DICT, (pdf_obj **)&Resources); |
917 | 1.97M | if (code < 0) |
918 | 61.3k | goto exit; |
919 | 1.91M | if (code > 0) |
920 | 1.17M | code = pdfi_dict_knownget_type(ctx, Resources, (const char *)Type, PDF_DICT, (pdf_obj **)typedict); |
921 | 1.97M | exit: |
922 | 1.97M | pdfi_countdown(Resources); |
923 | 1.97M | return code; |
924 | 1.91M | } |
925 | | |
926 | | int pdfi_find_resource(pdf_context *ctx, unsigned char *Type, pdf_name *name, |
927 | | pdf_dict *dict, pdf_dict *page_dict, pdf_obj **o) |
928 | 1.02M | { |
929 | 1.02M | pdf_dict *typedict = NULL; |
930 | 1.02M | pdf_dict *Parent = NULL; |
931 | 1.02M | pdf_name *n = NULL; |
932 | 1.02M | int code; |
933 | 1.02M | bool known = false; |
934 | | |
935 | 1.02M | *o = NULL; |
936 | | |
937 | | /* Check the provided dict, stream_dict can be NULL if we are trying to find a Default* ColorSpace */ |
938 | 1.02M | if (dict != NULL) { |
939 | 1.02M | bool deref_parent = true; |
940 | | |
941 | 1.02M | code = pdfi_resource_knownget_typedict(ctx, Type, dict, &typedict); |
942 | 1.02M | if (code < 0) |
943 | 402 | goto exit; |
944 | 1.02M | if (code > 0) { |
945 | 288k | code = pdfi_dict_get_no_store_R_key(ctx, typedict, name, o); |
946 | 288k | if (code != gs_error_undefined) |
947 | 235k | goto exit; |
948 | 288k | } |
949 | | |
950 | | /* Check the Parents, if any */ |
951 | | /* If the current dictionary is a Page dictionary, do NOT dereference it's Parent, as that |
952 | | * will be the Pages tree, and we will end up with circular references, causing a memory leak. |
953 | | */ |
954 | 785k | if (pdfi_dict_knownget_type(ctx, dict, "Type", PDF_NAME, (pdf_obj **)&n) > 0) { |
955 | 63.8k | if (pdfi_name_is(n, "Page")) |
956 | 6 | deref_parent = false; |
957 | 63.8k | pdfi_countdown(n); |
958 | 63.8k | } |
959 | | |
960 | 785k | if (deref_parent) { |
961 | 785k | code = pdfi_dict_known(ctx, dict, "Parent", &known); |
962 | 785k | if (code >= 0 && known == true) { |
963 | 63.4k | code = pdfi_dict_get_no_store_R(ctx, dict, "Parent", (pdf_obj **)&Parent); |
964 | | |
965 | 63.4k | if (code >= 0) { |
966 | 62.2k | if (pdfi_type_of(Parent) != PDF_DICT) { |
967 | 698 | if (pdfi_type_of(Parent) == PDF_INDIRECT) { |
968 | 33 | pdf_indirect_ref *o = (pdf_indirect_ref *)Parent; |
969 | | |
970 | 33 | Parent = NULL; |
971 | 33 | code = pdfi_dereference(ctx, o->ref_object_num, o->ref_generation_num, (pdf_obj **)&Parent); |
972 | 33 | pdfi_countdown(o); |
973 | 33 | if (code >= 0 && pdfi_type_of(Parent) != PDF_DICT) { |
974 | 0 | pdfi_countdown(Parent); |
975 | 0 | Parent = NULL; |
976 | 0 | } |
977 | 665 | } else { |
978 | 665 | pdfi_countdown(Parent); |
979 | 665 | Parent = NULL; |
980 | 665 | } |
981 | 698 | } |
982 | 62.2k | } else |
983 | 1.20k | Parent = NULL; |
984 | 63.4k | } |
985 | | |
986 | 785k | if (Parent != NULL) { |
987 | 61.5k | if (ctx->page.CurrentPageDict != NULL && Parent->object_num != ctx->page.CurrentPageDict->object_num) { |
988 | 61.5k | if (pdfi_loop_detector_check_object(ctx, Parent->object_num) == true) { |
989 | 12.2k | code = gs_note_error(gs_error_circular_reference); |
990 | 12.2k | goto exit; |
991 | 12.2k | } |
992 | | |
993 | 49.2k | code = pdfi_loop_detector_mark(ctx); |
994 | 49.2k | if (code < 0) |
995 | 0 | goto exit; |
996 | | |
997 | 49.2k | code = pdfi_loop_detector_add_object(ctx, dict->object_num); |
998 | 49.2k | if (code < 0) { |
999 | 0 | (void)pdfi_loop_detector_cleartomark(ctx); |
1000 | 0 | goto exit; |
1001 | 0 | } |
1002 | 49.2k | code = pdfi_find_resource(ctx, Type, name, Parent, page_dict, o); |
1003 | 49.2k | (void)pdfi_loop_detector_cleartomark(ctx); |
1004 | 49.2k | if (code != gs_error_undefined) |
1005 | 178 | goto exit; |
1006 | 49.2k | } |
1007 | 61.5k | } |
1008 | 772k | code = 0; |
1009 | 772k | } |
1010 | 772k | pdfi_countdown(typedict); |
1011 | 772k | typedict = NULL; |
1012 | 772k | } |
1013 | | |
1014 | | /* Normally page_dict can't be (or shouldn't be) NULL. However, if we are processing |
1015 | | * a TYpe 3 font, then the 'page dict' is the Resources dictionary of that font. If |
1016 | | * the font inherits Resources from its page (which it should not) then its possible |
1017 | | * that the 'page dict' could be NULL here. We need to guard against that. Its possible |
1018 | | * there may be other, similar, cases (eg Patterns within Patterns). In addition we |
1019 | | * do need to be able to check the real page dictionary for inhereited resources, and |
1020 | | * in the case of a type 3 font BuildChar at least there is no easy way to do that. |
1021 | | * So we'll store the page dictionary for the current page in the context as a |
1022 | | * last-ditch resource to check. |
1023 | | */ |
1024 | 772k | if (page_dict != NULL) { |
1025 | 772k | code = pdfi_resource_knownget_typedict(ctx, Type, page_dict, &typedict); |
1026 | 772k | if (code < 0) |
1027 | 65.2k | goto exit; |
1028 | | |
1029 | 707k | if (code > 0) { |
1030 | 613k | code = pdfi_dict_get_no_store_R_key(ctx, typedict, name, o); |
1031 | 613k | goto exit; |
1032 | 613k | } |
1033 | 707k | } |
1034 | | |
1035 | 93.9k | pdfi_countdown(typedict); |
1036 | 93.9k | typedict = NULL; |
1037 | | |
1038 | 93.9k | if (ctx->page.CurrentPageDict != NULL) { |
1039 | 93.9k | code = pdfi_resource_knownget_typedict(ctx, Type, ctx->page.CurrentPageDict, &typedict); |
1040 | 93.9k | if (code < 0) |
1041 | 0 | goto exit; |
1042 | | |
1043 | 93.9k | if (code > 0) { |
1044 | 2.29k | code = pdfi_dict_get_no_store_R_key(ctx, typedict, name, o); |
1045 | 2.29k | goto exit; |
1046 | 2.29k | } |
1047 | 93.9k | } |
1048 | | |
1049 | 91.6k | if (ctx->current_stream != NULL) { |
1050 | 90.2k | pdf_dict *stream_dict = NULL; |
1051 | 90.2k | pdf_stream *stream = ctx->current_stream; |
1052 | | |
1053 | 91.5k | do { |
1054 | 91.5k | code = pdfi_dict_from_obj(ctx, (pdf_obj *)stream, &stream_dict); |
1055 | 91.5k | if (code < 0) |
1056 | 0 | goto exit; |
1057 | 91.5k | code = pdfi_resource_knownget_typedict(ctx, Type, stream_dict, &typedict); |
1058 | 91.5k | if (code < 0) |
1059 | 0 | goto exit; |
1060 | 91.5k | if (code > 0) { |
1061 | 87.7k | code = pdfi_dict_get_no_store_R_key(ctx, typedict, name, o); |
1062 | | /* Triggered by: tests_private/comparefiles/Bug692783.pdf */ |
1063 | 87.7k | pdfi_set_error(ctx, 0, NULL, E_PDF_INHERITED_STREAM_RESOURCE, "pdfi_find_resource", (char *)"Couldn't find named resource in supplied dictionary, or Parents, or Pages, matching name located in earlier stream Resource"); |
1064 | 87.7k | goto exit; |
1065 | 87.7k | } |
1066 | 3.74k | pdfi_countdown(typedict); |
1067 | 3.74k | typedict = NULL; |
1068 | 3.74k | stream = pdfi_stream_parent(ctx, stream); |
1069 | 3.74k | }while(stream != NULL); |
1070 | 90.2k | } |
1071 | | |
1072 | | /* If we got all the way down there, we didn't find it */ |
1073 | 3.90k | pdfi_set_warning(ctx, 0, NULL, W_PDF_MISSING_NAMED_RESOURCE, "pdfi_find_resource", NULL); |
1074 | 3.90k | code = gs_error_undefined; |
1075 | | |
1076 | 1.02M | exit: |
1077 | 1.02M | pdfi_countdown(typedict); |
1078 | 1.02M | pdfi_countdown(Parent); |
1079 | 1.02M | return code; |
1080 | 3.90k | } |
1081 | | |
1082 | | /* Mark the actual outline */ |
1083 | | static int pdfi_doc_mark_the_outline(pdf_context *ctx, pdf_dict *outline) |
1084 | 92 | { |
1085 | 92 | int code = 0; |
1086 | 92 | pdf_dict *tempdict = NULL; |
1087 | 92 | uint64_t dictsize; |
1088 | 92 | uint64_t index; |
1089 | 92 | pdf_name *Key = NULL; |
1090 | 92 | double num; |
1091 | | |
1092 | | /* Basically we only do /Count, /Title, /A, /C, /F |
1093 | | * The /First, /Last, /Next, /Parent get written magically by pdfwrite |
1094 | | */ |
1095 | | |
1096 | | /* Make a temporary copy of the outline dict */ |
1097 | 92 | dictsize = pdfi_dict_entries(outline); |
1098 | 92 | code = pdfi_dict_alloc(ctx, dictsize, &tempdict); |
1099 | 92 | if (code < 0) goto exit; |
1100 | 92 | pdfi_countup(tempdict); |
1101 | 92 | code = pdfi_dict_copy(ctx, tempdict, outline); |
1102 | 92 | if (code < 0) goto exit; |
1103 | | |
1104 | | /* Due to some craziness on the part of Adobe, the /Count in an Outline entry |
1105 | | * in a PDF file, and the /Count value in an /OUT pdfmark mean different things(!) |
1106 | | * In a PDF file it is the number of outline entries beneath the current entry |
1107 | | * (all child descsndants) whereas in a pdfmark it is the number of entries |
1108 | | * in just the next level. So we cannot use the /Count from the PDF file, we |
1109 | | * need to go to the /First entry of the next level, and then count all |
1110 | | * the entries at that level by following each /Next. |
1111 | | */ |
1112 | 92 | code = pdfi_dict_knownget_number(ctx, outline, "Count", &num); |
1113 | 92 | if (code < 0) |
1114 | 0 | goto exit; |
1115 | | |
1116 | 92 | if (code > 0) { |
1117 | 30 | pdf_dict *current = NULL, *next = NULL; |
1118 | 30 | int count = 0; |
1119 | | |
1120 | 30 | code = pdfi_dict_knownget_type(ctx, outline, "First", PDF_DICT, (pdf_obj **)¤t); |
1121 | 30 | if (code > 0) { |
1122 | 3 | count++; |
1123 | 5 | do { |
1124 | 5 | code = pdfi_dict_knownget_type(ctx, current, "Next", PDF_DICT, (pdf_obj **)&next); |
1125 | 5 | if (code > 0) { |
1126 | 2 | pdfi_countdown(current); |
1127 | 2 | current = next; |
1128 | 2 | next = NULL; |
1129 | 2 | count++; |
1130 | 2 | } else |
1131 | 3 | break; |
1132 | 5 | } while (1); |
1133 | 3 | pdfi_countdown(current); |
1134 | 3 | } |
1135 | 30 | if (num < 0) |
1136 | 0 | count *= -1; |
1137 | 30 | pdfi_dict_put_int(ctx, tempdict, "Count", count); |
1138 | 30 | } |
1139 | | |
1140 | | /* Go through the dict, removing some keys and doing special handling for others. |
1141 | | */ |
1142 | 0 | code = pdfi_dict_key_first(ctx, outline, (pdf_obj **)&Key, &index); |
1143 | 392 | while (code >= 0) { |
1144 | 392 | if (pdfi_name_is(Key, "Last") || pdfi_name_is(Key, "Next") || pdfi_name_is(Key, "First") || |
1145 | 392 | pdfi_name_is(Key, "Prev") || pdfi_name_is(Key, "Parent")) { |
1146 | | /* Delete some keys |
1147 | | * These are handled in pdfwrite and can lead to circular refs |
1148 | | */ |
1149 | 170 | code = pdfi_dict_delete_pair(ctx, tempdict, Key); |
1150 | 222 | } else if (pdfi_name_is(Key, "SE")) { |
1151 | | /* TODO: Not sure what to do with SE, delete for now */ |
1152 | | /* Seems to be okay to just delete it, since there should also be a /Dest |
1153 | | * See sample fts_29_2903.pdf |
1154 | | * Currently we are same as gs |
1155 | | */ |
1156 | 0 | code = pdfi_dict_delete_pair(ctx, tempdict, Key); |
1157 | 222 | } else if (pdfi_name_is(Key, "A")) { |
1158 | 59 | code = pdfi_pdfmark_modA(ctx, tempdict); |
1159 | 163 | } else if (pdfi_name_is(Key, "Dest")) { |
1160 | 33 | code = pdfi_pdfmark_modDest(ctx, tempdict); |
1161 | 33 | } |
1162 | 392 | if (code < 0) |
1163 | 5 | goto exit; |
1164 | | |
1165 | 387 | pdfi_countdown(Key); |
1166 | 387 | Key = NULL; |
1167 | | |
1168 | 387 | code = pdfi_dict_key_next(ctx, outline, (pdf_obj **)&Key, &index); |
1169 | 387 | if (code == gs_error_undefined) { |
1170 | 87 | code = 0; |
1171 | 87 | break; |
1172 | 87 | } |
1173 | 387 | } |
1174 | 87 | if (code < 0) goto exit; |
1175 | | |
1176 | | /* Write the pdfmark */ |
1177 | 87 | code = pdfi_pdfmark_from_dict(ctx, tempdict, NULL, "OUT"); |
1178 | 87 | if (code < 0) |
1179 | 0 | goto exit; |
1180 | | |
1181 | 92 | exit: |
1182 | 92 | pdfi_countdown(tempdict); |
1183 | 92 | pdfi_countdown(Key); |
1184 | 92 | return code; |
1185 | 87 | } |
1186 | | |
1187 | | /* Do pdfmark on an outline entry (recursive) |
1188 | | * Note: the logic here is wonky. It is relying on the behavior of the pdfwrite driver. |
1189 | | * See pdf_main.ps/writeoutline() |
1190 | | */ |
1191 | | static int pdfi_doc_mark_outline(pdf_context *ctx, pdf_dict *outline) |
1192 | 92 | { |
1193 | 92 | int code = 0; |
1194 | 92 | pdf_dict *child = NULL; |
1195 | 92 | pdf_dict *Next = NULL; |
1196 | | |
1197 | 92 | if (outline == (pdf_dict *)PDF_NULL_OBJ) |
1198 | 0 | return 0; |
1199 | | |
1200 | | /* Mark the outline */ |
1201 | | /* NOTE: I think the pdfmark for this needs to be written before the children |
1202 | | * because I think pdfwrite relies on the order of things. |
1203 | | */ |
1204 | 92 | code = pdfi_doc_mark_the_outline(ctx, outline); |
1205 | 92 | if (code < 0) |
1206 | 5 | goto exit1; |
1207 | | |
1208 | | /* Handle the children */ |
1209 | 87 | code = pdfi_loop_detector_mark(ctx); |
1210 | 87 | if (code < 0) |
1211 | 0 | goto exit1; |
1212 | | |
1213 | | /* Handle any children (don't deref them, we don't want to leave them hanging around) */ |
1214 | 87 | code = pdfi_dict_get_no_store_R(ctx, outline, "First", (pdf_obj **)&child); |
1215 | 87 | if (code < 0 || pdfi_type_of(child) != PDF_DICT) { |
1216 | | /* TODO: flag a warning? */ |
1217 | 84 | code = 0; |
1218 | 84 | goto exit; |
1219 | 84 | } |
1220 | | |
1221 | 3 | if (child->object_num != 0) { |
1222 | 3 | code = pdfi_loop_detector_add_object(ctx, child->object_num); |
1223 | 3 | if (code < 0) |
1224 | 0 | goto exit; |
1225 | 3 | } |
1226 | | |
1227 | 5 | do { |
1228 | 5 | code = pdfi_doc_mark_outline(ctx, child); |
1229 | 5 | if (code < 0) goto exit; |
1230 | | |
1231 | | |
1232 | 5 | code = pdfi_dict_get_no_store_R(ctx, child, "Next", (pdf_obj **)&Next); |
1233 | 5 | if (code == gs_error_undefined) { |
1234 | 3 | code = 0; |
1235 | 3 | break; |
1236 | 3 | } |
1237 | 2 | if (code == gs_error_circular_reference) { |
1238 | 0 | code = 0; |
1239 | 0 | goto exit; |
1240 | 0 | } |
1241 | 2 | if (code < 0 || pdfi_type_of(Next) != PDF_DICT) |
1242 | 0 | goto exit; |
1243 | | |
1244 | 2 | pdfi_countdown(child); |
1245 | 2 | child = Next; |
1246 | 2 | Next = NULL; |
1247 | 2 | } while (true); |
1248 | | |
1249 | 87 | exit: |
1250 | 87 | (void)pdfi_loop_detector_cleartomark(ctx); |
1251 | 92 | exit1: |
1252 | 92 | pdfi_countdown(child); |
1253 | 92 | pdfi_countdown(Next); |
1254 | 92 | return code; |
1255 | 87 | } |
1256 | | |
1257 | | /* Do pdfmark for Outlines */ |
1258 | | static int pdfi_doc_Outlines(pdf_context *ctx) |
1259 | 1.68k | { |
1260 | 1.68k | int code = 0; |
1261 | 1.68k | pdf_dict *Outlines = NULL; |
1262 | 1.68k | pdf_dict *outline = NULL; |
1263 | 1.68k | pdf_dict *Next = NULL; |
1264 | | |
1265 | 1.68k | if (ctx->args.no_pdfmark_outlines) |
1266 | 0 | goto exit1; |
1267 | | |
1268 | 1.68k | code = pdfi_dict_knownget_type(ctx, ctx->Root, "Outlines", PDF_DICT, (pdf_obj **)&Outlines); |
1269 | 1.68k | if (code <= 0) { |
1270 | | /* TODO: flag a warning */ |
1271 | 1.61k | code = 0; |
1272 | 1.61k | goto exit1; |
1273 | 1.61k | } |
1274 | | |
1275 | 70 | code = pdfi_loop_detector_mark(ctx); |
1276 | 70 | if (code < 0) |
1277 | 0 | goto exit1; |
1278 | | |
1279 | | /* Handle any children (don't deref them, we don't want to leave them hanging around) */ |
1280 | 70 | code = pdfi_dict_get_no_store_R(ctx, Outlines, "First", (pdf_obj **)&outline); |
1281 | 70 | if (code < 0 || pdfi_type_of(outline) != PDF_DICT) { |
1282 | | /* TODO: flag a warning? */ |
1283 | 18 | code = 0; |
1284 | 18 | goto exit; |
1285 | 18 | } |
1286 | | |
1287 | 52 | if (pdfi_type_of(outline) != PDF_DICT) |
1288 | 0 | goto exit; /* Exit with no error? */ |
1289 | | |
1290 | 52 | if (outline->object_num != 0) { |
1291 | 52 | code = pdfi_loop_detector_add_object(ctx, outline->object_num); |
1292 | 52 | if (code < 0) |
1293 | 0 | goto exit; |
1294 | 52 | } |
1295 | | |
1296 | | /* Loop through all the top-level outline entries |
1297 | | * First one is in Outlines, and if there are more, they are the Next of the |
1298 | | * current outline item. (see spec) |
1299 | | * (basically we are walking a linked list) |
1300 | | */ |
1301 | 87 | do { |
1302 | 87 | code = pdfi_doc_mark_outline(ctx, outline); |
1303 | 87 | if (code < 0) goto exit; |
1304 | | |
1305 | | |
1306 | 82 | code = pdfi_dict_get_no_store_R(ctx, outline, "Next", (pdf_obj **)&Next); |
1307 | 82 | if (code == gs_error_undefined) { |
1308 | 47 | code = 0; |
1309 | 47 | break; |
1310 | 47 | } |
1311 | 35 | if (code == gs_error_circular_reference) { |
1312 | 0 | code = 0; |
1313 | 0 | goto exit; |
1314 | 0 | } |
1315 | 35 | if (code < 0 || pdfi_type_of(Next) != PDF_DICT) |
1316 | 0 | goto exit; |
1317 | | |
1318 | 35 | pdfi_countdown(outline); |
1319 | 35 | outline = Next; |
1320 | 35 | Next = NULL; |
1321 | 35 | } while (true); |
1322 | | |
1323 | 70 | exit: |
1324 | 70 | (void)pdfi_loop_detector_cleartomark(ctx); |
1325 | 1.68k | exit1: |
1326 | 1.68k | pdfi_countdown(Outlines); |
1327 | 1.68k | pdfi_countdown(outline); |
1328 | 1.68k | pdfi_countdown(Next); |
1329 | 1.68k | return code; |
1330 | 70 | } |
1331 | | |
1332 | | /* Do pdfmark for Info */ |
1333 | | static int pdfi_doc_Info(pdf_context *ctx) |
1334 | 1.68k | { |
1335 | 1.68k | int code = 0; |
1336 | 1.68k | pdf_dict *Info = NULL, *d = NULL; |
1337 | 1.68k | pdf_dict *tempdict = NULL; |
1338 | 1.68k | uint64_t dictsize; |
1339 | 1.68k | uint64_t index; |
1340 | 1.68k | pdf_name *Key = NULL; |
1341 | 1.68k | pdf_obj *Value = NULL; |
1342 | | |
1343 | | /* See comment in pdfi_read_Root() for details */ |
1344 | 1.68k | d = ctx->Trailer; |
1345 | 1.68k | pdfi_countup(d); |
1346 | 1.68k | code = pdfi_dict_knownget_type(ctx, d, "Info", PDF_DICT, (pdf_obj **)&Info); |
1347 | 1.68k | pdfi_countdown(d); |
1348 | 1.68k | if (code <= 0) { |
1349 | | /* TODO: flag a warning */ |
1350 | 291 | goto exit; |
1351 | 291 | } |
1352 | | |
1353 | | /* Make a temporary copy of the Info dict */ |
1354 | 1.39k | dictsize = pdfi_dict_entries(Info); |
1355 | 1.39k | code = pdfi_dict_alloc(ctx, dictsize, &tempdict); |
1356 | 1.39k | if (code < 0) goto exit; |
1357 | 1.39k | pdfi_countup(tempdict); |
1358 | | |
1359 | | /* Copy only certain keys from Info to tempdict |
1360 | | * NOTE: pdfwrite will set /Producer, /CreationDate and /ModDate |
1361 | | */ |
1362 | 1.39k | code = pdfi_dict_first(ctx, Info, (pdf_obj **)&Key, &Value, &index); |
1363 | 8.21k | while (code >= 0) { |
1364 | 8.18k | if (pdfi_name_is(Key, "Author") || pdfi_name_is(Key, "Creator") || |
1365 | 8.18k | pdfi_name_is(Key, "Title") || pdfi_name_is(Key, "Subject") || |
1366 | 8.18k | pdfi_name_is(Key, "Keywords")) { |
1367 | 3.54k | code = pdfi_dict_put_obj(ctx, tempdict, (pdf_obj *)Key, Value, true); |
1368 | 3.54k | if (code < 0) |
1369 | 0 | goto exit; |
1370 | 3.54k | } |
1371 | 8.18k | pdfi_countdown(Key); |
1372 | 8.18k | Key = NULL; |
1373 | 8.18k | pdfi_countdown(Value); |
1374 | 8.18k | Value = NULL; |
1375 | | |
1376 | 8.18k | code = pdfi_dict_next(ctx, Info, (pdf_obj **)&Key, &Value, &index); |
1377 | 8.18k | if (code == gs_error_undefined) { |
1378 | 1.36k | code = 0; |
1379 | 1.36k | break; |
1380 | 1.36k | } |
1381 | 8.18k | } |
1382 | 1.39k | if (code < 0) goto exit; |
1383 | | |
1384 | | /* Write the pdfmark */ |
1385 | 1.36k | code = pdfi_pdfmark_from_dict(ctx, tempdict, NULL, "DOCINFO"); |
1386 | | |
1387 | 1.68k | exit: |
1388 | 1.68k | pdfi_countdown(Key); |
1389 | 1.68k | pdfi_countdown(Value); |
1390 | 1.68k | pdfi_countdown(Info); |
1391 | 1.68k | pdfi_countdown(tempdict); |
1392 | 1.68k | return code; |
1393 | 1.36k | } |
1394 | | |
1395 | | /* Handle PageLabels for pdfwrite device */ |
1396 | | static int pdfi_doc_PageLabels(pdf_context *ctx) |
1397 | 13.3k | { |
1398 | 13.3k | int code; |
1399 | 13.3k | pdf_dict *PageLabels = NULL; |
1400 | | |
1401 | 13.3k | if (ctx->loop_detection) { |
1402 | 0 | code = pdfi_loop_detector_mark(ctx); |
1403 | 0 | if (code < 0) |
1404 | 0 | return code; |
1405 | 0 | } |
1406 | | |
1407 | 13.3k | code = pdfi_dict_knownget_type(ctx, ctx->Root, "PageLabels", PDF_DICT, (pdf_obj **)&PageLabels); |
1408 | 13.3k | if (code <= 0) { |
1409 | 12.6k | if (ctx->loop_detection) |
1410 | 0 | (void)pdfi_loop_detector_cleartomark(ctx); |
1411 | | /* TODO: flag a warning */ |
1412 | 12.6k | goto exit; |
1413 | 12.6k | } |
1414 | | |
1415 | 691 | if (ctx->loop_detection) { |
1416 | 0 | code = pdfi_loop_detector_cleartomark(ctx); |
1417 | 0 | if (code < 0) |
1418 | 0 | goto exit; |
1419 | 0 | } |
1420 | | |
1421 | 691 | if (ctx->device_state.WantsPageLabels) { |
1422 | | /* This will send the PageLabels object as a 'pdfpagelabels' setdeviceparams */ |
1423 | 44 | code = pdfi_pdfmark_object(ctx, (pdf_obj *)PageLabels, "pdfpagelabels"); |
1424 | 44 | if (code < 0) |
1425 | 0 | goto exit; |
1426 | 44 | } |
1427 | | |
1428 | 13.3k | exit: |
1429 | 13.3k | pdfi_countdown(PageLabels); |
1430 | 13.3k | return code; |
1431 | 691 | } |
1432 | | |
1433 | | /* Handle OutputIntents stuff |
1434 | | * (bottom of pdf_main.ps/process_trailer_attrs) |
1435 | | */ |
1436 | | static int pdfi_doc_OutputIntents(pdf_context *ctx) |
1437 | 13.3k | { |
1438 | 13.3k | int code; |
1439 | 13.3k | pdf_array *OutputIntents = NULL; |
1440 | 13.3k | pdf_dict *intent = NULL; |
1441 | 13.3k | pdf_string *name = NULL; |
1442 | 13.3k | pdf_stream *DestOutputProfile = NULL; |
1443 | 13.3k | uint64_t index; |
1444 | | |
1445 | | /* NOTE: subtle difference in error handling -- we are checking for OutputIntents first, |
1446 | | * so this will just ignore UsePDFX3Profile or UseOutputIntent params without warning, |
1447 | | * if OutputIntents doesn't exist. Seems fine to me. |
1448 | | */ |
1449 | 13.3k | code = pdfi_dict_knownget_type(ctx, ctx->Root, "OutputIntents", PDF_ARRAY, |
1450 | 13.3k | (pdf_obj **)&OutputIntents); |
1451 | 13.3k | if (code <= 0) { |
1452 | 13.3k | goto exit; |
1453 | 13.3k | } |
1454 | | |
1455 | | /* TODO: Implement writeoutputintents if somebody ever complains... |
1456 | | * See pdf_main.ps/writeoutputintents |
1457 | | * I am not aware of a device that supports "/OutputIntent" so |
1458 | | * couldn't figure out what to do for this. |
1459 | | */ |
1460 | | |
1461 | | /* Handle UsePDFX3Profile and UseOutputIntent command line options */ |
1462 | 52 | if (ctx->args.UsePDFX3Profile) { |
1463 | | /* This is an index into the array */ |
1464 | 0 | code = pdfi_array_get_type(ctx, OutputIntents, ctx->args.PDFX3Profile_num, |
1465 | 0 | PDF_DICT, (pdf_obj **)&intent); |
1466 | 0 | if (code < 0) { |
1467 | 0 | dmprintf1(ctx->memory, |
1468 | 0 | "*** WARNING UsePDFX3Profile specified invalid index %d for OutputIntents\n", |
1469 | 0 | ctx->args.PDFX3Profile_num); |
1470 | 0 | goto exit; |
1471 | 0 | } |
1472 | 52 | } else if (ctx->args.UseOutputIntent != NULL) { |
1473 | | /* This is a name to look up in the array */ |
1474 | 0 | for (index=0; index<pdfi_array_size(OutputIntents); index ++) { |
1475 | 0 | code = pdfi_array_get_type(ctx, OutputIntents, index, PDF_DICT, (pdf_obj **)&intent); |
1476 | 0 | if (code < 0) goto exit; |
1477 | | |
1478 | 0 | code = pdfi_dict_knownget_type(ctx, intent, "OutputConditionIdentifier", PDF_STRING, |
1479 | 0 | (pdf_obj **)&name); |
1480 | 0 | if (code < 0) goto exit; |
1481 | 0 | if (code == 0) |
1482 | 0 | continue; |
1483 | | |
1484 | | /* If the ID is "Custom" then check "Info" instead */ |
1485 | 0 | if (pdfi_string_is(name, "Custom")) { |
1486 | 0 | pdfi_countdown(name); |
1487 | 0 | name = NULL; |
1488 | 0 | code = pdfi_dict_knownget_type(ctx, intent, "Info", PDF_STRING, (pdf_obj **)&name); |
1489 | 0 | if (code < 0) goto exit; |
1490 | 0 | if (code == 0) |
1491 | 0 | continue; |
1492 | 0 | } |
1493 | | |
1494 | | /* Check for a match */ |
1495 | 0 | if (pdfi_string_is(name, ctx->args.UseOutputIntent)) |
1496 | 0 | break; |
1497 | | |
1498 | 0 | pdfi_countdown(intent); |
1499 | 0 | intent = NULL; |
1500 | 0 | pdfi_countdown(name); |
1501 | 0 | name = NULL; |
1502 | 0 | } |
1503 | 0 | code = 0; |
1504 | 52 | } else { |
1505 | | /* No command line arg was specified, so nothing to do */ |
1506 | 52 | code = 0; |
1507 | 52 | goto exit; |
1508 | 52 | } |
1509 | | |
1510 | | /* Now if intent is non-null, we found the selected intent dictionary */ |
1511 | 0 | if (intent == NULL) |
1512 | 0 | goto exit; |
1513 | | |
1514 | | /* Load the profile, if it exists */ |
1515 | 0 | code = pdfi_dict_knownget_type(ctx, intent, "DestOutputProfile", PDF_STREAM, (pdf_obj **)&DestOutputProfile); |
1516 | | /* TODO: Flag an error if it doesn't exist? Only required in some cases */ |
1517 | 0 | if (code <= 0) goto exit; |
1518 | | |
1519 | | /* Set the intent to the profile */ |
1520 | 0 | code = pdfi_color_setoutputintent(ctx, intent, DestOutputProfile); |
1521 | |
|
1522 | 13.3k | exit: |
1523 | 13.3k | pdfi_countdown(OutputIntents); |
1524 | 13.3k | pdfi_countdown(intent); |
1525 | 13.3k | pdfi_countdown(name); |
1526 | 13.3k | pdfi_countdown(DestOutputProfile); |
1527 | 13.3k | return code; |
1528 | 0 | } |
1529 | | |
1530 | | /* Handled an embedded files Names array for pdfwrite device */ |
1531 | | static int pdfi_doc_EmbeddedFiles_Names(pdf_context *ctx, pdf_array *names) |
1532 | 0 | { |
1533 | 0 | int code; |
1534 | 0 | uint64_t arraysize; |
1535 | 0 | uint64_t index; |
1536 | 0 | pdf_string *name = NULL; |
1537 | 0 | pdf_dict *filespec = NULL; |
1538 | |
|
1539 | 0 | arraysize = pdfi_array_size(names); |
1540 | 0 | if ((arraysize % 2) != 0) { |
1541 | 0 | code = gs_note_error(gs_error_syntaxerror); |
1542 | 0 | goto exit; |
1543 | 0 | } |
1544 | | |
1545 | | /* This is supposed to be an array of |
1546 | | * [ (filename1) (filespec1) (filename2) (filespec2) ... ] |
1547 | | */ |
1548 | 0 | for (index = 0; index < arraysize; index += 2) { |
1549 | 0 | code = pdfi_array_get_type(ctx, names, index, PDF_STRING, (pdf_obj **)&name); |
1550 | 0 | if (code < 0) goto exit; |
1551 | | |
1552 | 0 | code = pdfi_array_get_type(ctx, names, index+1, PDF_DICT, (pdf_obj **)&filespec); |
1553 | 0 | if (code < 0) goto exit; |
1554 | | |
1555 | 0 | code = pdfi_pdfmark_embed_filespec(ctx, name, filespec); |
1556 | 0 | if (code < 0) goto exit; |
1557 | | |
1558 | 0 | pdfi_countdown(name); |
1559 | 0 | name = NULL; |
1560 | 0 | pdfi_countdown(filespec); |
1561 | 0 | filespec = NULL; |
1562 | 0 | } |
1563 | | |
1564 | | |
1565 | 0 | exit: |
1566 | 0 | pdfi_countdown(name); |
1567 | 0 | pdfi_countdown(filespec); |
1568 | 0 | return code; |
1569 | 0 | } |
1570 | | |
1571 | | /* Handle PageLabels for pdfwrite device */ |
1572 | | static int pdfi_doc_EmbeddedFiles(pdf_context *ctx) |
1573 | 1.68k | { |
1574 | 1.68k | int code; |
1575 | 1.68k | pdf_dict *Names = NULL; |
1576 | 1.68k | pdf_dict *EmbeddedFiles = NULL; |
1577 | 1.68k | pdf_array *Names_array = NULL; |
1578 | 1.68k | pdf_array *Kids = NULL; |
1579 | | |
1580 | 1.68k | code = pdfi_dict_knownget_type(ctx, ctx->Root, "Names", PDF_DICT, (pdf_obj **)&Names); |
1581 | 1.68k | if (code <= 0) goto exit; |
1582 | | |
1583 | 451 | code = pdfi_dict_knownget_type(ctx, Names, "EmbeddedFiles", PDF_DICT, (pdf_obj **)&EmbeddedFiles); |
1584 | 451 | if (code <= 0) goto exit; |
1585 | | |
1586 | 0 | code = pdfi_dict_knownget_type(ctx, Names, "Kids", PDF_ARRAY, (pdf_obj **)&Kids); |
1587 | 0 | if (code < 0) goto exit; |
1588 | 0 | if (code > 0) { |
1589 | | /* TODO: Need to implement */ |
1590 | 0 | dmprintf(ctx->memory, "*** WARNING Kids array in EmbeddedFiles not implemented\n"); |
1591 | 0 | } |
1592 | | |
1593 | | /* TODO: This is a name tree. |
1594 | | * Can contain a Names array, or some complicated Kids. |
1595 | | * Just handling Names array for now |
1596 | | */ |
1597 | 0 | code = pdfi_dict_knownget_type(ctx, EmbeddedFiles, "Names", PDF_ARRAY, (pdf_obj **)&Names_array); |
1598 | 0 | if (code <= 0) goto exit; |
1599 | | |
1600 | 0 | code = pdfi_doc_EmbeddedFiles_Names(ctx, Names_array); |
1601 | 0 | if (code <= 0) goto exit; |
1602 | | |
1603 | 1.68k | exit: |
1604 | 1.68k | pdfi_countdown(Kids); |
1605 | 1.68k | pdfi_countdown(Names); |
1606 | 1.68k | pdfi_countdown(EmbeddedFiles); |
1607 | 1.68k | pdfi_countdown(Names_array); |
1608 | 1.68k | return code; |
1609 | 0 | } |
1610 | | |
1611 | | /* Handle some bookkeeping related to AcroForm (and annotations) |
1612 | | * See pdf_main.ps/process_trailer_attrs/AcroForm |
1613 | | * |
1614 | | * Mainly we preload AcroForm and NeedAppearances in the context |
1615 | | * |
1616 | | * TODO: gs code also seems to do something to link up parents in fields/annotations (ParentField) |
1617 | | * We are going to avoid doing that for now. |
1618 | | */ |
1619 | | static int pdfi_doc_AcroForm(pdf_context *ctx) |
1620 | 13.3k | { |
1621 | 13.3k | int code = 0; |
1622 | 13.3k | pdf_dict *AcroForm = NULL; |
1623 | 13.3k | bool boolval = false; |
1624 | | |
1625 | 13.3k | code = pdfi_dict_knownget_type(ctx, ctx->Root, "AcroForm", PDF_DICT, (pdf_obj **)&AcroForm); |
1626 | 13.3k | if (code <= 0) goto exit; |
1627 | | |
1628 | 3.64k | code = pdfi_dict_get_bool(ctx, AcroForm, "NeedAppearances", &boolval); |
1629 | 3.64k | if (code < 0) { |
1630 | 3.56k | if (code == gs_error_undefined) { |
1631 | 3.56k | boolval = true; |
1632 | 3.56k | code = 0; |
1633 | 3.56k | } |
1634 | 0 | else |
1635 | 0 | goto exit; |
1636 | 3.56k | } |
1637 | 3.64k | ctx->NeedAppearances = boolval; |
1638 | | |
1639 | | /* Save this for efficiency later */ |
1640 | 3.64k | ctx->AcroForm = AcroForm; |
1641 | 3.64k | pdfi_countup(AcroForm); |
1642 | | |
1643 | | /* TODO: Link up ParentField (but hopefully we can avoid doing this hacky mess). |
1644 | | * Also: Something to do with Bug692447.pdf? |
1645 | | */ |
1646 | | |
1647 | | |
1648 | 13.3k | exit: |
1649 | 13.3k | pdfi_countdown(AcroForm); |
1650 | 13.3k | return code; |
1651 | 3.64k | } |
1652 | | |
1653 | | |
1654 | | /* See pdf_main.ps/process_trailer_attrs() |
1655 | | * Some of this stuff is about pdfmarks, and some of it is just handling |
1656 | | * random things in the trailer. |
1657 | | */ |
1658 | | int pdfi_doc_trailer(pdf_context *ctx) |
1659 | 25.3k | { |
1660 | 25.3k | int code = 0; |
1661 | | |
1662 | | /* Can't do this stuff with no Trailer */ |
1663 | 25.3k | if (!ctx->Trailer) { |
1664 | 11.9k | pdfi_set_warning(ctx, code, NULL, W_PDF_BAD_TRAILER, "pdfi_doc_trailer", NULL); |
1665 | 11.9k | goto exit; |
1666 | 11.9k | } |
1667 | | |
1668 | 13.3k | if (ctx->device_state.writepdfmarks) { |
1669 | | /* Handle Outlines */ |
1670 | 1.68k | code = pdfi_doc_Outlines(ctx); |
1671 | 1.68k | if (code < 0) { |
1672 | 5 | pdfi_set_warning(ctx, code, NULL, W_PDF_BAD_OUTLINES, "pdfi_doc_trailer", NULL); |
1673 | 5 | if (ctx->args.pdfstoponerror) |
1674 | 0 | goto exit; |
1675 | 5 | } |
1676 | | |
1677 | | /* Handle Info */ |
1678 | 1.68k | code = pdfi_doc_Info(ctx); |
1679 | 1.68k | if (code < 0) { |
1680 | 88 | pdfi_set_warning(ctx, code, NULL, W_PDF_BAD_INFO, "pdfi_doc_trailer", NULL); |
1681 | 88 | if (ctx->args.pdfstoponerror) |
1682 | 0 | goto exit; |
1683 | 88 | } |
1684 | | |
1685 | | /* Handle EmbeddedFiles */ |
1686 | | /* TODO: add a configuration option to embed or omit */ |
1687 | 1.68k | code = pdfi_doc_EmbeddedFiles(ctx); |
1688 | 1.68k | if (code < 0) { |
1689 | 9 | pdfi_set_warning(ctx, 0, NULL, W_PDF_BAD_EMBEDDEDFILES, "pdfi_doc_trailer", NULL); |
1690 | 9 | if (ctx->args.pdfstoponerror) |
1691 | 0 | goto exit; |
1692 | 9 | } |
1693 | 1.68k | } |
1694 | | |
1695 | | /* Handle OCProperties */ |
1696 | | /* NOTE: Apparently already handled by pdfi_read_OptionalRoot() */ |
1697 | | |
1698 | | /* Handle AcroForm -- this is some bookkeeping once per doc, not rendering them yet */ |
1699 | 13.3k | code = pdfi_doc_AcroForm(ctx); |
1700 | 13.3k | if (code < 0) { |
1701 | 199 | pdfi_set_warning(ctx, code, NULL, W_PDF_BAD_ACROFORM, "pdfi_doc_trailer", NULL); |
1702 | 199 | if (ctx->args.pdfstoponerror) |
1703 | 0 | goto exit; |
1704 | 199 | } |
1705 | | |
1706 | | /* Handle OutputIntent ICC Profile */ |
1707 | 13.3k | code = pdfi_doc_OutputIntents(ctx); |
1708 | 13.3k | if (code < 0) { |
1709 | 0 | pdfi_set_warning(ctx, code, NULL, W_PDF_BAD_OUTPUTINTENTS, "pdfi_doc_trailer", NULL); |
1710 | 0 | if (ctx->args.pdfstoponerror) |
1711 | 0 | goto exit; |
1712 | 0 | } |
1713 | | |
1714 | | /* Handle PageLabels */ |
1715 | 13.3k | code = pdfi_doc_PageLabels(ctx); |
1716 | 13.3k | if (code < 0) { |
1717 | 11 | pdfi_set_warning(ctx, code, NULL, W_PDF_BAD_PAGELABELS, "pdfi_doc_trailer", NULL); |
1718 | 11 | if (ctx->args.pdfstoponerror) |
1719 | 0 | goto exit; |
1720 | 11 | } |
1721 | | |
1722 | 25.3k | exit: |
1723 | 25.3k | return code; |
1724 | 13.3k | } |