/src/mupdf/source/pdf/pdf-run.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2024 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | #include "pdf-annot-imp.h" |
25 | | |
26 | | static void |
27 | | pdf_run_annot_with_usage(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_annot *annot, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie) |
28 | 10.1k | { |
29 | 10.1k | fz_matrix page_ctm; |
30 | 10.1k | fz_rect mediabox; |
31 | 10.1k | pdf_processor *proc = NULL; |
32 | 10.1k | fz_default_colorspaces *default_cs = NULL; |
33 | 10.1k | int flags; |
34 | 10.1k | int resources_pushed = 0; |
35 | 10.1k | int struct_parent_num; |
36 | 10.1k | pdf_obj *struct_parent; |
37 | | |
38 | 10.1k | fz_var(proc); |
39 | 10.1k | fz_var(default_cs); |
40 | 10.1k | fz_var(resources_pushed); |
41 | | |
42 | 10.1k | if (cookie && page->super.incomplete) |
43 | 0 | cookie->incomplete = 1; |
44 | | |
45 | 10.1k | pdf_annot_push_local_xref(ctx, annot); |
46 | | |
47 | | /* Widgets only get displayed if they have both a T and a TF flag, |
48 | | * apparently */ |
49 | 10.1k | if (pdf_name_eq(ctx, pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)), PDF_NAME(Widget))) |
50 | 7.76k | { |
51 | 7.76k | pdf_obj *ft = pdf_dict_get_inheritable(ctx, annot->obj, PDF_NAME(FT)); |
52 | 7.76k | pdf_obj *t = pdf_dict_get_inheritable(ctx, annot->obj, PDF_NAME(T)); |
53 | | |
54 | 7.76k | if (ft == NULL || t == NULL) |
55 | 402 | { |
56 | 402 | pdf_annot_pop_local_xref(ctx, annot); |
57 | 402 | return; |
58 | 402 | } |
59 | 7.76k | } |
60 | | |
61 | 19.4k | fz_try(ctx) |
62 | 19.4k | { |
63 | 9.74k | default_cs = pdf_load_default_colorspaces(ctx, doc, page); |
64 | 9.74k | if (default_cs) |
65 | 9.74k | fz_set_default_colorspaces(ctx, dev, default_cs); |
66 | | |
67 | 9.74k | pdf_page_transform(ctx, page, &mediabox, &page_ctm); |
68 | | |
69 | 9.74k | flags = pdf_dict_get_int(ctx, annot->obj, PDF_NAME(F)); |
70 | 9.74k | if (flags & PDF_ANNOT_IS_NO_ROTATE) |
71 | 34 | { |
72 | 34 | int rotate = pdf_dict_get_inheritable_int(ctx, page->obj, PDF_NAME(Rotate)); |
73 | 34 | fz_rect rect = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); |
74 | 34 | fz_point tp = fz_transform_point_xy(rect.x0, rect.y1, page_ctm); |
75 | 34 | page_ctm = fz_concat(page_ctm, fz_translate(-tp.x, -tp.y)); |
76 | 34 | page_ctm = fz_concat(page_ctm, fz_rotate(-rotate)); |
77 | 34 | page_ctm = fz_concat(page_ctm, fz_translate(tp.x, tp.y)); |
78 | 34 | } |
79 | | |
80 | 9.74k | ctm = fz_concat(page_ctm, ctm); |
81 | | |
82 | 9.74k | struct_parent = pdf_dict_getl(ctx, page->obj, PDF_NAME(StructParent), NULL); |
83 | 9.74k | struct_parent_num = pdf_to_int_default(ctx, struct_parent, -1); |
84 | | |
85 | 9.74k | proc = pdf_new_run_processor(ctx, page->doc, dev, ctm, struct_parent_num, usage, NULL, default_cs, cookie, NULL, NULL); |
86 | 9.74k | pdf_processor_push_resources(ctx, proc, pdf_page_resources(ctx, annot->page)); |
87 | 9.74k | resources_pushed = 1; |
88 | 9.74k | pdf_process_annot(ctx, proc, annot, cookie); |
89 | 9.74k | pdf_close_processor(ctx, proc); |
90 | 9.74k | } |
91 | 19.4k | fz_always(ctx) |
92 | 9.74k | { |
93 | 9.74k | if (resources_pushed) |
94 | 9.74k | pdf_processor_pop_resources(ctx, proc); |
95 | 9.74k | pdf_drop_processor(ctx, proc); |
96 | 9.74k | fz_drop_default_colorspaces(ctx, default_cs); |
97 | 9.74k | pdf_annot_pop_local_xref(ctx, annot); |
98 | 9.74k | } |
99 | 9.74k | fz_catch(ctx) |
100 | 0 | fz_rethrow(ctx); |
101 | 9.74k | } |
102 | | |
103 | | static fz_rect pdf_page_cropbox(fz_context *ctx, pdf_page *page) |
104 | 11.3k | { |
105 | 11.3k | pdf_obj *obj = pdf_dict_get_inheritable(ctx, page->obj, PDF_NAME(CropBox)); |
106 | 11.3k | if (!obj) |
107 | 8.88k | obj = pdf_dict_get_inheritable(ctx, page->obj, PDF_NAME(MediaBox)); |
108 | 11.3k | return pdf_to_rect(ctx, obj); |
109 | 11.3k | } |
110 | | |
111 | | static fz_rect pdf_page_mediabox(fz_context *ctx, pdf_page *page) |
112 | 11.3k | { |
113 | 11.3k | return pdf_dict_get_inheritable_rect(ctx, page->obj, PDF_NAME(MediaBox)); |
114 | 11.3k | } |
115 | | |
116 | | static void |
117 | | pdf_run_page_contents_with_usage_imp(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie) |
118 | 11.3k | { |
119 | 11.3k | fz_matrix page_ctm; |
120 | 11.3k | pdf_obj *resources; |
121 | 11.3k | pdf_obj *contents; |
122 | 11.3k | fz_rect fitzbox; |
123 | 11.3k | fz_rect mediabox, cropbox; |
124 | 11.3k | pdf_processor *proc = NULL; |
125 | 11.3k | fz_default_colorspaces *default_cs = NULL; |
126 | 11.3k | fz_colorspace *colorspace = NULL; |
127 | 11.3k | fz_path *path = NULL; |
128 | 11.3k | int struct_parent_num; |
129 | 11.3k | pdf_obj *struct_parent; |
130 | | |
131 | 11.3k | fz_var(proc); |
132 | 11.3k | fz_var(colorspace); |
133 | 11.3k | fz_var(default_cs); |
134 | 11.3k | fz_var(path); |
135 | | |
136 | 11.3k | if (cookie && page->super.incomplete) |
137 | 0 | cookie->incomplete = 1; |
138 | | |
139 | 22.6k | fz_try(ctx) |
140 | 22.6k | { |
141 | 11.3k | default_cs = pdf_load_default_colorspaces(ctx, doc, page); |
142 | 11.3k | if (default_cs) |
143 | 11.3k | fz_set_default_colorspaces(ctx, dev, default_cs); |
144 | | |
145 | 11.3k | pdf_page_transform(ctx, page, &fitzbox, &page_ctm); |
146 | 11.3k | ctm = fz_concat(page_ctm, ctm); |
147 | 11.3k | fitzbox = fz_transform_rect(fitzbox, ctm); |
148 | | |
149 | 11.3k | resources = pdf_page_resources(ctx, page); |
150 | 11.3k | contents = pdf_page_contents(ctx, page); |
151 | | |
152 | 11.3k | mediabox = pdf_page_mediabox(ctx, page); |
153 | 11.3k | cropbox = pdf_page_cropbox(ctx, page); |
154 | | |
155 | 11.3k | if (page->transparency) |
156 | 2.07k | { |
157 | 2.07k | pdf_obj *group = pdf_page_group(ctx, page); |
158 | | |
159 | 2.07k | if (group) |
160 | 1.56k | { |
161 | 1.56k | pdf_obj *cs = pdf_dict_get(ctx, group, PDF_NAME(CS)); |
162 | 1.56k | if (cs) |
163 | 1.54k | { |
164 | 3.08k | fz_try(ctx) |
165 | 3.08k | colorspace = pdf_load_colorspace(ctx, cs); |
166 | 3.08k | fz_catch(ctx) |
167 | 20 | { |
168 | 20 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
169 | 20 | fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); |
170 | 20 | fz_report_error(ctx); |
171 | 20 | fz_warn(ctx, "Ignoring Page blending colorspace."); |
172 | 20 | } |
173 | 1.54k | if (!fz_is_valid_blend_colorspace(ctx, colorspace)) |
174 | 0 | { |
175 | 0 | fz_warn(ctx, "Ignoring invalid Page blending colorspace: %s.", colorspace->name); |
176 | 0 | fz_drop_colorspace(ctx, colorspace); |
177 | 0 | colorspace = NULL; |
178 | 0 | } |
179 | 1.54k | } |
180 | 1.56k | } |
181 | 507 | else |
182 | 507 | colorspace = fz_keep_colorspace(ctx, fz_default_output_intent(ctx, default_cs)); |
183 | | |
184 | 2.07k | fz_begin_group(ctx, dev, fitzbox, colorspace, 1, 0, 0, 1); |
185 | 2.07k | } |
186 | | |
187 | 11.3k | struct_parent = pdf_dict_get(ctx, page->obj, PDF_NAME(StructParents)); |
188 | 11.3k | struct_parent_num = pdf_to_int_default(ctx, struct_parent, -1); |
189 | | |
190 | | /* Clip content to CropBox if it is smaller than the MediaBox */ |
191 | 11.3k | if (cropbox.x0 > mediabox.x0 || cropbox.x1 < mediabox.x1 || cropbox.y0 > mediabox.y0 || cropbox.y1 < mediabox.y1) |
192 | 295 | { |
193 | 295 | path = fz_new_path(ctx); |
194 | 295 | fz_rectto(ctx, path, cropbox.x0, cropbox.y0, cropbox.x1, cropbox.y1); |
195 | 295 | fz_clip_path(ctx, dev, path, 1, ctm, fz_infinite_rect); |
196 | 295 | } |
197 | | |
198 | 11.3k | proc = pdf_new_run_processor(ctx, page->doc, dev, ctm, struct_parent_num, usage, NULL, default_cs, cookie, NULL, NULL); |
199 | 11.3k | pdf_process_contents(ctx, proc, doc, resources, contents, cookie, NULL); |
200 | 11.3k | pdf_close_processor(ctx, proc); |
201 | | |
202 | 11.3k | if (cropbox.x0 > mediabox.x0 || cropbox.x1 < mediabox.x1 || cropbox.y0 > mediabox.y0 || cropbox.y1 < mediabox.y1) |
203 | 295 | { |
204 | 295 | fz_pop_clip(ctx, dev); |
205 | 295 | } |
206 | | |
207 | 11.3k | if (page->transparency) |
208 | 2.05k | { |
209 | 2.05k | fz_end_group(ctx, dev); |
210 | 2.05k | } |
211 | 11.3k | } |
212 | 22.6k | fz_always(ctx) |
213 | 11.3k | { |
214 | 11.3k | fz_drop_path(ctx, path); |
215 | 11.3k | pdf_drop_processor(ctx, proc); |
216 | 11.3k | fz_drop_colorspace(ctx, colorspace); |
217 | 11.3k | fz_drop_default_colorspaces(ctx, default_cs); |
218 | 11.3k | } |
219 | 11.3k | fz_catch(ctx) |
220 | 60 | { |
221 | 60 | fz_rethrow(ctx); |
222 | 60 | } |
223 | 11.3k | } |
224 | | |
225 | | void pdf_run_page_contents_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie) |
226 | 11.3k | { |
227 | 11.3k | pdf_document *doc = page->doc; |
228 | 11.3k | int nocache; |
229 | | |
230 | 11.3k | nocache = !!(dev->hints & FZ_NO_CACHE); |
231 | 11.3k | if (nocache) |
232 | 0 | pdf_mark_xref(ctx, doc); |
233 | | |
234 | 22.6k | fz_try(ctx) |
235 | 22.6k | { |
236 | 11.3k | pdf_run_page_contents_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie); |
237 | 11.3k | } |
238 | 22.6k | fz_always(ctx) |
239 | 11.3k | { |
240 | 11.3k | if (nocache) |
241 | 0 | pdf_clear_xref_to_mark(ctx, doc); |
242 | 11.3k | } |
243 | 11.3k | fz_catch(ctx) |
244 | 60 | { |
245 | 60 | fz_rethrow(ctx); |
246 | 60 | } |
247 | 11.3k | } |
248 | | |
249 | | void pdf_run_page_contents(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, fz_cookie *cookie) |
250 | 11.3k | { |
251 | 11.3k | pdf_run_page_contents_with_usage(ctx, page, dev, ctm, "View", cookie); |
252 | 11.3k | } |
253 | | |
254 | | void pdf_run_annot(fz_context *ctx, pdf_annot *annot, fz_device *dev, fz_matrix ctm, fz_cookie *cookie) |
255 | 0 | { |
256 | 0 | pdf_page *page = annot->page; |
257 | 0 | pdf_document *doc; |
258 | 0 | int nocache; |
259 | |
|
260 | 0 | if (!page) |
261 | 0 | fz_throw(ctx, FZ_ERROR_ARGUMENT, "annotation not bound to any page"); |
262 | | |
263 | 0 | doc = page->doc; |
264 | |
|
265 | 0 | nocache = !!(dev->hints & FZ_NO_CACHE); |
266 | 0 | if (nocache) |
267 | 0 | pdf_mark_xref(ctx, doc); |
268 | 0 | fz_try(ctx) |
269 | 0 | { |
270 | 0 | pdf_run_annot_with_usage(ctx, doc, page, annot, dev, ctm, "View", cookie); |
271 | 0 | } |
272 | 0 | fz_always(ctx) |
273 | 0 | { |
274 | 0 | if (nocache) |
275 | 0 | pdf_clear_xref_to_mark(ctx, doc); |
276 | 0 | } |
277 | 0 | fz_catch(ctx) |
278 | 0 | { |
279 | 0 | fz_rethrow(ctx); |
280 | 0 | } |
281 | 0 | } |
282 | | |
283 | | static void |
284 | | pdf_run_page_widgets_with_usage_imp(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie) |
285 | 11.2k | { |
286 | 11.2k | pdf_annot *widget; |
287 | | |
288 | 11.2k | if (cookie && cookie->progress_max != (size_t)-1) |
289 | 0 | { |
290 | 0 | int count = 1; |
291 | 0 | for (widget = page->widgets; widget; widget = widget->next) |
292 | 0 | count++; |
293 | 0 | cookie->progress_max += count; |
294 | 0 | } |
295 | | |
296 | 19.2k | for (widget = page->widgets; widget; widget = widget->next) |
297 | 7.94k | { |
298 | | /* Check the cookie for aborting */ |
299 | 7.94k | if (cookie) |
300 | 0 | { |
301 | 0 | if (cookie->abort) |
302 | 0 | break; |
303 | 0 | cookie->progress++; |
304 | 0 | } |
305 | | |
306 | 7.94k | pdf_run_annot_with_usage(ctx, doc, page, widget, dev, ctm, usage, cookie); |
307 | 7.94k | } |
308 | 11.2k | } |
309 | | |
310 | | static void |
311 | | pdf_run_page_annots_with_usage_imp(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie) |
312 | 11.2k | { |
313 | 11.2k | pdf_annot *annot; |
314 | | |
315 | 11.2k | if (cookie && cookie->progress_max != (size_t)-1) |
316 | 0 | { |
317 | 0 | int count = 1; |
318 | 0 | for (annot = page->annots; annot; annot = annot->next) |
319 | 0 | count++; |
320 | 0 | cookie->progress_max += count; |
321 | 0 | } |
322 | | |
323 | 13.4k | for (annot = page->annots; annot; annot = annot->next) |
324 | 2.20k | { |
325 | | /* Check the cookie for aborting */ |
326 | 2.20k | if (cookie) |
327 | 0 | { |
328 | 0 | if (cookie->abort) |
329 | 0 | break; |
330 | 0 | cookie->progress++; |
331 | 0 | } |
332 | | |
333 | 2.20k | pdf_run_annot_with_usage(ctx, doc, page, annot, dev, ctm, usage, cookie); |
334 | 2.20k | } |
335 | 11.2k | } |
336 | | |
337 | | void pdf_run_page_annots_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie) |
338 | 11.2k | { |
339 | 11.2k | pdf_document *doc = page->doc; |
340 | 11.2k | int nocache; |
341 | | |
342 | 11.2k | nocache = !!(dev->hints & FZ_NO_CACHE); |
343 | 11.2k | if (nocache) |
344 | 0 | pdf_mark_xref(ctx, doc); |
345 | | |
346 | 22.5k | fz_try(ctx) |
347 | 22.5k | { |
348 | 11.2k | pdf_run_page_annots_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie); |
349 | 11.2k | } |
350 | 22.5k | fz_always(ctx) |
351 | 11.2k | { |
352 | 11.2k | if (nocache) |
353 | 0 | pdf_clear_xref_to_mark(ctx, doc); |
354 | 11.2k | } |
355 | 11.2k | fz_catch(ctx) |
356 | 0 | { |
357 | 0 | fz_rethrow(ctx); |
358 | 0 | } |
359 | 11.2k | } |
360 | | |
361 | | void pdf_run_page_annots(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, fz_cookie *cookie) |
362 | 11.2k | { |
363 | 11.2k | pdf_run_page_annots_with_usage(ctx, page, dev, ctm, "View", cookie); |
364 | 11.2k | } |
365 | | |
366 | | void pdf_run_page_widgets_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie) |
367 | 11.2k | { |
368 | 11.2k | pdf_document *doc = page->doc; |
369 | 11.2k | int nocache; |
370 | | |
371 | 11.2k | nocache = !!(dev->hints & FZ_NO_CACHE); |
372 | 11.2k | if (nocache) |
373 | 0 | pdf_mark_xref(ctx, doc); |
374 | | |
375 | 22.5k | fz_try(ctx) |
376 | 22.5k | { |
377 | 11.2k | pdf_run_page_widgets_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie); |
378 | 11.2k | } |
379 | 22.5k | fz_always(ctx) |
380 | 11.2k | { |
381 | 11.2k | if (nocache) |
382 | 0 | pdf_clear_xref_to_mark(ctx, doc); |
383 | 11.2k | } |
384 | 11.2k | fz_catch(ctx) |
385 | 0 | { |
386 | 0 | fz_rethrow(ctx); |
387 | 0 | } |
388 | 11.2k | } |
389 | | |
390 | | void pdf_run_page_widgets(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, fz_cookie *cookie) |
391 | 11.2k | { |
392 | 11.2k | pdf_run_page_widgets_with_usage(ctx, page, dev, ctm, "View", cookie); |
393 | 11.2k | } |
394 | | |
395 | | void |
396 | | pdf_run_page_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie) |
397 | 0 | { |
398 | 0 | pdf_document *doc = page->doc; |
399 | 0 | int nocache = !!(dev->hints & FZ_NO_CACHE); |
400 | |
|
401 | 0 | if (nocache) |
402 | 0 | pdf_mark_xref(ctx, doc); |
403 | 0 | fz_try(ctx) |
404 | 0 | { |
405 | 0 | pdf_run_page_contents_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie); |
406 | 0 | pdf_run_page_annots_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie); |
407 | 0 | pdf_run_page_widgets_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie); |
408 | 0 | } |
409 | 0 | fz_always(ctx) |
410 | 0 | { |
411 | 0 | if (nocache) |
412 | 0 | pdf_clear_xref_to_mark(ctx, doc); |
413 | 0 | } |
414 | 0 | fz_catch(ctx) |
415 | 0 | { |
416 | 0 | fz_rethrow(ctx); |
417 | 0 | } |
418 | 0 | } |
419 | | |
420 | | void |
421 | | pdf_run_page(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, fz_cookie *cookie) |
422 | 0 | { |
423 | 0 | pdf_run_page_with_usage(ctx, page, dev, ctm, "View", cookie); |
424 | 0 | } |
425 | | |
426 | | void |
427 | | pdf_run_glyph(fz_context *ctx, pdf_document *doc, pdf_obj *resources, fz_buffer *contents, fz_device *dev, fz_matrix ctm, void *gstate, fz_default_colorspaces *default_cs, void *fill_gstate, void *stroke_gstate) |
428 | 9.26k | { |
429 | 9.26k | pdf_processor *proc; |
430 | | |
431 | 9.26k | proc = pdf_new_run_processor(ctx, doc, dev, ctm, -1, "View", gstate, default_cs, NULL, fill_gstate, stroke_gstate); |
432 | 18.5k | fz_try(ctx) |
433 | 18.5k | { |
434 | 9.26k | pdf_process_glyph(ctx, proc, doc, resources, contents); |
435 | 9.26k | pdf_close_processor(ctx, proc); |
436 | 9.26k | } |
437 | 18.5k | fz_always(ctx) |
438 | 9.26k | pdf_drop_processor(ctx, proc); |
439 | 9.26k | fz_catch(ctx) |
440 | 182 | fz_rethrow(ctx); |
441 | 9.26k | } |
442 | | |
443 | | fz_structure |
444 | | pdf_structure_type(fz_context *ctx, pdf_obj *role_map, pdf_obj *tag) |
445 | 22.0k | { |
446 | | /* Perform Structure mapping to go from tag to standard. */ |
447 | 22.0k | if (role_map) |
448 | 19.3k | { |
449 | 19.3k | pdf_obj *o = pdf_dict_get(ctx, role_map, tag); |
450 | 19.3k | if (o) |
451 | 1.16k | tag = o; |
452 | 19.3k | } |
453 | | |
454 | 22.0k | if (pdf_name_eq(ctx, tag, PDF_NAME(Document))) |
455 | 197 | return FZ_STRUCTURE_DOCUMENT; |
456 | 21.8k | if (pdf_name_eq(ctx, tag, PDF_NAME(Part))) |
457 | 174 | return FZ_STRUCTURE_PART; |
458 | 21.6k | if (pdf_name_eq(ctx, tag, PDF_NAME(Art))) |
459 | 25 | return FZ_STRUCTURE_ART; |
460 | 21.6k | if (pdf_name_eq(ctx, tag, PDF_NAME(Sect))) |
461 | 269 | return FZ_STRUCTURE_SECT; |
462 | 21.3k | if (pdf_name_eq(ctx, tag, PDF_NAME(Div))) |
463 | 4.41k | return FZ_STRUCTURE_DIV; |
464 | 16.9k | if (pdf_name_eq(ctx, tag, PDF_NAME(BlockQuote))) |
465 | 0 | return FZ_STRUCTURE_BLOCKQUOTE; |
466 | 16.9k | if (pdf_name_eq(ctx, tag, PDF_NAME(Caption))) |
467 | 0 | return FZ_STRUCTURE_CAPTION; |
468 | 16.9k | if (pdf_name_eq(ctx, tag, PDF_NAME(TOC))) |
469 | 0 | return FZ_STRUCTURE_TOC; |
470 | 16.9k | if (pdf_name_eq(ctx, tag, PDF_NAME(TOCI))) |
471 | 0 | return FZ_STRUCTURE_TOCI; |
472 | 16.9k | if (pdf_name_eq(ctx, tag, PDF_NAME(Index))) |
473 | 0 | return FZ_STRUCTURE_INDEX; |
474 | 16.9k | if (pdf_name_eq(ctx, tag, PDF_NAME(NonStruct))) |
475 | 774 | return FZ_STRUCTURE_NONSTRUCT; |
476 | 16.1k | if (pdf_name_eq(ctx, tag, PDF_NAME(Private))) |
477 | 0 | return FZ_STRUCTURE_PRIVATE; |
478 | | /* Grouping elements (PDF 2.0 - Table 364) */ |
479 | 16.1k | if (pdf_name_eq(ctx, tag, PDF_NAME(DocumentFragment))) |
480 | 0 | return FZ_STRUCTURE_DOCUMENTFRAGMENT; |
481 | | /* Grouping elements (PDF 2.0 - Table 365) */ |
482 | 16.1k | if (pdf_name_eq(ctx, tag, PDF_NAME(Aside))) |
483 | 0 | return FZ_STRUCTURE_ASIDE; |
484 | | /* Grouping elements (PDF 2.0 - Table 366) */ |
485 | 16.1k | if (pdf_name_eq(ctx, tag, PDF_NAME(Title))) |
486 | 0 | return FZ_STRUCTURE_TITLE; |
487 | 16.1k | if (pdf_name_eq(ctx, tag, PDF_NAME(FENote))) |
488 | 0 | return FZ_STRUCTURE_FENOTE; |
489 | | /* Grouping elements (PDF 2.0 - Table 367) */ |
490 | 16.1k | if (pdf_name_eq(ctx, tag, PDF_NAME(Sub))) |
491 | 0 | return FZ_STRUCTURE_SUB; |
492 | | |
493 | | /* Paragraphlike elements (PDF 1.7 - Table 10.21) */ |
494 | 16.1k | if (pdf_name_eq(ctx, tag, PDF_NAME(P))) |
495 | 8.04k | return FZ_STRUCTURE_P; |
496 | 8.11k | if (pdf_name_eq(ctx, tag, PDF_NAME(H))) |
497 | 40 | return FZ_STRUCTURE_H; |
498 | 8.07k | if (pdf_name_eq(ctx, tag, PDF_NAME(H1))) |
499 | 128 | return FZ_STRUCTURE_H1; |
500 | 7.94k | if (pdf_name_eq(ctx, tag, PDF_NAME(H2))) |
501 | 130 | return FZ_STRUCTURE_H2; |
502 | 7.81k | if (pdf_name_eq(ctx, tag, PDF_NAME(H3))) |
503 | 28 | return FZ_STRUCTURE_H3; |
504 | 7.78k | if (pdf_name_eq(ctx, tag, PDF_NAME(H4))) |
505 | 0 | return FZ_STRUCTURE_H4; |
506 | 7.78k | if (pdf_name_eq(ctx, tag, PDF_NAME(H5))) |
507 | 0 | return FZ_STRUCTURE_H5; |
508 | 7.78k | if (pdf_name_eq(ctx, tag, PDF_NAME(H6))) |
509 | 0 | return FZ_STRUCTURE_H6; |
510 | | |
511 | | /* List elements (PDF 1.7 - Table 10.23) */ |
512 | 7.78k | if (pdf_name_eq(ctx, tag, PDF_NAME(L))) |
513 | 136 | return FZ_STRUCTURE_LIST; |
514 | 7.65k | if (pdf_name_eq(ctx, tag, PDF_NAME(LI))) |
515 | 478 | return FZ_STRUCTURE_LISTITEM; |
516 | 7.17k | if (pdf_name_eq(ctx, tag, PDF_NAME(Lbl))) |
517 | 416 | return FZ_STRUCTURE_LABEL; |
518 | 6.75k | if (pdf_name_eq(ctx, tag, PDF_NAME(LBody))) |
519 | 474 | return FZ_STRUCTURE_LISTBODY; |
520 | | |
521 | | /* Table elements (PDF 1.7 - Table 10.24) */ |
522 | 6.28k | if (pdf_name_eq(ctx, tag, PDF_NAME(Table))) |
523 | 208 | return FZ_STRUCTURE_TABLE; |
524 | 6.07k | if (pdf_name_eq(ctx, tag, PDF_NAME(TR))) |
525 | 1.04k | return FZ_STRUCTURE_TR; |
526 | 5.03k | if (pdf_name_eq(ctx, tag, PDF_NAME(TH))) |
527 | 80 | return FZ_STRUCTURE_TH; |
528 | 4.95k | if (pdf_name_eq(ctx, tag, PDF_NAME(TD))) |
529 | 2.27k | return FZ_STRUCTURE_TD; |
530 | 2.67k | if (pdf_name_eq(ctx, tag, PDF_NAME(THead))) |
531 | 0 | return FZ_STRUCTURE_THEAD; |
532 | 2.67k | if (pdf_name_eq(ctx, tag, PDF_NAME(TBody))) |
533 | 0 | return FZ_STRUCTURE_TBODY; |
534 | 2.67k | if (pdf_name_eq(ctx, tag, PDF_NAME(TFoot))) |
535 | 0 | return FZ_STRUCTURE_TFOOT; |
536 | | |
537 | | /* Inline elements (PDF 1.7 - Table 10.25) */ |
538 | 2.67k | if (pdf_name_eq(ctx, tag, PDF_NAME(Span))) |
539 | 1.18k | return FZ_STRUCTURE_SPAN; |
540 | 1.49k | if (pdf_name_eq(ctx, tag, PDF_NAME(Quote))) |
541 | 0 | return FZ_STRUCTURE_QUOTE; |
542 | 1.49k | if (pdf_name_eq(ctx, tag, PDF_NAME(Note))) |
543 | 0 | return FZ_STRUCTURE_NOTE; |
544 | 1.49k | if (pdf_name_eq(ctx, tag, PDF_NAME(Reference))) |
545 | 0 | return FZ_STRUCTURE_REFERENCE; |
546 | 1.49k | if (pdf_name_eq(ctx, tag, PDF_NAME(BibEntry))) |
547 | 0 | return FZ_STRUCTURE_BIBENTRY; |
548 | 1.49k | if (pdf_name_eq(ctx, tag, PDF_NAME(Code))) |
549 | 0 | return FZ_STRUCTURE_CODE; |
550 | 1.49k | if (pdf_name_eq(ctx, tag, PDF_NAME(Link))) |
551 | 784 | return FZ_STRUCTURE_LINK; |
552 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(Annot))) |
553 | 0 | return FZ_STRUCTURE_ANNOT; |
554 | | /* Inline elements (PDF 2.0 - Table 368) */ |
555 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(Em))) |
556 | 0 | return FZ_STRUCTURE_EM; |
557 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(Strong))) |
558 | 0 | return FZ_STRUCTURE_STRONG; |
559 | | |
560 | | /* Ruby inline element (PDF 1.7 - Table 10.26) */ |
561 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(Ruby))) |
562 | 0 | return FZ_STRUCTURE_RUBY; |
563 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(RB))) |
564 | 0 | return FZ_STRUCTURE_RB; |
565 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(RT))) |
566 | 0 | return FZ_STRUCTURE_RT; |
567 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(RP))) |
568 | 0 | return FZ_STRUCTURE_RP; |
569 | | |
570 | | /* Warichu inline element (PDF 1.7 - Table 10.26) */ |
571 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(Warichu))) |
572 | 0 | return FZ_STRUCTURE_WARICHU; |
573 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(WT))) |
574 | 0 | return FZ_STRUCTURE_WT; |
575 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(WP))) |
576 | 0 | return FZ_STRUCTURE_WP; |
577 | | |
578 | | /* Illustration elements (PDF 1.7 - Table 10.27) */ |
579 | 712 | if (pdf_name_eq(ctx, tag, PDF_NAME(Figure))) |
580 | 221 | return FZ_STRUCTURE_FIGURE; |
581 | 491 | if (pdf_name_eq(ctx, tag, PDF_NAME(Formula))) |
582 | 0 | return FZ_STRUCTURE_FORMULA; |
583 | 491 | if (pdf_name_eq(ctx, tag, PDF_NAME(Form))) |
584 | 0 | return FZ_STRUCTURE_FORM; |
585 | | |
586 | | /* Artifact structure type (PDF 2.0 - Table 375) */ |
587 | 491 | if (pdf_name_eq(ctx, tag, PDF_NAME(Artifact))) |
588 | 0 | return FZ_STRUCTURE_ARTIFACT; |
589 | | |
590 | 491 | return FZ_STRUCTURE_INVALID; |
591 | 491 | } |
592 | | |
593 | | /* The recursive descent of the structure tree uses an fz_try at each level. |
594 | | * At the risk of creating a foot cannon... "no one will need more than ~64 |
595 | | * levels of structure tree". */ |
596 | | static void |
597 | | run_ds(fz_context *ctx, fz_device *dev, pdf_obj *role_map, pdf_obj *obj, int idx, fz_cookie *cookie) |
598 | 0 | { |
599 | 0 | pdf_obj *k; |
600 | 0 | int i, n; |
601 | | |
602 | | /* Check the cookie for aborting */ |
603 | 0 | if (cookie) |
604 | 0 | { |
605 | 0 | if (cookie->abort) |
606 | 0 | return; |
607 | 0 | cookie->progress++; |
608 | 0 | } |
609 | | |
610 | 0 | if (pdf_is_number(ctx, obj)) |
611 | 0 | { |
612 | | /* A marked-content identifier denoting a marked content sequence. WHAT? */ |
613 | 0 | return; |
614 | 0 | } |
615 | | |
616 | 0 | if (pdf_mark_obj(ctx, obj)) |
617 | 0 | return; |
618 | | |
619 | 0 | fz_try(ctx) |
620 | 0 | { |
621 | 0 | fz_structure standard; |
622 | 0 | pdf_obj *tag = pdf_dict_get(ctx, obj, PDF_NAME(S)); |
623 | 0 | if (!tag) |
624 | 0 | break; |
625 | | |
626 | 0 | standard = pdf_structure_type(ctx, role_map, tag); |
627 | 0 | if (standard == FZ_STRUCTURE_INVALID) |
628 | 0 | break; |
629 | 0 | fz_begin_structure(ctx, dev, standard, pdf_to_name(ctx, tag), idx); |
630 | 0 | k = pdf_dict_get(ctx, obj, PDF_NAME(K)); |
631 | 0 | if (k) |
632 | 0 | { |
633 | 0 | n = pdf_array_len(ctx, k); |
634 | 0 | if (n == 0) |
635 | 0 | run_ds(ctx, dev, role_map, k, 0, cookie); |
636 | 0 | else |
637 | 0 | { |
638 | 0 | for (i = 0; i < n; i++) |
639 | 0 | run_ds(ctx, dev, role_map, pdf_array_get(ctx, k, i), i, cookie); |
640 | 0 | } |
641 | 0 | } |
642 | 0 | fz_end_structure(ctx, dev); |
643 | 0 | } |
644 | 0 | fz_always(ctx) |
645 | 0 | pdf_unmark_obj(ctx, obj); |
646 | 0 | fz_catch(ctx) |
647 | 0 | fz_rethrow(ctx); |
648 | 0 | } |
649 | | |
650 | | void pdf_run_document_structure(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_cookie *cookie) |
651 | 0 | { |
652 | 0 | int nocache; |
653 | 0 | int marked = 0; |
654 | 0 | pdf_obj *st, *rm, *k; |
655 | |
|
656 | 0 | fz_var(marked); |
657 | |
|
658 | 0 | nocache = !!(dev->hints & FZ_NO_CACHE); |
659 | 0 | if (nocache) |
660 | 0 | pdf_mark_xref(ctx, doc); |
661 | |
|
662 | 0 | fz_try(ctx) |
663 | 0 | { |
664 | 0 | st = pdf_dict_get(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)), PDF_NAME(StructTreeRoot)); |
665 | 0 | rm = pdf_dict_get(ctx, st, PDF_NAME(RoleMap)); |
666 | |
|
667 | 0 | if (pdf_mark_obj(ctx, st)) |
668 | 0 | break; |
669 | 0 | marked = 1; |
670 | |
|
671 | 0 | k = pdf_dict_get(ctx, st, PDF_NAME(K)); |
672 | 0 | if (k) |
673 | 0 | { |
674 | 0 | int n = pdf_array_len(ctx, k); |
675 | 0 | if (n == 0) |
676 | 0 | run_ds(ctx, dev, rm, k, 0, cookie); |
677 | 0 | else |
678 | 0 | { |
679 | 0 | int i; |
680 | 0 | for (i = 0; i < n; i++) |
681 | 0 | run_ds(ctx, dev, rm, pdf_array_get(ctx, k, i), i, cookie); |
682 | 0 | } |
683 | 0 | } |
684 | 0 | } |
685 | 0 | fz_always(ctx) |
686 | 0 | { |
687 | 0 | if (marked) |
688 | 0 | pdf_unmark_obj(ctx, st); |
689 | 0 | if (nocache) |
690 | 0 | pdf_clear_xref_to_mark(ctx, doc); |
691 | 0 | } |
692 | 0 | fz_catch(ctx) |
693 | 0 | fz_rethrow(ctx); |
694 | 0 | } |