/src/qpdf/libqpdf/QPDF_pages.cc
Line | Count | Source |
1 | | #include <qpdf/QPDFPageDocumentHelper.hh> |
2 | | #include <qpdf/QPDF_private.hh> |
3 | | |
4 | | #include <qpdf/QPDFAcroFormDocumentHelper.hh> |
5 | | #include <qpdf/QPDFExc.hh> |
6 | | #include <qpdf/QPDFObjectHandle_private.hh> |
7 | | #include <qpdf/QTC.hh> |
8 | | #include <qpdf/QUtil.hh> |
9 | | #include <qpdf/Util.hh> |
10 | | |
11 | | // In support of page manipulation APIs, these methods internally maintain state about pages in a |
12 | | // pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos, |
13 | | // which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages() |
14 | | // method returns a const reference to all_pages and has been in the public API long before the |
15 | | // introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of |
16 | | // calls to it in the library, so the efficiency of having it cached is probably worth keeping it. |
17 | | // At one point, I had partially implemented a helper class specifically for the pages tree, but |
18 | | // once you work in all the logic that handles repairing the /Type keys of page tree nodes (both |
19 | | // /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than |
20 | | // what's here. So, in spite of the fact that a const reference is returned, the current code is |
21 | | // fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in |
22 | | // attic in case there is ever a reason to resurrect it. There are additional notes in |
23 | | // README-maintainer, which also refers to this comment. |
24 | | |
25 | | // The goal of this code is to ensure that the all_pages vector, which users may have a reference |
26 | | // to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent |
27 | | // outside of any call to the library. As long as users only touch the /Pages structure through |
28 | | // page-specific API calls, they never have to worry about anything, and this will also stay |
29 | | // consistent. If a user touches anything about the /Pages structure outside of these calls (such |
30 | | // as by directly looking up and manipulating the underlying objects), they can call |
31 | | // updatePagesCache() to bring things back in sync. |
32 | | |
33 | | // If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure |
34 | | // alone. If the user does use the APIs, then we push all inheritable objects down and flatten the |
35 | | // /Pages tree. This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos |
36 | | // internally consistent at all times. |
37 | | |
38 | | // Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent |
39 | | // should remain in as few places as possible. As of initial writing, only flattenPagesTree, |
40 | | // insertPage, and removePage, along with methods they call, are concerned with it. Everything else |
41 | | // goes through one of those methods. |
42 | | |
43 | | using Pages = QPDF::Doc::Pages; |
44 | | |
45 | | std::vector<QPDFObjectHandle> const& |
46 | | QPDF::getAllPages() |
47 | 0 | { |
48 | 0 | return m->pages.all(); |
49 | 0 | } |
50 | | |
51 | | std::vector<QPDFObjectHandle> const& |
52 | | Pages::cache() |
53 | 12.0k | { |
54 | | // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages. |
55 | 12.0k | if (all_pages.empty() && !invalid_page_found) { |
56 | 9.30k | ever_called_get_all_pages_ = true; |
57 | 9.30k | auto root = qpdf.getRoot(); |
58 | 9.30k | QPDFObjGen::set visited; |
59 | 9.30k | QPDFObjGen::set seen; |
60 | 9.30k | QPDFObjectHandle pages = root.getKey("/Pages"); |
61 | 9.30k | bool warned = false; |
62 | 9.30k | bool changed_pages = false; |
63 | 9.46k | while (pages.isDictionary() && pages.hasKey("/Parent")) { |
64 | 183 | if (!seen.add(pages)) { |
65 | | // loop -- will be detected again and reported later |
66 | 29 | break; |
67 | 29 | } |
68 | | // Files have been found in the wild where /Pages in the catalog points to the first |
69 | | // page. Try to work around this and similar cases with this heuristic. |
70 | 154 | if (!warned) { |
71 | 71 | root.warn( |
72 | 71 | "document page tree root (root -> /Pages) doesn't point" |
73 | 71 | " to the root of the page tree; attempting to correct"); |
74 | 71 | warned = true; |
75 | 71 | } |
76 | 154 | changed_pages = true; |
77 | 154 | pages = pages.getKey("/Parent"); |
78 | 154 | } |
79 | 9.30k | if (changed_pages) { |
80 | 69 | root.replaceKey("/Pages", pages); |
81 | 69 | } |
82 | 9.30k | seen.clear(); |
83 | 9.30k | if (!pages.hasKey("/Kids")) { |
84 | | // Ensure we actually found a /Pages object. |
85 | 99 | throw QPDFExc( |
86 | 99 | qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array"); |
87 | 99 | } |
88 | 9.20k | try { |
89 | 9.20k | getAllPagesInternal(pages, visited, seen, false, false); |
90 | 9.20k | } catch (...) { |
91 | 71 | all_pages.clear(); |
92 | 71 | invalid_page_found = false; |
93 | 71 | throw; |
94 | 71 | } |
95 | 3.36k | if (invalid_page_found) { |
96 | 2.74k | flattenPagesTree(); |
97 | 2.74k | invalid_page_found = false; |
98 | 2.74k | } |
99 | 3.36k | } |
100 | 6.10k | return all_pages; |
101 | 12.0k | } |
102 | | |
103 | | void |
104 | | Pages::getAllPagesInternal( |
105 | | QPDFObjectHandle cur_node, |
106 | | QPDFObjGen::set& visited, |
107 | | QPDFObjGen::set& seen, |
108 | | bool media_box, |
109 | | bool resources) |
110 | 5.66k | { |
111 | 5.66k | if (!visited.add(cur_node)) { |
112 | 23 | throw QPDFExc( |
113 | 23 | qpdf_e_pages, |
114 | 23 | m->file->getName(), |
115 | 23 | "object " + cur_node.getObjGen().unparse(' '), |
116 | 23 | 0, |
117 | 23 | "Loop detected in /Pages structure (getAllPages)"); |
118 | 23 | } |
119 | 5.64k | if (!cur_node.isDictionaryOfType("/Pages")) { |
120 | | // During fuzzing files were encountered where the root object appeared in the pages tree. |
121 | | // Unconditionally setting the /Type to /Pages could cause problems, but trying to |
122 | | // accommodate the possibility may be excessive. |
123 | 4.32k | cur_node.warn("/Type key should be /Pages but is not; overriding"); |
124 | 4.32k | cur_node.replaceKey("/Type", Name("/Pages")); |
125 | 4.32k | } |
126 | 5.64k | if (!media_box) { |
127 | 5.55k | media_box = cur_node.getKey("/MediaBox").isRectangle(); |
128 | 5.55k | QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1); |
129 | 5.55k | } |
130 | 5.64k | if (!resources) { |
131 | 5.55k | resources = cur_node.getKey("/Resources").isDictionary(); |
132 | 5.55k | } |
133 | 5.64k | auto kids = cur_node.getKey("/Kids"); |
134 | 5.64k | if (!visited.add(kids)) { |
135 | 11 | throw QPDFExc( |
136 | 11 | qpdf_e_pages, |
137 | 11 | m->file->getName(), |
138 | 11 | "object " + cur_node.getObjGen().unparse(' '), |
139 | 11 | 0, |
140 | 11 | "Loop detected in /Pages structure (getAllPages)"); |
141 | 11 | } |
142 | 5.63k | int i = -1; |
143 | 31.2k | for (auto& kid: kids.as_array()) { |
144 | 31.2k | ++i; |
145 | 31.2k | int errors = 0; |
146 | | |
147 | 31.2k | if (!kid.isDictionary()) { |
148 | 22.1k | kid.warn("Pages tree includes non-dictionary object; ignoring"); |
149 | 22.1k | invalid_page_found = true; |
150 | 22.1k | continue; |
151 | 22.1k | } |
152 | 9.02k | if (!kid.isIndirect()) { |
153 | 2.81k | cur_node.warn( |
154 | 2.81k | "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect"); |
155 | 2.81k | kid = qpdf.makeIndirectObject(kid); |
156 | 2.81k | ++errors; |
157 | 2.81k | } |
158 | 9.02k | if (kid.hasKey("/Kids")) { |
159 | 2.23k | getAllPagesInternal(kid, visited, seen, media_box, resources); |
160 | 6.79k | } else { |
161 | 6.79k | if (!media_box && !kid.getKey("/MediaBox").isRectangle()) { |
162 | 3.35k | kid.warn( |
163 | 3.35k | "kid " + std::to_string(i) + |
164 | 3.35k | " (from 0) MediaBox is undefined; setting to letter / ANSI A"); |
165 | 3.35k | kid.replaceKey( |
166 | 3.35k | "/MediaBox", |
167 | 3.35k | QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792))); |
168 | 3.35k | ++errors; |
169 | 3.35k | } |
170 | 6.79k | if (!resources) { |
171 | 6.02k | auto res = kid.getKey("/Resources"); |
172 | | |
173 | 6.02k | if (!res.isDictionary()) { |
174 | 3.61k | ++errors; |
175 | 3.61k | kid.warn( |
176 | 3.61k | "kid " + std::to_string(i) + |
177 | 3.61k | " (from 0) Resources is missing or invalid; repairing"); |
178 | 3.61k | kid.replaceKey("/Resources", QPDFObjectHandle::newDictionary()); |
179 | 3.61k | } |
180 | 6.02k | } |
181 | 6.79k | auto annots = kid.getKey("/Annots"); |
182 | 6.79k | if (!annots.null()) { |
183 | 1.21k | if (!annots.isArray()) { |
184 | 11 | kid.warn( |
185 | 11 | "kid " + std::to_string(i) + " (from 0) Annots is not an array; removing"); |
186 | 11 | kid.removeKey("/Annots"); |
187 | 11 | ++errors; |
188 | 1.20k | } else { |
189 | 1.20k | QPDFObjGen::set seen_annots; |
190 | 10.8k | for (auto& annot: annots.as_array()) { |
191 | 10.8k | if (!seen_annots.add(annot)) { |
192 | 793 | kid.warn( |
193 | 793 | "kid " + std::to_string(i) + |
194 | 793 | " (from 0) Annots has duplicate entry for annotation " + |
195 | 793 | annot.id_gen().unparse(' ')); |
196 | 793 | ++errors; |
197 | 793 | } |
198 | 10.8k | } |
199 | 1.20k | } |
200 | 1.21k | } |
201 | | |
202 | 6.79k | if (!seen.add(kid)) { |
203 | | // Make a copy of the page. This does the same as shallowCopyPage in |
204 | | // QPDFPageObjectHelper. |
205 | 2.08k | if (!m->reconstructed_xref) { |
206 | 0 | cur_node.warn( |
207 | 0 | "kid " + std::to_string(i) + |
208 | 0 | " (from 0) appears more than once in the pages tree;" |
209 | 0 | " creating a new page object as a copy"); |
210 | | // This needs to be fixed. shallowCopy does not necessarily produce a valid |
211 | | // page. |
212 | 0 | kid = qpdf.makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); |
213 | 0 | seen.add(kid); |
214 | 2.08k | } else { |
215 | 2.08k | cur_node.warn( |
216 | 2.08k | "kid " + std::to_string(i) + |
217 | 2.08k | " (from 0) appears more than once in the pages tree; ignoring duplicate"); |
218 | 2.08k | invalid_page_found = true; |
219 | 2.08k | kid = QPDFObjectHandle::newNull(); |
220 | 2.08k | continue; |
221 | 2.08k | } |
222 | 0 | if (!kid.getKey("/Parent").isSameObjectAs(cur_node)) { |
223 | | // Consider fixing and adding an information message. |
224 | 0 | ++errors; |
225 | 0 | } |
226 | 0 | } |
227 | 4.71k | if (!kid.isDictionaryOfType("/Page")) { |
228 | 3.18k | kid.warn("/Type key should be /Page but is not; overriding"); |
229 | 3.18k | kid.replaceKey("/Type", Name("/Page")); |
230 | 3.18k | ++errors; |
231 | 3.18k | } |
232 | 4.71k | if (m->reconstructed_xref && errors > 2) { |
233 | 1.83k | cur_node.warn( |
234 | 1.83k | "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page"); |
235 | 1.83k | invalid_page_found = true; |
236 | 1.83k | kid = QPDFObjectHandle::newNull(); |
237 | 1.83k | continue; |
238 | 1.83k | } |
239 | 2.87k | all_pages.emplace_back(kid); |
240 | 2.87k | } |
241 | 9.02k | } |
242 | 5.63k | } |
243 | | |
244 | | void |
245 | | QPDF::updateAllPagesCache() |
246 | 0 | { |
247 | 0 | m->pages.update_cache(); |
248 | 0 | } |
249 | | |
250 | | void |
251 | | Pages::update_cache() |
252 | 0 | { |
253 | | // Force regeneration of the pages cache. We force immediate recalculation of all_pages since |
254 | | // users may have references to it that they got from calls to getAllPages(). We can defer |
255 | | // recalculation of pageobj_to_pages_pos until needed. |
256 | 0 | all_pages.clear(); |
257 | 0 | pageobj_to_pages_pos.clear(); |
258 | 0 | pushed_inherited_attributes_to_pages = false; |
259 | 0 | cache(); |
260 | 0 | } |
261 | | |
262 | | void |
263 | | Pages::flattenPagesTree() |
264 | 2.74k | { |
265 | | // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos. |
266 | | |
267 | 2.74k | if (!pageobj_to_pages_pos.empty()) { |
268 | 0 | return; |
269 | 0 | } |
270 | | |
271 | | // Push inherited objects down to the /Page level. As a side effect all_pages will also be |
272 | | // generated. |
273 | 2.74k | pushInheritedAttributesToPage(true, true); |
274 | | |
275 | 2.74k | QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); |
276 | | |
277 | 2.74k | size_t const len = all_pages.size(); |
278 | 4.77k | for (size_t pos = 0; pos < len; ++pos) { |
279 | | // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at |
280 | | // this point because pushInheritedAttributesToPage calls getAllPages which resolves |
281 | | // duplicates. |
282 | 2.03k | insertPageobjToPage(all_pages.at(pos), toI(pos), true); |
283 | 2.03k | all_pages.at(pos).replaceKey("/Parent", pages); |
284 | 2.03k | } |
285 | | |
286 | 2.74k | pages.replaceKey("/Kids", Array(all_pages)); |
287 | | // /Count has not changed |
288 | 2.74k | if (pages.getKey("/Count").getUIntValue() != len) { |
289 | 1.62k | if (invalid_page_found && pages.getKey("/Count").getUIntValue() > len) { |
290 | 1.55k | pages.replaceKey("/Count", Integer(len)); |
291 | 1.55k | } else { |
292 | 74 | throw std::runtime_error("/Count is wrong after flattening pages tree"); |
293 | 74 | } |
294 | 1.62k | } |
295 | 2.74k | } |
296 | | |
297 | | void |
298 | | QPDF::pushInheritedAttributesToPage() |
299 | 0 | { |
300 | | // Public API should not have access to allow_changes. |
301 | 0 | m->pages.pushInheritedAttributesToPage(true, false); |
302 | 0 | } |
303 | | |
304 | | void |
305 | | Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) |
306 | 2.74k | { |
307 | | // Traverse pages tree pushing all inherited resources down to the page level. |
308 | | |
309 | | // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning |
310 | | // for skipped keys, re-traverse unconditionally. |
311 | 2.74k | if (pushed_inherited_attributes_to_pages && !warn_skipped_keys) { |
312 | 0 | return; |
313 | 0 | } |
314 | | |
315 | | // Calling cache() resolves any duplicated page objects, repairs broken nodes, and detects |
316 | | // loops, so we don't have to do those activities here. |
317 | 2.74k | (void)cache(); |
318 | | |
319 | | // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain |
320 | | // values for them. |
321 | 2.74k | std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; |
322 | 2.74k | pushInheritedAttributesToPageInternal( |
323 | 2.74k | m->trailer.getKey("/Root").getKey("/Pages"), |
324 | 2.74k | key_ancestors, |
325 | 2.74k | allow_changes, |
326 | 2.74k | warn_skipped_keys); |
327 | 2.74k | util::assertion( |
328 | 2.74k | key_ancestors.empty(), |
329 | 2.74k | "key_ancestors not empty after pushing inherited attributes to pages"); |
330 | 2.74k | pushed_inherited_attributes_to_pages = true; |
331 | 2.74k | ever_pushed_inherited_attributes_to_pages_ = true; |
332 | 2.74k | } |
333 | | |
334 | | void |
335 | | Pages::pushInheritedAttributesToPageInternal( |
336 | | QPDFObjectHandle cur_pages, |
337 | | std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors, |
338 | | bool allow_changes, |
339 | | bool warn_skipped_keys) |
340 | 4.72k | { |
341 | | // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate |
342 | | // are inheritable attributes. Push this object onto the stack of pages nodes that have values |
343 | | // for this attribute. |
344 | | |
345 | 4.72k | std::set<std::string> inheritable_keys; |
346 | 17.4k | for (auto const& key: cur_pages.getKeys()) { |
347 | 17.4k | if (key == "/MediaBox" || key == "/CropBox" || key == "/Resources" || key == "/Rotate") { |
348 | 884 | if (!allow_changes) { |
349 | 0 | throw QPDFExc( |
350 | 0 | qpdf_e_internal, |
351 | 0 | m->file->getName(), |
352 | 0 | "/Pages object " + cur_pages.id_gen().unparse(' '), |
353 | 0 | cur_pages.offset(), |
354 | 0 | "pushInheritedAttributesToPage detected an inheritable attribute when called " |
355 | 0 | "in no-change mode"); |
356 | 0 | } |
357 | | |
358 | | // This is an inheritable resource |
359 | 884 | inheritable_keys.insert(key); |
360 | 884 | auto oh = cur_pages[key]; |
361 | 884 | QTC::TC("qpdf", "QPDF opt direct pages resource", oh.indirect() ? 0 : 1); |
362 | 884 | if (!oh.indirect()) { |
363 | 868 | if (!oh.isScalar()) { |
364 | | // Replace shared direct object non-scalar resources with indirect objects to |
365 | | // avoid copying large structures around. |
366 | 369 | cur_pages.replaceKey(key, qpdf.makeIndirectObject(oh)); |
367 | 369 | oh = cur_pages[key]; |
368 | 499 | } else { |
369 | | // It's okay to copy scalars. |
370 | 499 | } |
371 | 868 | } |
372 | 884 | key_ancestors[key].emplace_back(oh); |
373 | 884 | if (key_ancestors[key].size() > 1) { |
374 | 91 | } |
375 | | // Remove this resource from this node. It will be reattached at the page level. |
376 | 884 | cur_pages.erase(key); |
377 | 16.5k | } else if (!(key == "/Type" || key == "/Parent" || key == "/Kids" || key == "/Count")) { |
378 | | // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not |
379 | | // set), as we don't change these; but flattening removes intermediate /Pages nodes. |
380 | 4.41k | if (warn_skipped_keys && cur_pages.contains("/Parent")) { |
381 | 1.10k | warn( |
382 | 1.10k | qpdf_e_pages, |
383 | 1.10k | "Pages object: object " + cur_pages.id_gen().unparse(' '), |
384 | 1.10k | cur_pages.offset(), |
385 | 1.10k | ("Unknown key " + key + |
386 | 1.10k | " in /Pages object is being discarded as a result of flattening the /Pages " |
387 | 1.10k | "tree")); |
388 | 1.10k | } |
389 | 4.41k | } |
390 | 17.4k | } |
391 | | |
392 | | // Process descendant nodes. This method does not perform loop detection because all code paths |
393 | | // that lead here follow a call to getAllPages, which already throws an exception in the event |
394 | | // of a loop in the pages tree. |
395 | 26.9k | for (auto& kid: Array(cur_pages["/Kids"])) { |
396 | 26.9k | if (kid.isDictionaryOfType("/Pages")) { |
397 | 1.98k | pushInheritedAttributesToPageInternal( |
398 | 1.98k | kid, key_ancestors, allow_changes, warn_skipped_keys); |
399 | 24.9k | } else { |
400 | | // Add all available inheritable attributes not present in this object to this object. |
401 | 24.9k | for (auto const& [key, values]: key_ancestors) { |
402 | 2.23k | if (!kid.contains(key)) { |
403 | 2.15k | kid.replaceKey(key, values.back()); |
404 | 2.15k | } else { |
405 | 77 | QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); |
406 | 77 | } |
407 | 2.23k | } |
408 | 24.9k | } |
409 | 26.9k | } |
410 | | |
411 | | // For each inheritable key, pop the stack. If the stack becomes empty, remove it from the map. |
412 | | // That way, the invariant that the list of keys in key_ancestors is exactly those keys for |
413 | | // which inheritable attributes are available. |
414 | | |
415 | 4.72k | if (!inheritable_keys.empty()) { |
416 | 636 | for (auto const& key: inheritable_keys) { |
417 | 636 | key_ancestors[key].pop_back(); |
418 | 636 | if (key_ancestors[key].empty()) { |
419 | 560 | key_ancestors.erase(key); |
420 | 560 | } |
421 | 636 | } |
422 | 4.27k | } else { |
423 | 4.27k | QTC::TC("qpdf", "QPDF opt no inheritable keys"); |
424 | 4.27k | } |
425 | 4.72k | } |
426 | | |
427 | | void |
428 | | Pages::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate) |
429 | 2.03k | { |
430 | 2.03k | QPDFObjGen og(obj.getObjGen()); |
431 | 2.03k | if (check_duplicate) { |
432 | 2.03k | if (!pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { |
433 | | // The library never calls insertPageobjToPage in a way that causes this to happen. |
434 | 0 | throw QPDFExc( |
435 | 0 | qpdf_e_pages, |
436 | 0 | m->file->getName(), |
437 | 0 | "page " + std::to_string(pos) + " (numbered from zero): object " + og.unparse(' '), |
438 | 0 | 0, |
439 | 0 | "duplicate page reference found; this would cause loss of data"); |
440 | 0 | } |
441 | 2.03k | } else { |
442 | 0 | pageobj_to_pages_pos[og] = pos; |
443 | 0 | } |
444 | 2.03k | } |
445 | | |
446 | | void |
447 | | Pages::insert(QPDFObjectHandle newpage, int pos) |
448 | 0 | { |
449 | | // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end. |
450 | |
|
451 | 0 | flattenPagesTree(); |
452 | |
|
453 | 0 | if (!newpage.indirect()) { |
454 | 0 | newpage = qpdf.makeIndirectObject(newpage); |
455 | 0 | } else if (newpage.qpdf() != &qpdf) { |
456 | 0 | newpage.qpdf()->pushInheritedAttributesToPage(); |
457 | 0 | newpage = qpdf.copyForeignObject(newpage); |
458 | 0 | } else { |
459 | 0 | QTC::TC("qpdf", "QPDF insert indirect page"); |
460 | 0 | } |
461 | |
|
462 | 0 | if (pos < 0 || std::cmp_greater(pos, all_pages.size())) { |
463 | 0 | throw std::runtime_error("QPDF::insertPage called with pos out of range"); |
464 | 0 | } |
465 | | |
466 | 0 | QTC::TC( |
467 | 0 | "qpdf", |
468 | 0 | "QPDF insert page", |
469 | 0 | pos == 0 ? 0 : // insert at beginning |
470 | 0 | std::cmp_equal(pos, size()) ? 1 // at end |
471 | 0 | : 2); // insert in middle |
472 | |
|
473 | 0 | if (pageobj_to_pages_pos.contains(newpage)) { |
474 | 0 | newpage = qpdf.makeIndirectObject(newpage.copy()); |
475 | 0 | } |
476 | |
|
477 | 0 | auto pages = qpdf.getRoot()["/Pages"]; |
478 | 0 | Array kids = pages["/Kids"]; |
479 | |
|
480 | 0 | newpage.replaceKey("/Parent", pages); |
481 | 0 | kids.insert(pos, newpage); |
482 | 0 | size_t npages = kids.size(); |
483 | 0 | pages.replaceKey("/Count", Integer(npages)); |
484 | 0 | all_pages.insert(all_pages.begin() + pos, newpage); |
485 | 0 | for (size_t i = static_cast<size_t>(pos) + 1; i < npages; ++i) { |
486 | 0 | insertPageobjToPage(all_pages.at(i), static_cast<int>(i), false); |
487 | 0 | } |
488 | 0 | insertPageobjToPage(newpage, pos, true); |
489 | 0 | } |
490 | | |
491 | | void |
492 | | QPDF::removePage(QPDFObjectHandle page) |
493 | 0 | { |
494 | 0 | m->pages.erase(page); |
495 | 0 | } |
496 | | |
497 | | void |
498 | | Pages::erase(QPDFObjectHandle& page) |
499 | 0 | { |
500 | 0 | int pos = qpdf.findPage(page); // also ensures flat /Pages |
501 | 0 | QTC::TC( |
502 | 0 | "qpdf", |
503 | 0 | "QPDF remove page", |
504 | 0 | (pos == 0) ? 0 : // remove at beginning |
505 | 0 | (pos == toI(all_pages.size() - 1)) ? 1 // end |
506 | 0 | : 2); // remove in middle |
507 | |
|
508 | 0 | QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); |
509 | 0 | QPDFObjectHandle kids = pages.getKey("/Kids"); |
510 | |
|
511 | 0 | kids.eraseItem(pos); |
512 | 0 | int npages = static_cast<int>(kids.size()); |
513 | 0 | pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages)); |
514 | 0 | all_pages.erase(all_pages.begin() + pos); |
515 | 0 | pageobj_to_pages_pos.erase(page.getObjGen()); |
516 | 0 | for (int i = pos; i < npages; ++i) { |
517 | 0 | m->pages.insertPageobjToPage(all_pages.at(toS(i)), i, false); |
518 | 0 | } |
519 | 0 | } |
520 | | |
521 | | void |
522 | | QPDF::addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage) |
523 | 0 | { |
524 | 0 | int refpos = findPage(refpage); |
525 | 0 | if (!before) { |
526 | 0 | ++refpos; |
527 | 0 | } |
528 | 0 | m->pages.insert(newpage, refpos); |
529 | 0 | } |
530 | | |
531 | | void |
532 | | QPDF::addPage(QPDFObjectHandle newpage, bool first) |
533 | 0 | { |
534 | 0 | if (first) { |
535 | 0 | m->pages.insert(newpage, 0); |
536 | 0 | } else { |
537 | 0 | m->pages.insert(newpage, getRoot()["/Pages"]["/Count"].getIntValueAsInt()); |
538 | 0 | } |
539 | 0 | } |
540 | | |
541 | | int |
542 | | QPDF::findPage(QPDFObjectHandle& page) |
543 | 0 | { |
544 | 0 | return findPage(page.getObjGen()); |
545 | 0 | } |
546 | | |
547 | | int |
548 | | QPDF::findPage(QPDFObjGen og) |
549 | 0 | { |
550 | 0 | return m->pages.find(og); |
551 | 0 | } |
552 | | |
553 | | int |
554 | | Pages::find(QPDFObjGen og) |
555 | 0 | { |
556 | 0 | flattenPagesTree(); |
557 | 0 | auto it = pageobj_to_pages_pos.find(og); |
558 | 0 | if (it == pageobj_to_pages_pos.end()) { |
559 | 0 | throw QPDFExc( |
560 | 0 | qpdf_e_pages, |
561 | 0 | m->file->getName(), |
562 | 0 | "page object: object " + og.unparse(' '), |
563 | 0 | 0, |
564 | 0 | "page object not referenced in /Pages tree"); |
565 | 0 | } |
566 | 0 | return (*it).second; |
567 | 0 | } |
568 | | |
569 | | class QPDFPageDocumentHelper::Members |
570 | | { |
571 | | }; |
572 | | |
573 | | QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) : |
574 | 0 | QPDFDocumentHelper(qpdf) |
575 | 0 | { |
576 | 0 | } |
577 | | |
578 | | QPDFPageDocumentHelper& |
579 | | QPDFPageDocumentHelper::get(QPDF& qpdf) |
580 | 0 | { |
581 | 0 | return qpdf.doc().page_dh(); |
582 | 0 | } |
583 | | |
584 | | void |
585 | | QPDFPageDocumentHelper::validate(bool repair) |
586 | 0 | { |
587 | 0 | } |
588 | | |
589 | | std::vector<QPDFPageObjectHelper> |
590 | | QPDFPageDocumentHelper::getAllPages() |
591 | 0 | { |
592 | 0 | auto& pp = qpdf.doc().pages(); |
593 | 0 | return {pp.begin(), pp.end()}; |
594 | 0 | } |
595 | | |
596 | | void |
597 | | QPDFPageDocumentHelper::pushInheritedAttributesToPage() |
598 | 0 | { |
599 | 0 | qpdf.pushInheritedAttributesToPage(); |
600 | 0 | } |
601 | | |
602 | | void |
603 | | QPDFPageDocumentHelper::removeUnreferencedResources() |
604 | 0 | { |
605 | 0 | for (auto& ph: getAllPages()) { |
606 | 0 | ph.removeUnreferencedResources(); |
607 | 0 | } |
608 | 0 | } |
609 | | |
610 | | void |
611 | | QPDFPageDocumentHelper::addPage(QPDFPageObjectHelper newpage, bool first) |
612 | 0 | { |
613 | 0 | qpdf.doc().pages().insert(newpage, first ? 0 : qpdf.doc().pages().size()); |
614 | 0 | } |
615 | | |
616 | | void |
617 | | QPDFPageDocumentHelper::addPageAt( |
618 | | QPDFPageObjectHelper newpage, bool before, QPDFPageObjectHelper refpage) |
619 | 0 | { |
620 | 0 | qpdf.addPageAt(newpage.getObjectHandle(), before, refpage.getObjectHandle()); |
621 | 0 | } |
622 | | |
623 | | void |
624 | | QPDFPageDocumentHelper::removePage(QPDFPageObjectHelper page) |
625 | 0 | { |
626 | 0 | qpdf.removePage(page.getObjectHandle()); |
627 | 0 | } |
628 | | |
629 | | void |
630 | | QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_flags) |
631 | 0 | { |
632 | 0 | qpdf.doc().pages().flatten_annotations(required_flags, forbidden_flags); |
633 | 0 | } |
634 | | |
635 | | void |
636 | | Pages::flatten_annotations(int required_flags, int forbidden_flags) |
637 | 0 | { |
638 | 0 | auto& afdh = qpdf.doc().acroform(); |
639 | 0 | if (afdh.getNeedAppearances()) { |
640 | 0 | qpdf.getRoot() |
641 | 0 | .getKey("/AcroForm") |
642 | 0 | .warn( |
643 | 0 | "document does not have updated appearance streams, so form fields " |
644 | 0 | "will not be flattened"); |
645 | 0 | } |
646 | 0 | for (QPDFPageObjectHelper ph: all()) { |
647 | 0 | QPDFObjectHandle resources = ph.getAttribute("/Resources", true); |
648 | 0 | if (!resources.isDictionary()) { |
649 | | // As of #1521, this should be impossible unless a user inserted an invalid page. |
650 | 0 | resources = ph.getObjectHandle().replaceKeyAndGetNew("/Resources", Dictionary::empty()); |
651 | 0 | } |
652 | 0 | flatten_annotations_for_page(ph, resources, afdh, required_flags, forbidden_flags); |
653 | 0 | } |
654 | 0 | if (!afdh.getNeedAppearances()) { |
655 | 0 | qpdf.getRoot().removeKey("/AcroForm"); |
656 | 0 | } |
657 | 0 | } |
658 | | |
659 | | void |
660 | | Pages::flatten_annotations_for_page( |
661 | | QPDFPageObjectHelper& page, |
662 | | QPDFObjectHandle& resources, |
663 | | QPDFAcroFormDocumentHelper& afdh, |
664 | | int required_flags, |
665 | | int forbidden_flags) |
666 | 0 | { |
667 | 0 | bool need_appearances = afdh.getNeedAppearances(); |
668 | 0 | std::vector<QPDFAnnotationObjectHelper> annots = page.getAnnotations(); |
669 | 0 | std::vector<QPDFObjectHandle> new_annots; |
670 | 0 | std::string new_content; |
671 | 0 | int rotate = 0; |
672 | 0 | QPDFObjectHandle rotate_obj = page.getObjectHandle().getKey("/Rotate"); |
673 | 0 | if (rotate_obj.isInteger() && rotate_obj.getIntValue()) { |
674 | 0 | rotate = rotate_obj.getIntValueAsInt(); |
675 | 0 | } |
676 | 0 | int next_fx = 1; |
677 | 0 | for (auto& aoh: annots) { |
678 | 0 | QPDFObjectHandle as = aoh.getAppearanceStream("/N"); |
679 | 0 | bool is_widget = (aoh.getSubtype() == "/Widget"); |
680 | 0 | bool process = true; |
681 | 0 | if (need_appearances && is_widget) { |
682 | 0 | process = false; |
683 | 0 | } |
684 | 0 | if (process && as.isStream()) { |
685 | 0 | if (is_widget) { |
686 | 0 | QPDFFormFieldObjectHelper ff = afdh.getFieldForAnnotation(aoh); |
687 | 0 | QPDFObjectHandle as_resources = as.getDict().getKey("/Resources"); |
688 | 0 | if (as_resources.isIndirect()) { |
689 | 0 | ; |
690 | 0 | as.getDict().replaceKey("/Resources", as_resources.shallowCopy()); |
691 | 0 | as_resources = as.getDict().getKey("/Resources"); |
692 | 0 | } |
693 | 0 | as_resources.mergeResources(ff.getDefaultResources()); |
694 | 0 | } else { |
695 | 0 | QTC::TC("qpdf", "QPDFPageDocumentHelper non-widget annotation"); |
696 | 0 | } |
697 | 0 | std::string name = resources.getUniqueResourceName("/Fxo", next_fx); |
698 | 0 | std::string content = |
699 | 0 | aoh.getPageContentForAppearance(name, rotate, required_flags, forbidden_flags); |
700 | 0 | if (!content.empty()) { |
701 | 0 | resources.mergeResources(Dictionary({{"/XObject", Dictionary({{name, as}})}})); |
702 | 0 | resources.getKey("/XObject").replaceKey(name, as); |
703 | 0 | ++next_fx; |
704 | 0 | } |
705 | 0 | new_content += content; |
706 | 0 | } else if (process && !aoh.getAppearanceDictionary().null()) { |
707 | | // If an annotation has no selected appearance stream, just drop the annotation when |
708 | | // flattening. This can happen for unchecked checkboxes and radio buttons, popup windows |
709 | | // associated with comments that aren't visible, and other types of annotations that |
710 | | // aren't visible. Annotations that have no appearance streams at all, such as Link, |
711 | | // Popup, and Projection, should be preserved. |
712 | 0 | } else { |
713 | 0 | new_annots.push_back(aoh.getObjectHandle()); |
714 | 0 | } |
715 | 0 | } |
716 | 0 | if (new_annots.size() != annots.size()) { |
717 | 0 | QPDFObjectHandle page_oh = page.getObjectHandle(); |
718 | 0 | if (new_annots.empty()) { |
719 | 0 | page_oh.removeKey("/Annots"); |
720 | 0 | } else { |
721 | 0 | QPDFObjectHandle old_annots = page_oh.getKey("/Annots"); |
722 | 0 | QPDFObjectHandle new_annots_oh = QPDFObjectHandle::newArray(new_annots); |
723 | 0 | if (old_annots.isIndirect()) { |
724 | 0 | qpdf.replaceObject(old_annots.getObjGen(), new_annots_oh); |
725 | 0 | } else { |
726 | 0 | page_oh.replaceKey("/Annots", new_annots_oh); |
727 | 0 | } |
728 | 0 | } |
729 | 0 | page.addPageContents(qpdf.newStream("q\n"), true); |
730 | 0 | page.addPageContents(qpdf.newStream("\nQ\n" + new_content), false); |
731 | 0 | } |
732 | 0 | } |