/src/qpdf/libqpdf/QPDF_pages.cc
Line | Count | Source |
1 | | #include <qpdf/QPDFPageDocumentHelper.hh> |
2 | | #include <qpdf/QPDF_private.hh> |
3 | | |
4 | | #include <qpdf/AcroForm.hh> |
5 | | #include <qpdf/QPDFExc.hh> |
6 | | #include <qpdf/QPDFObjectHandle_private.hh> |
7 | | #include <qpdf/QTC.hh> |
8 | | #include <qpdf/QUtil.hh> |
9 | | #include <qpdf/Util.hh> |
10 | | |
11 | | // In support of page manipulation APIs, these methods internally maintain state about pages in a |
12 | | // pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos, |
13 | | // which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages() |
14 | | // method returns a const reference to all_pages and has been in the public API long before the |
15 | | // introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of |
16 | | // calls to it in the library, so the efficiency of having it cached is probably worth keeping it. |
17 | | // At one point, I had partially implemented a helper class specifically for the pages tree, but |
18 | | // once you work in all the logic that handles repairing the /Type keys of page tree nodes (both |
19 | | // /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than |
20 | | // what's here. So, in spite of the fact that a const reference is returned, the current code is |
21 | | // fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in |
22 | | // attic in case there is ever a reason to resurrect it. There are additional notes in |
23 | | // README-maintainer, which also refers to this comment. |
24 | | |
25 | | // The goal of this code is to ensure that the all_pages vector, which users may have a reference |
26 | | // to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent |
27 | | // outside of any call to the library. As long as users only touch the /Pages structure through |
28 | | // page-specific API calls, they never have to worry about anything, and this will also stay |
29 | | // consistent. If a user touches anything about the /Pages structure outside of these calls (such |
30 | | // as by directly looking up and manipulating the underlying objects), they can call |
31 | | // updatePagesCache() to bring things back in sync. |
32 | | |
33 | | // If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure |
34 | | // alone. If the user does use the APIs, then we push all inheritable objects down and flatten the |
35 | | // /Pages tree. This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos |
36 | | // internally consistent at all times. |
37 | | |
38 | | // Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent |
39 | | // should remain in as few places as possible. As of initial writing, only flattenPagesTree, |
40 | | // insertPage, and removePage, along with methods they call, are concerned with it. Everything else |
41 | | // goes through one of those methods. |
42 | | |
43 | | using Pages = QPDF::Doc::Pages; |
44 | | |
45 | | std::vector<QPDFObjectHandle> const& |
46 | | QPDF::getAllPages() |
47 | 0 | { |
48 | 0 | return m->pages.all(); |
49 | 0 | } |
50 | | |
51 | | std::vector<QPDFObjectHandle> const& |
52 | | Pages::cache() |
53 | 12.0k | { |
54 | | // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages. |
55 | 12.0k | if (all_pages.empty() && !invalid_page_found) { |
56 | 9.26k | ever_called_get_all_pages_ = true; |
57 | 9.26k | auto root = qpdf.getRoot(); |
58 | 9.26k | QPDFObjGen::set visited; |
59 | 9.26k | QPDFObjGen::set seen; |
60 | 9.26k | QPDFObjectHandle pages = root.getKey("/Pages"); |
61 | 9.26k | bool warned = false; |
62 | 9.26k | bool changed_pages = false; |
63 | 9.40k | while (pages.isDictionary() && pages.hasKey("/Parent")) { |
64 | 169 | if (!seen.add(pages)) { |
65 | | // loop -- will be detected again and reported later |
66 | 24 | break; |
67 | 24 | } |
68 | | // Files have been found in the wild where /Pages in the catalog points to the first |
69 | | // page. Try to work around this and similar cases with this heuristic. |
70 | 145 | if (!warned) { |
71 | 65 | root.warn( |
72 | 65 | "document page tree root (root -> /Pages) doesn't point" |
73 | 65 | " to the root of the page tree; attempting to correct"); |
74 | 65 | warned = true; |
75 | 65 | } |
76 | 145 | changed_pages = true; |
77 | 145 | pages = pages.getKey("/Parent"); |
78 | 145 | } |
79 | 9.26k | if (changed_pages) { |
80 | 64 | root.replaceKey("/Pages", pages); |
81 | 64 | } |
82 | 9.26k | seen.clear(); |
83 | 9.26k | if (!pages.hasKey("/Kids")) { |
84 | | // Ensure we actually found a /Pages object. |
85 | 135 | throw QPDFExc( |
86 | 135 | qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array"); |
87 | 135 | } |
88 | 9.12k | try { |
89 | 9.12k | getAllPagesInternal(pages, 0, visited, seen, false, false); |
90 | 9.12k | } catch (...) { |
91 | 87 | all_pages.clear(); |
92 | 87 | invalid_page_found = false; |
93 | 87 | throw; |
94 | 87 | } |
95 | 3.43k | if (invalid_page_found) { |
96 | 2.78k | flattenPagesTree(); |
97 | 2.78k | invalid_page_found = false; |
98 | 2.78k | } |
99 | 3.43k | } |
100 | 6.22k | return all_pages; |
101 | 12.0k | } |
102 | | |
103 | | void |
104 | | Pages::getAllPagesInternal( |
105 | | QPDFObjectHandle cur_node, |
106 | | uint32_t level, |
107 | | QPDFObjGen::set& visited, |
108 | | QPDFObjGen::set& seen, |
109 | | bool media_box, |
110 | | bool resources) |
111 | 6.16k | { |
112 | 6.16k | static uint32_t constexpr max_level = 100; |
113 | 6.16k | if (++level > max_level) { |
114 | 0 | throw QPDFExc( |
115 | 0 | qpdf_e_pages, |
116 | 0 | m->file->getName(), |
117 | 0 | "object " + cur_node.getObjGen().unparse(' '), |
118 | 0 | 0, |
119 | 0 | "/Pages structure too deeply nested (getAllPages)"); |
120 | 0 | } |
121 | 6.16k | if (!visited.add(cur_node)) { |
122 | 35 | throw QPDFExc( |
123 | 35 | qpdf_e_pages, |
124 | 35 | m->file->getName(), |
125 | 35 | "object " + cur_node.getObjGen().unparse(' '), |
126 | 35 | 0, |
127 | 35 | "Loop detected in /Pages structure (getAllPages)"); |
128 | 35 | } |
129 | 6.12k | if (!cur_node.isDictionaryOfType("/Pages")) { |
130 | | // During fuzzing files were encountered where the root object appeared in the pages tree. |
131 | | // Unconditionally setting the /Type to /Pages could cause problems, but trying to |
132 | | // accommodate the possibility may be excessive. |
133 | 5.29k | cur_node.warn("/Type key should be /Pages but is not; overriding"); |
134 | 5.29k | cur_node.replaceKey("/Type", Name("/Pages")); |
135 | 5.29k | } |
136 | 6.12k | if (!media_box) { |
137 | 5.92k | media_box = cur_node.getKey("/MediaBox").isRectangle(); |
138 | 5.92k | QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1); |
139 | 5.92k | } |
140 | 6.12k | if (!resources) { |
141 | 5.80k | resources = cur_node.getKey("/Resources").isDictionary(); |
142 | 5.80k | } |
143 | 6.12k | auto kids = cur_node.getKey("/Kids"); |
144 | 6.12k | if (!visited.add(kids)) { |
145 | 11 | throw QPDFExc( |
146 | 11 | qpdf_e_pages, |
147 | 11 | m->file->getName(), |
148 | 11 | "object " + cur_node.getObjGen().unparse(' '), |
149 | 11 | 0, |
150 | 11 | "Loop detected in /Pages structure (getAllPages)"); |
151 | 11 | } |
152 | 6.11k | int i = -1; |
153 | 32.5k | for (auto& kid: kids.as_array()) { |
154 | 32.5k | ++i; |
155 | 32.5k | int errors = 0; |
156 | | |
157 | 32.5k | if (!kid.isDictionary()) { |
158 | 21.6k | kid.warn("Pages tree includes non-dictionary object; ignoring"); |
159 | 21.6k | invalid_page_found = true; |
160 | 21.6k | continue; |
161 | 21.6k | } |
162 | 10.9k | if (!kid.isIndirect()) { |
163 | 3.51k | cur_node.warn( |
164 | 3.51k | "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect"); |
165 | 3.51k | kid = qpdf.makeIndirectObject(kid); |
166 | 3.51k | ++errors; |
167 | 3.51k | } |
168 | 10.9k | if (kid.hasKey("/Kids")) { |
169 | 2.64k | getAllPagesInternal(kid, level, visited, seen, media_box, resources); |
170 | 8.26k | } else { |
171 | 8.26k | if (!media_box && !kid.getKey("/MediaBox").isRectangle()) { |
172 | 3.40k | kid.warn( |
173 | 3.40k | "kid " + std::to_string(i) + |
174 | 3.40k | " (from 0) MediaBox is undefined; setting to letter / ANSI A"); |
175 | 3.40k | kid.replaceKey( |
176 | 3.40k | "/MediaBox", |
177 | 3.40k | QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792))); |
178 | 3.40k | ++errors; |
179 | 3.40k | } |
180 | 8.26k | if (!resources) { |
181 | 7.28k | auto res = kid.getKey("/Resources"); |
182 | | |
183 | 7.28k | if (!res.isDictionary()) { |
184 | 3.53k | ++errors; |
185 | 3.53k | kid.warn( |
186 | 3.53k | "kid " + std::to_string(i) + |
187 | 3.53k | " (from 0) Resources is missing or invalid; repairing"); |
188 | 3.53k | kid.replaceKey("/Resources", QPDFObjectHandle::newDictionary()); |
189 | 3.53k | } |
190 | 7.28k | } |
191 | 8.26k | auto annots = kid.getKey("/Annots"); |
192 | 8.26k | if (!annots.null()) { |
193 | 1.67k | if (!annots.isArray()) { |
194 | 17 | kid.warn( |
195 | 17 | "kid " + std::to_string(i) + " (from 0) Annots is not an array; removing"); |
196 | 17 | kid.removeKey("/Annots"); |
197 | 17 | ++errors; |
198 | 1.66k | } else { |
199 | 1.66k | QPDFObjGen::set seen_annots; |
200 | 73.5k | for (auto& annot: annots.as_array()) { |
201 | 73.5k | if (!seen_annots.add(annot)) { |
202 | 1.70k | kid.warn( |
203 | 1.70k | "kid " + std::to_string(i) + |
204 | 1.70k | " (from 0) Annots has duplicate entry for annotation " + |
205 | 1.70k | annot.id_gen().unparse(' ')); |
206 | 1.70k | ++errors; |
207 | 1.70k | } |
208 | 73.5k | } |
209 | 1.66k | } |
210 | 1.67k | } |
211 | | |
212 | 8.26k | if (!seen.add(kid)) { |
213 | | // Make a copy of the page. This does the same as shallowCopyPage in |
214 | | // QPDFPageObjectHelper. |
215 | 2.37k | if (!m->reconstructed_xref) { |
216 | 202 | cur_node.warn( |
217 | 202 | "kid " + std::to_string(i) + |
218 | 202 | " (from 0) appears more than once in the pages tree;" |
219 | 202 | " creating a new page object as a copy"); |
220 | | // This needs to be fixed. shallowCopy does not necessarily produce a valid |
221 | | // page. |
222 | 202 | kid = qpdf.makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); |
223 | 202 | seen.add(kid); |
224 | 2.17k | } else { |
225 | 2.17k | cur_node.warn( |
226 | 2.17k | "kid " + std::to_string(i) + |
227 | 2.17k | " (from 0) appears more than once in the pages tree; ignoring duplicate"); |
228 | 2.17k | invalid_page_found = true; |
229 | 2.17k | kid = QPDFObjectHandle::newNull(); |
230 | 2.17k | continue; |
231 | 2.17k | } |
232 | 202 | if (!kid.getKey("/Parent").isSameObjectAs(cur_node)) { |
233 | | // Consider fixing and adding an information message. |
234 | 150 | ++errors; |
235 | 150 | } |
236 | 202 | } |
237 | 6.09k | if (!kid.isDictionaryOfType("/Page")) { |
238 | 3.51k | kid.warn("/Type key should be /Page but is not; overriding"); |
239 | 3.51k | kid.replaceKey("/Type", Name("/Page")); |
240 | 3.51k | ++errors; |
241 | 3.51k | } |
242 | 6.09k | if (m->reconstructed_xref && errors > 2) { |
243 | 1.83k | cur_node.warn( |
244 | 1.83k | "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page"); |
245 | 1.83k | invalid_page_found = true; |
246 | 1.83k | kid = QPDFObjectHandle::newNull(); |
247 | 1.83k | continue; |
248 | 1.83k | } |
249 | 4.26k | all_pages.emplace_back(kid); |
250 | 4.26k | } |
251 | 10.9k | } |
252 | 6.11k | } |
253 | | |
254 | | void |
255 | | QPDF::updateAllPagesCache() |
256 | 0 | { |
257 | 0 | m->pages.update_cache(); |
258 | 0 | } |
259 | | |
260 | | void |
261 | | Pages::update_cache() |
262 | 0 | { |
263 | | // Force regeneration of the pages cache. We force immediate recalculation of all_pages since |
264 | | // users may have references to it that they got from calls to getAllPages(). We can defer |
265 | | // recalculation of pageobj_to_pages_pos until needed. |
266 | 0 | all_pages.clear(); |
267 | 0 | pageobj_to_pages_pos.clear(); |
268 | 0 | pushed_inherited_attributes_to_pages = false; |
269 | 0 | cache(); |
270 | 0 | } |
271 | | |
272 | | void |
273 | | Pages::flattenPagesTree() |
274 | 2.78k | { |
275 | | // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos. |
276 | | |
277 | 2.78k | if (!pageobj_to_pages_pos.empty()) { |
278 | 0 | return; |
279 | 0 | } |
280 | | |
281 | | // Push inherited objects down to the /Page level. As a side effect all_pages will also be |
282 | | // generated. |
283 | 2.78k | pushInheritedAttributesToPage(true, true); |
284 | | |
285 | 2.78k | QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); |
286 | | |
287 | 2.78k | size_t const len = all_pages.size(); |
288 | 6.03k | for (size_t pos = 0; pos < len; ++pos) { |
289 | | // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at |
290 | | // this point because pushInheritedAttributesToPage calls getAllPages which resolves |
291 | | // duplicates. |
292 | 3.24k | insertPageobjToPage(all_pages.at(pos), toI(pos), true); |
293 | 3.24k | all_pages.at(pos).replaceKey("/Parent", pages); |
294 | 3.24k | } |
295 | | |
296 | 2.78k | pages.replaceKey("/Kids", Array(all_pages)); |
297 | | // /Count has not changed |
298 | 2.78k | if (pages.getKey("/Count").getUIntValue() != len) { |
299 | 1.64k | if (invalid_page_found && pages.getKey("/Count").getUIntValue() > len) { |
300 | 1.59k | pages.replaceKey("/Count", Integer(len)); |
301 | 1.59k | } else { |
302 | 46 | throw std::runtime_error("/Count is wrong after flattening pages tree"); |
303 | 46 | } |
304 | 1.64k | } |
305 | 2.78k | } |
306 | | |
307 | | void |
308 | | QPDF::pushInheritedAttributesToPage() |
309 | 0 | { |
310 | | // Public API should not have access to allow_changes. |
311 | 0 | m->pages.pushInheritedAttributesToPage(true, false); |
312 | 0 | } |
313 | | |
314 | | void |
315 | | Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) |
316 | 2.78k | { |
317 | | // Traverse pages tree pushing all inherited resources down to the page level. |
318 | | |
319 | | // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning |
320 | | // for skipped keys, re-traverse unconditionally. |
321 | 2.78k | if (pushed_inherited_attributes_to_pages && !warn_skipped_keys) { |
322 | 0 | return; |
323 | 0 | } |
324 | | |
325 | | // Calling cache() resolves any duplicated page objects, repairs broken nodes, and detects |
326 | | // loops, so we don't have to do those activities here. |
327 | 2.78k | (void)cache(); |
328 | | |
329 | | // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain |
330 | | // values for them. |
331 | 2.78k | std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; |
332 | 2.78k | pushInheritedAttributesToPageInternal( |
333 | 2.78k | m->trailer.getKey("/Root").getKey("/Pages"), |
334 | 2.78k | key_ancestors, |
335 | 2.78k | allow_changes, |
336 | 2.78k | warn_skipped_keys); |
337 | 2.78k | util::assertion( |
338 | 2.78k | key_ancestors.empty(), |
339 | 2.78k | "key_ancestors not empty after pushing inherited attributes to pages"); |
340 | 2.78k | pushed_inherited_attributes_to_pages = true; |
341 | 2.78k | ever_pushed_inherited_attributes_to_pages_ = true; |
342 | 2.78k | } |
343 | | |
344 | | void |
345 | | Pages::pushInheritedAttributesToPageInternal( |
346 | | QPDFObjectHandle cur_pages, |
347 | | std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors, |
348 | | bool allow_changes, |
349 | | bool warn_skipped_keys) |
350 | 5.15k | { |
351 | | // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate |
352 | | // are inheritable attributes. Push this object onto the stack of pages nodes that have values |
353 | | // for this attribute. |
354 | | |
355 | 5.15k | std::set<std::string> inheritable_keys; |
356 | 17.5k | for (auto const& key: cur_pages.getKeys()) { |
357 | 17.5k | if (key == "/MediaBox" || key == "/CropBox" || key == "/Resources" || key == "/Rotate") { |
358 | 1.18k | if (!allow_changes) { |
359 | 0 | throw QPDFExc( |
360 | 0 | qpdf_e_internal, |
361 | 0 | m->file->getName(), |
362 | 0 | "/Pages object " + cur_pages.id_gen().unparse(' '), |
363 | 0 | cur_pages.offset(), |
364 | 0 | "pushInheritedAttributesToPage detected an inheritable attribute when called " |
365 | 0 | "in no-change mode"); |
366 | 0 | } |
367 | | |
368 | | // This is an inheritable resource |
369 | 1.18k | inheritable_keys.insert(key); |
370 | 1.18k | auto oh = cur_pages[key]; |
371 | 1.18k | QTC::TC("qpdf", "QPDF opt direct pages resource", oh.indirect() ? 0 : 1); |
372 | 1.18k | if (!oh.indirect()) { |
373 | 1.16k | if (!oh.isScalar()) { |
374 | | // Replace shared direct object non-scalar resources with indirect objects to |
375 | | // avoid copying large structures around. |
376 | 377 | cur_pages.replaceKey(key, qpdf.makeIndirectObject(oh)); |
377 | 377 | oh = cur_pages[key]; |
378 | 790 | } else { |
379 | | // It's okay to copy scalars. |
380 | 790 | } |
381 | 1.16k | } |
382 | 1.18k | key_ancestors[key].emplace_back(oh); |
383 | 1.18k | if (key_ancestors[key].size() > 1) { |
384 | 192 | } |
385 | | // Remove this resource from this node. It will be reattached at the page level. |
386 | 1.18k | cur_pages.erase(key); |
387 | 16.3k | } else if (!(key == "/Type" || key == "/Parent" || key == "/Kids" || key == "/Count")) { |
388 | | // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not |
389 | | // set), as we don't change these; but flattening removes intermediate /Pages nodes. |
390 | 3.81k | if (warn_skipped_keys && cur_pages.contains("/Parent")) { |
391 | 1.18k | warn( |
392 | 1.18k | qpdf_e_pages, |
393 | 1.18k | "Pages object: object " + cur_pages.id_gen().unparse(' '), |
394 | 1.18k | cur_pages.offset(), |
395 | 1.18k | ("Unknown key " + key + |
396 | 1.18k | " in /Pages object is being discarded as a result of flattening the /Pages " |
397 | 1.18k | "tree")); |
398 | 1.18k | } |
399 | 3.81k | } |
400 | 17.5k | } |
401 | | |
402 | | // Process descendant nodes. This method does not perform loop detection because all code paths |
403 | | // that lead here follow a call to getAllPages, which already throws an exception in the event |
404 | | // of a loop in the pages tree. |
405 | 27.9k | for (auto& kid: Array(cur_pages["/Kids"])) { |
406 | 27.9k | if (kid.isDictionaryOfType("/Pages")) { |
407 | 2.37k | pushInheritedAttributesToPageInternal( |
408 | 2.37k | kid, key_ancestors, allow_changes, warn_skipped_keys); |
409 | 25.6k | } else { |
410 | | // Add all available inheritable attributes not present in this object to this object. |
411 | 25.6k | for (auto const& [key, values]: key_ancestors) { |
412 | 3.04k | if (!kid.contains(key)) { |
413 | 2.94k | kid.replaceKey(key, values.back()); |
414 | 2.94k | } else { |
415 | 97 | QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); |
416 | 97 | } |
417 | 3.04k | } |
418 | 25.6k | } |
419 | 27.9k | } |
420 | | |
421 | | // For each inheritable key, pop the stack. If the stack becomes empty, remove it from the map. |
422 | | // That way, the invariant that the list of keys in key_ancestors is exactly those keys for |
423 | | // which inheritable attributes are available. |
424 | | |
425 | 5.15k | if (!inheritable_keys.empty()) { |
426 | 919 | for (auto const& key: inheritable_keys) { |
427 | 919 | key_ancestors[key].pop_back(); |
428 | 919 | if (key_ancestors[key].empty()) { |
429 | 742 | key_ancestors.erase(key); |
430 | 742 | } |
431 | 919 | } |
432 | 4.41k | } else { |
433 | 4.41k | QTC::TC("qpdf", "QPDF opt no inheritable keys"); |
434 | 4.41k | } |
435 | 5.15k | } |
436 | | |
437 | | void |
438 | | Pages::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate) |
439 | 3.24k | { |
440 | 3.24k | QPDFObjGen og(obj.getObjGen()); |
441 | 3.24k | if (check_duplicate) { |
442 | 3.24k | if (!pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { |
443 | | // The library never calls insertPageobjToPage in a way that causes this to happen. |
444 | 0 | throw QPDFExc( |
445 | 0 | qpdf_e_pages, |
446 | 0 | m->file->getName(), |
447 | 0 | "page " + std::to_string(pos) + " (numbered from zero): object " + og.unparse(' '), |
448 | 0 | 0, |
449 | 0 | "duplicate page reference found; this would cause loss of data"); |
450 | 0 | } |
451 | 3.24k | } else { |
452 | 0 | pageobj_to_pages_pos[og] = pos; |
453 | 0 | } |
454 | 3.24k | } |
455 | | |
456 | | void |
457 | | Pages::insert(QPDFObjectHandle newpage, int pos) |
458 | 0 | { |
459 | | // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end. |
460 | |
|
461 | 0 | flattenPagesTree(); |
462 | |
|
463 | 0 | if (!newpage.indirect()) { |
464 | 0 | newpage = qpdf.makeIndirectObject(newpage); |
465 | 0 | } else if (newpage.qpdf() != &qpdf) { |
466 | 0 | newpage.qpdf()->pushInheritedAttributesToPage(); |
467 | 0 | newpage = qpdf.copyForeignObject(newpage); |
468 | 0 | } else { |
469 | 0 | QTC::TC("qpdf", "QPDF insert indirect page"); |
470 | 0 | } |
471 | |
|
472 | 0 | if (pos < 0 || std::cmp_greater(pos, all_pages.size())) { |
473 | 0 | throw std::runtime_error("QPDF::insertPage called with pos out of range"); |
474 | 0 | } |
475 | | |
476 | 0 | QTC::TC( |
477 | 0 | "qpdf", |
478 | 0 | "QPDF insert page", |
479 | 0 | pos == 0 ? 0 : // insert at beginning |
480 | 0 | std::cmp_equal(pos, size()) ? 1 // at end |
481 | 0 | : 2); // insert in middle |
482 | |
|
483 | 0 | if (pageobj_to_pages_pos.contains(newpage)) { |
484 | 0 | newpage = qpdf.makeIndirectObject(newpage.copy()); |
485 | 0 | } |
486 | |
|
487 | 0 | auto pages = qpdf.getRoot()["/Pages"]; |
488 | 0 | Array kids = pages["/Kids"]; |
489 | |
|
490 | 0 | newpage.replaceKey("/Parent", pages); |
491 | 0 | kids.insert(pos, newpage); |
492 | 0 | size_t npages = kids.size(); |
493 | 0 | pages.replaceKey("/Count", Integer(npages)); |
494 | 0 | all_pages.insert(all_pages.begin() + pos, newpage); |
495 | 0 | for (size_t i = static_cast<size_t>(pos) + 1; i < npages; ++i) { |
496 | 0 | insertPageobjToPage(all_pages.at(i), static_cast<int>(i), false); |
497 | 0 | } |
498 | 0 | insertPageobjToPage(newpage, pos, true); |
499 | 0 | } |
500 | | |
501 | | void |
502 | | QPDF::removePage(QPDFObjectHandle page) |
503 | 0 | { |
504 | 0 | m->pages.erase(page); |
505 | 0 | } |
506 | | |
507 | | void |
508 | | Pages::erase(QPDFObjectHandle& page) |
509 | 0 | { |
510 | 0 | int pos = qpdf.findPage(page); // also ensures flat /Pages |
511 | 0 | QTC::TC( |
512 | 0 | "qpdf", |
513 | 0 | "QPDF remove page", |
514 | 0 | (pos == 0) ? 0 : // remove at beginning |
515 | 0 | (pos == toI(all_pages.size() - 1)) ? 1 // end |
516 | 0 | : 2); // remove in middle |
517 | |
|
518 | 0 | QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); |
519 | 0 | QPDFObjectHandle kids = pages.getKey("/Kids"); |
520 | |
|
521 | 0 | kids.eraseItem(pos); |
522 | 0 | int npages = static_cast<int>(kids.size()); |
523 | 0 | pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages)); |
524 | 0 | all_pages.erase(all_pages.begin() + pos); |
525 | 0 | pageobj_to_pages_pos.erase(page.getObjGen()); |
526 | 0 | for (int i = pos; i < npages; ++i) { |
527 | 0 | m->pages.insertPageobjToPage(all_pages.at(toS(i)), i, false); |
528 | 0 | } |
529 | 0 | } |
530 | | |
531 | | void |
532 | | QPDF::addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage) |
533 | 0 | { |
534 | 0 | int refpos = findPage(refpage); |
535 | 0 | if (!before) { |
536 | 0 | ++refpos; |
537 | 0 | } |
538 | 0 | m->pages.insert(newpage, refpos); |
539 | 0 | } |
540 | | |
541 | | void |
542 | | QPDF::addPage(QPDFObjectHandle newpage, bool first) |
543 | 0 | { |
544 | 0 | if (first) { |
545 | 0 | m->pages.insert(newpage, 0); |
546 | 0 | } else { |
547 | 0 | m->pages.insert(newpage, getRoot()["/Pages"]["/Count"].getIntValueAsInt()); |
548 | 0 | } |
549 | 0 | } |
550 | | |
551 | | int |
552 | | QPDF::findPage(QPDFObjectHandle& page) |
553 | 0 | { |
554 | 0 | return findPage(page.getObjGen()); |
555 | 0 | } |
556 | | |
557 | | int |
558 | | QPDF::findPage(QPDFObjGen og) |
559 | 0 | { |
560 | 0 | return m->pages.find(og); |
561 | 0 | } |
562 | | |
563 | | int |
564 | | Pages::find(QPDFObjGen og) |
565 | 0 | { |
566 | 0 | flattenPagesTree(); |
567 | 0 | auto it = pageobj_to_pages_pos.find(og); |
568 | 0 | if (it == pageobj_to_pages_pos.end()) { |
569 | 0 | throw QPDFExc( |
570 | 0 | qpdf_e_pages, |
571 | 0 | m->file->getName(), |
572 | 0 | "page object: object " + og.unparse(' '), |
573 | 0 | 0, |
574 | 0 | "page object not referenced in /Pages tree"); |
575 | 0 | } |
576 | 0 | return (*it).second; |
577 | 0 | } |
578 | | |
579 | | class QPDFPageDocumentHelper::Members |
580 | | { |
581 | | }; |
582 | | |
583 | | QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) : |
584 | 0 | QPDFDocumentHelper(qpdf) |
585 | 0 | { |
586 | 0 | } |
587 | | |
588 | | QPDFPageDocumentHelper& |
589 | | QPDFPageDocumentHelper::get(QPDF& qpdf) |
590 | 0 | { |
591 | 0 | return qpdf.doc().page_dh(); |
592 | 0 | } |
593 | | |
594 | | void |
595 | | QPDFPageDocumentHelper::validate(bool repair) |
596 | 0 | { |
597 | 0 | } |
598 | | |
599 | | std::vector<QPDFPageObjectHelper> |
600 | | QPDFPageDocumentHelper::getAllPages() |
601 | 0 | { |
602 | 0 | auto& pp = qpdf.doc().pages(); |
603 | 0 | return {pp.begin(), pp.end()}; |
604 | 0 | } |
605 | | |
606 | | void |
607 | | QPDFPageDocumentHelper::pushInheritedAttributesToPage() |
608 | 0 | { |
609 | 0 | qpdf.pushInheritedAttributesToPage(); |
610 | 0 | } |
611 | | |
612 | | void |
613 | | QPDFPageDocumentHelper::removeUnreferencedResources() |
614 | 0 | { |
615 | 0 | for (auto& ph: getAllPages()) { |
616 | 0 | ph.removeUnreferencedResources(); |
617 | 0 | } |
618 | 0 | } |
619 | | |
620 | | void |
621 | | QPDFPageDocumentHelper::addPage(QPDFPageObjectHelper newpage, bool first) |
622 | 0 | { |
623 | 0 | qpdf.doc().pages().insert(newpage, first ? 0 : qpdf.doc().pages().size()); |
624 | 0 | } |
625 | | |
626 | | void |
627 | | QPDFPageDocumentHelper::addPageAt( |
628 | | QPDFPageObjectHelper newpage, bool before, QPDFPageObjectHelper refpage) |
629 | 0 | { |
630 | 0 | qpdf.addPageAt(newpage.getObjectHandle(), before, refpage.getObjectHandle()); |
631 | 0 | } |
632 | | |
633 | | void |
634 | | QPDFPageDocumentHelper::removePage(QPDFPageObjectHelper page) |
635 | 0 | { |
636 | 0 | qpdf.removePage(page.getObjectHandle()); |
637 | 0 | } |
638 | | |
639 | | void |
640 | | QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_flags) |
641 | 0 | { |
642 | 0 | qpdf.doc().pages().flatten_annotations(required_flags, forbidden_flags); |
643 | 0 | } |
644 | | |
645 | | void |
646 | | Pages::flatten_annotations(int required_flags, int forbidden_flags) |
647 | 0 | { |
648 | 0 | auto& afdh = qpdf.doc().acroform(); |
649 | 0 | if (afdh.getNeedAppearances()) { |
650 | 0 | qpdf.getRoot() |
651 | 0 | .getKey("/AcroForm") |
652 | 0 | .warn( |
653 | 0 | "document does not have updated appearance streams, so form fields " |
654 | 0 | "will not be flattened"); |
655 | 0 | } |
656 | 0 | for (QPDFPageObjectHelper ph: all()) { |
657 | 0 | QPDFObjectHandle resources = ph.getAttribute("/Resources", true); |
658 | 0 | if (!resources.isDictionary()) { |
659 | | // As of #1521, this should be impossible unless a user inserted an invalid page. |
660 | 0 | resources = ph.getObjectHandle().replaceKeyAndGetNew("/Resources", Dictionary::empty()); |
661 | 0 | } |
662 | 0 | flatten_annotations_for_page(ph, resources, afdh, required_flags, forbidden_flags); |
663 | 0 | } |
664 | 0 | if (!afdh.getNeedAppearances()) { |
665 | 0 | qpdf.getRoot().removeKey("/AcroForm"); |
666 | 0 | } |
667 | 0 | } |
668 | | |
669 | | void |
670 | | Pages::flatten_annotations_for_page( |
671 | | QPDFPageObjectHelper& page, |
672 | | QPDFObjectHandle& resources, |
673 | | impl::AcroForm& afdh, |
674 | | int required_flags, |
675 | | int forbidden_flags) |
676 | 0 | { |
677 | 0 | bool need_appearances = afdh.getNeedAppearances(); |
678 | 0 | std::vector<QPDFAnnotationObjectHelper> annots = page.getAnnotations(); |
679 | 0 | std::vector<QPDFObjectHandle> new_annots; |
680 | 0 | std::string new_content; |
681 | 0 | int rotate = 0; |
682 | 0 | QPDFObjectHandle rotate_obj = page.getObjectHandle().getKey("/Rotate"); |
683 | 0 | if (rotate_obj.isInteger() && rotate_obj.getIntValue()) { |
684 | 0 | rotate = rotate_obj.getIntValueAsInt(); |
685 | 0 | } |
686 | 0 | int next_fx = 1; |
687 | 0 | for (auto& aoh: annots) { |
688 | 0 | QPDFObjectHandle as = aoh.getAppearanceStream("/N"); |
689 | 0 | bool is_widget = (aoh.getSubtype() == "/Widget"); |
690 | 0 | bool process = true; |
691 | 0 | if (need_appearances && is_widget) { |
692 | 0 | process = false; |
693 | 0 | } |
694 | 0 | if (process && as.isStream()) { |
695 | 0 | if (is_widget) { |
696 | 0 | QPDFFormFieldObjectHelper ff = afdh.getFieldForAnnotation(aoh); |
697 | 0 | QPDFObjectHandle as_resources = as.getDict().getKey("/Resources"); |
698 | 0 | if (as_resources.isIndirect()) { |
699 | 0 | ; |
700 | 0 | as.getDict().replaceKey("/Resources", as_resources.shallowCopy()); |
701 | 0 | as_resources = as.getDict().getKey("/Resources"); |
702 | 0 | } |
703 | 0 | as_resources.mergeResources(ff.getDefaultResources()); |
704 | 0 | } else { |
705 | 0 | QTC::TC("qpdf", "QPDFPageDocumentHelper non-widget annotation"); |
706 | 0 | } |
707 | 0 | std::string name = resources.getUniqueResourceName("/Fxo", next_fx); |
708 | 0 | std::string content = |
709 | 0 | aoh.getPageContentForAppearance(name, rotate, required_flags, forbidden_flags); |
710 | 0 | if (!content.empty()) { |
711 | 0 | resources.mergeResources(Dictionary({{"/XObject", Dictionary({{name, as}})}})); |
712 | 0 | resources.getKey("/XObject").replaceKey(name, as); |
713 | 0 | ++next_fx; |
714 | 0 | } |
715 | 0 | new_content += content; |
716 | 0 | } else if (process && !aoh.getAppearanceDictionary().null()) { |
717 | | // If an annotation has no selected appearance stream, just drop the annotation when |
718 | | // flattening. This can happen for unchecked checkboxes and radio buttons, popup windows |
719 | | // associated with comments that aren't visible, and other types of annotations that |
720 | | // aren't visible. Annotations that have no appearance streams at all, such as Link, |
721 | | // Popup, and Projection, should be preserved. |
722 | 0 | } else { |
723 | 0 | new_annots.push_back(aoh.getObjectHandle()); |
724 | 0 | } |
725 | 0 | } |
726 | 0 | if (new_annots.size() != annots.size()) { |
727 | 0 | QPDFObjectHandle page_oh = page.getObjectHandle(); |
728 | 0 | if (new_annots.empty()) { |
729 | 0 | page_oh.removeKey("/Annots"); |
730 | 0 | } else { |
731 | 0 | QPDFObjectHandle old_annots = page_oh.getKey("/Annots"); |
732 | 0 | QPDFObjectHandle new_annots_oh = QPDFObjectHandle::newArray(new_annots); |
733 | 0 | if (old_annots.isIndirect()) { |
734 | 0 | qpdf.replaceObject(old_annots.getObjGen(), new_annots_oh); |
735 | 0 | } else { |
736 | 0 | page_oh.replaceKey("/Annots", new_annots_oh); |
737 | 0 | } |
738 | 0 | } |
739 | 0 | page.addPageContents(qpdf.newStream("q\n"), true); |
740 | 0 | page.addPageContents(qpdf.newStream("\nQ\n" + new_content), false); |
741 | 0 | } |
742 | 0 | } |