Coverage Report

Created: 2025-11-11 07:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_pages.cc
Line
Count
Source
1
#include <qpdf/QPDFPageDocumentHelper.hh>
2
#include <qpdf/QPDF_private.hh>
3
4
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
5
#include <qpdf/QPDFExc.hh>
6
#include <qpdf/QPDFObjectHandle_private.hh>
7
#include <qpdf/QTC.hh>
8
#include <qpdf/QUtil.hh>
9
#include <qpdf/Util.hh>
10
11
// In support of page manipulation APIs, these methods internally maintain state about pages in a
12
// pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos,
13
// which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages()
14
// method returns a const reference to all_pages and has been in the public API long before the
15
// introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of
16
// calls to it in the library, so the efficiency of having it cached is probably worth keeping it.
17
// At one point, I had partially implemented a helper class specifically for the pages tree, but
18
// once you work in all the logic that handles repairing the /Type keys of page tree nodes (both
19
// /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than
20
// what's here. So, in spite of the fact that a const reference is returned, the current code is
21
// fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in
22
// attic in case there is ever a reason to resurrect it. There are additional notes in
23
// README-maintainer, which also refers to this comment.
24
25
// The goal of this code is to ensure that the all_pages vector, which users may have a reference
26
// to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent
27
// outside of any call to the library.  As long as users only touch the /Pages structure through
28
// page-specific API calls, they never have to worry about anything, and this will also stay
29
// consistent.  If a user touches anything about the /Pages structure outside of these calls (such
30
// as by directly looking up and manipulating the underlying objects), they can call
31
// updatePagesCache() to bring things back in sync.
32
33
// If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure
34
// alone.  If the user does use the APIs, then we push all inheritable objects down and flatten the
35
// /Pages tree.  This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos
36
// internally consistent at all times.
37
38
// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent
39
// should remain in as few places as possible.  As of initial writing, only flattenPagesTree,
40
// insertPage, and removePage, along with methods they call, are concerned with it.  Everything else
41
// goes through one of those methods.
42
43
using Pages = QPDF::Doc::Pages;
44
45
std::vector<QPDFObjectHandle> const&
46
QPDF::getAllPages()
47
0
{
48
0
    return m->pages.all();
49
0
}
50
51
std::vector<QPDFObjectHandle> const&
52
Pages::cache()
53
12.0k
{
54
    // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages.
55
12.0k
    if (all_pages.empty() && !invalid_page_found) {
56
9.30k
        ever_called_get_all_pages_ = true;
57
9.30k
        auto root = qpdf.getRoot();
58
9.30k
        QPDFObjGen::set visited;
59
9.30k
        QPDFObjGen::set seen;
60
9.30k
        QPDFObjectHandle pages = root.getKey("/Pages");
61
9.30k
        bool warned = false;
62
9.30k
        bool changed_pages = false;
63
9.46k
        while (pages.isDictionary() && pages.hasKey("/Parent")) {
64
183
            if (!seen.add(pages)) {
65
                // loop -- will be detected again and reported later
66
29
                break;
67
29
            }
68
            // Files have been found in the wild where /Pages in the catalog points to the first
69
            // page. Try to work around this and similar cases with this heuristic.
70
154
            if (!warned) {
71
71
                root.warn(
72
71
                    "document page tree root (root -> /Pages) doesn't point"
73
71
                    " to the root of the page tree; attempting to correct");
74
71
                warned = true;
75
71
            }
76
154
            changed_pages = true;
77
154
            pages = pages.getKey("/Parent");
78
154
        }
79
9.30k
        if (changed_pages) {
80
69
            root.replaceKey("/Pages", pages);
81
69
        }
82
9.30k
        seen.clear();
83
9.30k
        if (!pages.hasKey("/Kids")) {
84
            // Ensure we actually found a /Pages object.
85
99
            throw QPDFExc(
86
99
                qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array");
87
99
        }
88
9.20k
        try {
89
9.20k
            getAllPagesInternal(pages, visited, seen, false, false);
90
9.20k
        } catch (...) {
91
71
            all_pages.clear();
92
71
            invalid_page_found = false;
93
71
            throw;
94
71
        }
95
3.36k
        if (invalid_page_found) {
96
2.74k
            flattenPagesTree();
97
2.74k
            invalid_page_found = false;
98
2.74k
        }
99
3.36k
    }
100
6.10k
    return all_pages;
101
12.0k
}
102
103
void
104
Pages::getAllPagesInternal(
105
    QPDFObjectHandle cur_node,
106
    QPDFObjGen::set& visited,
107
    QPDFObjGen::set& seen,
108
    bool media_box,
109
    bool resources)
110
5.66k
{
111
5.66k
    if (!visited.add(cur_node)) {
112
23
        throw QPDFExc(
113
23
            qpdf_e_pages,
114
23
            m->file->getName(),
115
23
            "object " + cur_node.getObjGen().unparse(' '),
116
23
            0,
117
23
            "Loop detected in /Pages structure (getAllPages)");
118
23
    }
119
5.64k
    if (!cur_node.isDictionaryOfType("/Pages")) {
120
        // During fuzzing files were encountered where the root object appeared in the pages tree.
121
        // Unconditionally setting the /Type to /Pages could cause problems, but trying to
122
        // accommodate the possibility may be excessive.
123
4.32k
        cur_node.warn("/Type key should be /Pages but is not; overriding");
124
4.32k
        cur_node.replaceKey("/Type", Name("/Pages"));
125
4.32k
    }
126
5.64k
    if (!media_box) {
127
5.55k
        media_box = cur_node.getKey("/MediaBox").isRectangle();
128
5.55k
        QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1);
129
5.55k
    }
130
5.64k
    if (!resources) {
131
5.55k
        resources = cur_node.getKey("/Resources").isDictionary();
132
5.55k
    }
133
5.64k
    auto kids = cur_node.getKey("/Kids");
134
5.64k
    if (!visited.add(kids)) {
135
11
        throw QPDFExc(
136
11
            qpdf_e_pages,
137
11
            m->file->getName(),
138
11
            "object " + cur_node.getObjGen().unparse(' '),
139
11
            0,
140
11
            "Loop detected in /Pages structure (getAllPages)");
141
11
    }
142
5.63k
    int i = -1;
143
31.2k
    for (auto& kid: kids.as_array()) {
144
31.2k
        ++i;
145
31.2k
        int errors = 0;
146
147
31.2k
        if (!kid.isDictionary()) {
148
22.1k
            kid.warn("Pages tree includes non-dictionary object; ignoring");
149
22.1k
            invalid_page_found = true;
150
22.1k
            continue;
151
22.1k
        }
152
9.02k
        if (!kid.isIndirect()) {
153
2.81k
            cur_node.warn(
154
2.81k
                "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect");
155
2.81k
            kid = qpdf.makeIndirectObject(kid);
156
2.81k
            ++errors;
157
2.81k
        }
158
9.02k
        if (kid.hasKey("/Kids")) {
159
2.23k
            getAllPagesInternal(kid, visited, seen, media_box, resources);
160
6.79k
        } else {
161
6.79k
            if (!media_box && !kid.getKey("/MediaBox").isRectangle()) {
162
3.35k
                kid.warn(
163
3.35k
                    "kid " + std::to_string(i) +
164
3.35k
                    " (from 0) MediaBox is undefined; setting to letter / ANSI A");
165
3.35k
                kid.replaceKey(
166
3.35k
                    "/MediaBox",
167
3.35k
                    QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792)));
168
3.35k
                ++errors;
169
3.35k
            }
170
6.79k
            if (!resources) {
171
6.02k
                auto res = kid.getKey("/Resources");
172
173
6.02k
                if (!res.isDictionary()) {
174
3.61k
                    ++errors;
175
3.61k
                    kid.warn(
176
3.61k
                        "kid " + std::to_string(i) +
177
3.61k
                        " (from 0) Resources is missing or invalid; repairing");
178
3.61k
                    kid.replaceKey("/Resources", QPDFObjectHandle::newDictionary());
179
3.61k
                }
180
6.02k
            }
181
6.79k
            auto annots = kid.getKey("/Annots");
182
6.79k
            if (!annots.null()) {
183
1.21k
                if (!annots.isArray()) {
184
11
                    kid.warn(
185
11
                        "kid " + std::to_string(i) + " (from 0) Annots is not an array; removing");
186
11
                    kid.removeKey("/Annots");
187
11
                    ++errors;
188
1.20k
                } else {
189
1.20k
                    QPDFObjGen::set seen_annots;
190
10.8k
                    for (auto& annot: annots.as_array()) {
191
10.8k
                        if (!seen_annots.add(annot)) {
192
793
                            kid.warn(
193
793
                                "kid " + std::to_string(i) +
194
793
                                " (from 0) Annots has duplicate entry for annotation " +
195
793
                                annot.id_gen().unparse(' '));
196
793
                            ++errors;
197
793
                        }
198
10.8k
                    }
199
1.20k
                }
200
1.21k
            }
201
202
6.79k
            if (!seen.add(kid)) {
203
                // Make a copy of the page. This does the same as shallowCopyPage in
204
                // QPDFPageObjectHelper.
205
2.08k
                if (!m->reconstructed_xref) {
206
0
                    cur_node.warn(
207
0
                        "kid " + std::to_string(i) +
208
0
                        " (from 0) appears more than once in the pages tree;"
209
0
                        " creating a new page object as a copy");
210
                    // This needs to be fixed. shallowCopy does not necessarily produce a valid
211
                    // page.
212
0
                    kid = qpdf.makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
213
0
                    seen.add(kid);
214
2.08k
                } else {
215
2.08k
                    cur_node.warn(
216
2.08k
                        "kid " + std::to_string(i) +
217
2.08k
                        " (from 0) appears more than once in the pages tree; ignoring duplicate");
218
2.08k
                    invalid_page_found = true;
219
2.08k
                    kid = QPDFObjectHandle::newNull();
220
2.08k
                    continue;
221
2.08k
                }
222
0
                if (!kid.getKey("/Parent").isSameObjectAs(cur_node)) {
223
                    // Consider fixing and adding an information message.
224
0
                    ++errors;
225
0
                }
226
0
            }
227
4.71k
            if (!kid.isDictionaryOfType("/Page")) {
228
3.18k
                kid.warn("/Type key should be /Page but is not; overriding");
229
3.18k
                kid.replaceKey("/Type", Name("/Page"));
230
3.18k
                ++errors;
231
3.18k
            }
232
4.71k
            if (m->reconstructed_xref && errors > 2) {
233
1.83k
                cur_node.warn(
234
1.83k
                    "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page");
235
1.83k
                invalid_page_found = true;
236
1.83k
                kid = QPDFObjectHandle::newNull();
237
1.83k
                continue;
238
1.83k
            }
239
2.87k
            all_pages.emplace_back(kid);
240
2.87k
        }
241
9.02k
    }
242
5.63k
}
243
244
void
245
QPDF::updateAllPagesCache()
246
0
{
247
0
    m->pages.update_cache();
248
0
}
249
250
void
251
Pages::update_cache()
252
0
{
253
    // Force regeneration of the pages cache.  We force immediate recalculation of all_pages since
254
    // users may have references to it that they got from calls to getAllPages().  We can defer
255
    // recalculation of pageobj_to_pages_pos until needed.
256
0
    all_pages.clear();
257
0
    pageobj_to_pages_pos.clear();
258
0
    pushed_inherited_attributes_to_pages = false;
259
0
    cache();
260
0
}
261
262
void
263
Pages::flattenPagesTree()
264
2.74k
{
265
    // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos.
266
267
2.74k
    if (!pageobj_to_pages_pos.empty()) {
268
0
        return;
269
0
    }
270
271
    // Push inherited objects down to the /Page level.  As a side effect all_pages will also be
272
    // generated.
273
2.74k
    pushInheritedAttributesToPage(true, true);
274
275
2.74k
    QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
276
277
2.74k
    size_t const len = all_pages.size();
278
4.77k
    for (size_t pos = 0; pos < len; ++pos) {
279
        // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at
280
        // this point because pushInheritedAttributesToPage calls getAllPages which resolves
281
        // duplicates.
282
2.03k
        insertPageobjToPage(all_pages.at(pos), toI(pos), true);
283
2.03k
        all_pages.at(pos).replaceKey("/Parent", pages);
284
2.03k
    }
285
286
2.74k
    pages.replaceKey("/Kids", Array(all_pages));
287
    // /Count has not changed
288
2.74k
    if (pages.getKey("/Count").getUIntValue() != len) {
289
1.62k
        if (invalid_page_found && pages.getKey("/Count").getUIntValue() > len) {
290
1.55k
            pages.replaceKey("/Count", Integer(len));
291
1.55k
        } else {
292
74
            throw std::runtime_error("/Count is wrong after flattening pages tree");
293
74
        }
294
1.62k
    }
295
2.74k
}
296
297
void
298
QPDF::pushInheritedAttributesToPage()
299
0
{
300
    // Public API should not have access to allow_changes.
301
0
    m->pages.pushInheritedAttributesToPage(true, false);
302
0
}
303
304
void
305
Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
306
2.74k
{
307
    // Traverse pages tree pushing all inherited resources down to the page level.
308
309
    // The record of whether we've done this is cleared by updateAllPagesCache().  If we're warning
310
    // for skipped keys, re-traverse unconditionally.
311
2.74k
    if (pushed_inherited_attributes_to_pages && !warn_skipped_keys) {
312
0
        return;
313
0
    }
314
315
    // Calling cache() resolves any duplicated page objects, repairs broken nodes, and detects
316
    // loops, so we don't have to do those activities here.
317
2.74k
    (void)cache();
318
319
    // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain
320
    // values for them.
321
2.74k
    std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
322
2.74k
    pushInheritedAttributesToPageInternal(
323
2.74k
        m->trailer.getKey("/Root").getKey("/Pages"),
324
2.74k
        key_ancestors,
325
2.74k
        allow_changes,
326
2.74k
        warn_skipped_keys);
327
2.74k
    util::assertion(
328
2.74k
        key_ancestors.empty(),
329
2.74k
        "key_ancestors not empty after pushing inherited attributes to pages");
330
2.74k
    pushed_inherited_attributes_to_pages = true;
331
2.74k
    ever_pushed_inherited_attributes_to_pages_ = true;
332
2.74k
}
333
334
void
335
Pages::pushInheritedAttributesToPageInternal(
336
    QPDFObjectHandle cur_pages,
337
    std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors,
338
    bool allow_changes,
339
    bool warn_skipped_keys)
340
4.72k
{
341
    // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate
342
    // are inheritable attributes. Push this object onto the stack of pages nodes that have values
343
    // for this attribute.
344
345
4.72k
    std::set<std::string> inheritable_keys;
346
17.4k
    for (auto const& key: cur_pages.getKeys()) {
347
17.4k
        if (key == "/MediaBox" || key == "/CropBox" || key == "/Resources" || key == "/Rotate") {
348
884
            if (!allow_changes) {
349
0
                throw QPDFExc(
350
0
                    qpdf_e_internal,
351
0
                    m->file->getName(),
352
0
                    "/Pages object " + cur_pages.id_gen().unparse(' '),
353
0
                    cur_pages.offset(),
354
0
                    "pushInheritedAttributesToPage detected an inheritable attribute when called "
355
0
                    "in no-change mode");
356
0
            }
357
358
            // This is an inheritable resource
359
884
            inheritable_keys.insert(key);
360
884
            auto oh = cur_pages[key];
361
884
            QTC::TC("qpdf", "QPDF opt direct pages resource", oh.indirect() ? 0 : 1);
362
884
            if (!oh.indirect()) {
363
868
                if (!oh.isScalar()) {
364
                    // Replace shared direct object non-scalar resources with indirect objects to
365
                    // avoid copying large structures around.
366
369
                    cur_pages.replaceKey(key, qpdf.makeIndirectObject(oh));
367
369
                    oh = cur_pages[key];
368
499
                } else {
369
                    // It's okay to copy scalars.
370
499
                }
371
868
            }
372
884
            key_ancestors[key].emplace_back(oh);
373
884
            if (key_ancestors[key].size() > 1) {
374
91
            }
375
            // Remove this resource from this node.  It will be reattached at the page level.
376
884
            cur_pages.erase(key);
377
16.5k
        } else if (!(key == "/Type" || key == "/Parent" || key == "/Kids" || key == "/Count")) {
378
            // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not
379
            // set), as we don't change these; but flattening removes intermediate /Pages nodes.
380
4.41k
            if (warn_skipped_keys && cur_pages.contains("/Parent")) {
381
1.10k
                warn(
382
1.10k
                    qpdf_e_pages,
383
1.10k
                    "Pages object: object " + cur_pages.id_gen().unparse(' '),
384
1.10k
                    cur_pages.offset(),
385
1.10k
                    ("Unknown key " + key +
386
1.10k
                     " in /Pages object is being discarded as a result of flattening the /Pages "
387
1.10k
                     "tree"));
388
1.10k
            }
389
4.41k
        }
390
17.4k
    }
391
392
    // Process descendant nodes. This method does not perform loop detection because all code paths
393
    // that lead here follow a call to getAllPages, which already throws an exception in the event
394
    // of a loop in the pages tree.
395
26.9k
    for (auto& kid: Array(cur_pages["/Kids"])) {
396
26.9k
        if (kid.isDictionaryOfType("/Pages")) {
397
1.98k
            pushInheritedAttributesToPageInternal(
398
1.98k
                kid, key_ancestors, allow_changes, warn_skipped_keys);
399
24.9k
        } else {
400
            // Add all available inheritable attributes not present in this object to this object.
401
24.9k
            for (auto const& [key, values]: key_ancestors) {
402
2.23k
                if (!kid.contains(key)) {
403
2.15k
                    kid.replaceKey(key, values.back());
404
2.15k
                } else {
405
77
                    QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
406
77
                }
407
2.23k
            }
408
24.9k
        }
409
26.9k
    }
410
411
    // For each inheritable key, pop the stack.  If the stack becomes empty, remove it from the map.
412
    // That way, the invariant that the list of keys in key_ancestors is exactly those keys for
413
    // which inheritable attributes are available.
414
415
4.72k
    if (!inheritable_keys.empty()) {
416
636
        for (auto const& key: inheritable_keys) {
417
636
            key_ancestors[key].pop_back();
418
636
            if (key_ancestors[key].empty()) {
419
560
                key_ancestors.erase(key);
420
560
            }
421
636
        }
422
4.27k
    } else {
423
4.27k
        QTC::TC("qpdf", "QPDF opt no inheritable keys");
424
4.27k
    }
425
4.72k
}
426
427
void
428
Pages::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate)
429
2.03k
{
430
2.03k
    QPDFObjGen og(obj.getObjGen());
431
2.03k
    if (check_duplicate) {
432
2.03k
        if (!pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) {
433
            // The library never calls insertPageobjToPage in a way that causes this to happen.
434
0
            throw QPDFExc(
435
0
                qpdf_e_pages,
436
0
                m->file->getName(),
437
0
                "page " + std::to_string(pos) + " (numbered from zero): object " + og.unparse(' '),
438
0
                0,
439
0
                "duplicate page reference found; this would cause loss of data");
440
0
        }
441
2.03k
    } else {
442
0
        pageobj_to_pages_pos[og] = pos;
443
0
    }
444
2.03k
}
445
446
void
447
Pages::insert(QPDFObjectHandle newpage, int pos)
448
0
{
449
    // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end.
450
451
0
    flattenPagesTree();
452
453
0
    if (!newpage.indirect()) {
454
0
        newpage = qpdf.makeIndirectObject(newpage);
455
0
    } else if (newpage.qpdf() != &qpdf) {
456
0
        newpage.qpdf()->pushInheritedAttributesToPage();
457
0
        newpage = qpdf.copyForeignObject(newpage);
458
0
    } else {
459
0
        QTC::TC("qpdf", "QPDF insert indirect page");
460
0
    }
461
462
0
    if (pos < 0 || std::cmp_greater(pos, all_pages.size())) {
463
0
        throw std::runtime_error("QPDF::insertPage called with pos out of range");
464
0
    }
465
466
0
    QTC::TC(
467
0
        "qpdf",
468
0
        "QPDF insert page",
469
0
        pos == 0 ? 0 :                        // insert at beginning
470
0
            std::cmp_equal(pos, size()) ? 1   // at end
471
0
                                        : 2); // insert in middle
472
473
0
    if (pageobj_to_pages_pos.contains(newpage)) {
474
0
        newpage = qpdf.makeIndirectObject(newpage.copy());
475
0
    }
476
477
0
    auto pages = qpdf.getRoot()["/Pages"];
478
0
    Array kids = pages["/Kids"];
479
480
0
    newpage.replaceKey("/Parent", pages);
481
0
    kids.insert(pos, newpage);
482
0
    size_t npages = kids.size();
483
0
    pages.replaceKey("/Count", Integer(npages));
484
0
    all_pages.insert(all_pages.begin() + pos, newpage);
485
0
    for (size_t i = static_cast<size_t>(pos) + 1; i < npages; ++i) {
486
0
        insertPageobjToPage(all_pages.at(i), static_cast<int>(i), false);
487
0
    }
488
0
    insertPageobjToPage(newpage, pos, true);
489
0
}
490
491
void
492
QPDF::removePage(QPDFObjectHandle page)
493
0
{
494
0
    m->pages.erase(page);
495
0
}
496
497
void
498
Pages::erase(QPDFObjectHandle& page)
499
0
{
500
0
    int pos = qpdf.findPage(page); // also ensures flat /Pages
501
0
    QTC::TC(
502
0
        "qpdf",
503
0
        "QPDF remove page",
504
0
        (pos == 0) ? 0 :                             // remove at beginning
505
0
            (pos == toI(all_pages.size() - 1)) ? 1   // end
506
0
                                               : 2); // remove in middle
507
508
0
    QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
509
0
    QPDFObjectHandle kids = pages.getKey("/Kids");
510
511
0
    kids.eraseItem(pos);
512
0
    int npages = static_cast<int>(kids.size());
513
0
    pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
514
0
    all_pages.erase(all_pages.begin() + pos);
515
0
    pageobj_to_pages_pos.erase(page.getObjGen());
516
0
    for (int i = pos; i < npages; ++i) {
517
0
        m->pages.insertPageobjToPage(all_pages.at(toS(i)), i, false);
518
0
    }
519
0
}
520
521
void
522
QPDF::addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage)
523
0
{
524
0
    int refpos = findPage(refpage);
525
0
    if (!before) {
526
0
        ++refpos;
527
0
    }
528
0
    m->pages.insert(newpage, refpos);
529
0
}
530
531
void
532
QPDF::addPage(QPDFObjectHandle newpage, bool first)
533
0
{
534
0
    if (first) {
535
0
        m->pages.insert(newpage, 0);
536
0
    } else {
537
0
        m->pages.insert(newpage, getRoot()["/Pages"]["/Count"].getIntValueAsInt());
538
0
    }
539
0
}
540
541
int
542
QPDF::findPage(QPDFObjectHandle& page)
543
0
{
544
0
    return findPage(page.getObjGen());
545
0
}
546
547
int
548
QPDF::findPage(QPDFObjGen og)
549
0
{
550
0
    return m->pages.find(og);
551
0
}
552
553
int
554
Pages::find(QPDFObjGen og)
555
0
{
556
0
    flattenPagesTree();
557
0
    auto it = pageobj_to_pages_pos.find(og);
558
0
    if (it == pageobj_to_pages_pos.end()) {
559
0
        throw QPDFExc(
560
0
            qpdf_e_pages,
561
0
            m->file->getName(),
562
0
            "page object: object " + og.unparse(' '),
563
0
            0,
564
0
            "page object not referenced in /Pages tree");
565
0
    }
566
0
    return (*it).second;
567
0
}
568
569
class QPDFPageDocumentHelper::Members
570
{
571
};
572
573
QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) :
574
0
    QPDFDocumentHelper(qpdf)
575
0
{
576
0
}
577
578
QPDFPageDocumentHelper&
579
QPDFPageDocumentHelper::get(QPDF& qpdf)
580
0
{
581
0
    return qpdf.doc().page_dh();
582
0
}
583
584
void
585
QPDFPageDocumentHelper::validate(bool repair)
586
0
{
587
0
}
588
589
std::vector<QPDFPageObjectHelper>
590
QPDFPageDocumentHelper::getAllPages()
591
0
{
592
0
    auto& pp = qpdf.doc().pages();
593
0
    return {pp.begin(), pp.end()};
594
0
}
595
596
void
597
QPDFPageDocumentHelper::pushInheritedAttributesToPage()
598
0
{
599
0
    qpdf.pushInheritedAttributesToPage();
600
0
}
601
602
void
603
QPDFPageDocumentHelper::removeUnreferencedResources()
604
0
{
605
0
    for (auto& ph: getAllPages()) {
606
0
        ph.removeUnreferencedResources();
607
0
    }
608
0
}
609
610
void
611
QPDFPageDocumentHelper::addPage(QPDFPageObjectHelper newpage, bool first)
612
0
{
613
0
    qpdf.doc().pages().insert(newpage, first ? 0 : qpdf.doc().pages().size());
614
0
}
615
616
void
617
QPDFPageDocumentHelper::addPageAt(
618
    QPDFPageObjectHelper newpage, bool before, QPDFPageObjectHelper refpage)
619
0
{
620
0
    qpdf.addPageAt(newpage.getObjectHandle(), before, refpage.getObjectHandle());
621
0
}
622
623
void
624
QPDFPageDocumentHelper::removePage(QPDFPageObjectHelper page)
625
0
{
626
0
    qpdf.removePage(page.getObjectHandle());
627
0
}
628
629
void
630
QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_flags)
631
0
{
632
0
    qpdf.doc().pages().flatten_annotations(required_flags, forbidden_flags);
633
0
}
634
635
void
636
Pages::flatten_annotations(int required_flags, int forbidden_flags)
637
0
{
638
0
    auto& afdh = qpdf.doc().acroform();
639
0
    if (afdh.getNeedAppearances()) {
640
0
        qpdf.getRoot()
641
0
            .getKey("/AcroForm")
642
0
            .warn(
643
0
                "document does not have updated appearance streams, so form fields "
644
0
                "will not be flattened");
645
0
    }
646
0
    for (QPDFPageObjectHelper ph: all()) {
647
0
        QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
648
0
        if (!resources.isDictionary()) {
649
            // As of #1521, this should be impossible unless a user inserted an invalid page.
650
0
            resources = ph.getObjectHandle().replaceKeyAndGetNew("/Resources", Dictionary::empty());
651
0
        }
652
0
        flatten_annotations_for_page(ph, resources, afdh, required_flags, forbidden_flags);
653
0
    }
654
0
    if (!afdh.getNeedAppearances()) {
655
0
        qpdf.getRoot().removeKey("/AcroForm");
656
0
    }
657
0
}
658
659
void
660
Pages::flatten_annotations_for_page(
661
    QPDFPageObjectHelper& page,
662
    QPDFObjectHandle& resources,
663
    QPDFAcroFormDocumentHelper& afdh,
664
    int required_flags,
665
    int forbidden_flags)
666
0
{
667
0
    bool need_appearances = afdh.getNeedAppearances();
668
0
    std::vector<QPDFAnnotationObjectHelper> annots = page.getAnnotations();
669
0
    std::vector<QPDFObjectHandle> new_annots;
670
0
    std::string new_content;
671
0
    int rotate = 0;
672
0
    QPDFObjectHandle rotate_obj = page.getObjectHandle().getKey("/Rotate");
673
0
    if (rotate_obj.isInteger() && rotate_obj.getIntValue()) {
674
0
        rotate = rotate_obj.getIntValueAsInt();
675
0
    }
676
0
    int next_fx = 1;
677
0
    for (auto& aoh: annots) {
678
0
        QPDFObjectHandle as = aoh.getAppearanceStream("/N");
679
0
        bool is_widget = (aoh.getSubtype() == "/Widget");
680
0
        bool process = true;
681
0
        if (need_appearances && is_widget) {
682
0
            process = false;
683
0
        }
684
0
        if (process && as.isStream()) {
685
0
            if (is_widget) {
686
0
                QPDFFormFieldObjectHelper ff = afdh.getFieldForAnnotation(aoh);
687
0
                QPDFObjectHandle as_resources = as.getDict().getKey("/Resources");
688
0
                if (as_resources.isIndirect()) {
689
0
                    ;
690
0
                    as.getDict().replaceKey("/Resources", as_resources.shallowCopy());
691
0
                    as_resources = as.getDict().getKey("/Resources");
692
0
                }
693
0
                as_resources.mergeResources(ff.getDefaultResources());
694
0
            } else {
695
0
                QTC::TC("qpdf", "QPDFPageDocumentHelper non-widget annotation");
696
0
            }
697
0
            std::string name = resources.getUniqueResourceName("/Fxo", next_fx);
698
0
            std::string content =
699
0
                aoh.getPageContentForAppearance(name, rotate, required_flags, forbidden_flags);
700
0
            if (!content.empty()) {
701
0
                resources.mergeResources(Dictionary({{"/XObject", Dictionary({{name, as}})}}));
702
0
                resources.getKey("/XObject").replaceKey(name, as);
703
0
                ++next_fx;
704
0
            }
705
0
            new_content += content;
706
0
        } else if (process && !aoh.getAppearanceDictionary().null()) {
707
            // If an annotation has no selected appearance stream, just drop the annotation when
708
            // flattening. This can happen for unchecked checkboxes and radio buttons, popup windows
709
            // associated with comments that aren't visible, and other types of annotations that
710
            // aren't visible. Annotations that have no appearance streams at all, such as Link,
711
            // Popup, and Projection, should be preserved.
712
0
        } else {
713
0
            new_annots.push_back(aoh.getObjectHandle());
714
0
        }
715
0
    }
716
0
    if (new_annots.size() != annots.size()) {
717
0
        QPDFObjectHandle page_oh = page.getObjectHandle();
718
0
        if (new_annots.empty()) {
719
0
            page_oh.removeKey("/Annots");
720
0
        } else {
721
0
            QPDFObjectHandle old_annots = page_oh.getKey("/Annots");
722
0
            QPDFObjectHandle new_annots_oh = QPDFObjectHandle::newArray(new_annots);
723
0
            if (old_annots.isIndirect()) {
724
0
                qpdf.replaceObject(old_annots.getObjGen(), new_annots_oh);
725
0
            } else {
726
0
                page_oh.replaceKey("/Annots", new_annots_oh);
727
0
            }
728
0
        }
729
0
        page.addPageContents(qpdf.newStream("q\n"), true);
730
0
        page.addPageContents(qpdf.newStream("\nQ\n" + new_content), false);
731
0
    }
732
0
}