Coverage Report

Created: 2026-05-30 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_pages.cc
Line
Count
Source
1
#include <qpdf/QPDFPageDocumentHelper.hh>
2
#include <qpdf/QPDF_private.hh>
3
4
#include <qpdf/AcroForm.hh>
5
#include <qpdf/QPDFExc.hh>
6
#include <qpdf/QPDFObjectHandle_private.hh>
7
#include <qpdf/QTC.hh>
8
#include <qpdf/QUtil.hh>
9
#include <qpdf/Util.hh>
10
11
// In support of page manipulation APIs, these methods internally maintain state about pages in a
12
// pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos,
13
// which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages()
14
// method returns a const reference to all_pages and has been in the public API long before the
15
// introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of
16
// calls to it in the library, so the efficiency of having it cached is probably worth keeping it.
17
// At one point, I had partially implemented a helper class specifically for the pages tree, but
18
// once you work in all the logic that handles repairing the /Type keys of page tree nodes (both
19
// /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than
20
// what's here. So, in spite of the fact that a const reference is returned, the current code is
21
// fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in
22
// attic in case there is ever a reason to resurrect it. There are additional notes in
23
// README-maintainer, which also refers to this comment.
24
25
// The goal of this code is to ensure that the all_pages vector, which users may have a reference
26
// to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent
27
// outside of any call to the library.  As long as users only touch the /Pages structure through
28
// page-specific API calls, they never have to worry about anything, and this will also stay
29
// consistent.  If a user touches anything about the /Pages structure outside of these calls (such
30
// as by directly looking up and manipulating the underlying objects), they can call
31
// updatePagesCache() to bring things back in sync.
32
33
// If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure
34
// alone.  If the user does use the APIs, then we push all inheritable objects down and flatten the
35
// /Pages tree.  This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos
36
// internally consistent at all times.
37
38
// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent
39
// should remain in as few places as possible.  As of initial writing, only flattenPagesTree,
40
// insertPage, and removePage, along with methods they call, are concerned with it.  Everything else
41
// goes through one of those methods.
42
43
using Pages = QPDF::Doc::Pages;
44
45
std::vector<QPDFObjectHandle> const&
46
QPDF::getAllPages()
47
0
{
48
0
    return m->pages.all();
49
0
}
50
51
std::vector<QPDFObjectHandle> const&
52
Pages::cache()
53
12.0k
{
54
    // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages.
55
12.0k
    if (all_pages.empty() && !invalid_page_found) {
56
9.26k
        ever_called_get_all_pages_ = true;
57
9.26k
        auto root = qpdf.getRoot();
58
9.26k
        QPDFObjGen::set visited;
59
9.26k
        QPDFObjGen::set seen;
60
9.26k
        QPDFObjectHandle pages = root.getKey("/Pages");
61
9.26k
        bool warned = false;
62
9.26k
        bool changed_pages = false;
63
9.40k
        while (pages.isDictionary() && pages.hasKey("/Parent")) {
64
169
            if (!seen.add(pages)) {
65
                // loop -- will be detected again and reported later
66
24
                break;
67
24
            }
68
            // Files have been found in the wild where /Pages in the catalog points to the first
69
            // page. Try to work around this and similar cases with this heuristic.
70
145
            if (!warned) {
71
65
                root.warn(
72
65
                    "document page tree root (root -> /Pages) doesn't point"
73
65
                    " to the root of the page tree; attempting to correct");
74
65
                warned = true;
75
65
            }
76
145
            changed_pages = true;
77
145
            pages = pages.getKey("/Parent");
78
145
        }
79
9.26k
        if (changed_pages) {
80
64
            root.replaceKey("/Pages", pages);
81
64
        }
82
9.26k
        seen.clear();
83
9.26k
        if (!pages.hasKey("/Kids")) {
84
            // Ensure we actually found a /Pages object.
85
135
            throw QPDFExc(
86
135
                qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array");
87
135
        }
88
9.12k
        try {
89
9.12k
            getAllPagesInternal(pages, 0, visited, seen, false, false);
90
9.12k
        } catch (...) {
91
87
            all_pages.clear();
92
87
            invalid_page_found = false;
93
87
            throw;
94
87
        }
95
3.43k
        if (invalid_page_found) {
96
2.78k
            flattenPagesTree();
97
2.78k
            invalid_page_found = false;
98
2.78k
        }
99
3.43k
    }
100
6.22k
    return all_pages;
101
12.0k
}
102
103
void
104
Pages::getAllPagesInternal(
105
    QPDFObjectHandle cur_node,
106
    uint32_t level,
107
    QPDFObjGen::set& visited,
108
    QPDFObjGen::set& seen,
109
    bool media_box,
110
    bool resources)
111
6.16k
{
112
6.16k
    static uint32_t constexpr max_level = 100;
113
6.16k
    if (++level > max_level) {
114
0
        throw QPDFExc(
115
0
            qpdf_e_pages,
116
0
            m->file->getName(),
117
0
            "object " + cur_node.getObjGen().unparse(' '),
118
0
            0,
119
0
            "/Pages structure too deeply nested (getAllPages)");
120
0
    }
121
6.16k
    if (!visited.add(cur_node)) {
122
35
        throw QPDFExc(
123
35
            qpdf_e_pages,
124
35
            m->file->getName(),
125
35
            "object " + cur_node.getObjGen().unparse(' '),
126
35
            0,
127
35
            "Loop detected in /Pages structure (getAllPages)");
128
35
    }
129
6.12k
    if (!cur_node.isDictionaryOfType("/Pages")) {
130
        // During fuzzing files were encountered where the root object appeared in the pages tree.
131
        // Unconditionally setting the /Type to /Pages could cause problems, but trying to
132
        // accommodate the possibility may be excessive.
133
5.29k
        cur_node.warn("/Type key should be /Pages but is not; overriding");
134
5.29k
        cur_node.replaceKey("/Type", Name("/Pages"));
135
5.29k
    }
136
6.12k
    if (!media_box) {
137
5.92k
        media_box = cur_node.getKey("/MediaBox").isRectangle();
138
5.92k
        QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1);
139
5.92k
    }
140
6.12k
    if (!resources) {
141
5.80k
        resources = cur_node.getKey("/Resources").isDictionary();
142
5.80k
    }
143
6.12k
    auto kids = cur_node.getKey("/Kids");
144
6.12k
    if (!visited.add(kids)) {
145
11
        throw QPDFExc(
146
11
            qpdf_e_pages,
147
11
            m->file->getName(),
148
11
            "object " + cur_node.getObjGen().unparse(' '),
149
11
            0,
150
11
            "Loop detected in /Pages structure (getAllPages)");
151
11
    }
152
6.11k
    int i = -1;
153
32.5k
    for (auto& kid: kids.as_array()) {
154
32.5k
        ++i;
155
32.5k
        int errors = 0;
156
157
32.5k
        if (!kid.isDictionary()) {
158
21.6k
            kid.warn("Pages tree includes non-dictionary object; ignoring");
159
21.6k
            invalid_page_found = true;
160
21.6k
            continue;
161
21.6k
        }
162
10.9k
        if (!kid.isIndirect()) {
163
3.51k
            cur_node.warn(
164
3.51k
                "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect");
165
3.51k
            kid = qpdf.makeIndirectObject(kid);
166
3.51k
            ++errors;
167
3.51k
        }
168
10.9k
        if (kid.hasKey("/Kids")) {
169
2.64k
            getAllPagesInternal(kid, level, visited, seen, media_box, resources);
170
8.26k
        } else {
171
8.26k
            if (!media_box && !kid.getKey("/MediaBox").isRectangle()) {
172
3.40k
                kid.warn(
173
3.40k
                    "kid " + std::to_string(i) +
174
3.40k
                    " (from 0) MediaBox is undefined; setting to letter / ANSI A");
175
3.40k
                kid.replaceKey(
176
3.40k
                    "/MediaBox",
177
3.40k
                    QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792)));
178
3.40k
                ++errors;
179
3.40k
            }
180
8.26k
            if (!resources) {
181
7.28k
                auto res = kid.getKey("/Resources");
182
183
7.28k
                if (!res.isDictionary()) {
184
3.53k
                    ++errors;
185
3.53k
                    kid.warn(
186
3.53k
                        "kid " + std::to_string(i) +
187
3.53k
                        " (from 0) Resources is missing or invalid; repairing");
188
3.53k
                    kid.replaceKey("/Resources", QPDFObjectHandle::newDictionary());
189
3.53k
                }
190
7.28k
            }
191
8.26k
            auto annots = kid.getKey("/Annots");
192
8.26k
            if (!annots.null()) {
193
1.67k
                if (!annots.isArray()) {
194
17
                    kid.warn(
195
17
                        "kid " + std::to_string(i) + " (from 0) Annots is not an array; removing");
196
17
                    kid.removeKey("/Annots");
197
17
                    ++errors;
198
1.66k
                } else {
199
1.66k
                    QPDFObjGen::set seen_annots;
200
73.5k
                    for (auto& annot: annots.as_array()) {
201
73.5k
                        if (!seen_annots.add(annot)) {
202
1.70k
                            kid.warn(
203
1.70k
                                "kid " + std::to_string(i) +
204
1.70k
                                " (from 0) Annots has duplicate entry for annotation " +
205
1.70k
                                annot.id_gen().unparse(' '));
206
1.70k
                            ++errors;
207
1.70k
                        }
208
73.5k
                    }
209
1.66k
                }
210
1.67k
            }
211
212
8.26k
            if (!seen.add(kid)) {
213
                // Make a copy of the page. This does the same as shallowCopyPage in
214
                // QPDFPageObjectHelper.
215
2.37k
                if (!m->reconstructed_xref) {
216
202
                    cur_node.warn(
217
202
                        "kid " + std::to_string(i) +
218
202
                        " (from 0) appears more than once in the pages tree;"
219
202
                        " creating a new page object as a copy");
220
                    // This needs to be fixed. shallowCopy does not necessarily produce a valid
221
                    // page.
222
202
                    kid = qpdf.makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
223
202
                    seen.add(kid);
224
2.17k
                } else {
225
2.17k
                    cur_node.warn(
226
2.17k
                        "kid " + std::to_string(i) +
227
2.17k
                        " (from 0) appears more than once in the pages tree; ignoring duplicate");
228
2.17k
                    invalid_page_found = true;
229
2.17k
                    kid = QPDFObjectHandle::newNull();
230
2.17k
                    continue;
231
2.17k
                }
232
202
                if (!kid.getKey("/Parent").isSameObjectAs(cur_node)) {
233
                    // Consider fixing and adding an information message.
234
150
                    ++errors;
235
150
                }
236
202
            }
237
6.09k
            if (!kid.isDictionaryOfType("/Page")) {
238
3.51k
                kid.warn("/Type key should be /Page but is not; overriding");
239
3.51k
                kid.replaceKey("/Type", Name("/Page"));
240
3.51k
                ++errors;
241
3.51k
            }
242
6.09k
            if (m->reconstructed_xref && errors > 2) {
243
1.83k
                cur_node.warn(
244
1.83k
                    "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page");
245
1.83k
                invalid_page_found = true;
246
1.83k
                kid = QPDFObjectHandle::newNull();
247
1.83k
                continue;
248
1.83k
            }
249
4.26k
            all_pages.emplace_back(kid);
250
4.26k
        }
251
10.9k
    }
252
6.11k
}
253
254
void
255
QPDF::updateAllPagesCache()
256
0
{
257
0
    m->pages.update_cache();
258
0
}
259
260
void
261
Pages::update_cache()
262
0
{
263
    // Force regeneration of the pages cache.  We force immediate recalculation of all_pages since
264
    // users may have references to it that they got from calls to getAllPages().  We can defer
265
    // recalculation of pageobj_to_pages_pos until needed.
266
0
    all_pages.clear();
267
0
    pageobj_to_pages_pos.clear();
268
0
    pushed_inherited_attributes_to_pages = false;
269
0
    cache();
270
0
}
271
272
void
273
Pages::flattenPagesTree()
274
2.78k
{
275
    // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos.
276
277
2.78k
    if (!pageobj_to_pages_pos.empty()) {
278
0
        return;
279
0
    }
280
281
    // Push inherited objects down to the /Page level.  As a side effect all_pages will also be
282
    // generated.
283
2.78k
    pushInheritedAttributesToPage(true, true);
284
285
2.78k
    QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
286
287
2.78k
    size_t const len = all_pages.size();
288
6.03k
    for (size_t pos = 0; pos < len; ++pos) {
289
        // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at
290
        // this point because pushInheritedAttributesToPage calls getAllPages which resolves
291
        // duplicates.
292
3.24k
        insertPageobjToPage(all_pages.at(pos), toI(pos), true);
293
3.24k
        all_pages.at(pos).replaceKey("/Parent", pages);
294
3.24k
    }
295
296
2.78k
    pages.replaceKey("/Kids", Array(all_pages));
297
    // /Count has not changed
298
2.78k
    if (pages.getKey("/Count").getUIntValue() != len) {
299
1.64k
        if (invalid_page_found && pages.getKey("/Count").getUIntValue() > len) {
300
1.59k
            pages.replaceKey("/Count", Integer(len));
301
1.59k
        } else {
302
46
            throw std::runtime_error("/Count is wrong after flattening pages tree");
303
46
        }
304
1.64k
    }
305
2.78k
}
306
307
void
308
QPDF::pushInheritedAttributesToPage()
309
0
{
310
    // Public API should not have access to allow_changes.
311
0
    m->pages.pushInheritedAttributesToPage(true, false);
312
0
}
313
314
void
315
Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
316
2.78k
{
317
    // Traverse pages tree pushing all inherited resources down to the page level.
318
319
    // The record of whether we've done this is cleared by updateAllPagesCache().  If we're warning
320
    // for skipped keys, re-traverse unconditionally.
321
2.78k
    if (pushed_inherited_attributes_to_pages && !warn_skipped_keys) {
322
0
        return;
323
0
    }
324
325
    // Calling cache() resolves any duplicated page objects, repairs broken nodes, and detects
326
    // loops, so we don't have to do those activities here.
327
2.78k
    (void)cache();
328
329
    // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain
330
    // values for them.
331
2.78k
    std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
332
2.78k
    pushInheritedAttributesToPageInternal(
333
2.78k
        m->trailer.getKey("/Root").getKey("/Pages"),
334
2.78k
        key_ancestors,
335
2.78k
        allow_changes,
336
2.78k
        warn_skipped_keys);
337
2.78k
    util::assertion(
338
2.78k
        key_ancestors.empty(),
339
2.78k
        "key_ancestors not empty after pushing inherited attributes to pages");
340
2.78k
    pushed_inherited_attributes_to_pages = true;
341
2.78k
    ever_pushed_inherited_attributes_to_pages_ = true;
342
2.78k
}
343
344
void
345
Pages::pushInheritedAttributesToPageInternal(
346
    QPDFObjectHandle cur_pages,
347
    std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors,
348
    bool allow_changes,
349
    bool warn_skipped_keys)
350
5.15k
{
351
    // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate
352
    // are inheritable attributes. Push this object onto the stack of pages nodes that have values
353
    // for this attribute.
354
355
5.15k
    std::set<std::string> inheritable_keys;
356
17.5k
    for (auto const& key: cur_pages.getKeys()) {
357
17.5k
        if (key == "/MediaBox" || key == "/CropBox" || key == "/Resources" || key == "/Rotate") {
358
1.18k
            if (!allow_changes) {
359
0
                throw QPDFExc(
360
0
                    qpdf_e_internal,
361
0
                    m->file->getName(),
362
0
                    "/Pages object " + cur_pages.id_gen().unparse(' '),
363
0
                    cur_pages.offset(),
364
0
                    "pushInheritedAttributesToPage detected an inheritable attribute when called "
365
0
                    "in no-change mode");
366
0
            }
367
368
            // This is an inheritable resource
369
1.18k
            inheritable_keys.insert(key);
370
1.18k
            auto oh = cur_pages[key];
371
1.18k
            QTC::TC("qpdf", "QPDF opt direct pages resource", oh.indirect() ? 0 : 1);
372
1.18k
            if (!oh.indirect()) {
373
1.16k
                if (!oh.isScalar()) {
374
                    // Replace shared direct object non-scalar resources with indirect objects to
375
                    // avoid copying large structures around.
376
377
                    cur_pages.replaceKey(key, qpdf.makeIndirectObject(oh));
377
377
                    oh = cur_pages[key];
378
790
                } else {
379
                    // It's okay to copy scalars.
380
790
                }
381
1.16k
            }
382
1.18k
            key_ancestors[key].emplace_back(oh);
383
1.18k
            if (key_ancestors[key].size() > 1) {
384
192
            }
385
            // Remove this resource from this node.  It will be reattached at the page level.
386
1.18k
            cur_pages.erase(key);
387
16.3k
        } else if (!(key == "/Type" || key == "/Parent" || key == "/Kids" || key == "/Count")) {
388
            // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not
389
            // set), as we don't change these; but flattening removes intermediate /Pages nodes.
390
3.81k
            if (warn_skipped_keys && cur_pages.contains("/Parent")) {
391
1.18k
                warn(
392
1.18k
                    qpdf_e_pages,
393
1.18k
                    "Pages object: object " + cur_pages.id_gen().unparse(' '),
394
1.18k
                    cur_pages.offset(),
395
1.18k
                    ("Unknown key " + key +
396
1.18k
                     " in /Pages object is being discarded as a result of flattening the /Pages "
397
1.18k
                     "tree"));
398
1.18k
            }
399
3.81k
        }
400
17.5k
    }
401
402
    // Process descendant nodes. This method does not perform loop detection because all code paths
403
    // that lead here follow a call to getAllPages, which already throws an exception in the event
404
    // of a loop in the pages tree.
405
27.9k
    for (auto& kid: Array(cur_pages["/Kids"])) {
406
27.9k
        if (kid.isDictionaryOfType("/Pages")) {
407
2.37k
            pushInheritedAttributesToPageInternal(
408
2.37k
                kid, key_ancestors, allow_changes, warn_skipped_keys);
409
25.6k
        } else {
410
            // Add all available inheritable attributes not present in this object to this object.
411
25.6k
            for (auto const& [key, values]: key_ancestors) {
412
3.04k
                if (!kid.contains(key)) {
413
2.94k
                    kid.replaceKey(key, values.back());
414
2.94k
                } else {
415
97
                    QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
416
97
                }
417
3.04k
            }
418
25.6k
        }
419
27.9k
    }
420
421
    // For each inheritable key, pop the stack.  If the stack becomes empty, remove it from the map.
422
    // That way, the invariant that the list of keys in key_ancestors is exactly those keys for
423
    // which inheritable attributes are available.
424
425
5.15k
    if (!inheritable_keys.empty()) {
426
919
        for (auto const& key: inheritable_keys) {
427
919
            key_ancestors[key].pop_back();
428
919
            if (key_ancestors[key].empty()) {
429
742
                key_ancestors.erase(key);
430
742
            }
431
919
        }
432
4.41k
    } else {
433
4.41k
        QTC::TC("qpdf", "QPDF opt no inheritable keys");
434
4.41k
    }
435
5.15k
}
436
437
void
438
Pages::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate)
439
3.24k
{
440
3.24k
    QPDFObjGen og(obj.getObjGen());
441
3.24k
    if (check_duplicate) {
442
3.24k
        if (!pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) {
443
            // The library never calls insertPageobjToPage in a way that causes this to happen.
444
0
            throw QPDFExc(
445
0
                qpdf_e_pages,
446
0
                m->file->getName(),
447
0
                "page " + std::to_string(pos) + " (numbered from zero): object " + og.unparse(' '),
448
0
                0,
449
0
                "duplicate page reference found; this would cause loss of data");
450
0
        }
451
3.24k
    } else {
452
0
        pageobj_to_pages_pos[og] = pos;
453
0
    }
454
3.24k
}
455
456
void
457
Pages::insert(QPDFObjectHandle newpage, int pos)
458
0
{
459
    // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end.
460
461
0
    flattenPagesTree();
462
463
0
    if (!newpage.indirect()) {
464
0
        newpage = qpdf.makeIndirectObject(newpage);
465
0
    } else if (newpage.qpdf() != &qpdf) {
466
0
        newpage.qpdf()->pushInheritedAttributesToPage();
467
0
        newpage = qpdf.copyForeignObject(newpage);
468
0
    } else {
469
0
        QTC::TC("qpdf", "QPDF insert indirect page");
470
0
    }
471
472
0
    if (pos < 0 || std::cmp_greater(pos, all_pages.size())) {
473
0
        throw std::runtime_error("QPDF::insertPage called with pos out of range");
474
0
    }
475
476
0
    QTC::TC(
477
0
        "qpdf",
478
0
        "QPDF insert page",
479
0
        pos == 0 ? 0 :                        // insert at beginning
480
0
            std::cmp_equal(pos, size()) ? 1   // at end
481
0
                                        : 2); // insert in middle
482
483
0
    if (pageobj_to_pages_pos.contains(newpage)) {
484
0
        newpage = qpdf.makeIndirectObject(newpage.copy());
485
0
    }
486
487
0
    auto pages = qpdf.getRoot()["/Pages"];
488
0
    Array kids = pages["/Kids"];
489
490
0
    newpage.replaceKey("/Parent", pages);
491
0
    kids.insert(pos, newpage);
492
0
    size_t npages = kids.size();
493
0
    pages.replaceKey("/Count", Integer(npages));
494
0
    all_pages.insert(all_pages.begin() + pos, newpage);
495
0
    for (size_t i = static_cast<size_t>(pos) + 1; i < npages; ++i) {
496
0
        insertPageobjToPage(all_pages.at(i), static_cast<int>(i), false);
497
0
    }
498
0
    insertPageobjToPage(newpage, pos, true);
499
0
}
500
501
void
502
QPDF::removePage(QPDFObjectHandle page)
503
0
{
504
0
    m->pages.erase(page);
505
0
}
506
507
void
508
Pages::erase(QPDFObjectHandle& page)
509
0
{
510
0
    int pos = qpdf.findPage(page); // also ensures flat /Pages
511
0
    QTC::TC(
512
0
        "qpdf",
513
0
        "QPDF remove page",
514
0
        (pos == 0) ? 0 :                             // remove at beginning
515
0
            (pos == toI(all_pages.size() - 1)) ? 1   // end
516
0
                                               : 2); // remove in middle
517
518
0
    QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
519
0
    QPDFObjectHandle kids = pages.getKey("/Kids");
520
521
0
    kids.eraseItem(pos);
522
0
    int npages = static_cast<int>(kids.size());
523
0
    pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
524
0
    all_pages.erase(all_pages.begin() + pos);
525
0
    pageobj_to_pages_pos.erase(page.getObjGen());
526
0
    for (int i = pos; i < npages; ++i) {
527
0
        m->pages.insertPageobjToPage(all_pages.at(toS(i)), i, false);
528
0
    }
529
0
}
530
531
void
532
QPDF::addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage)
533
0
{
534
0
    int refpos = findPage(refpage);
535
0
    if (!before) {
536
0
        ++refpos;
537
0
    }
538
0
    m->pages.insert(newpage, refpos);
539
0
}
540
541
void
542
QPDF::addPage(QPDFObjectHandle newpage, bool first)
543
0
{
544
0
    if (first) {
545
0
        m->pages.insert(newpage, 0);
546
0
    } else {
547
0
        m->pages.insert(newpage, getRoot()["/Pages"]["/Count"].getIntValueAsInt());
548
0
    }
549
0
}
550
551
int
552
QPDF::findPage(QPDFObjectHandle& page)
553
0
{
554
0
    return findPage(page.getObjGen());
555
0
}
556
557
int
558
QPDF::findPage(QPDFObjGen og)
559
0
{
560
0
    return m->pages.find(og);
561
0
}
562
563
int
564
Pages::find(QPDFObjGen og)
565
0
{
566
0
    flattenPagesTree();
567
0
    auto it = pageobj_to_pages_pos.find(og);
568
0
    if (it == pageobj_to_pages_pos.end()) {
569
0
        throw QPDFExc(
570
0
            qpdf_e_pages,
571
0
            m->file->getName(),
572
0
            "page object: object " + og.unparse(' '),
573
0
            0,
574
0
            "page object not referenced in /Pages tree");
575
0
    }
576
0
    return (*it).second;
577
0
}
578
579
class QPDFPageDocumentHelper::Members
580
{
581
};
582
583
QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) :
584
0
    QPDFDocumentHelper(qpdf)
585
0
{
586
0
}
587
588
QPDFPageDocumentHelper&
589
QPDFPageDocumentHelper::get(QPDF& qpdf)
590
0
{
591
0
    return qpdf.doc().page_dh();
592
0
}
593
594
void
595
QPDFPageDocumentHelper::validate(bool repair)
596
0
{
597
0
}
598
599
std::vector<QPDFPageObjectHelper>
600
QPDFPageDocumentHelper::getAllPages()
601
0
{
602
0
    auto& pp = qpdf.doc().pages();
603
0
    return {pp.begin(), pp.end()};
604
0
}
605
606
void
607
QPDFPageDocumentHelper::pushInheritedAttributesToPage()
608
0
{
609
0
    qpdf.pushInheritedAttributesToPage();
610
0
}
611
612
void
613
QPDFPageDocumentHelper::removeUnreferencedResources()
614
0
{
615
0
    for (auto& ph: getAllPages()) {
616
0
        ph.removeUnreferencedResources();
617
0
    }
618
0
}
619
620
void
621
QPDFPageDocumentHelper::addPage(QPDFPageObjectHelper newpage, bool first)
622
0
{
623
0
    qpdf.doc().pages().insert(newpage, first ? 0 : qpdf.doc().pages().size());
624
0
}
625
626
void
627
QPDFPageDocumentHelper::addPageAt(
628
    QPDFPageObjectHelper newpage, bool before, QPDFPageObjectHelper refpage)
629
0
{
630
0
    qpdf.addPageAt(newpage.getObjectHandle(), before, refpage.getObjectHandle());
631
0
}
632
633
void
634
QPDFPageDocumentHelper::removePage(QPDFPageObjectHelper page)
635
0
{
636
0
    qpdf.removePage(page.getObjectHandle());
637
0
}
638
639
void
640
QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_flags)
641
0
{
642
0
    qpdf.doc().pages().flatten_annotations(required_flags, forbidden_flags);
643
0
}
644
645
void
646
Pages::flatten_annotations(int required_flags, int forbidden_flags)
647
0
{
648
0
    auto& afdh = qpdf.doc().acroform();
649
0
    if (afdh.getNeedAppearances()) {
650
0
        qpdf.getRoot()
651
0
            .getKey("/AcroForm")
652
0
            .warn(
653
0
                "document does not have updated appearance streams, so form fields "
654
0
                "will not be flattened");
655
0
    }
656
0
    for (QPDFPageObjectHelper ph: all()) {
657
0
        QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
658
0
        if (!resources.isDictionary()) {
659
            // As of #1521, this should be impossible unless a user inserted an invalid page.
660
0
            resources = ph.getObjectHandle().replaceKeyAndGetNew("/Resources", Dictionary::empty());
661
0
        }
662
0
        flatten_annotations_for_page(ph, resources, afdh, required_flags, forbidden_flags);
663
0
    }
664
0
    if (!afdh.getNeedAppearances()) {
665
0
        qpdf.getRoot().removeKey("/AcroForm");
666
0
    }
667
0
}
668
669
void
670
Pages::flatten_annotations_for_page(
671
    QPDFPageObjectHelper& page,
672
    QPDFObjectHandle& resources,
673
    impl::AcroForm& afdh,
674
    int required_flags,
675
    int forbidden_flags)
676
0
{
677
0
    bool need_appearances = afdh.getNeedAppearances();
678
0
    std::vector<QPDFAnnotationObjectHelper> annots = page.getAnnotations();
679
0
    std::vector<QPDFObjectHandle> new_annots;
680
0
    std::string new_content;
681
0
    int rotate = 0;
682
0
    QPDFObjectHandle rotate_obj = page.getObjectHandle().getKey("/Rotate");
683
0
    if (rotate_obj.isInteger() && rotate_obj.getIntValue()) {
684
0
        rotate = rotate_obj.getIntValueAsInt();
685
0
    }
686
0
    int next_fx = 1;
687
0
    for (auto& aoh: annots) {
688
0
        QPDFObjectHandle as = aoh.getAppearanceStream("/N");
689
0
        bool is_widget = (aoh.getSubtype() == "/Widget");
690
0
        bool process = true;
691
0
        if (need_appearances && is_widget) {
692
0
            process = false;
693
0
        }
694
0
        if (process && as.isStream()) {
695
0
            if (is_widget) {
696
0
                QPDFFormFieldObjectHelper ff = afdh.getFieldForAnnotation(aoh);
697
0
                QPDFObjectHandle as_resources = as.getDict().getKey("/Resources");
698
0
                if (as_resources.isIndirect()) {
699
0
                    ;
700
0
                    as.getDict().replaceKey("/Resources", as_resources.shallowCopy());
701
0
                    as_resources = as.getDict().getKey("/Resources");
702
0
                }
703
0
                as_resources.mergeResources(ff.getDefaultResources());
704
0
            } else {
705
0
                QTC::TC("qpdf", "QPDFPageDocumentHelper non-widget annotation");
706
0
            }
707
0
            std::string name = resources.getUniqueResourceName("/Fxo", next_fx);
708
0
            std::string content =
709
0
                aoh.getPageContentForAppearance(name, rotate, required_flags, forbidden_flags);
710
0
            if (!content.empty()) {
711
0
                resources.mergeResources(Dictionary({{"/XObject", Dictionary({{name, as}})}}));
712
0
                resources.getKey("/XObject").replaceKey(name, as);
713
0
                ++next_fx;
714
0
            }
715
0
            new_content += content;
716
0
        } else if (process && !aoh.getAppearanceDictionary().null()) {
717
            // If an annotation has no selected appearance stream, just drop the annotation when
718
            // flattening. This can happen for unchecked checkboxes and radio buttons, popup windows
719
            // associated with comments that aren't visible, and other types of annotations that
720
            // aren't visible. Annotations that have no appearance streams at all, such as Link,
721
            // Popup, and Projection, should be preserved.
722
0
        } else {
723
0
            new_annots.push_back(aoh.getObjectHandle());
724
0
        }
725
0
    }
726
0
    if (new_annots.size() != annots.size()) {
727
0
        QPDFObjectHandle page_oh = page.getObjectHandle();
728
0
        if (new_annots.empty()) {
729
0
            page_oh.removeKey("/Annots");
730
0
        } else {
731
0
            QPDFObjectHandle old_annots = page_oh.getKey("/Annots");
732
0
            QPDFObjectHandle new_annots_oh = QPDFObjectHandle::newArray(new_annots);
733
0
            if (old_annots.isIndirect()) {
734
0
                qpdf.replaceObject(old_annots.getObjGen(), new_annots_oh);
735
0
            } else {
736
0
                page_oh.replaceKey("/Annots", new_annots_oh);
737
0
            }
738
0
        }
739
0
        page.addPageContents(qpdf.newStream("q\n"), true);
740
0
        page.addPageContents(qpdf.newStream("\nQ\n" + new_content), false);
741
0
    }
742
0
}