Coverage Report

Created: 2025-12-05 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_pages.cc
Line
Count
Source
1
#include <qpdf/QPDFPageDocumentHelper.hh>
2
#include <qpdf/QPDF_private.hh>
3
4
#include <qpdf/AcroForm.hh>
5
#include <qpdf/QPDFExc.hh>
6
#include <qpdf/QPDFObjectHandle_private.hh>
7
#include <qpdf/QTC.hh>
8
#include <qpdf/QUtil.hh>
9
#include <qpdf/Util.hh>
10
11
// In support of page manipulation APIs, these methods internally maintain state about pages in a
12
// pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos,
13
// which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages()
14
// method returns a const reference to all_pages and has been in the public API long before the
15
// introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of
16
// calls to it in the library, so the efficiency of having it cached is probably worth keeping it.
17
// At one point, I had partially implemented a helper class specifically for the pages tree, but
18
// once you work in all the logic that handles repairing the /Type keys of page tree nodes (both
19
// /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than
20
// what's here. So, in spite of the fact that a const reference is returned, the current code is
21
// fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in
22
// attic in case there is ever a reason to resurrect it. There are additional notes in
23
// README-maintainer, which also refers to this comment.
24
25
// The goal of this code is to ensure that the all_pages vector, which users may have a reference
26
// to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent
27
// outside of any call to the library.  As long as users only touch the /Pages structure through
28
// page-specific API calls, they never have to worry about anything, and this will also stay
29
// consistent.  If a user touches anything about the /Pages structure outside of these calls (such
30
// as by directly looking up and manipulating the underlying objects), they can call
31
// updatePagesCache() to bring things back in sync.
32
33
// If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure
34
// alone.  If the user does use the APIs, then we push all inheritable objects down and flatten the
35
// /Pages tree.  This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos
36
// internally consistent at all times.
37
38
// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent
39
// should remain in as few places as possible.  As of initial writing, only flattenPagesTree,
40
// insertPage, and removePage, along with methods they call, are concerned with it.  Everything else
41
// goes through one of those methods.
42
43
using Pages = QPDF::Doc::Pages;
44
45
std::vector<QPDFObjectHandle> const&
46
QPDF::getAllPages()
47
0
{
48
0
    return m->pages.all();
49
0
}
50
51
std::vector<QPDFObjectHandle> const&
52
Pages::cache()
53
25.0k
{
54
    // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages.
55
25.0k
    if (all_pages.empty() && !invalid_page_found) {
56
14.8k
        ever_called_get_all_pages_ = true;
57
14.8k
        auto root = qpdf.getRoot();
58
14.8k
        QPDFObjGen::set visited;
59
14.8k
        QPDFObjGen::set seen;
60
14.8k
        QPDFObjectHandle pages = root.getKey("/Pages");
61
14.8k
        bool warned = false;
62
14.8k
        bool changed_pages = false;
63
15.5k
        while (pages.isDictionary() && pages.hasKey("/Parent")) {
64
867
            if (!seen.add(pages)) {
65
                // loop -- will be detected again and reported later
66
150
                break;
67
150
            }
68
            // Files have been found in the wild where /Pages in the catalog points to the first
69
            // page. Try to work around this and similar cases with this heuristic.
70
717
            if (!warned) {
71
641
                root.warn(
72
641
                    "document page tree root (root -> /Pages) doesn't point"
73
641
                    " to the root of the page tree; attempting to correct");
74
641
                warned = true;
75
641
            }
76
717
            changed_pages = true;
77
717
            pages = pages.getKey("/Parent");
78
717
        }
79
14.8k
        if (changed_pages) {
80
640
            root.replaceKey("/Pages", pages);
81
640
        }
82
14.8k
        seen.clear();
83
14.8k
        if (!pages.hasKey("/Kids")) {
84
            // Ensure we actually found a /Pages object.
85
159
            throw QPDFExc(
86
159
                qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array");
87
159
        }
88
14.7k
        try {
89
14.7k
            getAllPagesInternal(pages, visited, seen, false, false);
90
14.7k
        } catch (...) {
91
75
            all_pages.clear();
92
75
            invalid_page_found = false;
93
75
            throw;
94
75
        }
95
10.9k
        if (invalid_page_found) {
96
3.11k
            flattenPagesTree();
97
3.11k
            invalid_page_found = false;
98
3.11k
        }
99
10.9k
    }
100
21.1k
    return all_pages;
101
25.0k
}
102
103
void
104
Pages::getAllPagesInternal(
105
    QPDFObjectHandle cur_node,
106
    QPDFObjGen::set& visited,
107
    QPDFObjGen::set& seen,
108
    bool media_box,
109
    bool resources)
110
11.6k
{
111
11.6k
    if (!visited.add(cur_node)) {
112
33
        throw QPDFExc(
113
33
            qpdf_e_pages,
114
33
            m->file->getName(),
115
33
            "object " + cur_node.getObjGen().unparse(' '),
116
33
            0,
117
33
            "Loop detected in /Pages structure (getAllPages)");
118
33
    }
119
11.6k
    if (!cur_node.isDictionaryOfType("/Pages")) {
120
        // During fuzzing files were encountered where the root object appeared in the pages tree.
121
        // Unconditionally setting the /Type to /Pages could cause problems, but trying to
122
        // accommodate the possibility may be excessive.
123
6.67k
        cur_node.warn("/Type key should be /Pages but is not; overriding");
124
6.67k
        cur_node.replaceKey("/Type", Name("/Pages"));
125
6.67k
    }
126
11.6k
    if (!media_box) {
127
11.5k
        media_box = cur_node.getKey("/MediaBox").isRectangle();
128
11.5k
        QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1);
129
11.5k
    }
130
11.6k
    if (!resources) {
131
11.6k
        resources = cur_node.getKey("/Resources").isDictionary();
132
11.6k
    }
133
11.6k
    auto kids = cur_node.getKey("/Kids");
134
11.6k
    if (!visited.add(kids)) {
135
7
        throw QPDFExc(
136
7
            qpdf_e_pages,
137
7
            m->file->getName(),
138
7
            "object " + cur_node.getObjGen().unparse(' '),
139
7
            0,
140
7
            "Loop detected in /Pages structure (getAllPages)");
141
7
    }
142
11.6k
    int i = -1;
143
50.2k
    for (auto& kid: kids.as_array()) {
144
50.2k
        ++i;
145
50.2k
        int errors = 0;
146
147
50.2k
        if (!kid.isDictionary()) {
148
33.0k
            kid.warn("Pages tree includes non-dictionary object; ignoring");
149
33.0k
            invalid_page_found = true;
150
33.0k
            continue;
151
33.0k
        }
152
17.2k
        if (!kid.isIndirect()) {
153
706
            cur_node.warn(
154
706
                "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect");
155
706
            kid = qpdf.makeIndirectObject(kid);
156
706
            ++errors;
157
706
        }
158
17.2k
        if (kid.hasKey("/Kids")) {
159
623
            getAllPagesInternal(kid, visited, seen, media_box, resources);
160
16.5k
        } else {
161
16.5k
            if (!media_box && !kid.getKey("/MediaBox").isRectangle()) {
162
7.39k
                kid.warn(
163
7.39k
                    "kid " + std::to_string(i) +
164
7.39k
                    " (from 0) MediaBox is undefined; setting to letter / ANSI A");
165
7.39k
                kid.replaceKey(
166
7.39k
                    "/MediaBox",
167
7.39k
                    QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792)));
168
7.39k
                ++errors;
169
7.39k
            }
170
16.5k
            if (!resources) {
171
14.7k
                auto res = kid.getKey("/Resources");
172
173
14.7k
                if (!res.isDictionary()) {
174
6.84k
                    ++errors;
175
6.84k
                    kid.warn(
176
6.84k
                        "kid " + std::to_string(i) +
177
6.84k
                        " (from 0) Resources is missing or invalid; repairing");
178
6.84k
                    kid.replaceKey("/Resources", QPDFObjectHandle::newDictionary());
179
6.84k
                }
180
14.7k
            }
181
16.5k
            auto annots = kid.getKey("/Annots");
182
16.5k
            if (!annots.null()) {
183
1.38k
                if (!annots.isArray()) {
184
32
                    kid.warn(
185
32
                        "kid " + std::to_string(i) + " (from 0) Annots is not an array; removing");
186
32
                    kid.removeKey("/Annots");
187
32
                    ++errors;
188
1.35k
                } else {
189
1.35k
                    QPDFObjGen::set seen_annots;
190
25.5k
                    for (auto& annot: annots.as_array()) {
191
25.5k
                        if (!seen_annots.add(annot)) {
192
592
                            kid.warn(
193
592
                                "kid " + std::to_string(i) +
194
592
                                " (from 0) Annots has duplicate entry for annotation " +
195
592
                                annot.id_gen().unparse(' '));
196
592
                            ++errors;
197
592
                        }
198
25.5k
                    }
199
1.35k
                }
200
1.38k
            }
201
202
16.5k
            if (!seen.add(kid)) {
203
                // Make a copy of the page. This does the same as shallowCopyPage in
204
                // QPDFPageObjectHelper.
205
1.16k
                if (!m->reconstructed_xref) {
206
22
                    cur_node.warn(
207
22
                        "kid " + std::to_string(i) +
208
22
                        " (from 0) appears more than once in the pages tree;"
209
22
                        " creating a new page object as a copy");
210
                    // This needs to be fixed. shallowCopy does not necessarily produce a valid
211
                    // page.
212
22
                    kid = qpdf.makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
213
22
                    seen.add(kid);
214
1.14k
                } else {
215
1.14k
                    cur_node.warn(
216
1.14k
                        "kid " + std::to_string(i) +
217
1.14k
                        " (from 0) appears more than once in the pages tree; ignoring duplicate");
218
1.14k
                    invalid_page_found = true;
219
1.14k
                    kid = QPDFObjectHandle::newNull();
220
1.14k
                    continue;
221
1.14k
                }
222
22
                if (!kid.getKey("/Parent").isSameObjectAs(cur_node)) {
223
                    // Consider fixing and adding an information message.
224
10
                    ++errors;
225
10
                }
226
22
            }
227
15.4k
            if (!kid.isDictionaryOfType("/Page")) {
228
6.79k
                kid.warn("/Type key should be /Page but is not; overriding");
229
6.79k
                kid.replaceKey("/Type", Name("/Page"));
230
6.79k
                ++errors;
231
6.79k
            }
232
15.4k
            if (m->reconstructed_xref && errors > 2) {
233
1.04k
                cur_node.warn(
234
1.04k
                    "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page");
235
1.04k
                invalid_page_found = true;
236
1.04k
                kid = QPDFObjectHandle::newNull();
237
1.04k
                continue;
238
1.04k
            }
239
14.3k
            all_pages.emplace_back(kid);
240
14.3k
        }
241
17.2k
    }
242
11.6k
}
243
244
void
245
QPDF::updateAllPagesCache()
246
0
{
247
0
    m->pages.update_cache();
248
0
}
249
250
void
251
Pages::update_cache()
252
0
{
253
    // Force regeneration of the pages cache.  We force immediate recalculation of all_pages since
254
    // users may have references to it that they got from calls to getAllPages().  We can defer
255
    // recalculation of pageobj_to_pages_pos until needed.
256
0
    all_pages.clear();
257
0
    pageobj_to_pages_pos.clear();
258
0
    pushed_inherited_attributes_to_pages = false;
259
0
    cache();
260
0
}
261
262
void
263
Pages::flattenPagesTree()
264
3.11k
{
265
    // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos.
266
267
3.11k
    if (!pageobj_to_pages_pos.empty()) {
268
0
        return;
269
0
    }
270
271
    // Push inherited objects down to the /Page level.  As a side effect all_pages will also be
272
    // generated.
273
3.11k
    pushInheritedAttributesToPage(true, true);
274
275
3.11k
    QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
276
277
3.11k
    size_t const len = all_pages.size();
278
8.51k
    for (size_t pos = 0; pos < len; ++pos) {
279
        // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at
280
        // this point because pushInheritedAttributesToPage calls getAllPages which resolves
281
        // duplicates.
282
5.40k
        insertPageobjToPage(all_pages.at(pos), toI(pos), true);
283
5.40k
        all_pages.at(pos).replaceKey("/Parent", pages);
284
5.40k
    }
285
286
3.11k
    pages.replaceKey("/Kids", Array(all_pages));
287
    // /Count has not changed
288
3.11k
    if (pages.getKey("/Count").getUIntValue() != len) {
289
2.56k
        if (invalid_page_found && pages.getKey("/Count").getUIntValue() > len) {
290
2.53k
            pages.replaceKey("/Count", Integer(len));
291
2.53k
        } else {
292
27
            throw std::runtime_error("/Count is wrong after flattening pages tree");
293
27
        }
294
2.56k
    }
295
3.11k
}
296
297
void
298
QPDF::pushInheritedAttributesToPage()
299
0
{
300
    // Public API should not have access to allow_changes.
301
0
    m->pages.pushInheritedAttributesToPage(true, false);
302
0
}
303
304
void
305
Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
306
12.5k
{
307
    // Traverse pages tree pushing all inherited resources down to the page level.
308
309
    // The record of whether we've done this is cleared by updateAllPagesCache().  If we're warning
310
    // for skipped keys, re-traverse unconditionally.
311
12.5k
    if (pushed_inherited_attributes_to_pages && !warn_skipped_keys) {
312
2.29k
        return;
313
2.29k
    }
314
315
    // Calling cache() resolves any duplicated page objects, repairs broken nodes, and detects
316
    // loops, so we don't have to do those activities here.
317
10.2k
    (void)cache();
318
319
    // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain
320
    // values for them.
321
10.2k
    std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
322
10.2k
    pushInheritedAttributesToPageInternal(
323
10.2k
        m->trailer.getKey("/Root").getKey("/Pages"),
324
10.2k
        key_ancestors,
325
10.2k
        allow_changes,
326
10.2k
        warn_skipped_keys);
327
10.2k
    util::assertion(
328
10.2k
        key_ancestors.empty(),
329
10.2k
        "key_ancestors not empty after pushing inherited attributes to pages");
330
10.2k
    pushed_inherited_attributes_to_pages = true;
331
10.2k
    ever_pushed_inherited_attributes_to_pages_ = true;
332
10.2k
}
333
334
void
335
Pages::pushInheritedAttributesToPageInternal(
336
    QPDFObjectHandle cur_pages,
337
    std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors,
338
    bool allow_changes,
339
    bool warn_skipped_keys)
340
10.7k
{
341
    // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate
342
    // are inheritable attributes. Push this object onto the stack of pages nodes that have values
343
    // for this attribute.
344
345
10.7k
    std::set<std::string> inheritable_keys;
346
38.2k
    for (auto const& key: cur_pages.getKeys()) {
347
38.2k
        if (key == "/MediaBox" || key == "/CropBox" || key == "/Resources" || key == "/Rotate") {
348
2.20k
            if (!allow_changes) {
349
0
                throw QPDFExc(
350
0
                    qpdf_e_internal,
351
0
                    m->file->getName(),
352
0
                    "/Pages object " + cur_pages.id_gen().unparse(' '),
353
0
                    cur_pages.offset(),
354
0
                    "pushInheritedAttributesToPage detected an inheritable attribute when called "
355
0
                    "in no-change mode");
356
0
            }
357
358
            // This is an inheritable resource
359
2.20k
            inheritable_keys.insert(key);
360
2.20k
            auto oh = cur_pages[key];
361
2.20k
            QTC::TC("qpdf", "QPDF opt direct pages resource", oh.indirect() ? 0 : 1);
362
2.20k
            if (!oh.indirect()) {
363
2.11k
                if (!oh.isScalar()) {
364
                    // Replace shared direct object non-scalar resources with indirect objects to
365
                    // avoid copying large structures around.
366
1.82k
                    cur_pages.replaceKey(key, qpdf.makeIndirectObject(oh));
367
1.82k
                    oh = cur_pages[key];
368
1.82k
                } else {
369
                    // It's okay to copy scalars.
370
297
                }
371
2.11k
            }
372
2.20k
            key_ancestors[key].emplace_back(oh);
373
2.20k
            if (key_ancestors[key].size() > 1) {
374
72
            }
375
            // Remove this resource from this node.  It will be reattached at the page level.
376
2.20k
            cur_pages.erase(key);
377
36.0k
        } else if (!(key == "/Type" || key == "/Parent" || key == "/Kids" || key == "/Count")) {
378
            // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not
379
            // set), as we don't change these; but flattening removes intermediate /Pages nodes.
380
8.34k
            if (warn_skipped_keys && cur_pages.contains("/Parent")) {
381
843
                warn(
382
843
                    qpdf_e_pages,
383
843
                    "Pages object: object " + cur_pages.id_gen().unparse(' '),
384
843
                    cur_pages.offset(),
385
843
                    ("Unknown key " + key +
386
843
                     " in /Pages object is being discarded as a result of flattening the /Pages "
387
843
                     "tree"));
388
843
            }
389
8.34k
        }
390
38.2k
    }
391
392
    // Process descendant nodes. This method does not perform loop detection because all code paths
393
    // that lead here follow a call to getAllPages, which already throws an exception in the event
394
    // of a loop in the pages tree.
395
45.2k
    for (auto& kid: Array(cur_pages["/Kids"])) {
396
45.2k
        if (kid.isDictionaryOfType("/Pages")) {
397
458
            pushInheritedAttributesToPageInternal(
398
458
                kid, key_ancestors, allow_changes, warn_skipped_keys);
399
44.7k
        } else {
400
            // Add all available inheritable attributes not present in this object to this object.
401
44.7k
            for (auto const& [key, values]: key_ancestors) {
402
9.59k
                if (!kid.contains(key)) {
403
5.48k
                    kid.replaceKey(key, values.back());
404
5.48k
                } else {
405
4.10k
                    QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
406
4.10k
                }
407
9.59k
            }
408
44.7k
        }
409
45.2k
    }
410
411
    // For each inheritable key, pop the stack.  If the stack becomes empty, remove it from the map.
412
    // That way, the invariant that the list of keys in key_ancestors is exactly those keys for
413
    // which inheritable attributes are available.
414
415
10.7k
    if (!inheritable_keys.empty()) {
416
1.96k
        for (auto const& key: inheritable_keys) {
417
1.96k
            key_ancestors[key].pop_back();
418
1.96k
            if (key_ancestors[key].empty()) {
419
1.89k
                key_ancestors.erase(key);
420
1.89k
            }
421
1.96k
        }
422
9.04k
    } else {
423
9.04k
        QTC::TC("qpdf", "QPDF opt no inheritable keys");
424
9.04k
    }
425
10.7k
}
426
427
void
428
Pages::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate)
429
5.40k
{
430
5.40k
    QPDFObjGen og(obj.getObjGen());
431
5.40k
    if (check_duplicate) {
432
5.40k
        if (!pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) {
433
            // The library never calls insertPageobjToPage in a way that causes this to happen.
434
0
            throw QPDFExc(
435
0
                qpdf_e_pages,
436
0
                m->file->getName(),
437
0
                "page " + std::to_string(pos) + " (numbered from zero): object " + og.unparse(' '),
438
0
                0,
439
0
                "duplicate page reference found; this would cause loss of data");
440
0
        }
441
5.40k
    } else {
442
0
        pageobj_to_pages_pos[og] = pos;
443
0
    }
444
5.40k
}
445
446
void
447
Pages::insert(QPDFObjectHandle newpage, int pos)
448
0
{
449
    // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end.
450
451
0
    flattenPagesTree();
452
453
0
    if (!newpage.indirect()) {
454
0
        newpage = qpdf.makeIndirectObject(newpage);
455
0
    } else if (newpage.qpdf() != &qpdf) {
456
0
        newpage.qpdf()->pushInheritedAttributesToPage();
457
0
        newpage = qpdf.copyForeignObject(newpage);
458
0
    } else {
459
0
        QTC::TC("qpdf", "QPDF insert indirect page");
460
0
    }
461
462
0
    if (pos < 0 || std::cmp_greater(pos, all_pages.size())) {
463
0
        throw std::runtime_error("QPDF::insertPage called with pos out of range");
464
0
    }
465
466
0
    QTC::TC(
467
0
        "qpdf",
468
0
        "QPDF insert page",
469
0
        pos == 0 ? 0 :                        // insert at beginning
470
0
            std::cmp_equal(pos, size()) ? 1   // at end
471
0
                                        : 2); // insert in middle
472
473
0
    if (pageobj_to_pages_pos.contains(newpage)) {
474
0
        newpage = qpdf.makeIndirectObject(newpage.copy());
475
0
    }
476
477
0
    auto pages = qpdf.getRoot()["/Pages"];
478
0
    Array kids = pages["/Kids"];
479
480
0
    newpage.replaceKey("/Parent", pages);
481
0
    kids.insert(pos, newpage);
482
0
    size_t npages = kids.size();
483
0
    pages.replaceKey("/Count", Integer(npages));
484
0
    all_pages.insert(all_pages.begin() + pos, newpage);
485
0
    for (size_t i = static_cast<size_t>(pos) + 1; i < npages; ++i) {
486
0
        insertPageobjToPage(all_pages.at(i), static_cast<int>(i), false);
487
0
    }
488
0
    insertPageobjToPage(newpage, pos, true);
489
0
}
490
491
void
492
QPDF::removePage(QPDFObjectHandle page)
493
0
{
494
0
    m->pages.erase(page);
495
0
}
496
497
void
498
Pages::erase(QPDFObjectHandle& page)
499
0
{
500
0
    int pos = qpdf.findPage(page); // also ensures flat /Pages
501
0
    QTC::TC(
502
0
        "qpdf",
503
0
        "QPDF remove page",
504
0
        (pos == 0) ? 0 :                             // remove at beginning
505
0
            (pos == toI(all_pages.size() - 1)) ? 1   // end
506
0
                                               : 2); // remove in middle
507
508
0
    QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
509
0
    QPDFObjectHandle kids = pages.getKey("/Kids");
510
511
0
    kids.eraseItem(pos);
512
0
    int npages = static_cast<int>(kids.size());
513
0
    pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
514
0
    all_pages.erase(all_pages.begin() + pos);
515
0
    pageobj_to_pages_pos.erase(page.getObjGen());
516
0
    for (int i = pos; i < npages; ++i) {
517
0
        m->pages.insertPageobjToPage(all_pages.at(toS(i)), i, false);
518
0
    }
519
0
}
520
521
void
522
QPDF::addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage)
523
0
{
524
0
    int refpos = findPage(refpage);
525
0
    if (!before) {
526
0
        ++refpos;
527
0
    }
528
0
    m->pages.insert(newpage, refpos);
529
0
}
530
531
void
532
QPDF::addPage(QPDFObjectHandle newpage, bool first)
533
0
{
534
0
    if (first) {
535
0
        m->pages.insert(newpage, 0);
536
0
    } else {
537
0
        m->pages.insert(newpage, getRoot()["/Pages"]["/Count"].getIntValueAsInt());
538
0
    }
539
0
}
540
541
int
542
QPDF::findPage(QPDFObjectHandle& page)
543
0
{
544
0
    return findPage(page.getObjGen());
545
0
}
546
547
int
548
QPDF::findPage(QPDFObjGen og)
549
0
{
550
0
    return m->pages.find(og);
551
0
}
552
553
int
554
Pages::find(QPDFObjGen og)
555
0
{
556
0
    flattenPagesTree();
557
0
    auto it = pageobj_to_pages_pos.find(og);
558
0
    if (it == pageobj_to_pages_pos.end()) {
559
0
        throw QPDFExc(
560
0
            qpdf_e_pages,
561
0
            m->file->getName(),
562
0
            "page object: object " + og.unparse(' '),
563
0
            0,
564
0
            "page object not referenced in /Pages tree");
565
0
    }
566
0
    return (*it).second;
567
0
}
568
569
class QPDFPageDocumentHelper::Members
570
{
571
};
572
573
QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) :
574
0
    QPDFDocumentHelper(qpdf)
575
0
{
576
0
}
577
578
QPDFPageDocumentHelper&
579
QPDFPageDocumentHelper::get(QPDF& qpdf)
580
0
{
581
0
    return qpdf.doc().page_dh();
582
0
}
583
584
void
585
QPDFPageDocumentHelper::validate(bool repair)
586
0
{
587
0
}
588
589
std::vector<QPDFPageObjectHelper>
590
QPDFPageDocumentHelper::getAllPages()
591
0
{
592
0
    auto& pp = qpdf.doc().pages();
593
0
    return {pp.begin(), pp.end()};
594
0
}
595
596
void
597
QPDFPageDocumentHelper::pushInheritedAttributesToPage()
598
0
{
599
0
    qpdf.pushInheritedAttributesToPage();
600
0
}
601
602
void
603
QPDFPageDocumentHelper::removeUnreferencedResources()
604
0
{
605
0
    for (auto& ph: getAllPages()) {
606
0
        ph.removeUnreferencedResources();
607
0
    }
608
0
}
609
610
void
611
QPDFPageDocumentHelper::addPage(QPDFPageObjectHelper newpage, bool first)
612
0
{
613
0
    qpdf.doc().pages().insert(newpage, first ? 0 : qpdf.doc().pages().size());
614
0
}
615
616
void
617
QPDFPageDocumentHelper::addPageAt(
618
    QPDFPageObjectHelper newpage, bool before, QPDFPageObjectHelper refpage)
619
0
{
620
0
    qpdf.addPageAt(newpage.getObjectHandle(), before, refpage.getObjectHandle());
621
0
}
622
623
void
624
QPDFPageDocumentHelper::removePage(QPDFPageObjectHelper page)
625
0
{
626
0
    qpdf.removePage(page.getObjectHandle());
627
0
}
628
629
void
630
QPDFPageDocumentHelper::flattenAnnotations(int required_flags, int forbidden_flags)
631
0
{
632
0
    qpdf.doc().pages().flatten_annotations(required_flags, forbidden_flags);
633
0
}
634
635
void
636
Pages::flatten_annotations(int required_flags, int forbidden_flags)
637
0
{
638
0
    auto& afdh = qpdf.doc().acroform();
639
0
    if (afdh.getNeedAppearances()) {
640
0
        qpdf.getRoot()
641
0
            .getKey("/AcroForm")
642
0
            .warn(
643
0
                "document does not have updated appearance streams, so form fields "
644
0
                "will not be flattened");
645
0
    }
646
0
    for (QPDFPageObjectHelper ph: all()) {
647
0
        QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
648
0
        if (!resources.isDictionary()) {
649
            // As of #1521, this should be impossible unless a user inserted an invalid page.
650
0
            resources = ph.getObjectHandle().replaceKeyAndGetNew("/Resources", Dictionary::empty());
651
0
        }
652
0
        flatten_annotations_for_page(ph, resources, afdh, required_flags, forbidden_flags);
653
0
    }
654
0
    if (!afdh.getNeedAppearances()) {
655
0
        qpdf.getRoot().removeKey("/AcroForm");
656
0
    }
657
0
}
658
659
void
660
Pages::flatten_annotations_for_page(
661
    QPDFPageObjectHelper& page,
662
    QPDFObjectHandle& resources,
663
    impl::AcroForm& afdh,
664
    int required_flags,
665
    int forbidden_flags)
666
0
{
667
0
    bool need_appearances = afdh.getNeedAppearances();
668
0
    std::vector<QPDFAnnotationObjectHelper> annots = page.getAnnotations();
669
0
    std::vector<QPDFObjectHandle> new_annots;
670
0
    std::string new_content;
671
0
    int rotate = 0;
672
0
    QPDFObjectHandle rotate_obj = page.getObjectHandle().getKey("/Rotate");
673
0
    if (rotate_obj.isInteger() && rotate_obj.getIntValue()) {
674
0
        rotate = rotate_obj.getIntValueAsInt();
675
0
    }
676
0
    int next_fx = 1;
677
0
    for (auto& aoh: annots) {
678
0
        QPDFObjectHandle as = aoh.getAppearanceStream("/N");
679
0
        bool is_widget = (aoh.getSubtype() == "/Widget");
680
0
        bool process = true;
681
0
        if (need_appearances && is_widget) {
682
0
            process = false;
683
0
        }
684
0
        if (process && as.isStream()) {
685
0
            if (is_widget) {
686
0
                QPDFFormFieldObjectHelper ff = afdh.getFieldForAnnotation(aoh);
687
0
                QPDFObjectHandle as_resources = as.getDict().getKey("/Resources");
688
0
                if (as_resources.isIndirect()) {
689
0
                    ;
690
0
                    as.getDict().replaceKey("/Resources", as_resources.shallowCopy());
691
0
                    as_resources = as.getDict().getKey("/Resources");
692
0
                }
693
0
                as_resources.mergeResources(ff.getDefaultResources());
694
0
            } else {
695
0
                QTC::TC("qpdf", "QPDFPageDocumentHelper non-widget annotation");
696
0
            }
697
0
            std::string name = resources.getUniqueResourceName("/Fxo", next_fx);
698
0
            std::string content =
699
0
                aoh.getPageContentForAppearance(name, rotate, required_flags, forbidden_flags);
700
0
            if (!content.empty()) {
701
0
                resources.mergeResources(Dictionary({{"/XObject", Dictionary({{name, as}})}}));
702
0
                resources.getKey("/XObject").replaceKey(name, as);
703
0
                ++next_fx;
704
0
            }
705
0
            new_content += content;
706
0
        } else if (process && !aoh.getAppearanceDictionary().null()) {
707
            // If an annotation has no selected appearance stream, just drop the annotation when
708
            // flattening. This can happen for unchecked checkboxes and radio buttons, popup windows
709
            // associated with comments that aren't visible, and other types of annotations that
710
            // aren't visible. Annotations that have no appearance streams at all, such as Link,
711
            // Popup, and Projection, should be preserved.
712
0
        } else {
713
0
            new_annots.push_back(aoh.getObjectHandle());
714
0
        }
715
0
    }
716
0
    if (new_annots.size() != annots.size()) {
717
0
        QPDFObjectHandle page_oh = page.getObjectHandle();
718
0
        if (new_annots.empty()) {
719
0
            page_oh.removeKey("/Annots");
720
0
        } else {
721
0
            QPDFObjectHandle old_annots = page_oh.getKey("/Annots");
722
0
            QPDFObjectHandle new_annots_oh = QPDFObjectHandle::newArray(new_annots);
723
0
            if (old_annots.isIndirect()) {
724
0
                qpdf.replaceObject(old_annots.getObjGen(), new_annots_oh);
725
0
            } else {
726
0
                page_oh.replaceKey("/Annots", new_annots_oh);
727
0
            }
728
0
        }
729
0
        page.addPageContents(qpdf.newStream("q\n"), true);
730
0
        page.addPageContents(qpdf.newStream("\nQ\n" + new_content), false);
731
0
    }
732
0
}