Coverage Report

Created: 2025-10-10 06:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFPageObjectHelper.cc
Line
Count
Source
1
#include <qpdf/QPDFPageObjectHelper.hh>
2
3
#include <qpdf/Pl_Buffer.hh>
4
#include <qpdf/Pl_Concatenate.hh>
5
#include <qpdf/QIntC.hh>
6
#include <qpdf/QPDF.hh>
7
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
8
#include <qpdf/QPDFExc.hh>
9
#include <qpdf/QPDFMatrix.hh>
10
#include <qpdf/QPDFObjectHandle_private.hh>
11
#include <qpdf/QTC.hh>
12
#include <qpdf/QUtil.hh>
13
#include <qpdf/ResourceFinder.hh>
14
15
using namespace std::literals;
16
17
namespace
18
{
19
    class ContentProvider: public QPDFObjectHandle::StreamDataProvider
20
    {
21
      public:
22
        ContentProvider(QPDFObjectHandle from_page) :
23
0
            from_page(from_page)
24
0
        {
25
0
        }
26
0
        ~ContentProvider() override = default;
27
        void provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override;
28
29
      private:
30
        QPDFObjectHandle from_page;
31
    };
32
} // namespace
33
34
void
35
ContentProvider::provideStreamData(QPDFObjGen const&, Pipeline* p)
36
0
{
37
0
    Pl_Concatenate concat("concatenate", p);
38
0
    std::string description = "contents from page object " + from_page.getObjGen().unparse(' ');
39
0
    std::string all_description;
40
0
    from_page.getKey("/Contents").pipeContentStreams(&concat, description, all_description);
41
0
    concat.manualFinish();
42
0
}
43
44
namespace
45
{
46
    class InlineImageTracker: public QPDFObjectHandle::TokenFilter
47
    {
48
      public:
49
        InlineImageTracker(QPDF*, size_t min_size, QPDFObjectHandle resources);
50
0
        ~InlineImageTracker() override = default;
51
        void handleToken(QPDFTokenizer::Token const&) override;
52
        QPDFObjectHandle convertIIDict(QPDFObjectHandle odict);
53
54
        QPDF* qpdf;
55
        size_t min_size;
56
        QPDFObjectHandle resources;
57
        std::string dict_str;
58
        std::string bi_str;
59
        int min_suffix{1};
60
        bool any_images{false};
61
        enum { st_top, st_bi } state{st_top};
62
    };
63
} // namespace
64
65
InlineImageTracker::InlineImageTracker(QPDF* qpdf, size_t min_size, QPDFObjectHandle resources) :
66
0
    qpdf(qpdf),
67
0
    min_size(min_size),
68
0
    resources(resources)
69
0
{
70
0
}
71
72
QPDFObjectHandle
73
InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
74
0
{
75
0
    QPDFObjectHandle dict = QPDFObjectHandle::newDictionary();
76
0
    dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
77
0
    dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Image"));
78
0
    for (auto const& [k, v]: odict.as_dictionary()) {
79
0
        if (v.null()) {
80
0
            continue;
81
0
        }
82
0
        auto key = k;
83
0
        auto value = v;
84
0
        if (key == "/BPC") {
85
0
            key = "/BitsPerComponent";
86
0
        } else if (key == "/CS") {
87
0
            key = "/ColorSpace";
88
0
        } else if (key == "/D") {
89
0
            key = "/Decode";
90
0
        } else if (key == "/DP") {
91
0
            key = "/DecodeParms";
92
0
        } else if (key == "/F") {
93
0
            key = "/Filter";
94
0
        } else if (key == "/H") {
95
0
            key = "/Height";
96
0
        } else if (key == "/IM") {
97
0
            key = "/ImageMask";
98
0
        } else if (key == "/I") {
99
0
            key = "/Interpolate";
100
0
        } else if (key == "/W") {
101
0
            key = "/Width";
102
0
        }
103
104
0
        if (key == "/ColorSpace") {
105
0
            if (value.isName()) {
106
0
                std::string name = value.getName();
107
0
                if (name == "/G") {
108
0
                    name = "/DeviceGray";
109
0
                } else if (name == "/RGB") {
110
0
                    name = "/DeviceRGB";
111
0
                } else if (name == "/CMYK") {
112
0
                    name = "/DeviceCMYK";
113
0
                } else if (name == "/I") {
114
0
                    name = "/Indexed";
115
0
                } else {
116
                    // This is a key in the page's /Resources -> /ColorSpace dictionary. We need to
117
                    // look it up and use its value as the color space for the image.
118
0
                    QPDFObjectHandle colorspace = resources.getKey("/ColorSpace");
119
0
                    if (colorspace.isDictionary() && colorspace.hasKey(name)) {
120
0
                        QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup");
121
0
                        value = colorspace.getKey(name);
122
0
                    } else {
123
0
                        resources.warn("unable to resolve colorspace " + name);
124
0
                    }
125
0
                    name.clear();
126
0
                }
127
0
                if (!name.empty()) {
128
0
                    value = QPDFObjectHandle::newName(name);
129
0
                }
130
0
            }
131
0
        } else if (key == "/Filter") {
132
0
            std::vector<QPDFObjectHandle> filters;
133
0
            if (value.isName()) {
134
0
                filters.push_back(value);
135
0
            } else if (value.isArray()) {
136
0
                filters = value.getArrayAsVector();
137
0
            }
138
0
            for (auto& iter: filters) {
139
0
                std::string name;
140
0
                if (iter.isName()) {
141
0
                    name = iter.getName();
142
0
                }
143
0
                if (name == "/AHx") {
144
0
                    name = "/ASCIIHexDecode";
145
0
                } else if (name == "/A85") {
146
0
                    name = "/ASCII85Decode";
147
0
                } else if (name == "/LZW") {
148
0
                    name = "/LZWDecode";
149
0
                } else if (name == "/Fl") {
150
0
                    name = "/FlateDecode";
151
0
                } else if (name == "/RL") {
152
0
                    name = "/RunLengthDecode";
153
0
                } else if (name == "/CCF") {
154
0
                    name = "/CCITTFaxDecode";
155
0
                } else if (name == "/DCT") {
156
0
                    name = "/DCTDecode";
157
0
                } else {
158
0
                    name.clear();
159
0
                }
160
0
                if (!name.empty()) {
161
0
                    iter = QPDFObjectHandle::newName(name);
162
0
                }
163
0
            }
164
0
            if (value.isName() && (filters.size() == 1)) {
165
0
                value = filters.at(0);
166
0
            } else if (value.isArray()) {
167
0
                value = QPDFObjectHandle::newArray(filters);
168
0
            }
169
0
        }
170
0
        dict.replaceKey(key, value);
171
0
    }
172
0
    return dict;
173
0
}
174
175
void
176
InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
177
0
{
178
0
    if (state == st_bi) {
179
0
        if (token.getType() == QPDFTokenizer::tt_inline_image) {
180
0
            std::string image_data(token.getValue());
181
0
            size_t len = image_data.length();
182
0
            if (len >= min_size) {
183
0
                QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
184
0
                QPDFObjectHandle dict = convertIIDict(QPDFObjectHandle::parse(dict_str));
185
0
                dict.replaceKey("/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(len)));
186
0
                std::string name = resources.getUniqueResourceName("/IIm", min_suffix);
187
0
                QPDFObjectHandle image = QPDFObjectHandle::newStream(
188
0
                    qpdf, std::make_shared<Buffer>(std::move(image_data)));
189
0
                image.replaceDict(dict);
190
0
                resources.getKey("/XObject").replaceKey(name, image);
191
0
                write(name);
192
0
                write(" Do\n");
193
0
                any_images = true;
194
0
            } else {
195
0
                QTC::TC("qpdf", "QPDFPageObjectHelper keep inline image");
196
0
                write(bi_str);
197
0
                writeToken(token);
198
0
                state = st_top;
199
0
            }
200
0
        } else if (token.isWord("ID")) {
201
0
            bi_str += token.getValue();
202
0
            dict_str += " >>";
203
0
        } else if (token.isWord("EI")) {
204
0
            state = st_top;
205
0
        } else {
206
0
            bi_str += token.getRawValue();
207
0
            dict_str += token.getRawValue();
208
0
        }
209
0
    } else if (token.isWord("BI")) {
210
0
        bi_str = token.getValue();
211
0
        dict_str = "<< ";
212
0
        state = st_bi;
213
0
    } else {
214
0
        writeToken(token);
215
0
    }
216
0
}
217
218
QPDFPageObjectHelper::QPDFPageObjectHelper(QPDFObjectHandle oh) :
219
0
    QPDFObjectHelper(oh)
220
0
{
221
0
}
222
223
QPDFObjectHandle
224
QPDFPageObjectHelper::getAttribute(std::string const& name, bool copy_if_shared)
225
0
{
226
0
    return getAttribute(name, copy_if_shared, nullptr, false);
227
0
}
228
229
QPDFObjectHandle
230
QPDFPageObjectHelper::getAttribute(
231
    std::string const& name,
232
    bool copy_if_shared,
233
    std::function<QPDFObjectHandle()> get_fallback,
234
    bool copy_if_fallback)
235
0
{
236
0
    const bool is_form_xobject = oh().isFormXObject();
237
0
    bool inherited = false;
238
0
    auto dict = is_form_xobject ? oh().getDict() : oh();
239
0
    auto result = dict.getKey(name);
240
241
0
    if (!is_form_xobject && result.null() &&
242
0
        (name == "/MediaBox" || name == "/CropBox" || name == "/Resources" || name == "/Rotate")) {
243
0
        QPDFObjectHandle node = dict;
244
0
        QPDFObjGen::set seen{};
245
0
        while (seen.add(node) && node.hasKey("/Parent")) {
246
0
            node = node.getKey("/Parent");
247
0
            result = node.getKey(name);
248
0
            if (!result.null()) {
249
0
                inherited = true;
250
0
                break;
251
0
            }
252
0
        }
253
0
    }
254
0
    if (copy_if_shared && (inherited || result.isIndirect())) {
255
0
        QTC::TC("qpdf", "QPDFPageObjectHelper copy shared attribute", is_form_xobject ? 0 : 1);
256
0
        result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
257
0
    }
258
0
    if (result.null() && get_fallback) {
259
0
        result = get_fallback();
260
0
        if (copy_if_fallback && !result.null()) {
261
0
            result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
262
0
        } else {
263
0
            QTC::TC("qpdf", "QPDFPageObjectHelper used fallback without copying");
264
0
        }
265
0
    }
266
0
    return result;
267
0
}
268
269
QPDFObjectHandle
270
QPDFPageObjectHelper::getMediaBox(bool copy_if_shared)
271
0
{
272
0
    return getAttribute("/MediaBox", copy_if_shared);
273
0
}
274
275
QPDFObjectHandle
276
QPDFPageObjectHelper::getCropBox(bool copy_if_shared, bool copy_if_fallback)
277
0
{
278
0
    return getAttribute(
279
0
        "/CropBox",
280
0
        copy_if_shared,
281
0
        [this, copy_if_shared]() { return getMediaBox(copy_if_shared); },
282
0
        copy_if_fallback);
283
0
}
284
285
QPDFObjectHandle
286
QPDFPageObjectHelper::getTrimBox(bool copy_if_shared, bool copy_if_fallback)
287
0
{
288
0
    return getAttribute(
289
0
        "/TrimBox",
290
0
        copy_if_shared,
291
0
        [this, copy_if_shared, copy_if_fallback]() {
292
0
            return getCropBox(copy_if_shared, copy_if_fallback);
293
0
        },
294
0
        copy_if_fallback);
295
0
}
296
297
QPDFObjectHandle
298
QPDFPageObjectHelper::getArtBox(bool copy_if_shared, bool copy_if_fallback)
299
0
{
300
0
    return getAttribute(
301
0
        "/ArtBox",
302
0
        copy_if_shared,
303
0
        [this, copy_if_shared, copy_if_fallback]() {
304
0
            return getCropBox(copy_if_shared, copy_if_fallback);
305
0
        },
306
0
        copy_if_fallback);
307
0
}
308
309
QPDFObjectHandle
310
QPDFPageObjectHelper::getBleedBox(bool copy_if_shared, bool copy_if_fallback)
311
0
{
312
0
    return getAttribute(
313
0
        "/BleedBox",
314
0
        copy_if_shared,
315
0
        [this, copy_if_shared, copy_if_fallback]() {
316
0
            return getCropBox(copy_if_shared, copy_if_fallback);
317
0
        },
318
0
        copy_if_fallback);
319
0
}
320
321
void
322
QPDFPageObjectHelper::forEachXObject(
323
    bool recursive,
324
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
325
        action,
326
    std::function<bool(QPDFObjectHandle)> selector)
327
0
{
328
0
    QTC::TC(
329
0
        "qpdf",
330
0
        "QPDFPageObjectHelper::forEachXObject",
331
0
        recursive ? (oh().isFormXObject() ? 0 : 1) : (oh().isFormXObject() ? 2 : 3));
332
0
    QPDFObjGen::set seen;
333
0
    std::list<QPDFPageObjectHelper> queue;
334
0
    queue.emplace_back(*this);
335
0
    while (!queue.empty()) {
336
0
        auto& ph = queue.front();
337
0
        if (seen.add(ph)) {
338
0
            auto xobj_dict = ph.getAttribute("/Resources", false).getKeyIfDict("/XObject");
339
0
            for (auto const& [key, value]: xobj_dict.as_dictionary()) {
340
0
                if (value.null()) {
341
0
                    continue;
342
0
                }
343
0
                auto obj = value;
344
0
                if ((!selector) || selector(obj)) {
345
0
                    action(obj, xobj_dict, key);
346
0
                }
347
0
                if (recursive && obj.isFormXObject()) {
348
0
                    queue.emplace_back(obj);
349
0
                }
350
0
            }
351
0
        }
352
0
        queue.pop_front();
353
0
    }
354
0
}
355
356
void
357
QPDFPageObjectHelper::forEachImage(
358
    bool recursive,
359
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
360
        action)
361
0
{
362
0
    forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isImage(); });
363
0
}
364
365
void
366
QPDFPageObjectHelper::forEachFormXObject(
367
    bool recursive,
368
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
369
        action)
370
0
{
371
0
    forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isFormXObject(); });
372
0
}
373
374
std::map<std::string, QPDFObjectHandle>
375
QPDFPageObjectHelper::getPageImages()
376
0
{
377
0
    return getImages();
378
0
}
379
380
std::map<std::string, QPDFObjectHandle>
381
QPDFPageObjectHelper::getImages()
382
0
{
383
0
    std::map<std::string, QPDFObjectHandle> result;
384
0
    forEachImage(
385
0
        false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
386
0
            result[key] = obj;
387
0
        });
388
0
    return result;
389
0
}
390
391
std::map<std::string, QPDFObjectHandle>
392
QPDFPageObjectHelper::getFormXObjects()
393
0
{
394
0
    std::map<std::string, QPDFObjectHandle> result;
395
0
    forEachFormXObject(
396
0
        false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
397
0
            result[key] = obj;
398
0
        });
399
0
    return result;
400
0
}
401
402
void
403
QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
404
0
{
405
0
    if (shallow) {
406
0
        QPDFObjectHandle resources = getAttribute("/Resources", true);
407
        // Calling mergeResources also ensures that /XObject becomes direct and is not shared with
408
        // other pages.
409
0
        resources.mergeResources("<< /XObject << >> >>"_qpdf);
410
0
        InlineImageTracker iit(oh().getOwningQPDF(), min_size, resources);
411
0
        Pl_Buffer b("new page content");
412
0
        bool filtered = false;
413
0
        try {
414
0
            filterContents(&iit, &b);
415
0
            filtered = true;
416
0
        } catch (std::exception& e) {
417
0
            warn(
418
0
                "Unable to filter content stream: "s + e.what() +
419
0
                "; not attempting to externalize inline images from this stream");
420
0
        }
421
0
        if (filtered && iit.any_images) {
422
0
            if (oh().isFormXObject()) {
423
0
                oh().replaceStreamData(
424
0
                    b.getBufferSharedPointer(),
425
0
                    QPDFObjectHandle::newNull(),
426
0
                    QPDFObjectHandle::newNull());
427
0
            } else {
428
0
                oh().replaceKey(
429
0
                    "/Contents",
430
0
                    QPDFObjectHandle::newStream(&oh().getQPDF(), b.getBufferSharedPointer()));
431
0
            }
432
0
        }
433
0
    } else {
434
0
        externalizeInlineImages(min_size, true);
435
0
        forEachFormXObject(
436
0
            true, [min_size](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
437
0
                QPDFPageObjectHelper(obj).externalizeInlineImages(min_size, true);
438
0
            });
439
0
    }
440
0
}
441
442
std::vector<QPDFAnnotationObjectHelper>
443
QPDFPageObjectHelper::getAnnotations(std::string const& only_subtype)
444
0
{
445
0
    std::vector<QPDFAnnotationObjectHelper> result;
446
0
    for (auto const& annot: oh().getKey("/Annots").as_array()) {
447
0
        if (annot.isDictionaryOfType("", only_subtype)) {
448
0
            result.emplace_back(annot);
449
0
        }
450
0
    }
451
0
    return result;
452
0
}
453
454
std::vector<QPDFObjectHandle>
455
QPDFPageObjectHelper::getPageContents()
456
0
{
457
0
    return oh().getPageContents();
458
0
}
459
460
void
461
QPDFPageObjectHelper::addPageContents(QPDFObjectHandle contents, bool first)
462
0
{
463
0
    oh().addPageContents(contents, first);
464
0
}
465
466
void
467
QPDFPageObjectHelper::rotatePage(int angle, bool relative)
468
0
{
469
0
    oh().rotatePage(angle, relative);
470
0
}
471
472
void
473
QPDFPageObjectHelper::coalesceContentStreams()
474
0
{
475
0
    oh().coalesceContentStreams();
476
0
}
477
478
void
479
QPDFPageObjectHelper::parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks)
480
0
{
481
0
    parseContents(callbacks);
482
0
}
483
484
void
485
QPDFPageObjectHelper::parseContents(QPDFObjectHandle::ParserCallbacks* callbacks)
486
0
{
487
0
    if (oh().isFormXObject()) {
488
0
        oh().parseAsContents(callbacks);
489
0
    } else {
490
0
        oh().parsePageContents(callbacks);
491
0
    }
492
0
}
493
494
void
495
QPDFPageObjectHelper::filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
496
0
{
497
0
    return filterContents(filter, next);
498
0
}
499
500
void
501
QPDFPageObjectHelper::filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
502
0
{
503
0
    if (oh().isFormXObject()) {
504
0
        oh().filterAsContents(filter, next);
505
0
    } else {
506
0
        oh().filterPageContents(filter, next);
507
0
    }
508
0
}
509
510
void
511
QPDFPageObjectHelper::pipePageContents(Pipeline* p)
512
0
{
513
0
    pipeContents(p);
514
0
}
515
516
void
517
QPDFPageObjectHelper::pipeContents(Pipeline* p)
518
0
{
519
0
    if (oh().isFormXObject()) {
520
0
        oh().pipeStreamData(p, 0, qpdf_dl_specialized);
521
0
    } else {
522
0
        oh().pipePageContents(p);
523
0
    }
524
0
}
525
526
void
527
QPDFPageObjectHelper::addContentTokenFilter(
528
    std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter)
529
0
{
530
0
    if (oh().isFormXObject()) {
531
0
        oh().addTokenFilter(token_filter);
532
0
    } else {
533
0
        oh().addContentTokenFilter(token_filter);
534
0
    }
535
0
}
536
537
bool
538
QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
539
    QPDFPageObjectHelper ph, std::set<std::string>& unresolved)
540
0
{
541
0
    const bool is_page = !ph.oh().isFormXObject();
542
0
    if (!is_page) {
543
0
        QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
544
0
    }
545
546
0
    ResourceFinder rf;
547
0
    try {
548
0
        auto q = ph.qpdf();
549
0
        size_t before_nw = (q ? q->numWarnings() : 0);
550
0
        ph.parseContents(&rf);
551
0
        size_t after_nw = (q ? q->numWarnings() : 0);
552
0
        if (after_nw > before_nw) {
553
0
            ph.warn(
554
0
                "Bad token found while scanning content stream; "
555
0
                "not attempting to remove unreferenced objects from this object");
556
0
            return false;
557
0
        }
558
0
    } catch (std::exception& e) {
559
0
        QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
560
0
        ph.warn(
561
0
            "Unable to parse content stream: "s + e.what() +
562
0
            "; not attempting to remove unreferenced objects from this object");
563
0
        return false;
564
0
    }
565
566
    // We will walk through /Font and /XObject dictionaries, removing any resources that are not
567
    // referenced. We must make copies of resource dictionaries down into the dictionaries are
568
    // mutating to prevent mutating one dictionary from having the side effect of mutating the one
569
    // it was copied from.
570
0
    QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
571
0
    std::vector<QPDFObjectHandle> rdicts;
572
0
    std::set<std::string> known_names;
573
0
    if (resources.isDictionary()) {
574
0
        for (auto const& iter: {"/Font", "/XObject"}) {
575
0
            QPDFObjectHandle dict = resources.getKey(iter);
576
0
            if (dict.isDictionary()) {
577
0
                dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy());
578
0
                rdicts.push_back(dict);
579
0
                known_names.merge(dict.getKeys());
580
0
            }
581
0
        }
582
0
    }
583
584
0
    std::set<std::string> local_unresolved;
585
0
    auto const& names_by_rtype = rf.getNamesByResourceType();
586
0
    for (auto const& i1: {"/Font", "/XObject"}) {
587
0
        auto it = names_by_rtype.find(i1);
588
0
        if (it == names_by_rtype.end()) {
589
0
            continue;
590
0
        }
591
0
        for (auto const& n_iter: it->second) {
592
0
            std::string const& name = n_iter.first;
593
0
            if (!known_names.contains(name)) {
594
0
                unresolved.insert(name);
595
0
                local_unresolved.insert(name);
596
0
            }
597
0
        }
598
0
    }
599
    // Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in
600
    // which case names were resolved from the containing page. This behavior seems to be widely
601
    // supported by viewers. If a form XObjects has a resources dictionary and has some unresolved
602
    // names, some viewers fail to resolve them, and others allow them to be inherited from the page
603
    // or from another form XObjects that contains them. Since this behavior is inconsistent across
604
    // viewers, we consider an unresolved name when a resources dictionary is present to be reason
605
    // not to remove unreferenced resources. An unresolved name in the absence of a resource
606
    // dictionary is not considered a problem. For form XObjects, we just accumulate a list of
607
    // unresolved names, and for page objects, we avoid removing any such names found in nested form
608
    // XObjects.
609
610
0
    if (!local_unresolved.empty() && resources.isDictionary()) {
611
        // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
612
        // looking at names that are referencing fonts and XObjects, but until we're certain that we
613
        // know the meaning of every name in a content stream, we don't want to give warnings that
614
        // might be false positives. Also, this can happen in legitimate cases with older PDFs, and
615
        // there's nothing to be done about it, so there's no good reason to issue a warning. The
616
        // only sad thing is that it was a false positive that alerted me to a logic error in the
617
        // code, and any future such errors would now be hidden.
618
0
        QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
619
0
        return false;
620
0
    }
621
622
0
    for (auto& dict: rdicts) {
623
0
        for (auto const& key: dict.getKeys()) {
624
0
            if (is_page && unresolved.contains(key)) {
625
                // This name is referenced by some nested form xobject, so don't remove it.
626
0
                QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved");
627
0
            } else if (!rf.getNames().contains(key)) {
628
0
                dict.removeKey(key);
629
0
            }
630
0
        }
631
0
    }
632
0
    return true;
633
0
}
634
635
void
636
QPDFPageObjectHelper::removeUnreferencedResources()
637
0
{
638
    // Accumulate a list of unresolved names across all nested form XObjects.
639
0
    std::set<std::string> unresolved;
640
0
    bool any_failures = false;
641
0
    forEachFormXObject(
642
0
        true,
643
0
        [&any_failures, &unresolved](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
644
0
            if (!removeUnreferencedResourcesHelper(QPDFPageObjectHelper(obj), unresolved)) {
645
0
                any_failures = true;
646
0
            }
647
0
        });
648
0
    if (oh().isFormXObject() || (!any_failures)) {
649
0
        removeUnreferencedResourcesHelper(*this, unresolved);
650
0
    }
651
0
}
652
653
QPDFPageObjectHelper
654
QPDFPageObjectHelper::shallowCopyPage()
655
0
{
656
0
    QPDF& qpdf = oh().getQPDF("QPDFPageObjectHelper::shallowCopyPage called with a direct object");
657
0
    QPDFObjectHandle new_page = oh().shallowCopy();
658
0
    return {qpdf.makeIndirectObject(new_page)};
659
0
}
660
661
QPDFObjectHandle::Matrix
662
QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
663
0
{
664
0
    QPDFObjectHandle::Matrix matrix(1, 0, 0, 1, 0, 0);
665
0
    QPDFObjectHandle bbox = getTrimBox(false);
666
0
    if (!bbox.isRectangle()) {
667
0
        return matrix;
668
0
    }
669
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
670
0
    QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
671
0
    if (!(rotate_obj.null() && scale_obj.null())) {
672
0
        QPDFObjectHandle::Rectangle rect = bbox.getArrayAsRectangle();
673
0
        double width = rect.urx - rect.llx;
674
0
        double height = rect.ury - rect.lly;
675
0
        double scale = (scale_obj.isNumber() ? scale_obj.getNumericValue() : 1.0);
676
0
        int rotate = (rotate_obj.isInteger() ? rotate_obj.getIntValueAsInt() : 0);
677
0
        if (invert) {
678
0
            if (scale == 0.0) {
679
0
                return matrix;
680
0
            }
681
0
            scale = 1.0 / scale;
682
0
            rotate = 360 - rotate;
683
0
        }
684
685
        // Ignore invalid rotation angle
686
0
        switch (rotate) {
687
0
        case 90:
688
0
            matrix = QPDFObjectHandle::Matrix(0, -scale, scale, 0, 0, width * scale);
689
0
            break;
690
0
        case 180:
691
0
            matrix = QPDFObjectHandle::Matrix(-scale, 0, 0, -scale, width * scale, height * scale);
692
0
            break;
693
0
        case 270:
694
0
            matrix = QPDFObjectHandle::Matrix(0, scale, -scale, 0, height * scale, 0);
695
0
            break;
696
0
        default:
697
0
            matrix = QPDFObjectHandle::Matrix(scale, 0, 0, scale, 0, 0);
698
0
            break;
699
0
        }
700
0
    }
701
0
    return matrix;
702
0
}
703
704
QPDFObjectHandle
705
QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
706
0
{
707
0
    auto result =
708
0
        oh().getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object")
709
0
            .newStream();
710
0
    QPDFObjectHandle newdict = result.getDict();
711
0
    newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
712
0
    newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
713
0
    newdict.replaceKey("/Resources", getAttribute("/Resources", false).shallowCopy());
714
0
    newdict.replaceKey("/Group", getAttribute("/Group", false).shallowCopy());
715
0
    QPDFObjectHandle bbox = getTrimBox(false).shallowCopy();
716
0
    if (!bbox.isRectangle()) {
717
0
        warn("bounding box is invalid; form XObject created from page will not work");
718
0
    }
719
0
    newdict.replaceKey("/BBox", bbox);
720
0
    auto provider =
721
0
        std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(new ContentProvider(oh()));
722
0
    result.replaceStreamData(provider, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
723
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
724
0
    QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
725
0
    if (handle_transformations && !(rotate_obj.null() && scale_obj.null())) {
726
0
        newdict.replaceKey("/Matrix", QPDFObjectHandle::newArray(getMatrixForTransformations()));
727
0
    }
728
729
0
    return result;
730
0
}
731
732
QPDFMatrix
733
QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
734
    QPDFObjectHandle fo,
735
    QPDFObjectHandle::Rectangle rect,
736
    bool invert_transformations,
737
    bool allow_shrink,
738
    bool allow_expand)
739
0
{
740
    // Calculate the transformation matrix that will place the given form XObject fully inside the
741
    // given rectangle, center and shrinking or expanding as needed if requested.
742
743
    // When rendering a form XObject, the transformation in the graphics state (cm) is applied first
744
    // (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be
745
    // drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting
746
    // matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We
747
    // want to create a transformation matrix that make the form XObject's bounding box land in
748
    // exactly the right spot.
749
750
0
    QPDFObjectHandle fdict = fo.getDict();
751
0
    QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
752
0
    if (!bbox_obj.isRectangle()) {
753
0
        return {};
754
0
    }
755
756
0
    QPDFMatrix wmatrix; // work matrix
757
0
    QPDFMatrix tmatrix; // "to" matrix
758
0
    QPDFMatrix fmatrix; // "from" matrix
759
0
    if (invert_transformations) {
760
        // tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows
761
        // the overlaid form XObject's to be absolute rather than relative to properties of the
762
        // destination page. tmatrix is part of the computed transformation matrix.
763
0
        tmatrix = QPDFMatrix(getMatrixForTransformations(true));
764
0
        wmatrix.concat(tmatrix);
765
0
    }
766
0
    if (fdict.getKey("/Matrix").isMatrix()) {
767
        // fmatrix is the transformation matrix that is applied to the form XObject itself. We need
768
        // this for calculations, but we don't explicitly use it in the final result because the PDF
769
        // rendering system automatically applies this last before
770
        // drawing the form XObject.
771
0
        fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
772
0
        wmatrix.concat(fmatrix);
773
0
    }
774
775
    // The current wmatrix handles transformation from the form xobject and, if requested, the
776
    // destination page. Next, we have to adjust this for scale and position.
777
778
    // Step 1: figure out what scale factor we need to make the form XObject's bounding box fit
779
    // within the destination rectangle.
780
781
    // Transform bounding box
782
0
    QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
783
0
    QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
784
785
    // Calculate a scale factor, if needed. Shrink or expand if needed and allowed.
786
0
    if ((T.urx == T.llx) || (T.ury == T.lly)) {
787
        // avoid division by zero
788
0
        return {};
789
0
    }
790
0
    double rect_w = rect.urx - rect.llx;
791
0
    double rect_h = rect.ury - rect.lly;
792
0
    double t_w = T.urx - T.llx;
793
0
    double t_h = T.ury - T.lly;
794
0
    double xscale = rect_w / t_w;
795
0
    double yscale = rect_h / t_h;
796
0
    double scale = (xscale < yscale ? xscale : yscale);
797
0
    if (scale > 1.0) {
798
0
        if (!allow_expand) {
799
0
            scale = 1.0;
800
0
        }
801
0
    } else if (scale < 1.0) {
802
0
        if (!allow_shrink) {
803
0
            scale = 1.0;
804
0
        }
805
0
    }
806
807
    // Step 2: figure out what translation is required to get the rectangle to the right spot:
808
    // centered within the destination.
809
0
    wmatrix = QPDFMatrix();
810
0
    wmatrix.scale(scale, scale);
811
0
    wmatrix.concat(tmatrix);
812
0
    wmatrix.concat(fmatrix);
813
814
0
    T = wmatrix.transformRectangle(bbox);
815
0
    double t_cx = (T.llx + T.urx) / 2.0;
816
0
    double t_cy = (T.lly + T.ury) / 2.0;
817
0
    double r_cx = (rect.llx + rect.urx) / 2.0;
818
0
    double r_cy = (rect.lly + rect.ury) / 2.0;
819
0
    double tx = r_cx - t_cx;
820
0
    double ty = r_cy - t_cy;
821
822
    // Now we can calculate the final matrix. The final matrix does not include fmatrix because that
823
    // is applied automatically by the PDF interpreter.
824
0
    QPDFMatrix cm;
825
0
    cm.translate(tx, ty);
826
0
    cm.scale(scale, scale);
827
0
    cm.concat(tmatrix);
828
0
    return cm;
829
0
}
830
831
std::string
832
QPDFPageObjectHelper::placeFormXObject(
833
    QPDFObjectHandle fo,
834
    std::string const& name,
835
    QPDFObjectHandle::Rectangle rect,
836
    bool invert_transformations,
837
    bool allow_shrink,
838
    bool allow_expand)
839
0
{
840
0
    QPDFMatrix cm;
841
0
    return placeFormXObject(fo, name, rect, cm, invert_transformations, allow_shrink, allow_expand);
842
0
}
843
844
std::string
845
QPDFPageObjectHelper::placeFormXObject(
846
    QPDFObjectHandle fo,
847
    std::string const& name,
848
    QPDFObjectHandle::Rectangle rect,
849
    QPDFMatrix& cm,
850
    bool invert_transformations,
851
    bool allow_shrink,
852
    bool allow_expand)
853
0
{
854
0
    cm = getMatrixForFormXObjectPlacement(
855
0
        fo, rect, invert_transformations, allow_shrink, allow_expand);
856
0
    return ("q\n" + cm.unparse() + " cm\n" + name + " Do\n" + "Q\n");
857
0
}
858
859
void
860
QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
861
0
{
862
0
    QPDF& qpdf = oh().getQPDF("QPDFPageObjectHelper::flattenRotation called with a direct object");
863
0
    auto rotate_oh = oh().getKey("/Rotate");
864
0
    int rotate = 0;
865
0
    if (rotate_oh.isInteger()) {
866
0
        rotate = rotate_oh.getIntValueAsInt();
867
0
    }
868
0
    if (!((rotate == 90) || (rotate == 180) || (rotate == 270))) {
869
0
        return;
870
0
    }
871
0
    auto mediabox = oh().getKey("/MediaBox");
872
0
    if (!mediabox.isRectangle()) {
873
0
        return;
874
0
    }
875
0
    auto media_rect = mediabox.getArrayAsRectangle();
876
877
0
    std::vector<std::string> boxes = {
878
0
        "/MediaBox",
879
0
        "/CropBox",
880
0
        "/BleedBox",
881
0
        "/TrimBox",
882
0
        "/ArtBox",
883
0
    };
884
0
    for (auto const& boxkey: boxes) {
885
0
        auto box = oh().getKey(boxkey);
886
0
        if (!box.isRectangle()) {
887
0
            continue;
888
0
        }
889
0
        auto rect = box.getArrayAsRectangle();
890
0
        decltype(rect) new_rect;
891
892
        // How far are the edges of our rectangle from the edges of the media box?
893
0
        auto left_x = rect.llx - media_rect.llx;
894
0
        auto right_x = media_rect.urx - rect.urx;
895
0
        auto bottom_y = rect.lly - media_rect.lly;
896
0
        auto top_y = media_rect.ury - rect.ury;
897
898
        // Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees
899
        // reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the
900
        // corners to be the correct distance away from the corners of the mediabox.
901
0
        switch (rotate) {
902
0
        case 90:
903
0
            new_rect.llx = media_rect.lly + bottom_y;
904
0
            new_rect.urx = media_rect.ury - top_y;
905
0
            new_rect.lly = media_rect.llx + right_x;
906
0
            new_rect.ury = media_rect.urx - left_x;
907
0
            break;
908
909
0
        case 180:
910
0
            new_rect.llx = media_rect.llx + right_x;
911
0
            new_rect.urx = media_rect.urx - left_x;
912
0
            new_rect.lly = media_rect.lly + top_y;
913
0
            new_rect.ury = media_rect.ury - bottom_y;
914
0
            break;
915
916
0
        case 270:
917
0
            new_rect.llx = media_rect.lly + top_y;
918
0
            new_rect.urx = media_rect.ury - bottom_y;
919
0
            new_rect.lly = media_rect.llx + left_x;
920
0
            new_rect.ury = media_rect.urx - right_x;
921
0
            break;
922
923
0
        default:
924
            // ignore
925
0
            break;
926
0
        }
927
928
0
        oh().replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect));
929
0
    }
930
931
    // When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible
932
    // with the origin point being the same offset from the lower left corner of the media box.
933
    // These calculations have been verified empirically with various
934
    // PDF readers.
935
0
    QPDFMatrix cm(0, 0, 0, 0, 0, 0);
936
0
    switch (rotate) {
937
0
    case 90:
938
0
        cm.b = -1;
939
0
        cm.c = 1;
940
0
        cm.f = media_rect.urx + media_rect.llx;
941
0
        break;
942
943
0
    case 180:
944
0
        cm.a = -1;
945
0
        cm.d = -1;
946
0
        cm.e = media_rect.urx + media_rect.llx;
947
0
        cm.f = media_rect.ury + media_rect.lly;
948
0
        break;
949
950
0
    case 270:
951
0
        cm.b = 1;
952
0
        cm.c = -1;
953
0
        cm.e = media_rect.ury + media_rect.lly;
954
0
        break;
955
956
0
    default:
957
0
        break;
958
0
    }
959
0
    std::string cm_str = std::string("q\n") + cm.unparse() + " cm\n";
960
0
    oh().addPageContents(QPDFObjectHandle::newStream(&qpdf, cm_str), true);
961
0
    oh().addPageContents(qpdf.newStream("\nQ\n"), false);
962
0
    oh().removeKey("/Rotate");
963
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
964
0
    if (!rotate_obj.null()) {
965
0
        oh().replaceKey("/Rotate", QPDFObjectHandle::newInteger(0));
966
0
    }
967
968
0
    QPDFObjectHandle annots = oh().getKey("/Annots");
969
0
    if (annots.isArray()) {
970
0
        std::vector<QPDFObjectHandle> new_annots;
971
0
        std::vector<QPDFObjectHandle> new_fields;
972
0
        std::set<QPDFObjGen> old_fields;
973
0
        std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
974
0
        if (!afdh) {
975
0
            afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(qpdf);
976
0
            afdh = afdhph.get();
977
0
        }
978
0
        afdh->transformAnnotations(annots, new_annots, new_fields, old_fields, cm);
979
0
        afdh->removeFormFields(old_fields);
980
0
        for (auto const& f: new_fields) {
981
0
            afdh->addFormField(QPDFFormFieldObjectHelper(f));
982
0
        }
983
0
        oh().replaceKey("/Annots", QPDFObjectHandle::newArray(new_annots));
984
0
    }
985
0
}
986
987
void
988
QPDFPageObjectHelper::copyAnnotations(
989
    QPDFPageObjectHelper from_page,
990
    QPDFMatrix const& cm,
991
    QPDFAcroFormDocumentHelper* afdh,
992
    QPDFAcroFormDocumentHelper* from_afdh)
993
0
{
994
0
    auto old_annots = from_page.getObjectHandle().getKey("/Annots");
995
0
    if (!old_annots.isArray()) {
996
0
        return;
997
0
    }
998
999
0
    QPDF& from_qpdf = from_page.getObjectHandle().getQPDF(
1000
0
        "QPDFPageObjectHelper::copyAnnotations: from page is a direct object");
1001
0
    QPDF& this_qpdf =
1002
0
        oh().getQPDF("QPDFPageObjectHelper::copyAnnotations: this page is a direct object");
1003
1004
0
    std::vector<QPDFObjectHandle> new_annots;
1005
0
    std::vector<QPDFObjectHandle> new_fields;
1006
0
    std::set<QPDFObjGen> old_fields;
1007
0
    std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
1008
0
    std::shared_ptr<QPDFAcroFormDocumentHelper> from_afdhph;
1009
0
    if (!afdh) {
1010
0
        afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(this_qpdf);
1011
0
        afdh = afdhph.get();
1012
0
    }
1013
0
    if (&this_qpdf == &from_qpdf) {
1014
0
        from_afdh = afdh;
1015
0
    } else if (from_afdh) {
1016
0
        if (from_afdh->getQPDF().getUniqueId() != from_qpdf.getUniqueId()) {
1017
0
            throw std::logic_error(
1018
0
                "QPDFAcroFormDocumentHelper::copyAnnotations: from_afdh"
1019
0
                " is not from the same QPDF as from_page");
1020
0
        }
1021
0
    } else {
1022
0
        from_afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(from_qpdf);
1023
0
        from_afdh = from_afdhph.get();
1024
0
    }
1025
1026
0
    afdh->transformAnnotations(
1027
0
        old_annots, new_annots, new_fields, old_fields, cm, &from_qpdf, from_afdh);
1028
0
    afdh->addAndRenameFormFields(new_fields);
1029
0
    auto annots = oh().getKey("/Annots");
1030
0
    if (!annots.isArray()) {
1031
0
        annots = oh().replaceKeyAndGetNew("/Annots", QPDFObjectHandle::newArray());
1032
0
    }
1033
0
    for (auto const& annot: new_annots) {
1034
0
        annots.appendItem(annot);
1035
0
    }
1036
0
}