Coverage Report

Created: 2025-08-26 07:09

/src/qpdf/libqpdf/QPDFPageObjectHelper.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/QPDFPageObjectHelper.hh>
2
3
#include <qpdf/Pl_Buffer.hh>
4
#include <qpdf/Pl_Concatenate.hh>
5
#include <qpdf/QIntC.hh>
6
#include <qpdf/QPDF.hh>
7
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
8
#include <qpdf/QPDFExc.hh>
9
#include <qpdf/QPDFMatrix.hh>
10
#include <qpdf/QPDFObjectHandle_private.hh>
11
#include <qpdf/QTC.hh>
12
#include <qpdf/QUtil.hh>
13
#include <qpdf/ResourceFinder.hh>
14
15
using namespace std::literals;
16
17
namespace
18
{
19
    class ContentProvider: public QPDFObjectHandle::StreamDataProvider
20
    {
21
      public:
22
        ContentProvider(QPDFObjectHandle from_page) :
23
0
            from_page(from_page)
24
0
        {
25
0
        }
26
0
        ~ContentProvider() override = default;
27
        void provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override;
28
29
      private:
30
        QPDFObjectHandle from_page;
31
    };
32
} // namespace
33
34
void
35
ContentProvider::provideStreamData(QPDFObjGen const&, Pipeline* p)
36
0
{
37
0
    Pl_Concatenate concat("concatenate", p);
38
0
    std::string description = "contents from page object " + from_page.getObjGen().unparse(' ');
39
0
    std::string all_description;
40
0
    from_page.getKey("/Contents").pipeContentStreams(&concat, description, all_description);
41
0
    concat.manualFinish();
42
0
}
43
44
namespace
45
{
46
    class InlineImageTracker: public QPDFObjectHandle::TokenFilter
47
    {
48
      public:
49
        InlineImageTracker(QPDF*, size_t min_size, QPDFObjectHandle resources);
50
0
        ~InlineImageTracker() override = default;
51
        void handleToken(QPDFTokenizer::Token const&) override;
52
        QPDFObjectHandle convertIIDict(QPDFObjectHandle odict);
53
54
        QPDF* qpdf;
55
        size_t min_size;
56
        QPDFObjectHandle resources;
57
        std::string dict_str;
58
        std::string bi_str;
59
        int min_suffix{1};
60
        bool any_images{false};
61
        enum { st_top, st_bi } state{st_top};
62
    };
63
} // namespace
64
65
InlineImageTracker::InlineImageTracker(QPDF* qpdf, size_t min_size, QPDFObjectHandle resources) :
66
0
    qpdf(qpdf),
67
0
    min_size(min_size),
68
0
    resources(resources)
69
0
{
70
0
}
71
72
QPDFObjectHandle
73
InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
74
0
{
75
0
    QPDFObjectHandle dict = QPDFObjectHandle::newDictionary();
76
0
    dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
77
0
    dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Image"));
78
0
    for (auto const& [k, v]: odict.as_dictionary()) {
79
0
        if (v.null()) {
80
0
            continue;
81
0
        }
82
0
        auto key = k;
83
0
        auto value = v;
84
0
        if (key == "/BPC") {
85
0
            key = "/BitsPerComponent";
86
0
        } else if (key == "/CS") {
87
0
            key = "/ColorSpace";
88
0
        } else if (key == "/D") {
89
0
            key = "/Decode";
90
0
        } else if (key == "/DP") {
91
0
            key = "/DecodeParms";
92
0
        } else if (key == "/F") {
93
0
            key = "/Filter";
94
0
        } else if (key == "/H") {
95
0
            key = "/Height";
96
0
        } else if (key == "/IM") {
97
0
            key = "/ImageMask";
98
0
        } else if (key == "/I") {
99
0
            key = "/Interpolate";
100
0
        } else if (key == "/W") {
101
0
            key = "/Width";
102
0
        }
103
104
0
        if (key == "/ColorSpace") {
105
0
            if (value.isName()) {
106
0
                std::string name = value.getName();
107
0
                if (name == "/G") {
108
0
                    name = "/DeviceGray";
109
0
                } else if (name == "/RGB") {
110
0
                    name = "/DeviceRGB";
111
0
                } else if (name == "/CMYK") {
112
0
                    name = "/DeviceCMYK";
113
0
                } else if (name == "/I") {
114
0
                    name = "/Indexed";
115
0
                } else {
116
                    // This is a key in the page's /Resources -> /ColorSpace dictionary. We need to
117
                    // look it up and use its value as the color space for the image.
118
0
                    QPDFObjectHandle colorspace = resources.getKey("/ColorSpace");
119
0
                    if (colorspace.isDictionary() && colorspace.hasKey(name)) {
120
0
                        QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup");
121
0
                        value = colorspace.getKey(name);
122
0
                    } else {
123
0
                        resources.warn("unable to resolve colorspace " + name);
124
0
                    }
125
0
                    name.clear();
126
0
                }
127
0
                if (!name.empty()) {
128
0
                    value = QPDFObjectHandle::newName(name);
129
0
                }
130
0
            }
131
0
        } else if (key == "/Filter") {
132
0
            std::vector<QPDFObjectHandle> filters;
133
0
            if (value.isName()) {
134
0
                filters.push_back(value);
135
0
            } else if (value.isArray()) {
136
0
                filters = value.getArrayAsVector();
137
0
            }
138
0
            for (auto& iter: filters) {
139
0
                std::string name;
140
0
                if (iter.isName()) {
141
0
                    name = iter.getName();
142
0
                }
143
0
                if (name == "/AHx") {
144
0
                    name = "/ASCIIHexDecode";
145
0
                } else if (name == "/A85") {
146
0
                    name = "/ASCII85Decode";
147
0
                } else if (name == "/LZW") {
148
0
                    name = "/LZWDecode";
149
0
                } else if (name == "/Fl") {
150
0
                    name = "/FlateDecode";
151
0
                } else if (name == "/RL") {
152
0
                    name = "/RunLengthDecode";
153
0
                } else if (name == "/CCF") {
154
0
                    name = "/CCITTFaxDecode";
155
0
                } else if (name == "/DCT") {
156
0
                    name = "/DCTDecode";
157
0
                } else {
158
0
                    name.clear();
159
0
                }
160
0
                if (!name.empty()) {
161
0
                    iter = QPDFObjectHandle::newName(name);
162
0
                }
163
0
            }
164
0
            if (value.isName() && (filters.size() == 1)) {
165
0
                value = filters.at(0);
166
0
            } else if (value.isArray()) {
167
0
                value = QPDFObjectHandle::newArray(filters);
168
0
            }
169
0
        }
170
0
        dict.replaceKey(key, value);
171
0
    }
172
0
    return dict;
173
0
}
174
175
void
176
InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
177
0
{
178
0
    if (state == st_bi) {
179
0
        if (token.getType() == QPDFTokenizer::tt_inline_image) {
180
0
            std::string image_data(token.getValue());
181
0
            size_t len = image_data.length();
182
0
            if (len >= min_size) {
183
0
                QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
184
0
                QPDFObjectHandle dict = convertIIDict(QPDFObjectHandle::parse(dict_str));
185
0
                dict.replaceKey("/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(len)));
186
0
                std::string name = resources.getUniqueResourceName("/IIm", min_suffix);
187
0
                QPDFObjectHandle image = QPDFObjectHandle::newStream(
188
0
                    qpdf, std::make_shared<Buffer>(std::move(image_data)));
189
0
                image.replaceDict(dict);
190
0
                resources.getKey("/XObject").replaceKey(name, image);
191
0
                write(name);
192
0
                write(" Do\n");
193
0
                any_images = true;
194
0
            } else {
195
0
                QTC::TC("qpdf", "QPDFPageObjectHelper keep inline image");
196
0
                write(bi_str);
197
0
                writeToken(token);
198
0
                state = st_top;
199
0
            }
200
0
        } else if (token.isWord("ID")) {
201
0
            bi_str += token.getValue();
202
0
            dict_str += " >>";
203
0
        } else if (token.isWord("EI")) {
204
0
            state = st_top;
205
0
        } else {
206
0
            bi_str += token.getRawValue();
207
0
            dict_str += token.getRawValue();
208
0
        }
209
0
    } else if (token.isWord("BI")) {
210
0
        bi_str = token.getValue();
211
0
        dict_str = "<< ";
212
0
        state = st_bi;
213
0
    } else {
214
0
        writeToken(token);
215
0
    }
216
0
}
217
218
QPDFPageObjectHelper::QPDFPageObjectHelper(QPDFObjectHandle oh) :
219
0
    QPDFObjectHelper(oh)
220
0
{
221
0
}
222
223
QPDFObjectHandle
224
QPDFPageObjectHelper::getAttribute(std::string const& name, bool copy_if_shared)
225
0
{
226
0
    return getAttribute(name, copy_if_shared, nullptr, false);
227
0
}
228
229
QPDFObjectHandle
230
QPDFPageObjectHelper::getAttribute(
231
    std::string const& name,
232
    bool copy_if_shared,
233
    std::function<QPDFObjectHandle()> get_fallback,
234
    bool copy_if_fallback)
235
0
{
236
0
    const bool is_form_xobject = oh().isFormXObject();
237
0
    bool inherited = false;
238
0
    auto dict = is_form_xobject ? oh().getDict() : oh();
239
0
    auto result = dict.getKey(name);
240
241
0
    if (!is_form_xobject && result.isNull() &&
242
0
        (name == "/MediaBox" || name == "/CropBox" || name == "/Resources" || name == "/Rotate")) {
243
0
        QPDFObjectHandle node = dict;
244
0
        QPDFObjGen::set seen{};
245
0
        while (seen.add(node) && node.hasKey("/Parent")) {
246
0
            node = node.getKey("/Parent");
247
0
            result = node.getKey(name);
248
0
            if (!result.isNull()) {
249
0
                QTC::TC("qpdf", "QPDFPageObjectHelper non-trivial inheritance");
250
0
                inherited = true;
251
0
                break;
252
0
            }
253
0
        }
254
0
    }
255
0
    if (copy_if_shared && (inherited || result.isIndirect())) {
256
0
        QTC::TC("qpdf", "QPDFPageObjectHelper copy shared attribute", is_form_xobject ? 0 : 1);
257
0
        result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
258
0
    }
259
0
    if (result.isNull() && get_fallback) {
260
0
        result = get_fallback();
261
0
        if (copy_if_fallback && !result.isNull()) {
262
0
            QTC::TC("qpdf", "QPDFPageObjectHelper copied fallback");
263
0
            result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
264
0
        } else {
265
0
            QTC::TC("qpdf", "QPDFPageObjectHelper used fallback without copying");
266
0
        }
267
0
    }
268
0
    return result;
269
0
}
270
271
QPDFObjectHandle
272
QPDFPageObjectHelper::getMediaBox(bool copy_if_shared)
273
0
{
274
0
    return getAttribute("/MediaBox", copy_if_shared);
275
0
}
276
277
QPDFObjectHandle
278
QPDFPageObjectHelper::getCropBox(bool copy_if_shared, bool copy_if_fallback)
279
0
{
280
0
    return getAttribute(
281
0
        "/CropBox",
282
0
        copy_if_shared,
283
0
        [this, copy_if_shared]() { return getMediaBox(copy_if_shared); },
284
0
        copy_if_fallback);
285
0
}
286
287
QPDFObjectHandle
288
QPDFPageObjectHelper::getTrimBox(bool copy_if_shared, bool copy_if_fallback)
289
0
{
290
0
    return getAttribute(
291
0
        "/TrimBox",
292
0
        copy_if_shared,
293
0
        [this, copy_if_shared, copy_if_fallback]() {
294
0
            return getCropBox(copy_if_shared, copy_if_fallback);
295
0
        },
296
0
        copy_if_fallback);
297
0
}
298
299
QPDFObjectHandle
300
QPDFPageObjectHelper::getArtBox(bool copy_if_shared, bool copy_if_fallback)
301
0
{
302
0
    return getAttribute(
303
0
        "/ArtBox",
304
0
        copy_if_shared,
305
0
        [this, copy_if_shared, copy_if_fallback]() {
306
0
            return getCropBox(copy_if_shared, copy_if_fallback);
307
0
        },
308
0
        copy_if_fallback);
309
0
}
310
311
QPDFObjectHandle
312
QPDFPageObjectHelper::getBleedBox(bool copy_if_shared, bool copy_if_fallback)
313
0
{
314
0
    return getAttribute(
315
0
        "/BleedBox",
316
0
        copy_if_shared,
317
0
        [this, copy_if_shared, copy_if_fallback]() {
318
0
            return getCropBox(copy_if_shared, copy_if_fallback);
319
0
        },
320
0
        copy_if_fallback);
321
0
}
322
323
void
324
QPDFPageObjectHelper::forEachXObject(
325
    bool recursive,
326
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
327
        action,
328
    std::function<bool(QPDFObjectHandle)> selector)
329
0
{
330
0
    QTC::TC(
331
0
        "qpdf",
332
0
        "QPDFPageObjectHelper::forEachXObject",
333
0
        recursive ? (oh().isFormXObject() ? 0 : 1) : (oh().isFormXObject() ? 2 : 3));
334
0
    QPDFObjGen::set seen;
335
0
    std::list<QPDFPageObjectHelper> queue;
336
0
    queue.emplace_back(*this);
337
0
    while (!queue.empty()) {
338
0
        auto& ph = queue.front();
339
0
        if (seen.add(ph)) {
340
0
            auto xobj_dict = ph.getAttribute("/Resources", false).getKeyIfDict("/XObject");
341
0
            for (auto const& [key, value]: xobj_dict.as_dictionary()) {
342
0
                if (value.null()) {
343
0
                    continue;
344
0
                }
345
0
                auto obj = value;
346
0
                if ((!selector) || selector(obj)) {
347
0
                    action(obj, xobj_dict, key);
348
0
                }
349
0
                if (recursive && obj.isFormXObject()) {
350
0
                    queue.emplace_back(obj);
351
0
                }
352
0
            }
353
0
        }
354
0
        queue.pop_front();
355
0
    }
356
0
}
357
358
void
359
QPDFPageObjectHelper::forEachImage(
360
    bool recursive,
361
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
362
        action)
363
0
{
364
0
    forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isImage(); });
365
0
}
366
367
void
368
QPDFPageObjectHelper::forEachFormXObject(
369
    bool recursive,
370
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
371
        action)
372
0
{
373
0
    forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isFormXObject(); });
374
0
}
375
376
std::map<std::string, QPDFObjectHandle>
377
QPDFPageObjectHelper::getPageImages()
378
0
{
379
0
    return getImages();
380
0
}
381
382
std::map<std::string, QPDFObjectHandle>
383
QPDFPageObjectHelper::getImages()
384
0
{
385
0
    std::map<std::string, QPDFObjectHandle> result;
386
0
    forEachImage(
387
0
        false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
388
0
            result[key] = obj;
389
0
        });
390
0
    return result;
391
0
}
392
393
std::map<std::string, QPDFObjectHandle>
394
QPDFPageObjectHelper::getFormXObjects()
395
0
{
396
0
    std::map<std::string, QPDFObjectHandle> result;
397
0
    forEachFormXObject(
398
0
        false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
399
0
            result[key] = obj;
400
0
        });
401
0
    return result;
402
0
}
403
404
void
405
QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
406
0
{
407
0
    if (shallow) {
408
0
        QPDFObjectHandle resources = getAttribute("/Resources", true);
409
        // Calling mergeResources also ensures that /XObject becomes direct and is not shared with
410
        // other pages.
411
0
        resources.mergeResources("<< /XObject << >> >>"_qpdf);
412
0
        InlineImageTracker iit(oh().getOwningQPDF(), min_size, resources);
413
0
        Pl_Buffer b("new page content");
414
0
        bool filtered = false;
415
0
        try {
416
0
            filterContents(&iit, &b);
417
0
            filtered = true;
418
0
        } catch (std::exception& e) {
419
0
            warn(
420
0
                "Unable to filter content stream: "s + e.what() +
421
0
                "; not attempting to externalize inline images from this stream");
422
0
        }
423
0
        if (filtered && iit.any_images) {
424
0
            if (oh().isFormXObject()) {
425
0
                oh().replaceStreamData(
426
0
                    b.getBufferSharedPointer(),
427
0
                    QPDFObjectHandle::newNull(),
428
0
                    QPDFObjectHandle::newNull());
429
0
            } else {
430
0
                oh().replaceKey(
431
0
                    "/Contents",
432
0
                    QPDFObjectHandle::newStream(&oh().getQPDF(), b.getBufferSharedPointer()));
433
0
            }
434
0
        }
435
0
    } else {
436
0
        externalizeInlineImages(min_size, true);
437
0
        forEachFormXObject(
438
0
            true, [min_size](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
439
0
                QPDFPageObjectHelper(obj).externalizeInlineImages(min_size, true);
440
0
            });
441
0
    }
442
0
}
443
444
std::vector<QPDFAnnotationObjectHelper>
445
QPDFPageObjectHelper::getAnnotations(std::string const& only_subtype)
446
0
{
447
0
    std::vector<QPDFAnnotationObjectHelper> result;
448
0
    for (auto const& annot: oh().getKey("/Annots").as_array()) {
449
0
        if (annot.isDictionaryOfType("", only_subtype)) {
450
0
            result.emplace_back(annot);
451
0
        }
452
0
    }
453
0
    return result;
454
0
}
455
456
std::vector<QPDFObjectHandle>
457
QPDFPageObjectHelper::getPageContents()
458
0
{
459
0
    return oh().getPageContents();
460
0
}
461
462
void
463
QPDFPageObjectHelper::addPageContents(QPDFObjectHandle contents, bool first)
464
0
{
465
0
    oh().addPageContents(contents, first);
466
0
}
467
468
void
469
QPDFPageObjectHelper::rotatePage(int angle, bool relative)
470
0
{
471
0
    oh().rotatePage(angle, relative);
472
0
}
473
474
void
475
QPDFPageObjectHelper::coalesceContentStreams()
476
0
{
477
0
    oh().coalesceContentStreams();
478
0
}
479
480
void
481
QPDFPageObjectHelper::parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks)
482
0
{
483
0
    parseContents(callbacks);
484
0
}
485
486
void
487
QPDFPageObjectHelper::parseContents(QPDFObjectHandle::ParserCallbacks* callbacks)
488
0
{
489
0
    if (oh().isFormXObject()) {
490
0
        oh().parseAsContents(callbacks);
491
0
    } else {
492
0
        oh().parsePageContents(callbacks);
493
0
    }
494
0
}
495
496
void
497
QPDFPageObjectHelper::filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
498
0
{
499
0
    return filterContents(filter, next);
500
0
}
501
502
void
503
QPDFPageObjectHelper::filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
504
0
{
505
0
    if (oh().isFormXObject()) {
506
0
        oh().filterAsContents(filter, next);
507
0
    } else {
508
0
        oh().filterPageContents(filter, next);
509
0
    }
510
0
}
511
512
void
513
QPDFPageObjectHelper::pipePageContents(Pipeline* p)
514
0
{
515
0
    pipeContents(p);
516
0
}
517
518
void
519
QPDFPageObjectHelper::pipeContents(Pipeline* p)
520
0
{
521
0
    if (oh().isFormXObject()) {
522
0
        oh().pipeStreamData(p, 0, qpdf_dl_specialized);
523
0
    } else {
524
0
        oh().pipePageContents(p);
525
0
    }
526
0
}
527
528
void
529
QPDFPageObjectHelper::addContentTokenFilter(
530
    std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter)
531
0
{
532
0
    if (oh().isFormXObject()) {
533
0
        oh().addTokenFilter(token_filter);
534
0
    } else {
535
0
        oh().addContentTokenFilter(token_filter);
536
0
    }
537
0
}
538
539
bool
540
QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
541
    QPDFPageObjectHelper ph, std::set<std::string>& unresolved)
542
0
{
543
0
    const bool is_page = !ph.oh().isFormXObject();
544
0
    if (!is_page) {
545
0
        QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
546
0
    }
547
548
0
    ResourceFinder rf;
549
0
    try {
550
0
        auto q = ph.qpdf();
551
0
        size_t before_nw = (q ? q->numWarnings() : 0);
552
0
        ph.parseContents(&rf);
553
0
        size_t after_nw = (q ? q->numWarnings() : 0);
554
0
        if (after_nw > before_nw) {
555
0
            ph.warn(
556
0
                "Bad token found while scanning content stream; "
557
0
                "not attempting to remove unreferenced objects from this object");
558
0
            return false;
559
0
        }
560
0
    } catch (std::exception& e) {
561
0
        QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
562
0
        ph.warn(
563
0
            "Unable to parse content stream: "s + e.what() +
564
0
            "; not attempting to remove unreferenced objects from this object");
565
0
        return false;
566
0
    }
567
568
    // We will walk through /Font and /XObject dictionaries, removing any resources that are not
569
    // referenced. We must make copies of resource dictionaries down into the dictionaries are
570
    // mutating to prevent mutating one dictionary from having the side effect of mutating the one
571
    // it was copied from.
572
0
    QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
573
0
    std::vector<QPDFObjectHandle> rdicts;
574
0
    std::set<std::string> known_names;
575
0
    if (resources.isDictionary()) {
576
0
        for (auto const& iter: {"/Font", "/XObject"}) {
577
0
            QPDFObjectHandle dict = resources.getKey(iter);
578
0
            if (dict.isDictionary()) {
579
0
                dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy());
580
0
                rdicts.push_back(dict);
581
0
                known_names.merge(dict.getKeys());
582
0
            }
583
0
        }
584
0
    }
585
586
0
    std::set<std::string> local_unresolved;
587
0
    auto const& names_by_rtype = rf.getNamesByResourceType();
588
0
    for (auto const& i1: {"/Font", "/XObject"}) {
589
0
        auto it = names_by_rtype.find(i1);
590
0
        if (it == names_by_rtype.end()) {
591
0
            continue;
592
0
        }
593
0
        for (auto const& n_iter: it->second) {
594
0
            std::string const& name = n_iter.first;
595
0
            if (!known_names.contains(name)) {
596
0
                unresolved.insert(name);
597
0
                local_unresolved.insert(name);
598
0
            }
599
0
        }
600
0
    }
601
    // Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in
602
    // which case names were resolved from the containing page. This behavior seems to be widely
603
    // supported by viewers. If a form XObjects has a resources dictionary and has some unresolved
604
    // names, some viewers fail to resolve them, and others allow them to be inherited from the page
605
    // or from another form XObjects that contains them. Since this behavior is inconsistent across
606
    // viewers, we consider an unresolved name when a resources dictionary is present to be reason
607
    // not to remove unreferenced resources. An unresolved name in the absence of a resource
608
    // dictionary is not considered a problem. For form XObjects, we just accumulate a list of
609
    // unresolved names, and for page objects, we avoid removing any such names found in nested form
610
    // XObjects.
611
612
0
    if (!local_unresolved.empty() && resources.isDictionary()) {
613
        // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
614
        // looking at names that are referencing fonts and XObjects, but until we're certain that we
615
        // know the meaning of every name in a content stream, we don't want to give warnings that
616
        // might be false positives. Also, this can happen in legitimate cases with older PDFs, and
617
        // there's nothing to be done about it, so there's no good reason to issue a warning. The
618
        // only sad thing is that it was a false positive that alerted me to a logic error in the
619
        // code, and any future such errors would now be hidden.
620
0
        QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
621
0
        return false;
622
0
    }
623
624
0
    for (auto& dict: rdicts) {
625
0
        for (auto const& key: dict.getKeys()) {
626
0
            if (is_page && unresolved.contains(key)) {
627
                // This name is referenced by some nested form xobject, so don't remove it.
628
0
                QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved");
629
0
            } else if (!rf.getNames().contains(key)) {
630
0
                dict.removeKey(key);
631
0
            }
632
0
        }
633
0
    }
634
0
    return true;
635
0
}
636
637
void
638
QPDFPageObjectHelper::removeUnreferencedResources()
639
0
{
640
    // Accumulate a list of unresolved names across all nested form XObjects.
641
0
    std::set<std::string> unresolved;
642
0
    bool any_failures = false;
643
0
    forEachFormXObject(
644
0
        true,
645
0
        [&any_failures, &unresolved](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
646
0
            if (!removeUnreferencedResourcesHelper(QPDFPageObjectHelper(obj), unresolved)) {
647
0
                any_failures = true;
648
0
            }
649
0
        });
650
0
    if (oh().isFormXObject() || (!any_failures)) {
651
0
        removeUnreferencedResourcesHelper(*this, unresolved);
652
0
    }
653
0
}
654
655
QPDFPageObjectHelper
656
QPDFPageObjectHelper::shallowCopyPage()
657
0
{
658
0
    QPDF& qpdf = oh().getQPDF("QPDFPageObjectHelper::shallowCopyPage called with a direct object");
659
0
    QPDFObjectHandle new_page = oh().shallowCopy();
660
0
    return {qpdf.makeIndirectObject(new_page)};
661
0
}
662
663
QPDFObjectHandle::Matrix
664
QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
665
0
{
666
0
    QPDFObjectHandle::Matrix matrix(1, 0, 0, 1, 0, 0);
667
0
    QPDFObjectHandle bbox = getTrimBox(false);
668
0
    if (!bbox.isRectangle()) {
669
0
        return matrix;
670
0
    }
671
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
672
0
    QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
673
0
    if (!(rotate_obj.isNull() && scale_obj.isNull())) {
674
0
        QPDFObjectHandle::Rectangle rect = bbox.getArrayAsRectangle();
675
0
        double width = rect.urx - rect.llx;
676
0
        double height = rect.ury - rect.lly;
677
0
        double scale = (scale_obj.isNumber() ? scale_obj.getNumericValue() : 1.0);
678
0
        int rotate = (rotate_obj.isInteger() ? rotate_obj.getIntValueAsInt() : 0);
679
0
        if (invert) {
680
0
            if (scale == 0.0) {
681
0
                return matrix;
682
0
            }
683
0
            scale = 1.0 / scale;
684
0
            rotate = 360 - rotate;
685
0
        }
686
687
        // Ignore invalid rotation angle
688
0
        switch (rotate) {
689
0
        case 90:
690
0
            matrix = QPDFObjectHandle::Matrix(0, -scale, scale, 0, 0, width * scale);
691
0
            break;
692
0
        case 180:
693
0
            matrix = QPDFObjectHandle::Matrix(-scale, 0, 0, -scale, width * scale, height * scale);
694
0
            break;
695
0
        case 270:
696
0
            matrix = QPDFObjectHandle::Matrix(0, scale, -scale, 0, height * scale, 0);
697
0
            break;
698
0
        default:
699
0
            matrix = QPDFObjectHandle::Matrix(scale, 0, 0, scale, 0, 0);
700
0
            break;
701
0
        }
702
0
    }
703
0
    return matrix;
704
0
}
705
706
QPDFObjectHandle
707
QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
708
0
{
709
0
    auto result =
710
0
        oh().getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object")
711
0
            .newStream();
712
0
    QPDFObjectHandle newdict = result.getDict();
713
0
    newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
714
0
    newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
715
0
    newdict.replaceKey("/Resources", getAttribute("/Resources", false).shallowCopy());
716
0
    newdict.replaceKey("/Group", getAttribute("/Group", false).shallowCopy());
717
0
    QPDFObjectHandle bbox = getTrimBox(false).shallowCopy();
718
0
    if (!bbox.isRectangle()) {
719
0
        warn("bounding box is invalid; form XObject created from page will not work");
720
0
    }
721
0
    newdict.replaceKey("/BBox", bbox);
722
0
    auto provider =
723
0
        std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(new ContentProvider(oh()));
724
0
    result.replaceStreamData(provider, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
725
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
726
0
    QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
727
0
    if (handle_transformations && (!(rotate_obj.isNull() && scale_obj.isNull()))) {
728
0
        newdict.replaceKey("/Matrix", QPDFObjectHandle::newArray(getMatrixForTransformations()));
729
0
    }
730
731
0
    return result;
732
0
}
733
734
QPDFMatrix
735
QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
736
    QPDFObjectHandle fo,
737
    QPDFObjectHandle::Rectangle rect,
738
    bool invert_transformations,
739
    bool allow_shrink,
740
    bool allow_expand)
741
0
{
742
    // Calculate the transformation matrix that will place the given form XObject fully inside the
743
    // given rectangle, center and shrinking or expanding as needed if requested.
744
745
    // When rendering a form XObject, the transformation in the graphics state (cm) is applied first
746
    // (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be
747
    // drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting
748
    // matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We
749
    // want to create a transformation matrix that make the form XObject's bounding box land in
750
    // exactly the right spot.
751
752
0
    QPDFObjectHandle fdict = fo.getDict();
753
0
    QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
754
0
    if (!bbox_obj.isRectangle()) {
755
0
        return {};
756
0
    }
757
758
0
    QPDFMatrix wmatrix; // work matrix
759
0
    QPDFMatrix tmatrix; // "to" matrix
760
0
    QPDFMatrix fmatrix; // "from" matrix
761
0
    if (invert_transformations) {
762
        // tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows
763
        // the overlaid form XObject's to be absolute rather than relative to properties of the
764
        // destination page. tmatrix is part of the computed transformation matrix.
765
0
        tmatrix = QPDFMatrix(getMatrixForTransformations(true));
766
0
        wmatrix.concat(tmatrix);
767
0
    }
768
0
    if (fdict.getKey("/Matrix").isMatrix()) {
769
        // fmatrix is the transformation matrix that is applied to the form XObject itself. We need
770
        // this for calculations, but we don't explicitly use it in the final result because the PDF
771
        // rendering system automatically applies this last before
772
        // drawing the form XObject.
773
0
        fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
774
0
        wmatrix.concat(fmatrix);
775
0
    }
776
777
    // The current wmatrix handles transformation from the form xobject and, if requested, the
778
    // destination page. Next, we have to adjust this for scale and position.
779
780
    // Step 1: figure out what scale factor we need to make the form XObject's bounding box fit
781
    // within the destination rectangle.
782
783
    // Transform bounding box
784
0
    QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
785
0
    QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
786
787
    // Calculate a scale factor, if needed. Shrink or expand if needed and allowed.
788
0
    if ((T.urx == T.llx) || (T.ury == T.lly)) {
789
        // avoid division by zero
790
0
        return {};
791
0
    }
792
0
    double rect_w = rect.urx - rect.llx;
793
0
    double rect_h = rect.ury - rect.lly;
794
0
    double t_w = T.urx - T.llx;
795
0
    double t_h = T.ury - T.lly;
796
0
    double xscale = rect_w / t_w;
797
0
    double yscale = rect_h / t_h;
798
0
    double scale = (xscale < yscale ? xscale : yscale);
799
0
    if (scale > 1.0) {
800
0
        if (!allow_expand) {
801
0
            scale = 1.0;
802
0
        }
803
0
    } else if (scale < 1.0) {
804
0
        if (!allow_shrink) {
805
0
            scale = 1.0;
806
0
        }
807
0
    }
808
809
    // Step 2: figure out what translation is required to get the rectangle to the right spot:
810
    // centered within the destination.
811
0
    wmatrix = QPDFMatrix();
812
0
    wmatrix.scale(scale, scale);
813
0
    wmatrix.concat(tmatrix);
814
0
    wmatrix.concat(fmatrix);
815
816
0
    T = wmatrix.transformRectangle(bbox);
817
0
    double t_cx = (T.llx + T.urx) / 2.0;
818
0
    double t_cy = (T.lly + T.ury) / 2.0;
819
0
    double r_cx = (rect.llx + rect.urx) / 2.0;
820
0
    double r_cy = (rect.lly + rect.ury) / 2.0;
821
0
    double tx = r_cx - t_cx;
822
0
    double ty = r_cy - t_cy;
823
824
    // Now we can calculate the final matrix. The final matrix does not include fmatrix because that
825
    // is applied automatically by the PDF interpreter.
826
0
    QPDFMatrix cm;
827
0
    cm.translate(tx, ty);
828
0
    cm.scale(scale, scale);
829
0
    cm.concat(tmatrix);
830
0
    return cm;
831
0
}
832
833
std::string
834
QPDFPageObjectHelper::placeFormXObject(
835
    QPDFObjectHandle fo,
836
    std::string const& name,
837
    QPDFObjectHandle::Rectangle rect,
838
    bool invert_transformations,
839
    bool allow_shrink,
840
    bool allow_expand)
841
0
{
842
0
    QPDFMatrix cm;
843
0
    return placeFormXObject(fo, name, rect, cm, invert_transformations, allow_shrink, allow_expand);
844
0
}
845
846
std::string
847
QPDFPageObjectHelper::placeFormXObject(
848
    QPDFObjectHandle fo,
849
    std::string const& name,
850
    QPDFObjectHandle::Rectangle rect,
851
    QPDFMatrix& cm,
852
    bool invert_transformations,
853
    bool allow_shrink,
854
    bool allow_expand)
855
0
{
856
0
    cm = getMatrixForFormXObjectPlacement(
857
0
        fo, rect, invert_transformations, allow_shrink, allow_expand);
858
0
    return ("q\n" + cm.unparse() + " cm\n" + name + " Do\n" + "Q\n");
859
0
}
860
861
void
862
QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
863
0
{
864
0
    QPDF& qpdf = oh().getQPDF("QPDFPageObjectHelper::flattenRotation called with a direct object");
865
0
    auto rotate_oh = oh().getKey("/Rotate");
866
0
    int rotate = 0;
867
0
    if (rotate_oh.isInteger()) {
868
0
        rotate = rotate_oh.getIntValueAsInt();
869
0
    }
870
0
    if (!((rotate == 90) || (rotate == 180) || (rotate == 270))) {
871
0
        return;
872
0
    }
873
0
    auto mediabox = oh().getKey("/MediaBox");
874
0
    if (!mediabox.isRectangle()) {
875
0
        return;
876
0
    }
877
0
    auto media_rect = mediabox.getArrayAsRectangle();
878
879
0
    std::vector<std::string> boxes = {
880
0
        "/MediaBox",
881
0
        "/CropBox",
882
0
        "/BleedBox",
883
0
        "/TrimBox",
884
0
        "/ArtBox",
885
0
    };
886
0
    for (auto const& boxkey: boxes) {
887
0
        auto box = oh().getKey(boxkey);
888
0
        if (!box.isRectangle()) {
889
0
            continue;
890
0
        }
891
0
        auto rect = box.getArrayAsRectangle();
892
0
        decltype(rect) new_rect;
893
894
        // How far are the edges of our rectangle from the edges of the media box?
895
0
        auto left_x = rect.llx - media_rect.llx;
896
0
        auto right_x = media_rect.urx - rect.urx;
897
0
        auto bottom_y = rect.lly - media_rect.lly;
898
0
        auto top_y = media_rect.ury - rect.ury;
899
900
        // Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees
901
        // reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the
902
        // corners to be the correct distance away from the corners of the mediabox.
903
0
        switch (rotate) {
904
0
        case 90:
905
0
            new_rect.llx = media_rect.lly + bottom_y;
906
0
            new_rect.urx = media_rect.ury - top_y;
907
0
            new_rect.lly = media_rect.llx + right_x;
908
0
            new_rect.ury = media_rect.urx - left_x;
909
0
            break;
910
911
0
        case 180:
912
0
            new_rect.llx = media_rect.llx + right_x;
913
0
            new_rect.urx = media_rect.urx - left_x;
914
0
            new_rect.lly = media_rect.lly + top_y;
915
0
            new_rect.ury = media_rect.ury - bottom_y;
916
0
            break;
917
918
0
        case 270:
919
0
            new_rect.llx = media_rect.lly + top_y;
920
0
            new_rect.urx = media_rect.ury - bottom_y;
921
0
            new_rect.lly = media_rect.llx + left_x;
922
0
            new_rect.ury = media_rect.urx - right_x;
923
0
            break;
924
925
0
        default:
926
            // ignore
927
0
            break;
928
0
        }
929
930
0
        oh().replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect));
931
0
    }
932
933
    // When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible
934
    // with the origin point being the same offset from the lower left corner of the media box.
935
    // These calculations have been verified empirically with various
936
    // PDF readers.
937
0
    QPDFMatrix cm(0, 0, 0, 0, 0, 0);
938
0
    switch (rotate) {
939
0
    case 90:
940
0
        cm.b = -1;
941
0
        cm.c = 1;
942
0
        cm.f = media_rect.urx + media_rect.llx;
943
0
        break;
944
945
0
    case 180:
946
0
        cm.a = -1;
947
0
        cm.d = -1;
948
0
        cm.e = media_rect.urx + media_rect.llx;
949
0
        cm.f = media_rect.ury + media_rect.lly;
950
0
        break;
951
952
0
    case 270:
953
0
        cm.b = 1;
954
0
        cm.c = -1;
955
0
        cm.e = media_rect.ury + media_rect.lly;
956
0
        break;
957
958
0
    default:
959
0
        break;
960
0
    }
961
0
    std::string cm_str = std::string("q\n") + cm.unparse() + " cm\n";
962
0
    oh().addPageContents(QPDFObjectHandle::newStream(&qpdf, cm_str), true);
963
0
    oh().addPageContents(qpdf.newStream("\nQ\n"), false);
964
0
    oh().removeKey("/Rotate");
965
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
966
0
    if (!rotate_obj.isNull()) {
967
0
        QTC::TC("qpdf", "QPDFPageObjectHelper flatten inherit rotate");
968
0
        oh().replaceKey("/Rotate", QPDFObjectHandle::newInteger(0));
969
0
    }
970
971
0
    QPDFObjectHandle annots = oh().getKey("/Annots");
972
0
    if (annots.isArray()) {
973
0
        std::vector<QPDFObjectHandle> new_annots;
974
0
        std::vector<QPDFObjectHandle> new_fields;
975
0
        std::set<QPDFObjGen> old_fields;
976
0
        std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
977
0
        if (!afdh) {
978
0
            afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(qpdf);
979
0
            afdh = afdhph.get();
980
0
        }
981
0
        afdh->transformAnnotations(annots, new_annots, new_fields, old_fields, cm);
982
0
        afdh->removeFormFields(old_fields);
983
0
        for (auto const& f: new_fields) {
984
0
            afdh->addFormField(QPDFFormFieldObjectHelper(f));
985
0
        }
986
0
        oh().replaceKey("/Annots", QPDFObjectHandle::newArray(new_annots));
987
0
    }
988
0
}
989
990
void
991
QPDFPageObjectHelper::copyAnnotations(
992
    QPDFPageObjectHelper from_page,
993
    QPDFMatrix const& cm,
994
    QPDFAcroFormDocumentHelper* afdh,
995
    QPDFAcroFormDocumentHelper* from_afdh)
996
0
{
997
0
    auto old_annots = from_page.getObjectHandle().getKey("/Annots");
998
0
    if (!old_annots.isArray()) {
999
0
        return;
1000
0
    }
1001
1002
0
    QPDF& from_qpdf = from_page.getObjectHandle().getQPDF(
1003
0
        "QPDFPageObjectHelper::copyAnnotations: from page is a direct object");
1004
0
    QPDF& this_qpdf =
1005
0
        oh().getQPDF("QPDFPageObjectHelper::copyAnnotations: this page is a direct object");
1006
1007
0
    std::vector<QPDFObjectHandle> new_annots;
1008
0
    std::vector<QPDFObjectHandle> new_fields;
1009
0
    std::set<QPDFObjGen> old_fields;
1010
0
    std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
1011
0
    std::shared_ptr<QPDFAcroFormDocumentHelper> from_afdhph;
1012
0
    if (!afdh) {
1013
0
        afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(this_qpdf);
1014
0
        afdh = afdhph.get();
1015
0
    }
1016
0
    if (&this_qpdf == &from_qpdf) {
1017
0
        from_afdh = afdh;
1018
0
    } else if (from_afdh) {
1019
0
        if (from_afdh->getQPDF().getUniqueId() != from_qpdf.getUniqueId()) {
1020
0
            throw std::logic_error(
1021
0
                "QPDFAcroFormDocumentHelper::copyAnnotations: from_afdh"
1022
0
                " is not from the same QPDF as from_page");
1023
0
        }
1024
0
    } else {
1025
0
        from_afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(from_qpdf);
1026
0
        from_afdh = from_afdhph.get();
1027
0
    }
1028
1029
0
    afdh->transformAnnotations(
1030
0
        old_annots, new_annots, new_fields, old_fields, cm, &from_qpdf, from_afdh);
1031
0
    afdh->addAndRenameFormFields(new_fields);
1032
0
    auto annots = oh().getKey("/Annots");
1033
0
    if (!annots.isArray()) {
1034
0
        annots = oh().replaceKeyAndGetNew("/Annots", QPDFObjectHandle::newArray());
1035
0
    }
1036
0
    for (auto const& annot: new_annots) {
1037
0
        annots.appendItem(annot);
1038
0
    }
1039
0
}