Coverage Report

Created: 2025-06-22 06:28

/src/qpdf/libqpdf/QPDFPageObjectHelper.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/QPDFPageObjectHelper.hh>
2
3
#include <qpdf/Pl_Buffer.hh>
4
#include <qpdf/Pl_Concatenate.hh>
5
#include <qpdf/QIntC.hh>
6
#include <qpdf/QPDF.hh>
7
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
8
#include <qpdf/QPDFExc.hh>
9
#include <qpdf/QPDFMatrix.hh>
10
#include <qpdf/QPDFObjectHandle_private.hh>
11
#include <qpdf/QTC.hh>
12
#include <qpdf/QUtil.hh>
13
#include <qpdf/ResourceFinder.hh>
14
15
namespace
16
{
17
    class ContentProvider: public QPDFObjectHandle::StreamDataProvider
18
    {
19
      public:
20
        ContentProvider(QPDFObjectHandle from_page) :
21
0
            from_page(from_page)
22
0
        {
23
0
        }
24
0
        ~ContentProvider() override = default;
25
        void provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override;
26
27
      private:
28
        QPDFObjectHandle from_page;
29
    };
30
} // namespace
31
32
void
33
ContentProvider::provideStreamData(QPDFObjGen const&, Pipeline* p)
34
0
{
35
0
    Pl_Concatenate concat("concatenate", p);
36
0
    std::string description = "contents from page object " + from_page.getObjGen().unparse(' ');
37
0
    std::string all_description;
38
0
    from_page.getKey("/Contents").pipeContentStreams(&concat, description, all_description);
39
0
    concat.manualFinish();
40
0
}
41
42
namespace
43
{
44
    class InlineImageTracker: public QPDFObjectHandle::TokenFilter
45
    {
46
      public:
47
        InlineImageTracker(QPDF*, size_t min_size, QPDFObjectHandle resources);
48
0
        ~InlineImageTracker() override = default;
49
        void handleToken(QPDFTokenizer::Token const&) override;
50
        QPDFObjectHandle convertIIDict(QPDFObjectHandle odict);
51
52
        QPDF* qpdf;
53
        size_t min_size;
54
        QPDFObjectHandle resources;
55
        std::string dict_str;
56
        std::string bi_str;
57
        int min_suffix{1};
58
        bool any_images{false};
59
        enum { st_top, st_bi } state{st_top};
60
    };
61
} // namespace
62
63
InlineImageTracker::InlineImageTracker(QPDF* qpdf, size_t min_size, QPDFObjectHandle resources) :
64
0
    qpdf(qpdf),
65
0
    min_size(min_size),
66
0
    resources(resources)
67
0
{
68
0
}
69
70
QPDFObjectHandle
71
InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
72
0
{
73
0
    QPDFObjectHandle dict = QPDFObjectHandle::newDictionary();
74
0
    dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
75
0
    dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Image"));
76
0
    for (auto const& [k, v]: odict.as_dictionary()) {
77
0
        if (v.null()) {
78
0
            continue;
79
0
        }
80
0
        auto key = k;
81
0
        auto value = v;
82
0
        if (key == "/BPC") {
83
0
            key = "/BitsPerComponent";
84
0
        } else if (key == "/CS") {
85
0
            key = "/ColorSpace";
86
0
        } else if (key == "/D") {
87
0
            key = "/Decode";
88
0
        } else if (key == "/DP") {
89
0
            key = "/DecodeParms";
90
0
        } else if (key == "/F") {
91
0
            key = "/Filter";
92
0
        } else if (key == "/H") {
93
0
            key = "/Height";
94
0
        } else if (key == "/IM") {
95
0
            key = "/ImageMask";
96
0
        } else if (key == "/I") {
97
0
            key = "/Interpolate";
98
0
        } else if (key == "/W") {
99
0
            key = "/Width";
100
0
        }
101
102
0
        if (key == "/ColorSpace") {
103
0
            if (value.isName()) {
104
0
                std::string name = value.getName();
105
0
                if (name == "/G") {
106
0
                    name = "/DeviceGray";
107
0
                } else if (name == "/RGB") {
108
0
                    name = "/DeviceRGB";
109
0
                } else if (name == "/CMYK") {
110
0
                    name = "/DeviceCMYK";
111
0
                } else if (name == "/I") {
112
0
                    name = "/Indexed";
113
0
                } else {
114
                    // This is a key in the page's /Resources -> /ColorSpace dictionary. We need to
115
                    // look it up and use its value as the color space for the image.
116
0
                    QPDFObjectHandle colorspace = resources.getKey("/ColorSpace");
117
0
                    if (colorspace.isDictionary() && colorspace.hasKey(name)) {
118
0
                        QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup");
119
0
                        value = colorspace.getKey(name);
120
0
                    } else {
121
0
                        resources.warnIfPossible("unable to resolve colorspace " + name);
122
0
                    }
123
0
                    name.clear();
124
0
                }
125
0
                if (!name.empty()) {
126
0
                    value = QPDFObjectHandle::newName(name);
127
0
                }
128
0
            }
129
0
        } else if (key == "/Filter") {
130
0
            std::vector<QPDFObjectHandle> filters;
131
0
            if (value.isName()) {
132
0
                filters.push_back(value);
133
0
            } else if (value.isArray()) {
134
0
                filters = value.getArrayAsVector();
135
0
            }
136
0
            for (auto& iter: filters) {
137
0
                std::string name;
138
0
                if (iter.isName()) {
139
0
                    name = iter.getName();
140
0
                }
141
0
                if (name == "/AHx") {
142
0
                    name = "/ASCIIHexDecode";
143
0
                } else if (name == "/A85") {
144
0
                    name = "/ASCII85Decode";
145
0
                } else if (name == "/LZW") {
146
0
                    name = "/LZWDecode";
147
0
                } else if (name == "/Fl") {
148
0
                    name = "/FlateDecode";
149
0
                } else if (name == "/RL") {
150
0
                    name = "/RunLengthDecode";
151
0
                } else if (name == "/CCF") {
152
0
                    name = "/CCITTFaxDecode";
153
0
                } else if (name == "/DCT") {
154
0
                    name = "/DCTDecode";
155
0
                } else {
156
0
                    name.clear();
157
0
                }
158
0
                if (!name.empty()) {
159
0
                    iter = QPDFObjectHandle::newName(name);
160
0
                }
161
0
            }
162
0
            if (value.isName() && (filters.size() == 1)) {
163
0
                value = filters.at(0);
164
0
            } else if (value.isArray()) {
165
0
                value = QPDFObjectHandle::newArray(filters);
166
0
            }
167
0
        }
168
0
        dict.replaceKey(key, value);
169
0
    }
170
0
    return dict;
171
0
}
172
173
void
174
InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
175
0
{
176
0
    if (state == st_bi) {
177
0
        if (token.getType() == QPDFTokenizer::tt_inline_image) {
178
0
            std::string image_data(token.getValue());
179
0
            size_t len = image_data.length();
180
0
            if (len >= min_size) {
181
0
                QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
182
0
                QPDFObjectHandle dict = convertIIDict(QPDFObjectHandle::parse(dict_str));
183
0
                dict.replaceKey("/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(len)));
184
0
                std::string name = resources.getUniqueResourceName("/IIm", min_suffix);
185
0
                QPDFObjectHandle image = QPDFObjectHandle::newStream(
186
0
                    qpdf, std::make_shared<Buffer>(std::move(image_data)));
187
0
                image.replaceDict(dict);
188
0
                resources.getKey("/XObject").replaceKey(name, image);
189
0
                write(name);
190
0
                write(" Do\n");
191
0
                any_images = true;
192
0
            } else {
193
0
                QTC::TC("qpdf", "QPDFPageObjectHelper keep inline image");
194
0
                write(bi_str);
195
0
                writeToken(token);
196
0
                state = st_top;
197
0
            }
198
0
        } else if (token.isWord("ID")) {
199
0
            bi_str += token.getValue();
200
0
            dict_str += " >>";
201
0
        } else if (token.isWord("EI")) {
202
0
            state = st_top;
203
0
        } else {
204
0
            bi_str += token.getRawValue();
205
0
            dict_str += token.getRawValue();
206
0
        }
207
0
    } else if (token.isWord("BI")) {
208
0
        bi_str = token.getValue();
209
0
        dict_str = "<< ";
210
0
        state = st_bi;
211
0
    } else {
212
0
        writeToken(token);
213
0
    }
214
0
}
215
216
QPDFPageObjectHelper::QPDFPageObjectHelper(QPDFObjectHandle oh) :
217
39.9k
    QPDFObjectHelper(oh)
218
39.9k
{
219
39.9k
}
220
221
QPDFObjectHandle
222
QPDFPageObjectHelper::getAttribute(std::string const& name, bool copy_if_shared)
223
27.8k
{
224
27.8k
    return getAttribute(name, copy_if_shared, nullptr, false);
225
27.8k
}
226
227
QPDFObjectHandle
228
QPDFPageObjectHelper::getAttribute(
229
    std::string const& name,
230
    bool copy_if_shared,
231
    std::function<QPDFObjectHandle()> get_fallback,
232
    bool copy_if_fallback)
233
27.8k
{
234
27.8k
    const bool is_form_xobject = oh().isFormXObject();
235
27.8k
    bool inherited = false;
236
27.8k
    auto dict = is_form_xobject ? oh().getDict() : oh();
237
27.8k
    auto result = dict.getKey(name);
238
239
27.8k
    if (!is_form_xobject && result.isNull() &&
240
27.8k
        (name == "/MediaBox" || name == "/CropBox" || name == "/Resources" || name == "/Rotate")) {
241
6.93k
        QPDFObjectHandle node = dict;
242
6.93k
        QPDFObjGen::set seen{};
243
11.0k
        while (seen.add(node) && node.hasKey("/Parent")) {
244
4.28k
            node = node.getKey("/Parent");
245
4.28k
            result = node.getKey(name);
246
4.28k
            if (!result.isNull()) {
247
126
                QTC::TC("qpdf", "QPDFPageObjectHelper non-trivial inheritance");
248
126
                inherited = true;
249
126
                break;
250
126
            }
251
4.28k
        }
252
6.93k
    }
253
27.8k
    if (copy_if_shared && (inherited || result.isIndirect())) {
254
3.01k
        QTC::TC("qpdf", "QPDFPageObjectHelper copy shared attribute", is_form_xobject ? 0 : 1);
255
3.01k
        result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
256
3.01k
    }
257
27.8k
    if (result.isNull() && get_fallback) {
258
0
        result = get_fallback();
259
0
        if (copy_if_fallback && !result.isNull()) {
260
0
            QTC::TC("qpdf", "QPDFPageObjectHelper copied fallback");
261
0
            result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
262
0
        } else {
263
0
            QTC::TC("qpdf", "QPDFPageObjectHelper used fallback without copying");
264
0
        }
265
0
    }
266
27.8k
    return result;
267
27.8k
}
268
269
QPDFObjectHandle
270
QPDFPageObjectHelper::getMediaBox(bool copy_if_shared)
271
0
{
272
0
    return getAttribute("/MediaBox", copy_if_shared);
273
0
}
274
275
QPDFObjectHandle
276
QPDFPageObjectHelper::getCropBox(bool copy_if_shared, bool copy_if_fallback)
277
0
{
278
0
    return getAttribute(
279
0
        "/CropBox",
280
0
        copy_if_shared,
281
0
        [this, copy_if_shared]() { return getMediaBox(copy_if_shared); },
282
0
        copy_if_fallback);
283
0
}
284
285
QPDFObjectHandle
286
QPDFPageObjectHelper::getTrimBox(bool copy_if_shared, bool copy_if_fallback)
287
0
{
288
0
    return getAttribute(
289
0
        "/TrimBox",
290
0
        copy_if_shared,
291
0
        [this, copy_if_shared, copy_if_fallback]() {
292
0
            return getCropBox(copy_if_shared, copy_if_fallback);
293
0
        },
294
0
        copy_if_fallback);
295
0
}
296
297
QPDFObjectHandle
298
QPDFPageObjectHelper::getArtBox(bool copy_if_shared, bool copy_if_fallback)
299
0
{
300
0
    return getAttribute(
301
0
        "/ArtBox",
302
0
        copy_if_shared,
303
0
        [this, copy_if_shared, copy_if_fallback]() {
304
0
            return getCropBox(copy_if_shared, copy_if_fallback);
305
0
        },
306
0
        copy_if_fallback);
307
0
}
308
309
QPDFObjectHandle
310
QPDFPageObjectHelper::getBleedBox(bool copy_if_shared, bool copy_if_fallback)
311
0
{
312
0
    return getAttribute(
313
0
        "/BleedBox",
314
0
        copy_if_shared,
315
0
        [this, copy_if_shared, copy_if_fallback]() {
316
0
            return getCropBox(copy_if_shared, copy_if_fallback);
317
0
        },
318
0
        copy_if_fallback);
319
0
}
320
321
void
322
QPDFPageObjectHelper::forEachXObject(
323
    bool recursive,
324
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
325
        action,
326
    std::function<bool(QPDFObjectHandle)> selector)
327
13.0k
{
328
13.0k
    QTC::TC(
329
13.0k
        "qpdf",
330
13.0k
        "QPDFPageObjectHelper::forEachXObject",
331
13.0k
        recursive ? (oh().isFormXObject() ? 0 : 1) : (oh().isFormXObject() ? 2 : 3));
332
13.0k
    QPDFObjGen::set seen;
333
13.0k
    std::list<QPDFPageObjectHelper> queue;
334
13.0k
    queue.emplace_back(*this);
335
26.1k
    while (!queue.empty()) {
336
13.0k
        auto& ph = queue.front();
337
13.0k
        if (seen.add(ph)) {
338
13.0k
            auto xobj_dict = ph.getAttribute("/Resources", false).getKeyIfDict("/XObject");
339
20.2k
            for (auto const& [key, value]: xobj_dict.as_dictionary()) {
340
20.2k
                if (value.null()) {
341
1.15k
                    continue;
342
1.15k
                }
343
19.0k
                auto obj = value;
344
19.0k
                if ((!selector) || selector(obj)) {
345
26
                    action(obj, xobj_dict, key);
346
26
                }
347
19.0k
                if (recursive && obj.isFormXObject()) {
348
0
                    queue.emplace_back(obj);
349
0
                }
350
19.0k
            }
351
13.0k
        }
352
13.0k
        queue.pop_front();
353
13.0k
    }
354
13.0k
}
355
356
void
357
QPDFPageObjectHelper::forEachImage(
358
    bool recursive,
359
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
360
        action)
361
13.0k
{
362
19.0k
    forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isImage(); });
363
13.0k
}
364
365
void
366
QPDFPageObjectHelper::forEachFormXObject(
367
    bool recursive,
368
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
369
        action)
370
0
{
371
0
    forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isFormXObject(); });
372
0
}
373
374
std::map<std::string, QPDFObjectHandle>
375
QPDFPageObjectHelper::getPageImages()
376
0
{
377
0
    return getImages();
378
0
}
379
380
std::map<std::string, QPDFObjectHandle>
381
QPDFPageObjectHelper::getImages()
382
13.0k
{
383
13.0k
    std::map<std::string, QPDFObjectHandle> result;
384
13.0k
    forEachImage(
385
13.0k
        false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
386
26
            result[key] = obj;
387
26
        });
388
13.0k
    return result;
389
13.0k
}
390
391
std::map<std::string, QPDFObjectHandle>
392
QPDFPageObjectHelper::getFormXObjects()
393
0
{
394
0
    std::map<std::string, QPDFObjectHandle> result;
395
0
    forEachFormXObject(
396
0
        false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
397
0
            result[key] = obj;
398
0
        });
399
0
    return result;
400
0
}
401
402
void
403
QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
404
0
{
405
0
    if (shallow) {
406
0
        QPDFObjectHandle resources = getAttribute("/Resources", true);
407
        // Calling mergeResources also ensures that /XObject becomes direct and is not shared with
408
        // other pages.
409
0
        resources.mergeResources("<< /XObject << >> >>"_qpdf);
410
0
        InlineImageTracker iit(oh().getOwningQPDF(), min_size, resources);
411
0
        Pl_Buffer b("new page content");
412
0
        bool filtered = false;
413
0
        try {
414
0
            filterContents(&iit, &b);
415
0
            filtered = true;
416
0
        } catch (std::exception& e) {
417
0
            oh().warnIfPossible(
418
0
                std::string("Unable to filter content stream: ") + e.what() +
419
0
                "; not attempting to externalize inline images from this stream");
420
0
        }
421
0
        if (filtered && iit.any_images) {
422
0
            if (oh().isFormXObject()) {
423
0
                oh().replaceStreamData(
424
0
                    b.getBufferSharedPointer(),
425
0
                    QPDFObjectHandle::newNull(),
426
0
                    QPDFObjectHandle::newNull());
427
0
            } else {
428
0
                oh().replaceKey(
429
0
                    "/Contents",
430
0
                    QPDFObjectHandle::newStream(&oh().getQPDF(), b.getBufferSharedPointer()));
431
0
            }
432
0
        }
433
0
    } else {
434
0
        externalizeInlineImages(min_size, true);
435
0
        forEachFormXObject(
436
0
            true, [min_size](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
437
0
                QPDFPageObjectHelper(obj).externalizeInlineImages(min_size, true);
438
0
            });
439
0
    }
440
0
}
441
442
std::vector<QPDFAnnotationObjectHelper>
443
QPDFPageObjectHelper::getAnnotations(std::string const& only_subtype)
444
36.3k
{
445
36.3k
    std::vector<QPDFAnnotationObjectHelper> result;
446
36.3k
    QPDFObjectHandle annots = oh().getKey("/Annots");
447
36.3k
    if (annots.isArray()) {
448
12.5k
        int nannots = annots.getArrayNItems();
449
521k
        for (int i = 0; i < nannots; ++i) {
450
509k
            QPDFObjectHandle annot = annots.getArrayItem(i);
451
509k
            if (annot.isDictionaryOfType("", only_subtype)) {
452
182k
                result.emplace_back(annot);
453
182k
            }
454
509k
        }
455
12.5k
    }
456
36.3k
    return result;
457
36.3k
}
458
459
std::vector<QPDFObjectHandle>
460
QPDFPageObjectHelper::getPageContents()
461
0
{
462
0
    return oh().getPageContents();
463
0
}
464
465
void
466
QPDFPageObjectHelper::addPageContents(QPDFObjectHandle contents, bool first)
467
5.08k
{
468
5.08k
    oh().addPageContents(contents, first);
469
5.08k
}
470
471
void
472
QPDFPageObjectHelper::rotatePage(int angle, bool relative)
473
0
{
474
0
    oh().rotatePage(angle, relative);
475
0
}
476
477
void
478
QPDFPageObjectHelper::coalesceContentStreams()
479
14.6k
{
480
14.6k
    oh().coalesceContentStreams();
481
14.6k
}
482
483
void
484
QPDFPageObjectHelper::parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks)
485
0
{
486
0
    parseContents(callbacks);
487
0
}
488
489
void
490
QPDFPageObjectHelper::parseContents(QPDFObjectHandle::ParserCallbacks* callbacks)
491
14.5k
{
492
14.5k
    if (oh().isFormXObject()) {
493
0
        oh().parseAsContents(callbacks);
494
14.5k
    } else {
495
14.5k
        oh().parsePageContents(callbacks);
496
14.5k
    }
497
14.5k
}
498
499
void
500
QPDFPageObjectHelper::filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
501
0
{
502
0
    return filterContents(filter, next);
503
0
}
504
505
void
506
QPDFPageObjectHelper::filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
507
0
{
508
0
    if (oh().isFormXObject()) {
509
0
        oh().filterAsContents(filter, next);
510
0
    } else {
511
0
        oh().filterPageContents(filter, next);
512
0
    }
513
0
}
514
515
void
516
QPDFPageObjectHelper::pipePageContents(Pipeline* p)
517
0
{
518
0
    pipeContents(p);
519
0
}
520
521
void
522
QPDFPageObjectHelper::pipeContents(Pipeline* p)
523
0
{
524
0
    if (oh().isFormXObject()) {
525
0
        oh().pipeStreamData(p, 0, qpdf_dl_specialized);
526
0
    } else {
527
0
        oh().pipePageContents(p);
528
0
    }
529
0
}
530
531
void
532
QPDFPageObjectHelper::addContentTokenFilter(
533
    std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter)
534
0
{
535
0
    if (oh().isFormXObject()) {
536
0
        oh().addTokenFilter(token_filter);
537
0
    } else {
538
0
        oh().addContentTokenFilter(token_filter);
539
0
    }
540
0
}
541
542
bool
543
QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
544
    QPDFPageObjectHelper ph, std::set<std::string>& unresolved)
545
0
{
546
0
    bool is_page = (!ph.oh().isFormXObject());
547
0
    if (!is_page) {
548
0
        QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
549
0
    }
550
551
0
    ResourceFinder rf;
552
0
    try {
553
0
        auto q = ph.oh().getOwningQPDF();
554
0
        size_t before_nw = (q ? q->numWarnings() : 0);
555
0
        ph.parseContents(&rf);
556
0
        size_t after_nw = (q ? q->numWarnings() : 0);
557
0
        if (after_nw > before_nw) {
558
0
            ph.oh().warnIfPossible(
559
0
                "Bad token found while scanning content stream; "
560
0
                "not attempting to remove unreferenced objects from this object");
561
0
            return false;
562
0
        }
563
0
    } catch (std::exception& e) {
564
0
        QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
565
0
        ph.oh().warnIfPossible(
566
0
            std::string("Unable to parse content stream: ") + e.what() +
567
0
            "; not attempting to remove unreferenced objects from this object");
568
0
        return false;
569
0
    }
570
571
    // We will walk through /Font and /XObject dictionaries, removing any resources that are not
572
    // referenced. We must make copies of resource dictionaries down into the dictionaries are
573
    // mutating to prevent mutating one dictionary from having the side effect of mutating the one
574
    // it was copied from.
575
0
    QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
576
0
    std::vector<QPDFObjectHandle> rdicts;
577
0
    std::set<std::string> known_names;
578
0
    std::vector<std::string> to_filter = {"/Font", "/XObject"};
579
0
    if (resources.isDictionary()) {
580
0
        for (auto const& iter: to_filter) {
581
0
            QPDFObjectHandle dict = resources.getKey(iter);
582
0
            if (dict.isDictionary()) {
583
0
                dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy());
584
0
                rdicts.push_back(dict);
585
0
                auto keys = dict.getKeys();
586
0
                known_names.insert(keys.begin(), keys.end());
587
0
            }
588
0
        }
589
0
    }
590
591
0
    std::set<std::string> local_unresolved;
592
0
    auto names_by_rtype = rf.getNamesByResourceType();
593
0
    for (auto const& i1: to_filter) {
594
0
        for (auto const& n_iter: names_by_rtype[i1]) {
595
0
            std::string const& name = n_iter.first;
596
0
            if (!known_names.contains(name)) {
597
0
                unresolved.insert(name);
598
0
                local_unresolved.insert(name);
599
0
            }
600
0
        }
601
0
    }
602
    // Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in
603
    // which case names were resolved from the containing page. This behavior seems to be widely
604
    // supported by viewers. If a form XObjects has a resources dictionary and has some unresolved
605
    // names, some viewers fail to resolve them, and others allow them to be inherited from the page
606
    // or from another form XObjects that contains them. Since this behavior is inconsistent across
607
    // viewers, we consider an unresolved name when a resources dictionary is present to be reason
608
    // not to remove unreferenced resources. An unresolved name in the absence of a resource
609
    // dictionary is not considered a problem. For form XObjects, we just accumulate a list of
610
    // unresolved names, and for page objects, we avoid removing any such names found in nested form
611
    // XObjects.
612
613
0
    if ((!local_unresolved.empty()) && resources.isDictionary()) {
614
        // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
615
        // looking at names that are referencing fonts and XObjects, but until we're certain that we
616
        // know the meaning of every name in a content stream, we don't want to give warnings that
617
        // might be false positives. Also, this can happen in legitimate cases with older PDFs, and
618
        // there's nothing to be done about it, so there's no good reason to issue a warning. The
619
        // only sad thing is that it was a false positive that alerted me to a logic error in the
620
        // code, and any future such errors would now be hidden.
621
0
        QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
622
0
        return false;
623
0
    }
624
625
0
    for (auto& dict: rdicts) {
626
0
        for (auto const& key: dict.getKeys()) {
627
0
            if (is_page && unresolved.contains(key)) {
628
                // This name is referenced by some nested form xobject, so don't remove it.
629
0
                QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved");
630
0
            } else if (!rf.getNames().contains(key)) {
631
0
                dict.removeKey(key);
632
0
            }
633
0
        }
634
0
    }
635
0
    return true;
636
0
}
637
638
void
639
QPDFPageObjectHelper::removeUnreferencedResources()
640
0
{
641
    // Accumulate a list of unresolved names across all nested form XObjects.
642
0
    std::set<std::string> unresolved;
643
0
    bool any_failures = false;
644
0
    forEachFormXObject(
645
0
        true,
646
0
        [&any_failures, &unresolved](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
647
0
            if (!removeUnreferencedResourcesHelper(QPDFPageObjectHelper(obj), unresolved)) {
648
0
                any_failures = true;
649
0
            }
650
0
        });
651
0
    if (oh().isFormXObject() || (!any_failures)) {
652
0
        removeUnreferencedResourcesHelper(*this, unresolved);
653
0
    }
654
0
}
655
656
QPDFPageObjectHelper
657
QPDFPageObjectHelper::shallowCopyPage()
658
0
{
659
0
    QPDF& qpdf = oh().getQPDF("QPDFPageObjectHelper::shallowCopyPage called with a direct object");
660
0
    QPDFObjectHandle new_page = oh().shallowCopy();
661
0
    return {qpdf.makeIndirectObject(new_page)};
662
0
}
663
664
QPDFObjectHandle::Matrix
665
QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
666
0
{
667
0
    QPDFObjectHandle::Matrix matrix(1, 0, 0, 1, 0, 0);
668
0
    QPDFObjectHandle bbox = getTrimBox(false);
669
0
    if (!bbox.isRectangle()) {
670
0
        return matrix;
671
0
    }
672
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
673
0
    QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
674
0
    if (!(rotate_obj.isNull() && scale_obj.isNull())) {
675
0
        QPDFObjectHandle::Rectangle rect = bbox.getArrayAsRectangle();
676
0
        double width = rect.urx - rect.llx;
677
0
        double height = rect.ury - rect.lly;
678
0
        double scale = (scale_obj.isNumber() ? scale_obj.getNumericValue() : 1.0);
679
0
        int rotate = (rotate_obj.isInteger() ? rotate_obj.getIntValueAsInt() : 0);
680
0
        if (invert) {
681
0
            if (scale == 0.0) {
682
0
                return matrix;
683
0
            }
684
0
            scale = 1.0 / scale;
685
0
            rotate = 360 - rotate;
686
0
        }
687
688
        // Ignore invalid rotation angle
689
0
        switch (rotate) {
690
0
        case 90:
691
0
            matrix = QPDFObjectHandle::Matrix(0, -scale, scale, 0, 0, width * scale);
692
0
            break;
693
0
        case 180:
694
0
            matrix = QPDFObjectHandle::Matrix(-scale, 0, 0, -scale, width * scale, height * scale);
695
0
            break;
696
0
        case 270:
697
0
            matrix = QPDFObjectHandle::Matrix(0, scale, -scale, 0, height * scale, 0);
698
0
            break;
699
0
        default:
700
0
            matrix = QPDFObjectHandle::Matrix(scale, 0, 0, scale, 0, 0);
701
0
            break;
702
0
        }
703
0
    }
704
0
    return matrix;
705
0
}
706
707
QPDFObjectHandle
708
QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
709
0
{
710
0
    auto result =
711
0
        oh().getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object")
712
0
            .newStream();
713
0
    QPDFObjectHandle newdict = result.getDict();
714
0
    newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
715
0
    newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
716
0
    newdict.replaceKey("/Resources", getAttribute("/Resources", false).shallowCopy());
717
0
    newdict.replaceKey("/Group", getAttribute("/Group", false).shallowCopy());
718
0
    QPDFObjectHandle bbox = getTrimBox(false).shallowCopy();
719
0
    if (!bbox.isRectangle()) {
720
0
        oh().warnIfPossible(
721
0
            "bounding box is invalid; form XObject created from page will not work");
722
0
    }
723
0
    newdict.replaceKey("/BBox", bbox);
724
0
    auto provider =
725
0
        std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(new ContentProvider(oh()));
726
0
    result.replaceStreamData(provider, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
727
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
728
0
    QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
729
0
    if (handle_transformations && (!(rotate_obj.isNull() && scale_obj.isNull()))) {
730
0
        newdict.replaceKey("/Matrix", QPDFObjectHandle::newArray(getMatrixForTransformations()));
731
0
    }
732
733
0
    return result;
734
0
}
735
736
QPDFMatrix
737
QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
738
    QPDFObjectHandle fo,
739
    QPDFObjectHandle::Rectangle rect,
740
    bool invert_transformations,
741
    bool allow_shrink,
742
    bool allow_expand)
743
0
{
744
    // Calculate the transformation matrix that will place the given form XObject fully inside the
745
    // given rectangle, center and shrinking or expanding as needed if requested.
746
747
    // When rendering a form XObject, the transformation in the graphics state (cm) is applied first
748
    // (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be
749
    // drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting
750
    // matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We
751
    // want to create a transformation matrix that make the form XObject's bounding box land in
752
    // exactly the right spot.
753
754
0
    QPDFObjectHandle fdict = fo.getDict();
755
0
    QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
756
0
    if (!bbox_obj.isRectangle()) {
757
0
        return {};
758
0
    }
759
760
0
    QPDFMatrix wmatrix; // work matrix
761
0
    QPDFMatrix tmatrix; // "to" matrix
762
0
    QPDFMatrix fmatrix; // "from" matrix
763
0
    if (invert_transformations) {
764
        // tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows
765
        // the overlaid form XObject's to be absolute rather than relative to properties of the
766
        // destination page. tmatrix is part of the computed transformation matrix.
767
0
        tmatrix = QPDFMatrix(getMatrixForTransformations(true));
768
0
        wmatrix.concat(tmatrix);
769
0
    }
770
0
    if (fdict.getKey("/Matrix").isMatrix()) {
771
        // fmatrix is the transformation matrix that is applied to the form XObject itself. We need
772
        // this for calculations, but we don't explicitly use it in the final result because the PDF
773
        // rendering system automatically applies this last before
774
        // drawing the form XObject.
775
0
        fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
776
0
        wmatrix.concat(fmatrix);
777
0
    }
778
779
    // The current wmatrix handles transformation from the form xobject and, if requested, the
780
    // destination page. Next, we have to adjust this for scale and position.
781
782
    // Step 1: figure out what scale factor we need to make the form XObject's bounding box fit
783
    // within the destination rectangle.
784
785
    // Transform bounding box
786
0
    QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
787
0
    QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
788
789
    // Calculate a scale factor, if needed. Shrink or expand if needed and allowed.
790
0
    if ((T.urx == T.llx) || (T.ury == T.lly)) {
791
        // avoid division by zero
792
0
        return {};
793
0
    }
794
0
    double rect_w = rect.urx - rect.llx;
795
0
    double rect_h = rect.ury - rect.lly;
796
0
    double t_w = T.urx - T.llx;
797
0
    double t_h = T.ury - T.lly;
798
0
    double xscale = rect_w / t_w;
799
0
    double yscale = rect_h / t_h;
800
0
    double scale = (xscale < yscale ? xscale : yscale);
801
0
    if (scale > 1.0) {
802
0
        if (!allow_expand) {
803
0
            scale = 1.0;
804
0
        }
805
0
    } else if (scale < 1.0) {
806
0
        if (!allow_shrink) {
807
0
            scale = 1.0;
808
0
        }
809
0
    }
810
811
    // Step 2: figure out what translation is required to get the rectangle to the right spot:
812
    // centered within the destination.
813
0
    wmatrix = QPDFMatrix();
814
0
    wmatrix.scale(scale, scale);
815
0
    wmatrix.concat(tmatrix);
816
0
    wmatrix.concat(fmatrix);
817
818
0
    T = wmatrix.transformRectangle(bbox);
819
0
    double t_cx = (T.llx + T.urx) / 2.0;
820
0
    double t_cy = (T.lly + T.ury) / 2.0;
821
0
    double r_cx = (rect.llx + rect.urx) / 2.0;
822
0
    double r_cy = (rect.lly + rect.ury) / 2.0;
823
0
    double tx = r_cx - t_cx;
824
0
    double ty = r_cy - t_cy;
825
826
    // Now we can calculate the final matrix. The final matrix does not include fmatrix because that
827
    // is applied automatically by the PDF interpreter.
828
0
    QPDFMatrix cm;
829
0
    cm.translate(tx, ty);
830
0
    cm.scale(scale, scale);
831
0
    cm.concat(tmatrix);
832
0
    return cm;
833
0
}
834
835
std::string
836
QPDFPageObjectHelper::placeFormXObject(
837
    QPDFObjectHandle fo,
838
    std::string const& name,
839
    QPDFObjectHandle::Rectangle rect,
840
    bool invert_transformations,
841
    bool allow_shrink,
842
    bool allow_expand)
843
0
{
844
0
    QPDFMatrix cm;
845
0
    return placeFormXObject(fo, name, rect, cm, invert_transformations, allow_shrink, allow_expand);
846
0
}
847
848
std::string
849
QPDFPageObjectHelper::placeFormXObject(
850
    QPDFObjectHandle fo,
851
    std::string const& name,
852
    QPDFObjectHandle::Rectangle rect,
853
    QPDFMatrix& cm,
854
    bool invert_transformations,
855
    bool allow_shrink,
856
    bool allow_expand)
857
0
{
858
0
    cm = getMatrixForFormXObjectPlacement(
859
0
        fo, rect, invert_transformations, allow_shrink, allow_expand);
860
0
    return ("q\n" + cm.unparse() + " cm\n" + name + " Do\n" + "Q\n");
861
0
}
862
863
void
864
QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
865
0
{
866
0
    QPDF& qpdf = oh().getQPDF("QPDFPageObjectHelper::flattenRotation called with a direct object");
867
0
    auto rotate_oh = oh().getKey("/Rotate");
868
0
    int rotate = 0;
869
0
    if (rotate_oh.isInteger()) {
870
0
        rotate = rotate_oh.getIntValueAsInt();
871
0
    }
872
0
    if (!((rotate == 90) || (rotate == 180) || (rotate == 270))) {
873
0
        return;
874
0
    }
875
0
    auto mediabox = oh().getKey("/MediaBox");
876
0
    if (!mediabox.isRectangle()) {
877
0
        return;
878
0
    }
879
0
    auto media_rect = mediabox.getArrayAsRectangle();
880
881
0
    std::vector<std::string> boxes = {
882
0
        "/MediaBox",
883
0
        "/CropBox",
884
0
        "/BleedBox",
885
0
        "/TrimBox",
886
0
        "/ArtBox",
887
0
    };
888
0
    for (auto const& boxkey: boxes) {
889
0
        auto box = oh().getKey(boxkey);
890
0
        if (!box.isRectangle()) {
891
0
            continue;
892
0
        }
893
0
        auto rect = box.getArrayAsRectangle();
894
0
        decltype(rect) new_rect;
895
896
        // How far are the edges of our rectangle from the edges of the media box?
897
0
        auto left_x = rect.llx - media_rect.llx;
898
0
        auto right_x = media_rect.urx - rect.urx;
899
0
        auto bottom_y = rect.lly - media_rect.lly;
900
0
        auto top_y = media_rect.ury - rect.ury;
901
902
        // Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees
903
        // reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the
904
        // corners to be the correct distance away from the corners of the mediabox.
905
0
        switch (rotate) {
906
0
        case 90:
907
0
            new_rect.llx = media_rect.lly + bottom_y;
908
0
            new_rect.urx = media_rect.ury - top_y;
909
0
            new_rect.lly = media_rect.llx + right_x;
910
0
            new_rect.ury = media_rect.urx - left_x;
911
0
            break;
912
913
0
        case 180:
914
0
            new_rect.llx = media_rect.llx + right_x;
915
0
            new_rect.urx = media_rect.urx - left_x;
916
0
            new_rect.lly = media_rect.lly + top_y;
917
0
            new_rect.ury = media_rect.ury - bottom_y;
918
0
            break;
919
920
0
        case 270:
921
0
            new_rect.llx = media_rect.lly + top_y;
922
0
            new_rect.urx = media_rect.ury - bottom_y;
923
0
            new_rect.lly = media_rect.llx + left_x;
924
0
            new_rect.ury = media_rect.urx - right_x;
925
0
            break;
926
927
0
        default:
928
            // ignore
929
0
            break;
930
0
        }
931
932
0
        oh().replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect));
933
0
    }
934
935
    // When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible
936
    // with the origin point being the same offset from the lower left corner of the media box.
937
    // These calculations have been verified empirically with various
938
    // PDF readers.
939
0
    QPDFMatrix cm(0, 0, 0, 0, 0, 0);
940
0
    switch (rotate) {
941
0
    case 90:
942
0
        cm.b = -1;
943
0
        cm.c = 1;
944
0
        cm.f = media_rect.urx + media_rect.llx;
945
0
        break;
946
947
0
    case 180:
948
0
        cm.a = -1;
949
0
        cm.d = -1;
950
0
        cm.e = media_rect.urx + media_rect.llx;
951
0
        cm.f = media_rect.ury + media_rect.lly;
952
0
        break;
953
954
0
    case 270:
955
0
        cm.b = 1;
956
0
        cm.c = -1;
957
0
        cm.e = media_rect.ury + media_rect.lly;
958
0
        break;
959
960
0
    default:
961
0
        break;
962
0
    }
963
0
    std::string cm_str = std::string("q\n") + cm.unparse() + " cm\n";
964
0
    oh().addPageContents(QPDFObjectHandle::newStream(&qpdf, cm_str), true);
965
0
    oh().addPageContents(qpdf.newStream("\nQ\n"), false);
966
0
    oh().removeKey("/Rotate");
967
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
968
0
    if (!rotate_obj.isNull()) {
969
0
        QTC::TC("qpdf", "QPDFPageObjectHelper flatten inherit rotate");
970
0
        oh().replaceKey("/Rotate", QPDFObjectHandle::newInteger(0));
971
0
    }
972
973
0
    QPDFObjectHandle annots = oh().getKey("/Annots");
974
0
    if (annots.isArray()) {
975
0
        std::vector<QPDFObjectHandle> new_annots;
976
0
        std::vector<QPDFObjectHandle> new_fields;
977
0
        std::set<QPDFObjGen> old_fields;
978
0
        std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
979
0
        if (!afdh) {
980
0
            afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(qpdf);
981
0
            afdh = afdhph.get();
982
0
        }
983
0
        afdh->transformAnnotations(annots, new_annots, new_fields, old_fields, cm);
984
0
        afdh->removeFormFields(old_fields);
985
0
        for (auto const& f: new_fields) {
986
0
            afdh->addFormField(QPDFFormFieldObjectHelper(f));
987
0
        }
988
0
        oh().replaceKey("/Annots", QPDFObjectHandle::newArray(new_annots));
989
0
    }
990
0
}
991
992
void
993
QPDFPageObjectHelper::copyAnnotations(
994
    QPDFPageObjectHelper from_page,
995
    QPDFMatrix const& cm,
996
    QPDFAcroFormDocumentHelper* afdh,
997
    QPDFAcroFormDocumentHelper* from_afdh)
998
0
{
999
0
    auto old_annots = from_page.getObjectHandle().getKey("/Annots");
1000
0
    if (!old_annots.isArray()) {
1001
0
        return;
1002
0
    }
1003
1004
0
    QPDF& from_qpdf = from_page.getObjectHandle().getQPDF(
1005
0
        "QPDFPageObjectHelper::copyAnnotations: from page is a direct object");
1006
0
    QPDF& this_qpdf =
1007
0
        oh().getQPDF("QPDFPageObjectHelper::copyAnnotations: this page is a direct object");
1008
1009
0
    std::vector<QPDFObjectHandle> new_annots;
1010
0
    std::vector<QPDFObjectHandle> new_fields;
1011
0
    std::set<QPDFObjGen> old_fields;
1012
0
    std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
1013
0
    std::shared_ptr<QPDFAcroFormDocumentHelper> from_afdhph;
1014
0
    if (!afdh) {
1015
0
        afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(this_qpdf);
1016
0
        afdh = afdhph.get();
1017
0
    }
1018
0
    if (&this_qpdf == &from_qpdf) {
1019
0
        from_afdh = afdh;
1020
0
    } else if (from_afdh) {
1021
0
        if (from_afdh->getQPDF().getUniqueId() != from_qpdf.getUniqueId()) {
1022
0
            throw std::logic_error(
1023
0
                "QPDFAcroFormDocumentHelper::copyAnnotations: from_afdh"
1024
0
                " is not from the same QPDF as from_page");
1025
0
        }
1026
0
    } else {
1027
0
        from_afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(from_qpdf);
1028
0
        from_afdh = from_afdhph.get();
1029
0
    }
1030
1031
0
    afdh->transformAnnotations(
1032
0
        old_annots, new_annots, new_fields, old_fields, cm, &from_qpdf, from_afdh);
1033
0
    afdh->addAndRenameFormFields(new_fields);
1034
0
    auto annots = oh().getKey("/Annots");
1035
0
    if (!annots.isArray()) {
1036
0
        annots = oh().replaceKeyAndGetNew("/Annots", QPDFObjectHandle::newArray());
1037
0
    }
1038
0
    for (auto const& annot: new_annots) {
1039
0
        annots.appendItem(annot);
1040
0
    }
1041
0
}