Coverage Report

Created: 2024-09-08 06:06

/src/qpdf/libqpdf/QPDFPageObjectHelper.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/QPDFPageObjectHelper.hh>
2
3
#include <qpdf/Pl_Buffer.hh>
4
#include <qpdf/Pl_Concatenate.hh>
5
#include <qpdf/QIntC.hh>
6
#include <qpdf/QPDF.hh>
7
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
8
#include <qpdf/QPDFExc.hh>
9
#include <qpdf/QPDFMatrix.hh>
10
#include <qpdf/QTC.hh>
11
#include <qpdf/QUtil.hh>
12
#include <qpdf/ResourceFinder.hh>
13
14
namespace
15
{
16
    class ContentProvider: public QPDFObjectHandle::StreamDataProvider
17
    {
18
      public:
19
        ContentProvider(QPDFObjectHandle from_page) :
20
            from_page(from_page)
21
0
        {
22
0
        }
23
0
        ~ContentProvider() override = default;
24
        void provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override;
25
26
      private:
27
        QPDFObjectHandle from_page;
28
    };
29
} // namespace
30
31
void
32
ContentProvider::provideStreamData(QPDFObjGen const&, Pipeline* p)
33
0
{
34
0
    Pl_Concatenate concat("concatenate", p);
35
0
    std::string description = "contents from page object " + from_page.getObjGen().unparse(' ');
36
0
    std::string all_description;
37
0
    from_page.getKey("/Contents").pipeContentStreams(&concat, description, all_description);
38
0
    concat.manualFinish();
39
0
}
40
41
namespace
42
{
43
    class InlineImageTracker: public QPDFObjectHandle::TokenFilter
44
    {
45
      public:
46
        InlineImageTracker(QPDF*, size_t min_size, QPDFObjectHandle resources);
47
0
        ~InlineImageTracker() override = default;
48
        void handleToken(QPDFTokenizer::Token const&) override;
49
        QPDFObjectHandle convertIIDict(QPDFObjectHandle odict);
50
51
        QPDF* qpdf;
52
        size_t min_size;
53
        QPDFObjectHandle resources;
54
        std::string dict_str;
55
        std::string bi_str;
56
        int min_suffix{1};
57
        bool any_images{false};
58
        enum { st_top, st_bi } state{st_top};
59
    };
60
} // namespace
61
62
InlineImageTracker::InlineImageTracker(QPDF* qpdf, size_t min_size, QPDFObjectHandle resources) :
63
    qpdf(qpdf),
64
    min_size(min_size),
65
    resources(resources)
66
0
{
67
0
}
68
69
QPDFObjectHandle
70
InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
71
0
{
72
0
    QPDFObjectHandle dict = QPDFObjectHandle::newDictionary();
73
0
    dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
74
0
    dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Image"));
75
0
    std::set<std::string> keys = odict.getKeys();
76
0
    for (auto key: keys) {
77
0
        QPDFObjectHandle value = odict.getKey(key);
78
0
        if (key == "/BPC") {
79
0
            key = "/BitsPerComponent";
80
0
        } else if (key == "/CS") {
81
0
            key = "/ColorSpace";
82
0
        } else if (key == "/D") {
83
0
            key = "/Decode";
84
0
        } else if (key == "/DP") {
85
0
            key = "/DecodeParms";
86
0
        } else if (key == "/F") {
87
0
            key = "/Filter";
88
0
        } else if (key == "/H") {
89
0
            key = "/Height";
90
0
        } else if (key == "/IM") {
91
0
            key = "/ImageMask";
92
0
        } else if (key == "/I") {
93
0
            key = "/Interpolate";
94
0
        } else if (key == "/W") {
95
0
            key = "/Width";
96
0
        }
97
98
0
        if (key == "/ColorSpace") {
99
0
            if (value.isName()) {
100
0
                std::string name = value.getName();
101
0
                if (name == "/G") {
102
0
                    name = "/DeviceGray";
103
0
                } else if (name == "/RGB") {
104
0
                    name = "/DeviceRGB";
105
0
                } else if (name == "/CMYK") {
106
0
                    name = "/DeviceCMYK";
107
0
                } else if (name == "/I") {
108
0
                    name = "/Indexed";
109
0
                } else {
110
                    // This is a key in the page's /Resources -> /ColorSpace dictionary. We need to
111
                    // look it up and use its value as the color space for the image.
112
0
                    QPDFObjectHandle colorspace = resources.getKey("/ColorSpace");
113
0
                    if (colorspace.isDictionary() && colorspace.hasKey(name)) {
114
0
                        QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup");
115
0
                        value = colorspace.getKey(name);
116
0
                    } else {
117
0
                        resources.warnIfPossible("unable to resolve colorspace " + name);
118
0
                    }
119
0
                    name.clear();
120
0
                }
121
0
                if (!name.empty()) {
122
0
                    value = QPDFObjectHandle::newName(name);
123
0
                }
124
0
            }
125
0
        } else if (key == "/Filter") {
126
0
            std::vector<QPDFObjectHandle> filters;
127
0
            if (value.isName()) {
128
0
                filters.push_back(value);
129
0
            } else if (value.isArray()) {
130
0
                filters = value.getArrayAsVector();
131
0
            }
132
0
            for (auto& iter: filters) {
133
0
                std::string name;
134
0
                if (iter.isName()) {
135
0
                    name = iter.getName();
136
0
                }
137
0
                if (name == "/AHx") {
138
0
                    name = "/ASCIIHexDecode";
139
0
                } else if (name == "/A85") {
140
0
                    name = "/ASCII85Decode";
141
0
                } else if (name == "/LZW") {
142
0
                    name = "/LZWDecode";
143
0
                } else if (name == "/Fl") {
144
0
                    name = "/FlateDecode";
145
0
                } else if (name == "/RL") {
146
0
                    name = "/RunLengthDecode";
147
0
                } else if (name == "/CCF") {
148
0
                    name = "/CCITTFaxDecode";
149
0
                } else if (name == "/DCT") {
150
0
                    name = "/DCTDecode";
151
0
                } else {
152
0
                    name.clear();
153
0
                }
154
0
                if (!name.empty()) {
155
0
                    iter = QPDFObjectHandle::newName(name);
156
0
                }
157
0
            }
158
0
            if (value.isName() && (filters.size() == 1)) {
159
0
                value = filters.at(0);
160
0
            } else if (value.isArray()) {
161
0
                value = QPDFObjectHandle::newArray(filters);
162
0
            }
163
0
        }
164
0
        dict.replaceKey(key, value);
165
0
    }
166
0
    return dict;
167
0
}
168
169
void
170
InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
171
0
{
172
0
    if (state == st_bi) {
173
0
        if (token.getType() == QPDFTokenizer::tt_inline_image) {
174
0
            std::string image_data(token.getValue());
175
0
            size_t len = image_data.length();
176
0
            if (len >= this->min_size) {
177
0
                QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
178
0
                QPDFObjectHandle dict = convertIIDict(QPDFObjectHandle::parse(dict_str));
179
0
                dict.replaceKey("/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(len)));
180
0
                std::string name = resources.getUniqueResourceName("/IIm", this->min_suffix);
181
0
                QPDFObjectHandle image = QPDFObjectHandle::newStream(
182
0
                    this->qpdf, std::make_shared<Buffer>(std::move(image_data)));
183
0
                image.replaceDict(dict);
184
0
                resources.getKey("/XObject").replaceKey(name, image);
185
0
                write(name);
186
0
                write(" Do\n");
187
0
                any_images = true;
188
0
            } else {
189
0
                QTC::TC("qpdf", "QPDFPageObjectHelper keep inline image");
190
0
                write(bi_str);
191
0
                writeToken(token);
192
0
                state = st_top;
193
0
            }
194
0
        } else if (token.isWord("ID")) {
195
0
            bi_str += token.getValue();
196
0
            dict_str += " >>";
197
0
        } else if (token.isWord("EI")) {
198
0
            state = st_top;
199
0
        } else {
200
0
            bi_str += token.getRawValue();
201
0
            dict_str += token.getRawValue();
202
0
        }
203
0
    } else if (token.isWord("BI")) {
204
0
        bi_str = token.getValue();
205
0
        dict_str = "<< ";
206
0
        state = st_bi;
207
0
    } else {
208
0
        writeToken(token);
209
0
    }
210
0
}
211
212
QPDFPageObjectHelper::QPDFPageObjectHelper(QPDFObjectHandle oh) :
213
    QPDFObjectHelper(oh)
214
80.4k
{
215
80.4k
}
216
217
QPDFObjectHandle
218
QPDFPageObjectHelper::getAttribute(std::string const& name, bool copy_if_shared)
219
66.4k
{
220
66.4k
    return getAttribute(name, copy_if_shared, nullptr, false);
221
66.4k
}
222
223
QPDFObjectHandle
224
QPDFPageObjectHelper::getAttribute(
225
    std::string const& name,
226
    bool copy_if_shared,
227
    std::function<QPDFObjectHandle()> get_fallback,
228
    bool copy_if_fallback)
229
66.4k
{
230
66.4k
    const bool is_form_xobject = this->oh.isFormXObject();
231
66.4k
    bool inherited = false;
232
66.4k
    auto dict = is_form_xobject ? oh.getDict() : oh;
233
66.4k
    auto result = dict.getKey(name);
234
235
66.4k
    if (!is_form_xobject && result.isNull() &&
236
66.4k
        (name == "/MediaBox" || name == "/CropBox" || name == "/Resources" || name == "/Rotate")) {
237
10.5k
        QPDFObjectHandle node = dict;
238
10.5k
        QPDFObjGen::set seen{};
239
19.7k
        while (seen.add(node) && node.hasKey("/Parent")) {
240
9.32k
            node = node.getKey("/Parent");
241
9.32k
            result = node.getKey(name);
242
9.32k
            if (!result.isNull()) {
243
170
                QTC::TC("qpdf", "QPDFPageObjectHelper non-trivial inheritance");
244
170
                inherited = true;
245
170
                break;
246
170
            }
247
9.32k
        }
248
10.5k
    }
249
66.4k
    if (copy_if_shared && (inherited || result.isIndirect())) {
250
2.93k
        QTC::TC("qpdf", "QPDFPageObjectHelper copy shared attribute", is_form_xobject ? 0 : 1);
251
2.93k
        result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
252
2.93k
    }
253
66.4k
    if (result.isNull() && get_fallback) {
254
0
        result = get_fallback();
255
0
        if (copy_if_fallback && !result.isNull()) {
256
0
            QTC::TC("qpdf", "QPDFPageObjectHelper copied fallback");
257
0
            result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
258
0
        } else {
259
0
            QTC::TC("qpdf", "QPDFPageObjectHelper used fallback without copying");
260
0
        }
261
0
    }
262
66.4k
    return result;
263
66.4k
}
264
265
QPDFObjectHandle
266
QPDFPageObjectHelper::getMediaBox(bool copy_if_shared)
267
0
{
268
0
    return getAttribute("/MediaBox", copy_if_shared);
269
0
}
270
271
QPDFObjectHandle
272
QPDFPageObjectHelper::getCropBox(bool copy_if_shared, bool copy_if_fallback)
273
0
{
274
0
    return getAttribute(
275
0
        "/CropBox",
276
0
        copy_if_shared,
277
0
        [this, copy_if_shared]() { return this->getMediaBox(copy_if_shared); },
278
0
        copy_if_fallback);
279
0
}
280
281
QPDFObjectHandle
282
QPDFPageObjectHelper::getTrimBox(bool copy_if_shared, bool copy_if_fallback)
283
0
{
284
0
    return getAttribute(
285
0
        "/TrimBox",
286
0
        copy_if_shared,
287
0
        [this, copy_if_shared, copy_if_fallback]() {
288
0
            return this->getCropBox(copy_if_shared, copy_if_fallback);
289
0
        },
290
0
        copy_if_fallback);
291
0
}
292
293
QPDFObjectHandle
294
QPDFPageObjectHelper::getArtBox(bool copy_if_shared, bool copy_if_fallback)
295
0
{
296
0
    return getAttribute(
297
0
        "/ArtBox",
298
0
        copy_if_shared,
299
0
        [this, copy_if_shared, copy_if_fallback]() {
300
0
            return this->getCropBox(copy_if_shared, copy_if_fallback);
301
0
        },
302
0
        copy_if_fallback);
303
0
}
304
305
QPDFObjectHandle
306
QPDFPageObjectHelper::getBleedBox(bool copy_if_shared, bool copy_if_fallback)
307
0
{
308
0
    return getAttribute(
309
0
        "/BleedBox",
310
0
        copy_if_shared,
311
0
        [this, copy_if_shared, copy_if_fallback]() {
312
0
            return this->getCropBox(copy_if_shared, copy_if_fallback);
313
0
        },
314
0
        copy_if_fallback);
315
0
}
316
317
void
318
QPDFPageObjectHelper::forEachXObject(
319
    bool recursive,
320
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
321
        action,
322
    std::function<bool(QPDFObjectHandle)> selector)
323
31.8k
{
324
31.8k
    QTC::TC(
325
31.8k
        "qpdf",
326
31.8k
        "QPDFPageObjectHelper::forEachXObject",
327
31.8k
        recursive ? (this->oh.isFormXObject() ? 0 : 1) : (this->oh.isFormXObject() ? 2 : 3));
328
31.8k
    QPDFObjGen::set seen;
329
31.8k
    std::list<QPDFPageObjectHelper> queue;
330
31.8k
    queue.push_back(*this);
331
63.6k
    while (!queue.empty()) {
332
31.8k
        auto& ph = queue.front();
333
31.8k
        if (seen.add(ph)) {
334
31.8k
            auto xobj_dict = ph.getAttribute("/Resources", false).getKeyIfDict("/XObject");
335
31.8k
            if (xobj_dict.isDictionary()) {
336
6.94k
                for (auto const& key: xobj_dict.getKeys()) {
337
6.94k
                    QPDFObjectHandle obj = xobj_dict.getKey(key);
338
6.94k
                    if ((!selector) || selector(obj)) {
339
206
                        action(obj, xobj_dict, key);
340
206
                    }
341
6.94k
                    if (recursive && obj.isFormXObject()) {
342
0
                        queue.emplace_back(obj);
343
0
                    }
344
6.94k
                }
345
2.25k
            }
346
31.8k
        }
347
31.8k
        queue.pop_front();
348
31.8k
    }
349
31.8k
}
350
351
void
352
QPDFPageObjectHelper::forEachImage(
353
    bool recursive,
354
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
355
        action)
356
31.8k
{
357
31.8k
    forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isImage(); });
358
31.8k
}
359
360
void
361
QPDFPageObjectHelper::forEachFormXObject(
362
    bool recursive,
363
    std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
364
        action)
365
0
{
366
0
    forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isFormXObject(); });
367
0
}
368
369
std::map<std::string, QPDFObjectHandle>
370
QPDFPageObjectHelper::getPageImages()
371
0
{
372
0
    return getImages();
373
0
}
374
375
std::map<std::string, QPDFObjectHandle>
376
QPDFPageObjectHelper::getImages()
377
31.8k
{
378
31.8k
    std::map<std::string, QPDFObjectHandle> result;
379
31.8k
    forEachImage(
380
31.8k
        false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
381
206
            result[key] = obj;
382
206
        });
383
31.8k
    return result;
384
31.8k
}
385
386
std::map<std::string, QPDFObjectHandle>
387
QPDFPageObjectHelper::getFormXObjects()
388
0
{
389
0
    std::map<std::string, QPDFObjectHandle> result;
390
0
    forEachFormXObject(
391
0
        false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
392
0
            result[key] = obj;
393
0
        });
394
0
    return result;
395
0
}
396
397
void
398
QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
399
0
{
400
0
    if (shallow) {
401
0
        QPDFObjectHandle resources = getAttribute("/Resources", true);
402
        // Calling mergeResources also ensures that /XObject becomes direct and is not shared with
403
        // other pages.
404
0
        resources.mergeResources("<< /XObject << >> >>"_qpdf);
405
0
        InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
406
0
        Pl_Buffer b("new page content");
407
0
        bool filtered = false;
408
0
        try {
409
0
            filterContents(&iit, &b);
410
0
            filtered = true;
411
0
        } catch (std::exception& e) {
412
0
            this->oh.warnIfPossible(
413
0
                std::string("Unable to filter content stream: ") + e.what() +
414
0
                "; not attempting to externalize inline images"
415
0
                " from this stream");
416
0
        }
417
0
        if (filtered && iit.any_images) {
418
0
            if (this->oh.isFormXObject()) {
419
0
                this->oh.replaceStreamData(
420
0
                    b.getBufferSharedPointer(),
421
0
                    QPDFObjectHandle::newNull(),
422
0
                    QPDFObjectHandle::newNull());
423
0
            } else {
424
0
                this->oh.replaceKey(
425
0
                    "/Contents",
426
0
                    QPDFObjectHandle::newStream(&this->oh.getQPDF(), b.getBufferSharedPointer()));
427
0
            }
428
0
        }
429
0
    } else {
430
0
        externalizeInlineImages(min_size, true);
431
0
        forEachFormXObject(
432
0
            true, [min_size](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
433
0
                QPDFPageObjectHelper(obj).externalizeInlineImages(min_size, true);
434
0
            });
435
0
    }
436
0
}
437
438
std::vector<QPDFAnnotationObjectHelper>
439
QPDFPageObjectHelper::getAnnotations(std::string const& only_subtype)
440
73.0k
{
441
73.0k
    std::vector<QPDFAnnotationObjectHelper> result;
442
73.0k
    QPDFObjectHandle annots = this->oh.getKey("/Annots");
443
73.0k
    if (annots.isArray()) {
444
12.0k
        int nannots = annots.getArrayNItems();
445
322k
        for (int i = 0; i < nannots; ++i) {
446
310k
            QPDFObjectHandle annot = annots.getArrayItem(i);
447
310k
            if (annot.isDictionaryOfType("", only_subtype)) {
448
71.0k
                result.emplace_back(annot);
449
71.0k
            }
450
310k
        }
451
12.0k
    }
452
73.0k
    return result;
453
73.0k
}
454
455
std::vector<QPDFObjectHandle>
456
QPDFPageObjectHelper::getPageContents()
457
0
{
458
0
    return this->oh.getPageContents();
459
0
}
460
461
void
462
QPDFPageObjectHelper::addPageContents(QPDFObjectHandle contents, bool first)
463
4.87k
{
464
4.87k
    this->oh.addPageContents(contents, first);
465
4.87k
}
466
467
void
468
QPDFPageObjectHelper::rotatePage(int angle, bool relative)
469
0
{
470
0
    this->oh.rotatePage(angle, relative);
471
0
}
472
473
void
474
QPDFPageObjectHelper::coalesceContentStreams()
475
34.1k
{
476
34.1k
    this->oh.coalesceContentStreams();
477
34.1k
}
478
479
void
480
QPDFPageObjectHelper::parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks)
481
0
{
482
0
    parseContents(callbacks);
483
0
}
484
485
void
486
QPDFPageObjectHelper::parseContents(QPDFObjectHandle::ParserCallbacks* callbacks)
487
33.8k
{
488
33.8k
    if (this->oh.isFormXObject()) {
489
0
        this->oh.parseAsContents(callbacks);
490
33.8k
    } else {
491
33.8k
        this->oh.parsePageContents(callbacks);
492
33.8k
    }
493
33.8k
}
494
495
void
496
QPDFPageObjectHelper::filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
497
0
{
498
0
    return filterContents(filter, next);
499
0
}
500
501
void
502
QPDFPageObjectHelper::filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
503
0
{
504
0
    if (this->oh.isFormXObject()) {
505
0
        this->oh.filterAsContents(filter, next);
506
0
    } else {
507
0
        this->oh.filterPageContents(filter, next);
508
0
    }
509
0
}
510
511
void
512
QPDFPageObjectHelper::pipePageContents(Pipeline* p)
513
0
{
514
0
    pipeContents(p);
515
0
}
516
517
void
518
QPDFPageObjectHelper::pipeContents(Pipeline* p)
519
0
{
520
0
    if (this->oh.isFormXObject()) {
521
0
        this->oh.pipeStreamData(p, 0, qpdf_dl_specialized);
522
0
    } else {
523
0
        this->oh.pipePageContents(p);
524
0
    }
525
0
}
526
527
void
528
QPDFPageObjectHelper::addContentTokenFilter(
529
    std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter)
530
0
{
531
0
    if (this->oh.isFormXObject()) {
532
0
        this->oh.addTokenFilter(token_filter);
533
0
    } else {
534
0
        this->oh.addContentTokenFilter(token_filter);
535
0
    }
536
0
}
537
538
bool
539
QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
540
    QPDFPageObjectHelper ph, std::set<std::string>& unresolved)
541
0
{
542
0
    bool is_page = (!ph.oh.isFormXObject());
543
0
    if (!is_page) {
544
0
        QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
545
0
    }
546
547
0
    ResourceFinder rf;
548
0
    try {
549
0
        auto q = ph.oh.getOwningQPDF();
550
0
        size_t before_nw = (q ? q->numWarnings() : 0);
551
0
        ph.parseContents(&rf);
552
0
        size_t after_nw = (q ? q->numWarnings() : 0);
553
0
        if (after_nw > before_nw) {
554
0
            ph.oh.warnIfPossible("Bad token found while scanning content stream; "
555
0
                                 "not attempting to remove unreferenced objects from"
556
0
                                 " this object");
557
0
            return false;
558
0
        }
559
0
    } catch (std::exception& e) {
560
0
        QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
561
0
        ph.oh.warnIfPossible(
562
0
            std::string("Unable to parse content stream: ") + e.what() +
563
0
            "; not attempting to remove unreferenced objects"
564
0
            " from this object");
565
0
        return false;
566
0
    }
567
568
    // We will walk through /Font and /XObject dictionaries, removing any resources that are not
569
    // referenced. We must make copies of resource dictionaries down into the dictionaries are
570
    // mutating to prevent mutating one dictionary from having the side effect of mutating the one
571
    // it was copied from.
572
0
    QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
573
0
    std::vector<QPDFObjectHandle> rdicts;
574
0
    std::set<std::string> known_names;
575
0
    std::vector<std::string> to_filter = {"/Font", "/XObject"};
576
0
    if (resources.isDictionary()) {
577
0
        for (auto const& iter: to_filter) {
578
0
            QPDFObjectHandle dict = resources.getKey(iter);
579
0
            if (dict.isDictionary()) {
580
0
                dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy());
581
0
                rdicts.push_back(dict);
582
0
                auto keys = dict.getKeys();
583
0
                known_names.insert(keys.begin(), keys.end());
584
0
            }
585
0
        }
586
0
    }
587
588
0
    std::set<std::string> local_unresolved;
589
0
    auto names_by_rtype = rf.getNamesByResourceType();
590
0
    for (auto const& i1: to_filter) {
591
0
        for (auto const& n_iter: names_by_rtype[i1]) {
592
0
            std::string const& name = n_iter.first;
593
0
            if (!known_names.count(name)) {
594
0
                unresolved.insert(name);
595
0
                local_unresolved.insert(name);
596
0
            }
597
0
        }
598
0
    }
599
    // Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in
600
    // which case names were resolved from the containing page. This behavior seems to be widely
601
    // supported by viewers. If a form XObjects has a resources dictionary and has some unresolved
602
    // names, some viewers fail to resolve them, and others allow them to be inherited from the page
603
    // or from another form XObjects that contains them. Since this behavior is inconsistent across
604
    // viewers, we consider an unresolved name when a resources dictionary is present to be reason
605
    // not to remove unreferenced resources. An unresolved name in the absence of a resource
606
    // dictionary is not considered a problem. For form XObjects, we just accumulate a list of
607
    // unresolved names, and for page objects, we avoid removing any such names found in nested form
608
    // XObjects.
609
610
0
    if ((!local_unresolved.empty()) && resources.isDictionary()) {
611
        // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
612
        // looking at names that are referencing fonts and XObjects, but until we're certain that we
613
        // know the meaning of every name in a content stream, we don't want to give warnings that
614
        // might be false positives. Also, this can happen in legitimate cases with older PDFs, and
615
        // there's nothing to be done about it, so there's no good reason to issue a warning. The
616
        // only sad thing is that it was a false positive that alerted me to a logic error in the
617
        // code, and any future such errors would now be hidden.
618
0
        QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
619
0
        return false;
620
0
    }
621
622
0
    for (auto& dict: rdicts) {
623
0
        for (auto const& key: dict.getKeys()) {
624
0
            if (is_page && unresolved.count(key)) {
625
                // This name is referenced by some nested form xobject, so don't remove it.
626
0
                QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved");
627
0
            } else if (!rf.getNames().count(key)) {
628
0
                dict.removeKey(key);
629
0
            }
630
0
        }
631
0
    }
632
0
    return true;
633
0
}
634
635
void
636
QPDFPageObjectHelper::removeUnreferencedResources()
637
0
{
638
    // Accumulate a list of unresolved names across all nested form XObjects.
639
0
    std::set<std::string> unresolved;
640
0
    bool any_failures = false;
641
0
    forEachFormXObject(
642
0
        true,
643
0
        [&any_failures, &unresolved](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
644
0
            if (!removeUnreferencedResourcesHelper(QPDFPageObjectHelper(obj), unresolved)) {
645
0
                any_failures = true;
646
0
            }
647
0
        });
648
0
    if (this->oh.isFormXObject() || (!any_failures)) {
649
0
        removeUnreferencedResourcesHelper(*this, unresolved);
650
0
    }
651
0
}
652
653
QPDFPageObjectHelper
654
QPDFPageObjectHelper::shallowCopyPage()
655
0
{
656
0
    QPDF& qpdf =
657
0
        this->oh.getQPDF("QPDFPageObjectHelper::shallowCopyPage called with a direct object");
658
0
    QPDFObjectHandle new_page = this->oh.shallowCopy();
659
0
    return {qpdf.makeIndirectObject(new_page)};
660
0
}
661
662
QPDFObjectHandle::Matrix
663
QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
664
0
{
665
0
    QPDFObjectHandle::Matrix matrix(1, 0, 0, 1, 0, 0);
666
0
    QPDFObjectHandle bbox = getTrimBox(false);
667
0
    if (!bbox.isRectangle()) {
668
0
        return matrix;
669
0
    }
670
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
671
0
    QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
672
0
    if (!(rotate_obj.isNull() && scale_obj.isNull())) {
673
0
        QPDFObjectHandle::Rectangle rect = bbox.getArrayAsRectangle();
674
0
        double width = rect.urx - rect.llx;
675
0
        double height = rect.ury - rect.lly;
676
0
        double scale = (scale_obj.isNumber() ? scale_obj.getNumericValue() : 1.0);
677
0
        int rotate = (rotate_obj.isInteger() ? rotate_obj.getIntValueAsInt() : 0);
678
0
        if (invert) {
679
0
            if (scale == 0.0) {
680
0
                return matrix;
681
0
            }
682
0
            scale = 1.0 / scale;
683
0
            rotate = 360 - rotate;
684
0
        }
685
686
        // Ignore invalid rotation angle
687
0
        switch (rotate) {
688
0
        case 90:
689
0
            matrix = QPDFObjectHandle::Matrix(0, -scale, scale, 0, 0, width * scale);
690
0
            break;
691
0
        case 180:
692
0
            matrix = QPDFObjectHandle::Matrix(-scale, 0, 0, -scale, width * scale, height * scale);
693
0
            break;
694
0
        case 270:
695
0
            matrix = QPDFObjectHandle::Matrix(0, scale, -scale, 0, height * scale, 0);
696
0
            break;
697
0
        default:
698
0
            matrix = QPDFObjectHandle::Matrix(scale, 0, 0, scale, 0, 0);
699
0
            break;
700
0
        }
701
0
    }
702
0
    return matrix;
703
0
}
704
705
QPDFObjectHandle
706
QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
707
0
{
708
0
    auto result =
709
0
        this->oh.getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object")
710
0
            .newStream();
711
0
    QPDFObjectHandle newdict = result.getDict();
712
0
    newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
713
0
    newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
714
0
    newdict.replaceKey("/Resources", getAttribute("/Resources", false).shallowCopy());
715
0
    newdict.replaceKey("/Group", getAttribute("/Group", false).shallowCopy());
716
0
    QPDFObjectHandle bbox = getTrimBox(false).shallowCopy();
717
0
    if (!bbox.isRectangle()) {
718
0
        this->oh.warnIfPossible("bounding box is invalid; form"
719
0
                                " XObject created from page will not work");
720
0
    }
721
0
    newdict.replaceKey("/BBox", bbox);
722
0
    auto provider =
723
0
        std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(new ContentProvider(this->oh));
724
0
    result.replaceStreamData(provider, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
725
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
726
0
    QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
727
0
    if (handle_transformations && (!(rotate_obj.isNull() && scale_obj.isNull()))) {
728
0
        newdict.replaceKey("/Matrix", QPDFObjectHandle::newArray(getMatrixForTransformations()));
729
0
    }
730
731
0
    return result;
732
0
}
733
734
QPDFMatrix
735
QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
736
    QPDFObjectHandle fo,
737
    QPDFObjectHandle::Rectangle rect,
738
    bool invert_transformations,
739
    bool allow_shrink,
740
    bool allow_expand)
741
0
{
742
    // Calculate the transformation matrix that will place the given form XObject fully inside the
743
    // given rectangle, center and shrinking or expanding as needed if requested.
744
745
    // When rendering a form XObject, the transformation in the graphics state (cm) is applied first
746
    // (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be
747
    // drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting
748
    // matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We
749
    // want to create a transformation matrix that make the form XObject's bounding box land in
750
    // exactly the right spot.
751
752
0
    QPDFObjectHandle fdict = fo.getDict();
753
0
    QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
754
0
    if (!bbox_obj.isRectangle()) {
755
0
        return {};
756
0
    }
757
758
0
    QPDFMatrix wmatrix; // work matrix
759
0
    QPDFMatrix tmatrix; // "to" matrix
760
0
    QPDFMatrix fmatrix; // "from" matrix
761
0
    if (invert_transformations) {
762
        // tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows
763
        // the overlaid form XObject's to be absolute rather than relative to properties of the
764
        // destination page. tmatrix is part of the computed transformation matrix.
765
0
        tmatrix = QPDFMatrix(getMatrixForTransformations(true));
766
0
        wmatrix.concat(tmatrix);
767
0
    }
768
0
    if (fdict.getKey("/Matrix").isMatrix()) {
769
        // fmatrix is the transformation matrix that is applied to the form XObject itself. We need
770
        // this for calculations, but we don't explicitly use it in the final result because the PDF
771
        // rendering system automatically applies this last before
772
        // drawing the form XObject.
773
0
        fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
774
0
        wmatrix.concat(fmatrix);
775
0
    }
776
777
    // The current wmatrix handles transformation from the form xobject and, if requested, the
778
    // destination page. Next, we have to adjust this for scale and position.
779
780
    // Step 1: figure out what scale factor we need to make the form XObject's bounding box fit
781
    // within the destination rectangle.
782
783
    // Transform bounding box
784
0
    QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
785
0
    QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
786
787
    // Calculate a scale factor, if needed. Shrink or expand if needed and allowed.
788
0
    if ((T.urx == T.llx) || (T.ury == T.lly)) {
789
        // avoid division by zero
790
0
        return {};
791
0
    }
792
0
    double rect_w = rect.urx - rect.llx;
793
0
    double rect_h = rect.ury - rect.lly;
794
0
    double t_w = T.urx - T.llx;
795
0
    double t_h = T.ury - T.lly;
796
0
    double xscale = rect_w / t_w;
797
0
    double yscale = rect_h / t_h;
798
0
    double scale = (xscale < yscale ? xscale : yscale);
799
0
    if (scale > 1.0) {
800
0
        if (!allow_expand) {
801
0
            scale = 1.0;
802
0
        }
803
0
    } else if (scale < 1.0) {
804
0
        if (!allow_shrink) {
805
0
            scale = 1.0;
806
0
        }
807
0
    }
808
809
    // Step 2: figure out what translation is required to get the rectangle to the right spot:
810
    // centered within the destination.
811
0
    wmatrix = QPDFMatrix();
812
0
    wmatrix.scale(scale, scale);
813
0
    wmatrix.concat(tmatrix);
814
0
    wmatrix.concat(fmatrix);
815
816
0
    T = wmatrix.transformRectangle(bbox);
817
0
    double t_cx = (T.llx + T.urx) / 2.0;
818
0
    double t_cy = (T.lly + T.ury) / 2.0;
819
0
    double r_cx = (rect.llx + rect.urx) / 2.0;
820
0
    double r_cy = (rect.lly + rect.ury) / 2.0;
821
0
    double tx = r_cx - t_cx;
822
0
    double ty = r_cy - t_cy;
823
824
    // Now we can calculate the final matrix. The final matrix does not include fmatrix because that
825
    // is applied automatically by the PDF interpreter.
826
0
    QPDFMatrix cm;
827
0
    cm.translate(tx, ty);
828
0
    cm.scale(scale, scale);
829
0
    cm.concat(tmatrix);
830
0
    return cm;
831
0
}
832
833
std::string
834
QPDFPageObjectHelper::placeFormXObject(
835
    QPDFObjectHandle fo,
836
    std::string const& name,
837
    QPDFObjectHandle::Rectangle rect,
838
    bool invert_transformations,
839
    bool allow_shrink,
840
    bool allow_expand)
841
0
{
842
0
    QPDFMatrix cm;
843
0
    return placeFormXObject(fo, name, rect, cm, invert_transformations, allow_shrink, allow_expand);
844
0
}
845
846
std::string
847
QPDFPageObjectHelper::placeFormXObject(
848
    QPDFObjectHandle fo,
849
    std::string const& name,
850
    QPDFObjectHandle::Rectangle rect,
851
    QPDFMatrix& cm,
852
    bool invert_transformations,
853
    bool allow_shrink,
854
    bool allow_expand)
855
0
{
856
0
    cm = getMatrixForFormXObjectPlacement(
857
0
        fo, rect, invert_transformations, allow_shrink, allow_expand);
858
0
    return ("q\n" + cm.unparse() + " cm\n" + name + " Do\n" + "Q\n");
859
0
}
860
861
void
862
QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
863
0
{
864
0
    QPDF& qpdf =
865
0
        this->oh.getQPDF("QPDFPageObjectHelper::flattenRotation called with a direct object");
866
0
    auto rotate_oh = this->oh.getKey("/Rotate");
867
0
    int rotate = 0;
868
0
    if (rotate_oh.isInteger()) {
869
0
        rotate = rotate_oh.getIntValueAsInt();
870
0
    }
871
0
    if (!((rotate == 90) || (rotate == 180) || (rotate == 270))) {
872
0
        return;
873
0
    }
874
0
    auto mediabox = this->oh.getKey("/MediaBox");
875
0
    if (!mediabox.isRectangle()) {
876
0
        return;
877
0
    }
878
0
    auto media_rect = mediabox.getArrayAsRectangle();
879
880
0
    std::vector<std::string> boxes = {
881
0
        "/MediaBox",
882
0
        "/CropBox",
883
0
        "/BleedBox",
884
0
        "/TrimBox",
885
0
        "/ArtBox",
886
0
    };
887
0
    for (auto const& boxkey: boxes) {
888
0
        auto box = this->oh.getKey(boxkey);
889
0
        if (!box.isRectangle()) {
890
0
            continue;
891
0
        }
892
0
        auto rect = box.getArrayAsRectangle();
893
0
        decltype(rect) new_rect;
894
895
        // How far are the edges of our rectangle from the edges of the media box?
896
0
        auto left_x = rect.llx - media_rect.llx;
897
0
        auto right_x = media_rect.urx - rect.urx;
898
0
        auto bottom_y = rect.lly - media_rect.lly;
899
0
        auto top_y = media_rect.ury - rect.ury;
900
901
        // Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees
902
        // reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the
903
        // corners to be the correct distance away from the corners of the mediabox.
904
0
        switch (rotate) {
905
0
        case 90:
906
0
            new_rect.llx = media_rect.lly + bottom_y;
907
0
            new_rect.urx = media_rect.ury - top_y;
908
0
            new_rect.lly = media_rect.llx + right_x;
909
0
            new_rect.ury = media_rect.urx - left_x;
910
0
            break;
911
912
0
        case 180:
913
0
            new_rect.llx = media_rect.llx + right_x;
914
0
            new_rect.urx = media_rect.urx - left_x;
915
0
            new_rect.lly = media_rect.lly + top_y;
916
0
            new_rect.ury = media_rect.ury - bottom_y;
917
0
            break;
918
919
0
        case 270:
920
0
            new_rect.llx = media_rect.lly + top_y;
921
0
            new_rect.urx = media_rect.ury - bottom_y;
922
0
            new_rect.lly = media_rect.llx + left_x;
923
0
            new_rect.ury = media_rect.urx - right_x;
924
0
            break;
925
926
0
        default:
927
            // ignore
928
0
            break;
929
0
        }
930
931
0
        this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect));
932
0
    }
933
934
    // When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible
935
    // with the origin point being the same offset from the lower left corner of the media box.
936
    // These calculations have been verified empirically with various
937
    // PDF readers.
938
0
    QPDFMatrix cm(0, 0, 0, 0, 0, 0);
939
0
    switch (rotate) {
940
0
    case 90:
941
0
        cm.b = -1;
942
0
        cm.c = 1;
943
0
        cm.f = media_rect.urx + media_rect.llx;
944
0
        break;
945
946
0
    case 180:
947
0
        cm.a = -1;
948
0
        cm.d = -1;
949
0
        cm.e = media_rect.urx + media_rect.llx;
950
0
        cm.f = media_rect.ury + media_rect.lly;
951
0
        break;
952
953
0
    case 270:
954
0
        cm.b = 1;
955
0
        cm.c = -1;
956
0
        cm.e = media_rect.ury + media_rect.lly;
957
0
        break;
958
959
0
    default:
960
0
        break;
961
0
    }
962
0
    std::string cm_str = std::string("q\n") + cm.unparse() + " cm\n";
963
0
    this->oh.addPageContents(QPDFObjectHandle::newStream(&qpdf, cm_str), true);
964
0
    this->oh.addPageContents(qpdf.newStream("\nQ\n"), false);
965
0
    this->oh.removeKey("/Rotate");
966
0
    QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
967
0
    if (!rotate_obj.isNull()) {
968
0
        QTC::TC("qpdf", "QPDFPageObjectHelper flatten inherit rotate");
969
0
        this->oh.replaceKey("/Rotate", QPDFObjectHandle::newInteger(0));
970
0
    }
971
972
0
    QPDFObjectHandle annots = this->oh.getKey("/Annots");
973
0
    if (annots.isArray()) {
974
0
        std::vector<QPDFObjectHandle> new_annots;
975
0
        std::vector<QPDFObjectHandle> new_fields;
976
0
        std::set<QPDFObjGen> old_fields;
977
0
        std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
978
0
        if (!afdh) {
979
0
            afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(qpdf);
980
0
            afdh = afdhph.get();
981
0
        }
982
0
        afdh->transformAnnotations(annots, new_annots, new_fields, old_fields, cm);
983
0
        afdh->removeFormFields(old_fields);
984
0
        for (auto const& f: new_fields) {
985
0
            afdh->addFormField(QPDFFormFieldObjectHelper(f));
986
0
        }
987
0
        this->oh.replaceKey("/Annots", QPDFObjectHandle::newArray(new_annots));
988
0
    }
989
0
}
990
991
void
992
QPDFPageObjectHelper::copyAnnotations(
993
    QPDFPageObjectHelper from_page,
994
    QPDFMatrix const& cm,
995
    QPDFAcroFormDocumentHelper* afdh,
996
    QPDFAcroFormDocumentHelper* from_afdh)
997
0
{
998
0
    auto old_annots = from_page.getObjectHandle().getKey("/Annots");
999
0
    if (!old_annots.isArray()) {
1000
0
        return;
1001
0
    }
1002
1003
0
    QPDF& from_qpdf = from_page.getObjectHandle().getQPDF(
1004
0
        "QPDFPageObjectHelper::copyAnnotations: from page is a direct object");
1005
0
    QPDF& this_qpdf =
1006
0
        this->oh.getQPDF("QPDFPageObjectHelper::copyAnnotations: this page is a direct object");
1007
1008
0
    std::vector<QPDFObjectHandle> new_annots;
1009
0
    std::vector<QPDFObjectHandle> new_fields;
1010
0
    std::set<QPDFObjGen> old_fields;
1011
0
    std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
1012
0
    std::shared_ptr<QPDFAcroFormDocumentHelper> from_afdhph;
1013
0
    if (!afdh) {
1014
0
        afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(this_qpdf);
1015
0
        afdh = afdhph.get();
1016
0
    }
1017
0
    if (&this_qpdf == &from_qpdf) {
1018
0
        from_afdh = afdh;
1019
0
    } else if (from_afdh) {
1020
0
        if (from_afdh->getQPDF().getUniqueId() != from_qpdf.getUniqueId()) {
1021
0
            throw std::logic_error("QPDFAcroFormDocumentHelper::copyAnnotations: from_afdh"
1022
0
                                   " is not from the same QPDF as from_page");
1023
0
        }
1024
0
    } else {
1025
0
        from_afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(from_qpdf);
1026
0
        from_afdh = from_afdhph.get();
1027
0
    }
1028
1029
0
    afdh->transformAnnotations(
1030
0
        old_annots, new_annots, new_fields, old_fields, cm, &from_qpdf, from_afdh);
1031
0
    afdh->addAndRenameFormFields(new_fields);
1032
0
    auto annots = this->oh.getKey("/Annots");
1033
0
    if (!annots.isArray()) {
1034
0
        annots = this->oh.replaceKeyAndGetNew("/Annots", QPDFObjectHandle::newArray());
1035
0
    }
1036
0
    for (auto const& annot: new_annots) {
1037
0
        annots.appendItem(annot);
1038
0
    }
1039
0
}