Coverage Report

Created: 2025-07-01 06:10

/src/qpdf/libqpdf/QPDF.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/qpdf-config.h> // include first for large file support
2
3
#include <qpdf/QPDF_private.hh>
4
5
#include <array>
6
#include <atomic>
7
#include <cstring>
8
#include <limits>
9
#include <map>
10
#include <regex>
11
#include <sstream>
12
#include <vector>
13
14
#include <qpdf/BufferInputSource.hh>
15
#include <qpdf/FileInputSource.hh>
16
#include <qpdf/InputSource_private.hh>
17
#include <qpdf/OffsetInputSource.hh>
18
#include <qpdf/Pipeline.hh>
19
#include <qpdf/QPDFExc.hh>
20
#include <qpdf/QPDFLogger.hh>
21
#include <qpdf/QPDFObjectHandle_private.hh>
22
#include <qpdf/QPDFObject_private.hh>
23
#include <qpdf/QPDFParser.hh>
24
#include <qpdf/QTC.hh>
25
#include <qpdf/QUtil.hh>
26
#include <qpdf/Util.hh>
27
28
using namespace qpdf;
29
using namespace std::literals;
30
31
// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
32
// being static as well.
33
std::string const QPDF::qpdf_version(QPDF_VERSION);
34
35
static char const* EMPTY_PDF = (
36
    // force line break
37
    "%PDF-1.3\n"
38
    "1 0 obj\n"
39
    "<< /Type /Catalog /Pages 2 0 R >>\n"
40
    "endobj\n"
41
    "2 0 obj\n"
42
    "<< /Type /Pages /Kids [] /Count 0 >>\n"
43
    "endobj\n"
44
    "xref\n"
45
    "0 3\n"
46
    "0000000000 65535 f \n"
47
    "0000000009 00000 n \n"
48
    "0000000058 00000 n \n"
49
    "trailer << /Size 3 /Root 1 0 R >>\n"
50
    "startxref\n"
51
    "110\n"
52
    "%%EOF\n");
53
54
namespace
55
{
56
    class InvalidInputSource: public InputSource
57
    {
58
      public:
59
        ~InvalidInputSource() override = default;
60
        qpdf_offset_t
61
        findAndSkipNextEOL() override
62
0
        {
63
0
            throwException();
64
0
            return 0;
65
0
        }
66
        std::string const&
67
        getName() const override
68
0
        {
69
0
            static std::string name("closed input source");
70
0
            return name;
71
0
        }
72
        qpdf_offset_t
73
        tell() override
74
0
        {
75
0
            throwException();
76
0
            return 0;
77
0
        }
78
        void
79
        seek(qpdf_offset_t offset, int whence) override
80
0
        {
81
0
            throwException();
82
0
        }
83
        void
84
        rewind() override
85
0
        {
86
0
            throwException();
87
0
        }
88
        size_t
89
        read(char* buffer, size_t length) override
90
0
        {
91
0
            throwException();
92
0
            return 0;
93
0
        }
94
        void
95
        unreadCh(char ch) override
96
0
        {
97
0
            throwException();
98
0
        }
99
100
      private:
101
        void
102
        throwException()
103
0
        {
104
0
            throw std::logic_error(
105
0
                "QPDF operation attempted on a QPDF object with no input "
106
0
                "source. QPDF operations are invalid before processFile (or "
107
0
                "another process method) or after closeInputSource");
108
0
        }
109
    };
110
} // namespace
111
112
QPDF::ForeignStreamData::ForeignStreamData(
113
    std::shared_ptr<EncryptionParameters> encp,
114
    std::shared_ptr<InputSource> file,
115
    QPDFObjGen foreign_og,
116
    qpdf_offset_t offset,
117
    size_t length,
118
    QPDFObjectHandle local_dict,
119
    bool is_root_metadata) :
120
0
    encp(encp),
121
0
    file(file),
122
0
    foreign_og(foreign_og),
123
0
    offset(offset),
124
0
    length(length),
125
0
    local_dict(local_dict),
126
0
    is_root_metadata(is_root_metadata)
127
0
{
128
0
}
129
130
QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :
131
0
    QPDFObjectHandle::StreamDataProvider(true),
132
0
    destination_qpdf(destination_qpdf)
133
0
{
134
0
}
135
136
bool
137
QPDF::CopiedStreamDataProvider::provideStreamData(
138
    QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
139
0
{
140
0
    std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];
141
0
    bool result = false;
142
0
    if (foreign_data.get()) {
143
0
        result = destination_qpdf.pipeForeignStreamData(
144
0
            foreign_data, pipeline, suppress_warnings, will_retry);
145
0
        QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);
146
0
    } else {
147
0
        auto foreign_stream = foreign_streams[og];
148
0
        result = foreign_stream.pipeStreamData(
149
0
            pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);
150
0
        QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);
151
0
    }
152
0
    return result;
153
0
}
154
155
void
156
QPDF::CopiedStreamDataProvider::registerForeignStream(
157
    QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)
158
0
{
159
0
    this->foreign_streams[local_og] = foreign_stream;
160
0
}
161
162
void
163
QPDF::CopiedStreamDataProvider::registerForeignStream(
164
    QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)
165
0
{
166
0
    this->foreign_stream_data[local_og] = foreign_stream;
167
0
}
168
169
QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) :
170
0
    qpdf(qpdf),
171
0
    og(og)
172
0
{
173
0
}
174
175
std::string const&
176
QPDF::QPDFVersion()
177
0
{
178
    // The C API relies on this being a static value.
179
0
    return QPDF::qpdf_version;
180
0
}
181
182
QPDF::Members::Members() :
183
0
    log(QPDFLogger::defaultLogger()),
184
0
    file(new InvalidInputSource()),
185
0
    encp(new EncryptionParameters)
186
0
{
187
0
}
188
189
QPDF::QPDF() :
190
0
    m(std::make_unique<Members>())
191
0
{
192
0
    m->tokenizer.allowEOF();
193
    // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
194
    // the lifetime of this running application.
195
0
    static std::atomic<unsigned long long> unique_id{0};
196
0
    m->unique_id = unique_id.fetch_add(1ULL);
197
0
}
198
199
// Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache
200
// (future Objects::Entry) to centralize all QPDF access to QPDFObject.
201
class Disconnect: BaseHandle
202
{
203
  public:
204
    Disconnect(std::shared_ptr<QPDFObject> const& obj) :
205
0
        BaseHandle(obj)
206
0
    {
207
0
    }
208
    void
209
    disconnect()
210
0
    {
211
0
        BaseHandle::disconnect(false);
212
0
        if (raw_type_code() != ::ot_null) {
213
0
            obj->value = QPDF_Destroyed();
214
0
        }
215
0
    }
216
};
217
218
QPDF::~QPDF()
219
0
{
220
    // If two objects are mutually referential (through each object having an array or dictionary
221
    // that contains an indirect reference to the other), the circular references in the
222
    // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
223
    // in the object cache, which is those objects that we read from the file, and break all
224
    // resolved indirect references by replacing them with an internal object type representing that
225
    // they have been destroyed. Note that we can't break references like this at any time when the
226
    // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
227
    // are reachable from this object to release their association with this QPDF. Direct objects
228
    // are not destroyed since they can be moved to other QPDF objects safely.
229
230
    // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
231
    // the xref table anyway just to prevent any possibility of resolve() succeeding.
232
0
    m->xref_table.clear();
233
0
    for (auto const& iter: m->obj_cache) {
234
0
        Disconnect(iter.second.object).disconnect();
235
0
    }
236
0
}
237
238
std::shared_ptr<QPDF>
239
QPDF::create()
240
0
{
241
0
    return std::make_shared<QPDF>();
242
0
}
243
244
void
245
QPDF::processFile(char const* filename, char const* password)
246
0
{
247
0
    auto* fi = new FileInputSource(filename);
248
0
    processInputSource(std::shared_ptr<InputSource>(fi), password);
249
0
}
250
251
void
252
QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)
253
0
{
254
0
    auto* fi = new FileInputSource(description, filep, close_file);
255
0
    processInputSource(std::shared_ptr<InputSource>(fi), password);
256
0
}
257
258
void
259
QPDF::processMemoryFile(
260
    char const* description, char const* buf, size_t length, char const* password)
261
0
{
262
0
    processInputSource(
263
0
        std::shared_ptr<InputSource>(
264
            // line-break
265
0
            new BufferInputSource(
266
0
                description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)),
267
0
        password);
268
0
}
269
270
void
271
QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
272
0
{
273
0
    m->file = source;
274
0
    parse(password);
275
0
}
276
277
void
278
QPDF::closeInputSource()
279
0
{
280
0
    m->file = std::shared_ptr<InputSource>(new InvalidInputSource());
281
0
}
282
283
void
284
QPDF::setPasswordIsHexKey(bool val)
285
0
{
286
0
    m->provided_password_is_hex_key = val;
287
0
}
288
289
void
290
QPDF::emptyPDF()
291
0
{
292
0
    processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
293
0
}
294
295
void
296
QPDF::registerStreamFilter(
297
    std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
298
0
{
299
0
    qpdf::Stream::registerStreamFilter(filter_name, factory);
300
0
}
301
302
void
303
QPDF::setIgnoreXRefStreams(bool val)
304
0
{
305
0
    m->ignore_xref_streams = val;
306
0
}
307
308
std::shared_ptr<QPDFLogger>
309
QPDF::getLogger()
310
0
{
311
0
    return m->log;
312
0
}
313
314
void
315
QPDF::setLogger(std::shared_ptr<QPDFLogger> l)
316
0
{
317
0
    m->log = l;
318
0
}
319
320
void
321
QPDF::setOutputStreams(std::ostream* out, std::ostream* err)
322
0
{
323
0
    setLogger(QPDFLogger::create());
324
0
    m->log->setOutputStreams(out, err);
325
0
}
326
327
void
328
QPDF::setSuppressWarnings(bool val)
329
0
{
330
0
    m->suppress_warnings = val;
331
0
}
332
333
void
334
QPDF::setMaxWarnings(size_t val)
335
0
{
336
0
    m->max_warnings = val;
337
0
}
338
339
void
340
QPDF::setAttemptRecovery(bool val)
341
0
{
342
0
    m->attempt_recovery = val;
343
0
}
344
345
void
346
QPDF::setImmediateCopyFrom(bool val)
347
0
{
348
0
    m->immediate_copy_from = val;
349
0
}
350
351
std::vector<QPDFExc>
352
QPDF::getWarnings()
353
0
{
354
0
    std::vector<QPDFExc> result = m->warnings;
355
0
    m->warnings.clear();
356
0
    return result;
357
0
}
358
359
bool
360
QPDF::anyWarnings() const
361
0
{
362
0
    return !m->warnings.empty();
363
0
}
364
365
size_t
366
QPDF::numWarnings() const
367
0
{
368
0
    return m->warnings.size();
369
0
}
370
371
bool
372
QPDF::validatePDFVersion(char const*& p, std::string& version)
373
0
{
374
0
    bool valid = util::is_digit(*p);
375
0
    if (valid) {
376
0
        while (util::is_digit(*p)) {
377
0
            version.append(1, *p++);
378
0
        }
379
0
        if ((*p == '.') && util::is_digit(*(p + 1))) {
380
0
            version.append(1, *p++);
381
0
            while (util::is_digit(*p)) {
382
0
                version.append(1, *p++);
383
0
            }
384
0
        } else {
385
0
            valid = false;
386
0
        }
387
0
    }
388
0
    return valid;
389
0
}
390
391
bool
392
QPDF::findHeader()
393
0
{
394
0
    qpdf_offset_t global_offset = m->file->tell();
395
0
    std::string line = m->file->readLine(1024);
396
0
    char const* p = line.c_str();
397
0
    if (strncmp(p, "%PDF-", 5) != 0) {
398
0
        throw std::logic_error("findHeader is not looking at %PDF-");
399
0
    }
400
0
    p += 5;
401
0
    std::string version;
402
    // Note: The string returned by line.c_str() is always null-terminated. The code below never
403
    // overruns the buffer because a null character always short-circuits further advancement.
404
0
    bool valid = validatePDFVersion(p, version);
405
0
    if (valid) {
406
0
        m->pdf_version = version;
407
0
        if (global_offset != 0) {
408
            // Empirical evidence strongly suggests that when there is leading material prior to the
409
            // PDF header, all explicit offsets in the file are such that 0 points to the beginning
410
            // of the header.
411
0
            QTC::TC("qpdf", "QPDF global offset");
412
0
            m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
413
0
        }
414
0
    }
415
0
    return valid;
416
0
}
417
418
void
419
QPDF::warn(QPDFExc const& e)
420
0
{
421
0
    if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {
422
0
        stopOnError("Too many warnings - file is too badly damaged");
423
0
    }
424
0
    m->warnings.push_back(e);
425
0
    if (!m->suppress_warnings) {
426
0
        *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
427
0
    }
428
0
}
429
430
void
431
QPDF::warn(
432
    qpdf_error_code_e error_code,
433
    std::string const& object,
434
    qpdf_offset_t offset,
435
    std::string const& message)
436
0
{
437
0
    warn(QPDFExc(error_code, getFilename(), object, offset, message));
438
0
}
439
440
QPDFObjectHandle
441
QPDF::newReserved()
442
0
{
443
0
    return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>());
444
0
}
445
446
QPDFObjectHandle
447
QPDF::newIndirectNull()
448
0
{
449
0
    return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>());
450
0
}
451
452
QPDFObjectHandle
453
QPDF::newStream()
454
0
{
455
0
    return makeIndirectObject(
456
0
        qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));
457
0
}
458
459
QPDFObjectHandle
460
QPDF::newStream(std::shared_ptr<Buffer> data)
461
0
{
462
0
    auto result = newStream();
463
0
    result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
464
0
    return result;
465
0
}
466
467
QPDFObjectHandle
468
QPDF::newStream(std::string const& data)
469
0
{
470
0
    auto result = newStream();
471
0
    result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
472
0
    return result;
473
0
}
474
475
QPDFObjectHandle
476
QPDF::getObject(int objid, int generation)
477
0
{
478
0
    return getObject(QPDFObjGen(objid, generation));
479
0
}
480
481
QPDFObjectHandle
482
QPDF::getObjectByObjGen(QPDFObjGen og)
483
0
{
484
0
    return getObject(og);
485
0
}
486
487
QPDFObjectHandle
488
QPDF::getObjectByID(int objid, int generation)
489
0
{
490
0
    return getObject(QPDFObjGen(objid, generation));
491
0
}
492
493
QPDFObjectHandle
494
QPDF::copyForeignObject(QPDFObjectHandle foreign)
495
0
{
496
    // Here's an explanation of what's going on here.
497
    //
498
    // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
499
    // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
500
    // foreign QPDF into the local QPDF, we have to replace all indirect object references with
501
    // references to the corresponding object in the local file.
502
    //
503
    // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
504
    // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
505
    // mapping from the foreign ObjGen to the local QPDFObjectHandle.
506
    //
507
    // To copy, we do a deep traversal of the foreign object with loop detection to discover all
508
    // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
509
    // indirect object, we check to see if we have already created a local copy of it. If not, we
510
    // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
511
    // mapping from the foreign object ID to the new object. While we
512
    // do this, we keep a list of objects to copy.
513
    //
514
    // Once we are done with the traversal, we copy all the objects that we need to copy. However,
515
    // the copies will contain indirect object IDs that refer to objects in the foreign file. We
516
    // need to replace them with references to objects in the local file. This is what
517
    // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
518
    // all the indirect references replaced with new ones in the local context, we can replace the
519
    // local reserved object with the copy. This mechanism allows us to copy objects with circular
520
    // references in any order.
521
522
    // For streams, rather than copying the objects, we set up the stream data to pull from the
523
    // original stream by using a stream data provider. This is done in a manner that doesn't
524
    // require the original QPDF object but may require the original source of the stream data with
525
    // special handling for immediate_copy_from. This logic is also in
526
    // replaceForeignIndirectObjects.
527
528
    // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
529
    // use case to copy pages this way if the intention is to not update the pages tree.
530
0
    if (!foreign.isIndirect()) {
531
0
        QTC::TC("qpdf", "QPDF copyForeign direct");
532
0
        throw std::logic_error("QPDF::copyForeign called with direct object handle");
533
0
    }
534
0
    QPDF& other = foreign.getQPDF();
535
0
    if (&other == this) {
536
0
        QTC::TC("qpdf", "QPDF copyForeign not foreign");
537
0
        throw std::logic_error("QPDF::copyForeign called with object from this QPDF");
538
0
    }
539
540
0
    ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];
541
0
    if (!obj_copier.visiting.empty()) {
542
0
        throw std::logic_error(
543
0
            "obj_copier.visiting is not empty at the beginning of copyForeignObject");
544
0
    }
545
546
    // Make sure we have an object in this file for every referenced object in the old file.
547
    // obj_copier.object_map maps foreign QPDFObjGen to local objects.  For everything new that we
548
    // have to copy, the local object will be a reservation, unless it is a stream, in which case
549
    // the local object will already be a stream.
550
0
    reserveObjects(foreign, obj_copier, true);
551
552
0
    if (!obj_copier.visiting.empty()) {
553
0
        throw std::logic_error("obj_copier.visiting is not empty after reserving objects");
554
0
    }
555
556
    // Copy any new objects and replace the reservations.
557
0
    for (auto& to_copy: obj_copier.to_copy) {
558
0
        QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);
559
0
        if (!to_copy.isStream()) {
560
0
            QPDFObjGen og(to_copy.getObjGen());
561
0
            replaceReserved(obj_copier.object_map[og], copy);
562
0
        }
563
0
    }
564
0
    obj_copier.to_copy.clear();
565
566
0
    auto og = foreign.getObjGen();
567
0
    if (!obj_copier.object_map.contains(og)) {
568
0
        warn(damagedPDF(
569
0
            other.getFilename() + " object " + og.unparse(' '),
570
0
            foreign.getParsedOffset(),
571
0
            "unexpected reference to /Pages object while copying foreign object; replacing with "
572
0
            "null"));
573
0
        return QPDFObjectHandle::newNull();
574
0
    }
575
0
    return obj_copier.object_map[foreign.getObjGen()];
576
0
}
577
578
void
579
QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
580
0
{
581
0
    auto foreign_tc = foreign.getTypeCode();
582
0
    if (foreign_tc == ::ot_reserved) {
583
0
        throw std::logic_error("QPDF: attempting to copy a foreign reserved object");
584
0
    }
585
586
0
    if (foreign.isPagesObject()) {
587
0
        QTC::TC("qpdf", "QPDF not copying pages object");
588
0
        return;
589
0
    }
590
591
0
    if (foreign.isIndirect()) {
592
0
        QPDFObjGen foreign_og(foreign.getObjGen());
593
0
        if (!obj_copier.visiting.add(foreign_og)) {
594
0
            QTC::TC("qpdf", "QPDF loop reserving objects");
595
0
            return;
596
0
        }
597
0
        if (obj_copier.object_map.contains(foreign_og)) {
598
0
            QTC::TC("qpdf", "QPDF already reserved object");
599
0
            if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {
600
0
                obj_copier.visiting.erase(foreign);
601
0
                return;
602
0
            }
603
0
        } else {
604
0
            QTC::TC("qpdf", "QPDF copy indirect");
605
0
            obj_copier.object_map[foreign_og] =
606
0
                foreign.isStream() ? newStream() : newIndirectNull();
607
0
            if ((!top) && foreign.isPageObject()) {
608
0
                QTC::TC("qpdf", "QPDF not crossing page boundary");
609
0
                obj_copier.visiting.erase(foreign_og);
610
0
                return;
611
0
            }
612
0
        }
613
0
        obj_copier.to_copy.push_back(foreign);
614
0
    }
615
616
0
    if (foreign_tc == ::ot_array) {
617
0
        QTC::TC("qpdf", "QPDF reserve array");
618
0
        for (auto const& item: foreign.as_array()) {
619
0
            reserveObjects(item, obj_copier, false);
620
0
        }
621
0
    } else if (foreign_tc == ::ot_dictionary) {
622
0
        QTC::TC("qpdf", "QPDF reserve dictionary");
623
0
        for (auto const& item: foreign.as_dictionary()) {
624
0
            if (!item.second.null()) {
625
0
                reserveObjects(item.second, obj_copier, false);
626
0
            }
627
0
        }
628
0
    } else if (foreign_tc == ::ot_stream) {
629
0
        QTC::TC("qpdf", "QPDF reserve stream");
630
0
        reserveObjects(foreign.getDict(), obj_copier, false);
631
0
    }
632
633
0
    obj_copier.visiting.erase(foreign);
634
0
}
635
636
QPDFObjectHandle
637
QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
638
0
{
639
0
    auto foreign_tc = foreign.getTypeCode();
640
0
    QPDFObjectHandle result;
641
0
    if ((!top) && foreign.isIndirect()) {
642
0
        QTC::TC("qpdf", "QPDF replace indirect");
643
0
        auto mapping = obj_copier.object_map.find(foreign.getObjGen());
644
0
        if (mapping == obj_copier.object_map.end()) {
645
            // This case would occur if this is a reference to a Pages object that we didn't
646
            // traverse into.
647
0
            QTC::TC("qpdf", "QPDF replace foreign indirect with null");
648
0
            result = QPDFObjectHandle::newNull();
649
0
        } else {
650
0
            result = mapping->second;
651
0
        }
652
0
    } else if (foreign_tc == ::ot_array) {
653
0
        QTC::TC("qpdf", "QPDF replace array");
654
0
        result = QPDFObjectHandle::newArray();
655
0
        for (auto const& item: foreign.as_array()) {
656
0
            result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false));
657
0
        }
658
0
    } else if (foreign_tc == ::ot_dictionary) {
659
0
        QTC::TC("qpdf", "QPDF replace dictionary");
660
0
        result = QPDFObjectHandle::newDictionary();
661
0
        for (auto const& [key, value]: foreign.as_dictionary()) {
662
0
            if (!value.null()) {
663
0
                result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
664
0
            }
665
0
        }
666
0
    } else if (foreign_tc == ::ot_stream) {
667
0
        QTC::TC("qpdf", "QPDF replace stream");
668
0
        result = obj_copier.object_map[foreign.getObjGen()];
669
0
        QPDFObjectHandle dict = result.getDict();
670
0
        QPDFObjectHandle old_dict = foreign.getDict();
671
0
        for (auto const& [key, value]: old_dict.as_dictionary()) {
672
0
            if (!value.null()) {
673
0
                dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
674
0
            }
675
0
        }
676
0
        copyStreamData(result, foreign);
677
0
    } else {
678
0
        foreign.assertScalar();
679
0
        result = foreign;
680
0
        result.makeDirect();
681
0
    }
682
683
0
    if (top && (!result.isStream()) && result.isIndirect()) {
684
0
        throw std::logic_error("replacement for foreign object is indirect");
685
0
    }
686
687
0
    return result;
688
0
}
689
690
void
691
QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
692
0
{
693
    // This method was originally written for copying foreign streams, but it is used by
694
    // QPDFObjectHandle to copy streams from the same QPDF object as well.
695
696
0
    QPDFObjectHandle dict = result.getDict();
697
0
    QPDFObjectHandle old_dict = foreign.getDict();
698
0
    if (m->copied_stream_data_provider == nullptr) {
699
0
        m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);
700
0
        m->copied_streams =
701
0
            std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
702
0
    }
703
0
    QPDFObjGen local_og(result.getObjGen());
704
    // Copy information from the foreign stream so we can pipe its data later without keeping the
705
    // original QPDF object around.
706
707
0
    QPDF& foreign_stream_qpdf =
708
0
        foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
709
710
0
    auto stream = foreign.as_stream();
711
0
    if (!stream) {
712
0
        throw std::logic_error("unable to retrieve underlying stream object from foreign stream");
713
0
    }
714
0
    std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer();
715
0
    if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
716
        // Pull the stream data into a buffer before attempting the copy operation. Do it on the
717
        // source stream so that if the source stream is copied multiple times, we don't have to
718
        // keep duplicating the memory.
719
0
        QTC::TC("qpdf", "QPDF immediate copy stream data");
720
0
        foreign.replaceStreamData(
721
0
            foreign.getRawStreamData(),
722
0
            old_dict.getKey("/Filter"),
723
0
            old_dict.getKey("/DecodeParms"));
724
0
        stream_buffer = stream.getStreamDataBuffer();
725
0
    }
726
0
    std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =
727
0
        stream.getStreamDataProvider();
728
0
    if (stream_buffer.get()) {
729
0
        QTC::TC("qpdf", "QPDF copy foreign stream with buffer");
730
0
        result.replaceStreamData(
731
0
            stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
732
0
    } else if (stream_provider.get()) {
733
        // In this case, the remote stream's QPDF must stay in scope.
734
0
        QTC::TC("qpdf", "QPDF copy foreign stream with provider");
735
0
        m->copied_stream_data_provider->registerForeignStream(local_og, foreign);
736
0
        result.replaceStreamData(
737
0
            m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
738
0
    } else {
739
0
        auto foreign_stream_data = std::make_shared<ForeignStreamData>(
740
0
            foreign_stream_qpdf.m->encp,
741
0
            foreign_stream_qpdf.m->file,
742
0
            foreign,
743
0
            foreign.getParsedOffset(),
744
0
            stream.getLength(),
745
0
            dict,
746
0
            stream.isRootMetadata());
747
0
        m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
748
0
        result.replaceStreamData(
749
0
            m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
750
0
    }
751
0
}
752
753
unsigned long long
754
QPDF::getUniqueId() const
755
0
{
756
0
    return m->unique_id;
757
0
}
758
759
std::string
760
QPDF::getFilename() const
761
0
{
762
0
    return m->file->getName();
763
0
}
764
765
PDFVersion
766
QPDF::getVersionAsPDFVersion()
767
0
{
768
0
    int major = 1;
769
0
    int minor = 3;
770
0
    int extension_level = getExtensionLevel();
771
772
0
    std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");
773
0
    std::smatch match;
774
0
    if (std::regex_search(m->pdf_version, match, v)) {
775
0
        major = QUtil::string_to_int(match[1].str().c_str());
776
0
        minor = QUtil::string_to_int(match[2].str().c_str());
777
0
    }
778
779
0
    return {major, minor, extension_level};
780
0
}
781
782
std::string
783
QPDF::getPDFVersion() const
784
0
{
785
0
    return m->pdf_version;
786
0
}
787
788
int
789
QPDF::getExtensionLevel()
790
0
{
791
0
    int result = 0;
792
0
    QPDFObjectHandle obj = getRoot();
793
0
    if (obj.hasKey("/Extensions")) {
794
0
        obj = obj.getKey("/Extensions");
795
0
        if (obj.isDictionary() && obj.hasKey("/ADBE")) {
796
0
            obj = obj.getKey("/ADBE");
797
0
            if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {
798
0
                obj = obj.getKey("/ExtensionLevel");
799
0
                if (obj.isInteger()) {
800
0
                    result = obj.getIntValueAsInt();
801
0
                }
802
0
            }
803
0
        }
804
0
    }
805
0
    return result;
806
0
}
807
808
QPDFObjectHandle
809
QPDF::getTrailer()
810
0
{
811
0
    return m->trailer;
812
0
}
813
814
QPDFObjectHandle
815
QPDF::getRoot()
816
0
{
817
0
    QPDFObjectHandle root = m->trailer.getKey("/Root");
818
0
    if (!root.isDictionary()) {
819
0
        throw damagedPDF("", -1, "unable to find /Root dictionary");
820
0
    } else if (
821
        // Check_mode is an interim solution to request #810 pending a more comprehensive review of
822
        // the approach to more extensive checks and warning levels.
823
0
        m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
824
0
        warn(damagedPDF("", -1, "catalog /Type entry missing or invalid"));
825
0
        root.replaceKey("/Type", "/Catalog"_qpdf);
826
0
    }
827
0
    return root;
828
0
}
829
830
std::map<QPDFObjGen, QPDFXRefEntry>
831
QPDF::getXRefTable()
832
0
{
833
0
    return getXRefTableInternal();
834
0
}
835
836
std::map<QPDFObjGen, QPDFXRefEntry> const&
837
QPDF::getXRefTableInternal()
838
0
{
839
0
    if (!m->parsed) {
840
0
        throw std::logic_error("QPDF::getXRefTable called before parsing.");
841
0
    }
842
843
0
    return m->xref_table;
844
0
}
845
846
bool
847
QPDF::pipeStreamData(
848
    std::shared_ptr<EncryptionParameters> encp,
849
    std::shared_ptr<InputSource> file,
850
    QPDF& qpdf_for_warning,
851
    QPDFObjGen og,
852
    qpdf_offset_t offset,
853
    size_t length,
854
    QPDFObjectHandle stream_dict,
855
    bool is_root_metadata,
856
    Pipeline* pipeline,
857
    bool suppress_warnings,
858
    bool will_retry)
859
0
{
860
0
    std::unique_ptr<Pipeline> to_delete;
861
0
    if (encp->encrypted) {
862
0
        decryptStream(
863
0
            encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete);
864
0
    }
865
866
0
    bool attempted_finish = false;
867
0
    try {
868
0
        auto buf = file->read(length, offset);
869
0
        if (buf.size() != length) {
870
0
            throw damagedPDF(
871
0
                *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");
872
0
        }
873
0
        pipeline->write(buf.data(), length);
874
0
        attempted_finish = true;
875
0
        pipeline->finish();
876
0
        return true;
877
0
    } catch (QPDFExc& e) {
878
0
        if (!suppress_warnings) {
879
0
            qpdf_for_warning.warn(e);
880
0
        }
881
0
    } catch (std::exception& e) {
882
0
        if (!suppress_warnings) {
883
0
            QTC::TC("qpdf", "QPDF decoding error warning");
884
0
            qpdf_for_warning.warn(
885
                // line-break
886
0
                damagedPDF(
887
0
                    *file,
888
0
                    "",
889
0
                    file->getLastOffset(),
890
0
                    ("error decoding stream data for object " + og.unparse(' ') + ": " +
891
0
                     e.what())));
892
0
            if (will_retry) {
893
0
                qpdf_for_warning.warn(
894
                    // line-break
895
0
                    damagedPDF(
896
0
                        *file,
897
0
                        "",
898
0
                        file->getLastOffset(),
899
0
                        "stream will be re-processed without filtering to avoid data loss"));
900
0
            }
901
0
        }
902
0
    }
903
0
    if (!attempted_finish) {
904
0
        try {
905
0
            pipeline->finish();
906
0
        } catch (std::exception&) {
907
            // ignore
908
0
        }
909
0
    }
910
0
    return false;
911
0
}
912
913
bool
914
QPDF::pipeStreamData(
915
    QPDFObjGen og,
916
    qpdf_offset_t offset,
917
    size_t length,
918
    QPDFObjectHandle stream_dict,
919
    bool is_root_metadata,
920
    Pipeline* pipeline,
921
    bool suppress_warnings,
922
    bool will_retry)
923
0
{
924
0
    return pipeStreamData(
925
0
        m->encp,
926
0
        m->file,
927
0
        *this,
928
0
        og,
929
0
        offset,
930
0
        length,
931
0
        stream_dict,
932
0
        is_root_metadata,
933
0
        pipeline,
934
0
        suppress_warnings,
935
0
        will_retry);
936
0
}
937
938
bool
939
QPDF::pipeForeignStreamData(
940
    std::shared_ptr<ForeignStreamData> foreign,
941
    Pipeline* pipeline,
942
    bool suppress_warnings,
943
    bool will_retry)
944
0
{
945
0
    if (foreign->encp->encrypted) {
946
0
        QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
947
0
    }
948
0
    return pipeStreamData(
949
0
        foreign->encp,
950
0
        foreign->file,
951
0
        *this,
952
0
        foreign->foreign_og,
953
0
        foreign->offset,
954
0
        foreign->length,
955
0
        foreign->local_dict,
956
0
        foreign->is_root_metadata,
957
0
        pipeline,
958
0
        suppress_warnings,
959
0
        will_retry);
960
0
}
961
962
// Throw a generic exception when we lack context for something more specific. New code should not
963
// use this. This method exists to improve somewhat from calling assert in very old code.
964
void
965
QPDF::stopOnError(std::string const& message)
966
0
{
967
0
    throw damagedPDF("", message);
968
0
}
969
970
// Return an exception of type qpdf_e_damaged_pdf.
971
QPDFExc
972
QPDF::damagedPDF(
973
    InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)
974
0
{
975
0
    return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true};
976
0
}
977
978
// Return an exception of type qpdf_e_damaged_pdf.  The object is taken from
979
// m->last_object_description.
980
QPDFExc
981
QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message)
982
0
{
983
0
    return damagedPDF(input, m->last_object_description, offset, message);
984
0
}
985
986
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file.
987
QPDFExc
988
QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
989
0
{
990
0
    return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true};
991
0
}
992
993
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file and the
994
// offset from .m->file->getLastOffset().
995
QPDFExc
996
QPDF::damagedPDF(std::string const& object, std::string const& message)
997
0
{
998
0
    return damagedPDF(object, m->file->getLastOffset(), message);
999
0
}
1000
1001
// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
1002
// from .m->last_object_description.
1003
QPDFExc
1004
QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
1005
0
{
1006
0
    return damagedPDF(m->last_object_description, offset, message);
1007
0
}
1008
1009
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file, the object
1010
// from m->last_object_description and the offset from m->file->getLastOffset().
1011
QPDFExc
1012
QPDF::damagedPDF(std::string const& message)
1013
0
{
1014
0
    return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);
1015
0
}
1016
1017
bool
1018
QPDF::everCalledGetAllPages() const
1019
0
{
1020
0
    return m->ever_called_get_all_pages;
1021
0
}
1022
1023
bool
1024
QPDF::everPushedInheritedAttributesToPages() const
1025
0
{
1026
0
    return m->ever_pushed_inherited_attributes_to_pages;
1027
0
}
1028
1029
void
1030
QPDF::removeSecurityRestrictions()
1031
0
{
1032
0
    auto root = getRoot();
1033
0
    root.removeKey("/Perms");
1034
0
    auto acroform = root.getKey("/AcroForm");
1035
0
    if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {
1036
0
        acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));
1037
0
    }
1038
0
}