Coverage Report

Created: 2025-08-28 06:32

/src/qpdf/libqpdf/QPDF.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/qpdf-config.h> // include first for large file support
2
3
#include <qpdf/QPDF_private.hh>
4
5
#include <array>
6
#include <atomic>
7
#include <cstring>
8
#include <limits>
9
#include <map>
10
#include <regex>
11
#include <sstream>
12
#include <vector>
13
14
#include <qpdf/FileInputSource.hh>
15
#include <qpdf/InputSource_private.hh>
16
#include <qpdf/OffsetInputSource.hh>
17
#include <qpdf/Pipeline.hh>
18
#include <qpdf/QPDFExc.hh>
19
#include <qpdf/QPDFLogger.hh>
20
#include <qpdf/QPDFObjectHandle_private.hh>
21
#include <qpdf/QPDFObject_private.hh>
22
#include <qpdf/QPDFParser.hh>
23
#include <qpdf/QTC.hh>
24
#include <qpdf/QUtil.hh>
25
#include <qpdf/Util.hh>
26
27
using namespace qpdf;
28
using namespace std::literals;
29
30
// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
31
// being static as well.
32
std::string const QPDF::qpdf_version(QPDF_VERSION);
33
34
static char const* EMPTY_PDF = (
35
    // force line break
36
    "%PDF-1.3\n"
37
    "1 0 obj\n"
38
    "<< /Type /Catalog /Pages 2 0 R >>\n"
39
    "endobj\n"
40
    "2 0 obj\n"
41
    "<< /Type /Pages /Kids [] /Count 0 >>\n"
42
    "endobj\n"
43
    "xref\n"
44
    "0 3\n"
45
    "0000000000 65535 f \n"
46
    "0000000009 00000 n \n"
47
    "0000000058 00000 n \n"
48
    "trailer << /Size 3 /Root 1 0 R >>\n"
49
    "startxref\n"
50
    "110\n"
51
    "%%EOF\n");
52
53
namespace
54
{
55
    class InvalidInputSource: public InputSource
56
    {
57
      public:
58
        ~InvalidInputSource() override = default;
59
        qpdf_offset_t
60
        findAndSkipNextEOL() override
61
0
        {
62
0
            throwException();
63
0
            return 0;
64
0
        }
65
        std::string const&
66
        getName() const override
67
0
        {
68
0
            static std::string name("closed input source");
69
0
            return name;
70
0
        }
71
        qpdf_offset_t
72
        tell() override
73
0
        {
74
0
            throwException();
75
0
            return 0;
76
0
        }
77
        void
78
        seek(qpdf_offset_t offset, int whence) override
79
0
        {
80
0
            throwException();
81
0
        }
82
        void
83
        rewind() override
84
0
        {
85
0
            throwException();
86
0
        }
87
        size_t
88
        read(char* buffer, size_t length) override
89
0
        {
90
0
            throwException();
91
0
            return 0;
92
0
        }
93
        void
94
        unreadCh(char ch) override
95
0
        {
96
0
            throwException();
97
0
        }
98
99
      private:
100
        void
101
        throwException()
102
0
        {
103
0
            throw std::logic_error(
104
0
                "QPDF operation attempted on a QPDF object with no input "
105
0
                "source. QPDF operations are invalid before processFile (or "
106
0
                "another process method) or after closeInputSource");
107
0
        }
108
    };
109
} // namespace
110
111
QPDF::ForeignStreamData::ForeignStreamData(
112
    std::shared_ptr<EncryptionParameters> encp,
113
    std::shared_ptr<InputSource> file,
114
    QPDFObjGen foreign_og,
115
    qpdf_offset_t offset,
116
    size_t length,
117
    QPDFObjectHandle local_dict,
118
    bool is_root_metadata) :
119
0
    encp(encp),
120
0
    file(file),
121
0
    foreign_og(foreign_og),
122
0
    offset(offset),
123
0
    length(length),
124
0
    local_dict(local_dict),
125
0
    is_root_metadata(is_root_metadata)
126
0
{
127
0
}
128
129
QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :
130
0
    QPDFObjectHandle::StreamDataProvider(true),
131
0
    destination_qpdf(destination_qpdf)
132
0
{
133
0
}
134
135
bool
136
QPDF::CopiedStreamDataProvider::provideStreamData(
137
    QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
138
0
{
139
0
    std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];
140
0
    bool result = false;
141
0
    if (foreign_data.get()) {
142
0
        result = destination_qpdf.pipeForeignStreamData(
143
0
            foreign_data, pipeline, suppress_warnings, will_retry);
144
0
        QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);
145
0
    } else {
146
0
        auto foreign_stream = foreign_streams[og];
147
0
        result = foreign_stream.pipeStreamData(
148
0
            pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);
149
0
        QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);
150
0
    }
151
0
    return result;
152
0
}
153
154
void
155
QPDF::CopiedStreamDataProvider::registerForeignStream(
156
    QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)
157
0
{
158
0
    this->foreign_streams[local_og] = foreign_stream;
159
0
}
160
161
void
162
QPDF::CopiedStreamDataProvider::registerForeignStream(
163
    QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)
164
0
{
165
0
    this->foreign_stream_data[local_og] = foreign_stream;
166
0
}
167
168
QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) :
169
84.3k
    qpdf(qpdf),
170
84.3k
    og(og)
171
84.3k
{
172
84.3k
}
173
174
std::string const&
175
QPDF::QPDFVersion()
176
0
{
177
    // The C API relies on this being a static value.
178
0
    return QPDF::qpdf_version;
179
0
}
180
181
QPDF::Members::Members() :
182
19.3k
    log(QPDFLogger::defaultLogger()),
183
19.3k
    file(new InvalidInputSource()),
184
19.3k
    encp(new EncryptionParameters)
185
19.3k
{
186
19.3k
}
187
188
QPDF::QPDF() :
189
19.3k
    m(std::make_unique<Members>())
190
19.3k
{
191
19.3k
    m->tokenizer.allowEOF();
192
    // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
193
    // the lifetime of this running application.
194
19.3k
    static std::atomic<unsigned long long> unique_id{0};
195
19.3k
    m->unique_id = unique_id.fetch_add(1ULL);
196
19.3k
}
197
198
// Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache
199
// (future Objects::Entry) to centralize all QPDF access to QPDFObject.
200
class Disconnect: BaseHandle
201
{
202
  public:
203
    Disconnect(std::shared_ptr<QPDFObject> const& obj) :
204
228k
        BaseHandle(obj)
205
228k
    {
206
228k
    }
207
    void
208
    disconnect()
209
228k
    {
210
228k
        BaseHandle::disconnect(false);
211
228k
        if (raw_type_code() != ::ot_null) {
212
84.6k
            obj->value = QPDF_Destroyed();
213
84.6k
        }
214
228k
    }
215
};
216
217
QPDF::~QPDF()
218
19.3k
{
219
    // If two objects are mutually referential (through each object having an array or dictionary
220
    // that contains an indirect reference to the other), the circular references in the
221
    // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
222
    // in the object cache, which is those objects that we read from the file, and break all
223
    // resolved indirect references by replacing them with an internal object type representing that
224
    // they have been destroyed. Note that we can't break references like this at any time when the
225
    // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
226
    // are reachable from this object to release their association with this QPDF. Direct objects
227
    // are not destroyed since they can be moved to other QPDF objects safely.
228
229
    // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
230
    // the xref table anyway just to prevent any possibility of resolve() succeeding.
231
19.3k
    m->xref_table.clear();
232
228k
    for (auto const& iter: m->obj_cache) {
233
228k
        Disconnect(iter.second.object).disconnect();
234
228k
    }
235
19.3k
}
236
237
std::shared_ptr<QPDF>
238
QPDF::create()
239
19.3k
{
240
19.3k
    return std::make_shared<QPDF>();
241
19.3k
}
242
243
void
244
QPDF::processFile(char const* filename, char const* password)
245
0
{
246
0
    auto* fi = new FileInputSource(filename);
247
0
    processInputSource(std::shared_ptr<InputSource>(fi), password);
248
0
}
249
250
void
251
QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)
252
0
{
253
0
    auto* fi = new FileInputSource(description, filep, close_file);
254
0
    processInputSource(std::shared_ptr<InputSource>(fi), password);
255
0
}
256
257
void
258
QPDF::processMemoryFile(
259
    char const* description, char const* buf, size_t length, char const* password)
260
0
{
261
0
    auto is = std::make_shared<is::OffsetBuffer>(description, std::string_view{buf, length});
262
0
    processInputSource(is, password);
263
0
}
264
265
void
266
QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
267
19.3k
{
268
19.3k
    m->file = source;
269
19.3k
    parse(password);
270
19.3k
}
271
272
void
273
QPDF::closeInputSource()
274
0
{
275
0
    m->file = std::shared_ptr<InputSource>(new InvalidInputSource());
276
0
}
277
278
void
279
QPDF::setPasswordIsHexKey(bool val)
280
0
{
281
0
    m->provided_password_is_hex_key = val;
282
0
}
283
284
void
285
QPDF::emptyPDF()
286
0
{
287
0
    processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
288
0
}
289
290
void
291
QPDF::registerStreamFilter(
292
    std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
293
0
{
294
0
    qpdf::Stream::registerStreamFilter(filter_name, factory);
295
0
}
296
297
void
298
QPDF::setIgnoreXRefStreams(bool val)
299
0
{
300
0
    m->ignore_xref_streams = val;
301
0
}
302
303
std::shared_ptr<QPDFLogger>
304
QPDF::getLogger()
305
0
{
306
0
    return m->log;
307
0
}
308
309
void
310
QPDF::setLogger(std::shared_ptr<QPDFLogger> l)
311
0
{
312
0
    m->log = l;
313
0
}
314
315
void
316
QPDF::setOutputStreams(std::ostream* out, std::ostream* err)
317
0
{
318
0
    setLogger(QPDFLogger::create());
319
0
    m->log->setOutputStreams(out, err);
320
0
}
321
322
void
323
QPDF::setSuppressWarnings(bool val)
324
0
{
325
0
    m->suppress_warnings = val;
326
0
}
327
328
void
329
QPDF::setMaxWarnings(size_t val)
330
19.3k
{
331
19.3k
    m->max_warnings = val;
332
19.3k
}
333
334
void
335
QPDF::setAttemptRecovery(bool val)
336
0
{
337
0
    m->attempt_recovery = val;
338
0
}
339
340
void
341
QPDF::setImmediateCopyFrom(bool val)
342
0
{
343
0
    m->immediate_copy_from = val;
344
0
}
345
346
std::vector<QPDFExc>
347
QPDF::getWarnings()
348
0
{
349
0
    std::vector<QPDFExc> result = m->warnings;
350
0
    m->warnings.clear();
351
0
    return result;
352
0
}
353
354
bool
355
QPDF::anyWarnings() const
356
0
{
357
0
    return !m->warnings.empty();
358
0
}
359
360
size_t
361
QPDF::numWarnings() const
362
0
{
363
0
    return m->warnings.size();
364
0
}
365
366
bool
367
QPDF::validatePDFVersion(char const*& p, std::string& version)
368
4.44k
{
369
4.44k
    if (!util::is_digit(*p)) {
370
2.10k
        return false;
371
2.10k
    }
372
6.81k
    while (util::is_digit(*p)) {
373
4.48k
        version.append(1, *p++);
374
4.48k
    }
375
2.33k
    if (!(*p == '.' && util::is_digit(*(p + 1)))) {
376
1.02k
        return false;
377
1.02k
    }
378
1.31k
    version.append(1, *p++);
379
2.96k
    while (util::is_digit(*p)) {
380
1.65k
        version.append(1, *p++);
381
1.65k
    }
382
1.31k
    return true;
383
2.33k
}
384
385
bool
386
QPDF::findHeader()
387
4.45k
{
388
4.45k
    qpdf_offset_t global_offset = m->file->tell();
389
4.45k
    std::string line = m->file->readLine(1024);
390
4.45k
    char const* p = line.data();
391
4.45k
    if (strncmp(p, "%PDF-", 5) != 0) {
392
0
        throw std::logic_error("findHeader is not looking at %PDF-");
393
0
    }
394
4.45k
    p += 5;
395
4.45k
    std::string version;
396
    // Note: The string returned by line.data() is always null-terminated. The code below never
397
    // overruns the buffer because a null character always short-circuits further advancement.
398
4.45k
    if (!validatePDFVersion(p, version)) {
399
3.12k
        return false;
400
3.12k
    }
401
1.32k
    m->pdf_version = version;
402
1.32k
    if (global_offset != 0) {
403
        // Empirical evidence strongly suggests (codified in PDF 2.0 spec) that when there is
404
        // leading material prior to the PDF header, all explicit offsets in the file are such that
405
        // 0 points to the beginning of the header.
406
851
        QTC::TC("qpdf", "QPDF global offset");
407
851
        m->file = std::make_shared<OffsetInputSource>(m->file, global_offset);
408
851
    }
409
1.32k
    return true;
410
4.45k
}
411
412
void
413
QPDF::warn(QPDFExc const& e)
414
673k
{
415
673k
    if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {
416
45.3k
        stopOnError("Too many warnings - file is too badly damaged");
417
45.3k
    }
418
673k
    m->warnings.push_back(e);
419
673k
    if (!m->suppress_warnings) {
420
627k
        *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
421
627k
    }
422
673k
}
423
424
void
425
QPDF::warn(
426
    qpdf_error_code_e error_code,
427
    std::string const& object,
428
    qpdf_offset_t offset,
429
    std::string const& message)
430
7.33k
{
431
7.33k
    warn(QPDFExc(error_code, getFilename(), object, offset, message));
432
7.33k
}
433
434
QPDFObjectHandle
435
QPDF::newReserved()
436
0
{
437
0
    return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>());
438
0
}
439
440
QPDFObjectHandle
441
QPDF::newIndirectNull()
442
0
{
443
0
    return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>());
444
0
}
445
446
QPDFObjectHandle
447
QPDF::newStream()
448
0
{
449
0
    return makeIndirectObject(
450
0
        qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));
451
0
}
452
453
QPDFObjectHandle
454
QPDF::newStream(std::shared_ptr<Buffer> data)
455
0
{
456
0
    auto result = newStream();
457
0
    result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
458
0
    return result;
459
0
}
460
461
QPDFObjectHandle
462
QPDF::newStream(std::string const& data)
463
0
{
464
0
    auto result = newStream();
465
0
    result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
466
0
    return result;
467
0
}
468
469
QPDFObjectHandle
470
QPDF::getObject(int objid, int generation)
471
18.0k
{
472
18.0k
    return getObject(QPDFObjGen(objid, generation));
473
18.0k
}
474
475
QPDFObjectHandle
476
QPDF::getObjectByObjGen(QPDFObjGen og)
477
0
{
478
0
    return getObject(og);
479
0
}
480
481
QPDFObjectHandle
482
QPDF::getObjectByID(int objid, int generation)
483
0
{
484
0
    return getObject(QPDFObjGen(objid, generation));
485
0
}
486
487
QPDFObjectHandle
488
QPDF::copyForeignObject(QPDFObjectHandle foreign)
489
0
{
490
    // Here's an explanation of what's going on here.
491
    //
492
    // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
493
    // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
494
    // foreign QPDF into the local QPDF, we have to replace all indirect object references with
495
    // references to the corresponding object in the local file.
496
    //
497
    // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
498
    // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
499
    // mapping from the foreign ObjGen to the local QPDFObjectHandle.
500
    //
501
    // To copy, we do a deep traversal of the foreign object with loop detection to discover all
502
    // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
503
    // indirect object, we check to see if we have already created a local copy of it. If not, we
504
    // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
505
    // mapping from the foreign object ID to the new object. While we
506
    // do this, we keep a list of objects to copy.
507
    //
508
    // Once we are done with the traversal, we copy all the objects that we need to copy. However,
509
    // the copies will contain indirect object IDs that refer to objects in the foreign file. We
510
    // need to replace them with references to objects in the local file. This is what
511
    // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
512
    // all the indirect references replaced with new ones in the local context, we can replace the
513
    // local reserved object with the copy. This mechanism allows us to copy objects with circular
514
    // references in any order.
515
516
    // For streams, rather than copying the objects, we set up the stream data to pull from the
517
    // original stream by using a stream data provider. This is done in a manner that doesn't
518
    // require the original QPDF object but may require the original source of the stream data with
519
    // special handling for immediate_copy_from. This logic is also in
520
    // replaceForeignIndirectObjects.
521
522
    // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
523
    // use case to copy pages this way if the intention is to not update the pages tree.
524
0
    if (!foreign.isIndirect()) {
525
0
        QTC::TC("qpdf", "QPDF copyForeign direct");
526
0
        throw std::logic_error("QPDF::copyForeign called with direct object handle");
527
0
    }
528
0
    QPDF& other = foreign.getQPDF();
529
0
    if (&other == this) {
530
0
        QTC::TC("qpdf", "QPDF copyForeign not foreign");
531
0
        throw std::logic_error("QPDF::copyForeign called with object from this QPDF");
532
0
    }
533
534
0
    ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];
535
0
    if (!obj_copier.visiting.empty()) {
536
0
        throw std::logic_error(
537
0
            "obj_copier.visiting is not empty at the beginning of copyForeignObject");
538
0
    }
539
540
    // Make sure we have an object in this file for every referenced object in the old file.
541
    // obj_copier.object_map maps foreign QPDFObjGen to local objects.  For everything new that we
542
    // have to copy, the local object will be a reservation, unless it is a stream, in which case
543
    // the local object will already be a stream.
544
0
    reserveObjects(foreign, obj_copier, true);
545
546
0
    if (!obj_copier.visiting.empty()) {
547
0
        throw std::logic_error("obj_copier.visiting is not empty after reserving objects");
548
0
    }
549
550
    // Copy any new objects and replace the reservations.
551
0
    for (auto& to_copy: obj_copier.to_copy) {
552
0
        QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);
553
0
        if (!to_copy.isStream()) {
554
0
            QPDFObjGen og(to_copy.getObjGen());
555
0
            replaceReserved(obj_copier.object_map[og], copy);
556
0
        }
557
0
    }
558
0
    obj_copier.to_copy.clear();
559
560
0
    auto og = foreign.getObjGen();
561
0
    if (!obj_copier.object_map.contains(og)) {
562
0
        warn(damagedPDF(
563
0
            other.getFilename() + " object " + og.unparse(' '),
564
0
            foreign.getParsedOffset(),
565
0
            "unexpected reference to /Pages object while copying foreign object; replacing with "
566
0
            "null"));
567
0
        return QPDFObjectHandle::newNull();
568
0
    }
569
0
    return obj_copier.object_map[foreign.getObjGen()];
570
0
}
571
572
void
573
QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
574
0
{
575
0
    auto foreign_tc = foreign.getTypeCode();
576
0
    if (foreign_tc == ::ot_reserved) {
577
0
        throw std::logic_error("QPDF: attempting to copy a foreign reserved object");
578
0
    }
579
580
0
    if (foreign.isPagesObject()) {
581
0
        QTC::TC("qpdf", "QPDF not copying pages object");
582
0
        return;
583
0
    }
584
585
0
    if (foreign.isIndirect()) {
586
0
        QPDFObjGen foreign_og(foreign.getObjGen());
587
0
        if (!obj_copier.visiting.add(foreign_og)) {
588
0
            QTC::TC("qpdf", "QPDF loop reserving objects");
589
0
            return;
590
0
        }
591
0
        if (obj_copier.object_map.contains(foreign_og)) {
592
0
            QTC::TC("qpdf", "QPDF already reserved object");
593
0
            if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {
594
0
                obj_copier.visiting.erase(foreign);
595
0
                return;
596
0
            }
597
0
        } else {
598
0
            QTC::TC("qpdf", "QPDF copy indirect");
599
0
            obj_copier.object_map[foreign_og] =
600
0
                foreign.isStream() ? newStream() : newIndirectNull();
601
0
            if ((!top) && foreign.isPageObject()) {
602
0
                QTC::TC("qpdf", "QPDF not crossing page boundary");
603
0
                obj_copier.visiting.erase(foreign_og);
604
0
                return;
605
0
            }
606
0
        }
607
0
        obj_copier.to_copy.push_back(foreign);
608
0
    }
609
610
0
    if (foreign_tc == ::ot_array) {
611
0
        QTC::TC("qpdf", "QPDF reserve array");
612
0
        for (auto const& item: foreign.as_array()) {
613
0
            reserveObjects(item, obj_copier, false);
614
0
        }
615
0
    } else if (foreign_tc == ::ot_dictionary) {
616
0
        QTC::TC("qpdf", "QPDF reserve dictionary");
617
0
        for (auto const& item: foreign.as_dictionary()) {
618
0
            if (!item.second.null()) {
619
0
                reserveObjects(item.second, obj_copier, false);
620
0
            }
621
0
        }
622
0
    } else if (foreign_tc == ::ot_stream) {
623
0
        QTC::TC("qpdf", "QPDF reserve stream");
624
0
        reserveObjects(foreign.getDict(), obj_copier, false);
625
0
    }
626
627
0
    obj_copier.visiting.erase(foreign);
628
0
}
629
630
QPDFObjectHandle
631
QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
632
0
{
633
0
    auto foreign_tc = foreign.getTypeCode();
634
0
    QPDFObjectHandle result;
635
0
    if ((!top) && foreign.isIndirect()) {
636
0
        QTC::TC("qpdf", "QPDF replace indirect");
637
0
        auto mapping = obj_copier.object_map.find(foreign.getObjGen());
638
0
        if (mapping == obj_copier.object_map.end()) {
639
            // This case would occur if this is a reference to a Pages object that we didn't
640
            // traverse into.
641
0
            QTC::TC("qpdf", "QPDF replace foreign indirect with null");
642
0
            result = QPDFObjectHandle::newNull();
643
0
        } else {
644
0
            result = mapping->second;
645
0
        }
646
0
    } else if (foreign_tc == ::ot_array) {
647
0
        QTC::TC("qpdf", "QPDF replace array");
648
0
        result = QPDFObjectHandle::newArray();
649
0
        for (auto const& item: foreign.as_array()) {
650
0
            result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false));
651
0
        }
652
0
    } else if (foreign_tc == ::ot_dictionary) {
653
0
        QTC::TC("qpdf", "QPDF replace dictionary");
654
0
        result = QPDFObjectHandle::newDictionary();
655
0
        for (auto const& [key, value]: foreign.as_dictionary()) {
656
0
            if (!value.null()) {
657
0
                result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
658
0
            }
659
0
        }
660
0
    } else if (foreign_tc == ::ot_stream) {
661
0
        QTC::TC("qpdf", "QPDF replace stream");
662
0
        result = obj_copier.object_map[foreign.getObjGen()];
663
0
        QPDFObjectHandle dict = result.getDict();
664
0
        QPDFObjectHandle old_dict = foreign.getDict();
665
0
        for (auto const& [key, value]: old_dict.as_dictionary()) {
666
0
            if (!value.null()) {
667
0
                dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
668
0
            }
669
0
        }
670
0
        copyStreamData(result, foreign);
671
0
    } else {
672
0
        foreign.assertScalar();
673
0
        result = foreign;
674
0
        result.makeDirect();
675
0
    }
676
677
0
    if (top && (!result.isStream()) && result.isIndirect()) {
678
0
        throw std::logic_error("replacement for foreign object is indirect");
679
0
    }
680
681
0
    return result;
682
0
}
683
684
void
685
QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
686
0
{
687
    // This method was originally written for copying foreign streams, but it is used by
688
    // QPDFObjectHandle to copy streams from the same QPDF object as well.
689
690
0
    QPDFObjectHandle dict = result.getDict();
691
0
    QPDFObjectHandle old_dict = foreign.getDict();
692
0
    if (m->copied_stream_data_provider == nullptr) {
693
0
        m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);
694
0
        m->copied_streams =
695
0
            std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
696
0
    }
697
0
    QPDFObjGen local_og(result.getObjGen());
698
    // Copy information from the foreign stream so we can pipe its data later without keeping the
699
    // original QPDF object around.
700
701
0
    QPDF& foreign_stream_qpdf =
702
0
        foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
703
704
0
    auto stream = foreign.as_stream();
705
0
    if (!stream) {
706
0
        throw std::logic_error("unable to retrieve underlying stream object from foreign stream");
707
0
    }
708
0
    std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer();
709
0
    if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
710
        // Pull the stream data into a buffer before attempting the copy operation. Do it on the
711
        // source stream so that if the source stream is copied multiple times, we don't have to
712
        // keep duplicating the memory.
713
0
        QTC::TC("qpdf", "QPDF immediate copy stream data");
714
0
        foreign.replaceStreamData(
715
0
            foreign.getRawStreamData(),
716
0
            old_dict.getKey("/Filter"),
717
0
            old_dict.getKey("/DecodeParms"));
718
0
        stream_buffer = stream.getStreamDataBuffer();
719
0
    }
720
0
    std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =
721
0
        stream.getStreamDataProvider();
722
0
    if (stream_buffer.get()) {
723
0
        QTC::TC("qpdf", "QPDF copy foreign stream with buffer");
724
0
        result.replaceStreamData(
725
0
            stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
726
0
    } else if (stream_provider.get()) {
727
        // In this case, the remote stream's QPDF must stay in scope.
728
0
        QTC::TC("qpdf", "QPDF copy foreign stream with provider");
729
0
        m->copied_stream_data_provider->registerForeignStream(local_og, foreign);
730
0
        result.replaceStreamData(
731
0
            m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
732
0
    } else {
733
0
        auto foreign_stream_data = std::make_shared<ForeignStreamData>(
734
0
            foreign_stream_qpdf.m->encp,
735
0
            foreign_stream_qpdf.m->file,
736
0
            foreign,
737
0
            foreign.getParsedOffset(),
738
0
            stream.getLength(),
739
0
            dict,
740
0
            stream.isRootMetadata());
741
0
        m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
742
0
        result.replaceStreamData(
743
0
            m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
744
0
    }
745
0
}
746
747
unsigned long long
748
QPDF::getUniqueId() const
749
0
{
750
0
    return m->unique_id;
751
0
}
752
753
std::string
754
QPDF::getFilename() const
755
45.8k
{
756
45.8k
    return m->file->getName();
757
45.8k
}
758
759
PDFVersion
760
QPDF::getVersionAsPDFVersion()
761
0
{
762
0
    int major = 1;
763
0
    int minor = 3;
764
0
    int extension_level = getExtensionLevel();
765
766
0
    std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");
767
0
    std::smatch match;
768
0
    if (std::regex_search(m->pdf_version, match, v)) {
769
0
        major = QUtil::string_to_int(match[1].str().c_str());
770
0
        minor = QUtil::string_to_int(match[2].str().c_str());
771
0
    }
772
773
0
    return {major, minor, extension_level};
774
0
}
775
776
std::string
777
QPDF::getPDFVersion() const
778
0
{
779
0
    return m->pdf_version;
780
0
}
781
782
int
783
QPDF::getExtensionLevel()
784
0
{
785
0
    int result = 0;
786
0
    QPDFObjectHandle obj = getRoot();
787
0
    if (obj.hasKey("/Extensions")) {
788
0
        obj = obj.getKey("/Extensions");
789
0
        if (obj.isDictionary() && obj.hasKey("/ADBE")) {
790
0
            obj = obj.getKey("/ADBE");
791
0
            if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {
792
0
                obj = obj.getKey("/ExtensionLevel");
793
0
                if (obj.isInteger()) {
794
0
                    result = obj.getIntValueAsInt();
795
0
                }
796
0
            }
797
0
        }
798
0
    }
799
0
    return result;
800
0
}
801
802
QPDFObjectHandle
803
QPDF::getTrailer()
804
0
{
805
0
    return m->trailer;
806
0
}
807
808
QPDFObjectHandle
809
QPDF::getRoot()
810
17.4k
{
811
17.4k
    QPDFObjectHandle root = m->trailer.getKey("/Root");
812
17.4k
    if (!root.isDictionary()) {
813
6.43k
        throw damagedPDF("", -1, "unable to find /Root dictionary");
814
11.0k
    } else if (
815
        // Check_mode is an interim solution to request #810 pending a more comprehensive review of
816
        // the approach to more extensive checks and warning levels.
817
11.0k
        m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
818
0
        warn(damagedPDF("", -1, "catalog /Type entry missing or invalid"));
819
0
        root.replaceKey("/Type", "/Catalog"_qpdf);
820
0
    }
821
11.0k
    return root;
822
17.4k
}
823
824
std::map<QPDFObjGen, QPDFXRefEntry>
825
QPDF::getXRefTable()
826
0
{
827
0
    return getXRefTableInternal();
828
0
}
829
830
std::map<QPDFObjGen, QPDFXRefEntry> const&
831
QPDF::getXRefTableInternal()
832
0
{
833
0
    if (!m->parsed) {
834
0
        throw std::logic_error("QPDF::getXRefTable called before parsing.");
835
0
    }
836
837
0
    return m->xref_table;
838
0
}
839
840
bool
841
QPDF::pipeStreamData(
842
    std::shared_ptr<EncryptionParameters> encp,
843
    std::shared_ptr<InputSource> file,
844
    QPDF& qpdf_for_warning,
845
    QPDFObjGen og,
846
    qpdf_offset_t offset,
847
    size_t length,
848
    QPDFObjectHandle stream_dict,
849
    bool is_root_metadata,
850
    Pipeline* pipeline,
851
    bool suppress_warnings,
852
    bool will_retry)
853
9.42k
{
854
9.42k
    std::unique_ptr<Pipeline> to_delete;
855
9.42k
    if (encp->encrypted) {
856
36
        decryptStream(
857
36
            encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete);
858
36
    }
859
860
9.42k
    bool attempted_finish = false;
861
9.42k
    try {
862
9.42k
        auto buf = file->read(length, offset);
863
9.42k
        if (buf.size() != length) {
864
0
            throw damagedPDF(
865
0
                *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");
866
0
        }
867
9.42k
        pipeline->write(buf.data(), length);
868
9.42k
        attempted_finish = true;
869
9.42k
        pipeline->finish();
870
9.42k
        return true;
871
9.42k
    } catch (QPDFExc& e) {
872
0
        if (!suppress_warnings) {
873
0
            qpdf_for_warning.warn(e);
874
0
        }
875
1.80k
    } catch (std::exception& e) {
876
1.80k
        if (!suppress_warnings) {
877
1.80k
            QTC::TC("qpdf", "QPDF decoding error warning");
878
1.80k
            qpdf_for_warning.warn(
879
                // line-break
880
1.80k
                damagedPDF(
881
1.80k
                    *file,
882
1.80k
                    "",
883
1.80k
                    file->getLastOffset(),
884
1.80k
                    ("error decoding stream data for object " + og.unparse(' ') + ": " +
885
1.80k
                     e.what())));
886
1.80k
            if (will_retry) {
887
0
                qpdf_for_warning.warn(
888
                    // line-break
889
0
                    damagedPDF(
890
0
                        *file,
891
0
                        "",
892
0
                        file->getLastOffset(),
893
0
                        "stream will be re-processed without filtering to avoid data loss"));
894
0
            }
895
1.80k
        }
896
1.80k
    }
897
1.75k
    if (!attempted_finish) {
898
1.59k
        try {
899
1.59k
            pipeline->finish();
900
1.59k
        } catch (std::exception&) {
901
            // ignore
902
1.09k
        }
903
1.59k
    }
904
1.75k
    return false;
905
1.75k
}
906
907
bool
908
QPDF::pipeStreamData(
909
    QPDFObjGen og,
910
    qpdf_offset_t offset,
911
    size_t length,
912
    QPDFObjectHandle stream_dict,
913
    bool is_root_metadata,
914
    Pipeline* pipeline,
915
    bool suppress_warnings,
916
    bool will_retry)
917
9.42k
{
918
9.42k
    return pipeStreamData(
919
9.42k
        m->encp,
920
9.42k
        m->file,
921
9.42k
        *this,
922
9.42k
        og,
923
9.42k
        offset,
924
9.42k
        length,
925
9.42k
        stream_dict,
926
9.42k
        is_root_metadata,
927
9.42k
        pipeline,
928
9.42k
        suppress_warnings,
929
9.42k
        will_retry);
930
9.42k
}
931
932
bool
933
QPDF::pipeForeignStreamData(
934
    std::shared_ptr<ForeignStreamData> foreign,
935
    Pipeline* pipeline,
936
    bool suppress_warnings,
937
    bool will_retry)
938
0
{
939
0
    if (foreign->encp->encrypted) {
940
0
        QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
941
0
    }
942
0
    return pipeStreamData(
943
0
        foreign->encp,
944
0
        foreign->file,
945
0
        *this,
946
0
        foreign->foreign_og,
947
0
        foreign->offset,
948
0
        foreign->length,
949
0
        foreign->local_dict,
950
0
        foreign->is_root_metadata,
951
0
        pipeline,
952
0
        suppress_warnings,
953
0
        will_retry);
954
0
}
955
956
// Throw a generic exception when we lack context for something more specific. New code should not
957
// use this. This method exists to improve somewhat from calling assert in very old code.
958
void
959
QPDF::stopOnError(std::string const& message)
960
45.3k
{
961
45.3k
    throw damagedPDF("", message);
962
45.3k
}
963
964
// Return an exception of type qpdf_e_damaged_pdf.
965
QPDFExc
966
QPDF::damagedPDF(
967
    InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)
968
37.5k
{
969
37.5k
    return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true};
970
37.5k
}
971
972
// Return an exception of type qpdf_e_damaged_pdf.  The object is taken from
973
// m->last_object_description.
974
QPDFExc
975
QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message)
976
35.7k
{
977
35.7k
    return damagedPDF(input, m->last_object_description, offset, message);
978
35.7k
}
979
980
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file.
981
QPDFExc
982
QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
983
256k
{
984
256k
    return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true};
985
256k
}
986
987
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file and the
988
// offset from .m->file->getLastOffset().
989
QPDFExc
990
QPDF::damagedPDF(std::string const& object, std::string const& message)
991
77.2k
{
992
77.2k
    return damagedPDF(object, m->file->getLastOffset(), message);
993
77.2k
}
994
995
// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
996
// from .m->last_object_description.
997
QPDFExc
998
QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
999
42.1k
{
1000
42.1k
    return damagedPDF(m->last_object_description, offset, message);
1001
42.1k
}
1002
1003
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file, the object
1004
// from m->last_object_description and the offset from m->file->getLastOffset().
1005
QPDFExc
1006
QPDF::damagedPDF(std::string const& message)
1007
31.8k
{
1008
31.8k
    return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);
1009
31.8k
}
1010
1011
bool
1012
QPDF::everCalledGetAllPages() const
1013
0
{
1014
0
    return m->ever_called_get_all_pages;
1015
0
}
1016
1017
bool
1018
QPDF::everPushedInheritedAttributesToPages() const
1019
0
{
1020
0
    return m->ever_pushed_inherited_attributes_to_pages;
1021
0
}
1022
1023
void
1024
QPDF::removeSecurityRestrictions()
1025
0
{
1026
0
    auto root = getRoot();
1027
0
    root.removeKey("/Perms");
1028
0
    auto acroform = root.getKey("/AcroForm");
1029
0
    if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {
1030
0
        acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));
1031
0
    }
1032
0
}