Coverage Report

Created: 2025-08-03 06:15

/src/qpdf/libqpdf/QPDF.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/qpdf-config.h> // include first for large file support
2
3
#include <qpdf/QPDF_private.hh>
4
5
#include <array>
6
#include <atomic>
7
#include <cstring>
8
#include <limits>
9
#include <map>
10
#include <regex>
11
#include <sstream>
12
#include <vector>
13
14
#include <qpdf/FileInputSource.hh>
15
#include <qpdf/InputSource_private.hh>
16
#include <qpdf/OffsetInputSource.hh>
17
#include <qpdf/Pipeline.hh>
18
#include <qpdf/QPDFExc.hh>
19
#include <qpdf/QPDFLogger.hh>
20
#include <qpdf/QPDFObjectHandle_private.hh>
21
#include <qpdf/QPDFObject_private.hh>
22
#include <qpdf/QPDFParser.hh>
23
#include <qpdf/QTC.hh>
24
#include <qpdf/QUtil.hh>
25
#include <qpdf/Util.hh>
26
27
using namespace qpdf;
28
using namespace std::literals;
29
30
// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
31
// being static as well.
32
std::string const QPDF::qpdf_version(QPDF_VERSION);
33
34
static char const* EMPTY_PDF = (
35
    // force line break
36
    "%PDF-1.3\n"
37
    "1 0 obj\n"
38
    "<< /Type /Catalog /Pages 2 0 R >>\n"
39
    "endobj\n"
40
    "2 0 obj\n"
41
    "<< /Type /Pages /Kids [] /Count 0 >>\n"
42
    "endobj\n"
43
    "xref\n"
44
    "0 3\n"
45
    "0000000000 65535 f \n"
46
    "0000000009 00000 n \n"
47
    "0000000058 00000 n \n"
48
    "trailer << /Size 3 /Root 1 0 R >>\n"
49
    "startxref\n"
50
    "110\n"
51
    "%%EOF\n");
52
53
namespace
54
{
55
    class InvalidInputSource: public InputSource
56
    {
57
      public:
58
        ~InvalidInputSource() override = default;
59
        qpdf_offset_t
60
        findAndSkipNextEOL() override
61
0
        {
62
0
            throwException();
63
0
            return 0;
64
0
        }
65
        std::string const&
66
        getName() const override
67
0
        {
68
0
            static std::string name("closed input source");
69
0
            return name;
70
0
        }
71
        qpdf_offset_t
72
        tell() override
73
0
        {
74
0
            throwException();
75
0
            return 0;
76
0
        }
77
        void
78
        seek(qpdf_offset_t offset, int whence) override
79
0
        {
80
0
            throwException();
81
0
        }
82
        void
83
        rewind() override
84
0
        {
85
0
            throwException();
86
0
        }
87
        size_t
88
        read(char* buffer, size_t length) override
89
0
        {
90
0
            throwException();
91
0
            return 0;
92
0
        }
93
        void
94
        unreadCh(char ch) override
95
0
        {
96
0
            throwException();
97
0
        }
98
99
      private:
100
        void
101
        throwException()
102
0
        {
103
0
            throw std::logic_error(
104
0
                "QPDF operation attempted on a QPDF object with no input "
105
0
                "source. QPDF operations are invalid before processFile (or "
106
0
                "another process method) or after closeInputSource");
107
0
        }
108
    };
109
} // namespace
110
111
QPDF::ForeignStreamData::ForeignStreamData(
112
    std::shared_ptr<EncryptionParameters> encp,
113
    std::shared_ptr<InputSource> file,
114
    QPDFObjGen foreign_og,
115
    qpdf_offset_t offset,
116
    size_t length,
117
    QPDFObjectHandle local_dict,
118
    bool is_root_metadata) :
119
0
    encp(encp),
120
0
    file(file),
121
0
    foreign_og(foreign_og),
122
0
    offset(offset),
123
0
    length(length),
124
0
    local_dict(local_dict),
125
0
    is_root_metadata(is_root_metadata)
126
0
{
127
0
}
128
129
QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :
130
0
    QPDFObjectHandle::StreamDataProvider(true),
131
0
    destination_qpdf(destination_qpdf)
132
0
{
133
0
}
134
135
bool
136
QPDF::CopiedStreamDataProvider::provideStreamData(
137
    QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
138
0
{
139
0
    std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];
140
0
    bool result = false;
141
0
    if (foreign_data.get()) {
142
0
        result = destination_qpdf.pipeForeignStreamData(
143
0
            foreign_data, pipeline, suppress_warnings, will_retry);
144
0
        QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);
145
0
    } else {
146
0
        auto foreign_stream = foreign_streams[og];
147
0
        result = foreign_stream.pipeStreamData(
148
0
            pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);
149
0
        QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);
150
0
    }
151
0
    return result;
152
0
}
153
154
void
155
QPDF::CopiedStreamDataProvider::registerForeignStream(
156
    QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)
157
0
{
158
0
    this->foreign_streams[local_og] = foreign_stream;
159
0
}
160
161
void
162
QPDF::CopiedStreamDataProvider::registerForeignStream(
163
    QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)
164
0
{
165
0
    this->foreign_stream_data[local_og] = foreign_stream;
166
0
}
167
168
QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) :
169
0
    qpdf(qpdf),
170
0
    og(og)
171
0
{
172
0
}
173
174
std::string const&
175
QPDF::QPDFVersion()
176
0
{
177
    // The C API relies on this being a static value.
178
0
    return QPDF::qpdf_version;
179
0
}
180
181
QPDF::Members::Members() :
182
7.71k
    log(QPDFLogger::defaultLogger()),
183
7.71k
    file(new InvalidInputSource()),
184
7.71k
    encp(new EncryptionParameters)
185
7.71k
{
186
7.71k
}
187
188
QPDF::QPDF() :
189
7.71k
    m(std::make_unique<Members>())
190
7.71k
{
191
7.71k
    m->tokenizer.allowEOF();
192
    // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
193
    // the lifetime of this running application.
194
7.71k
    static std::atomic<unsigned long long> unique_id{0};
195
7.71k
    m->unique_id = unique_id.fetch_add(1ULL);
196
7.71k
}
197
198
// Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache
199
// (future Objects::Entry) to centralize all QPDF access to QPDFObject.
200
class Disconnect: BaseHandle
201
{
202
  public:
203
    Disconnect(std::shared_ptr<QPDFObject> const& obj) :
204
15.1k
        BaseHandle(obj)
205
15.1k
    {
206
15.1k
    }
207
    void
208
    disconnect()
209
15.1k
    {
210
15.1k
        BaseHandle::disconnect(false);
211
15.1k
        if (raw_type_code() != ::ot_null) {
212
10.3k
            obj->value = QPDF_Destroyed();
213
10.3k
        }
214
15.1k
    }
215
};
216
217
QPDF::~QPDF()
218
7.71k
{
219
    // If two objects are mutually referential (through each object having an array or dictionary
220
    // that contains an indirect reference to the other), the circular references in the
221
    // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
222
    // in the object cache, which is those objects that we read from the file, and break all
223
    // resolved indirect references by replacing them with an internal object type representing that
224
    // they have been destroyed. Note that we can't break references like this at any time when the
225
    // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
226
    // are reachable from this object to release their association with this QPDF. Direct objects
227
    // are not destroyed since they can be moved to other QPDF objects safely.
228
229
    // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
230
    // the xref table anyway just to prevent any possibility of resolve() succeeding.
231
7.71k
    m->xref_table.clear();
232
15.1k
    for (auto const& iter: m->obj_cache) {
233
15.1k
        Disconnect(iter.second.object).disconnect();
234
15.1k
    }
235
7.71k
}
236
237
std::shared_ptr<QPDF>
238
QPDF::create()
239
0
{
240
0
    return std::make_shared<QPDF>();
241
0
}
242
243
void
244
QPDF::processFile(char const* filename, char const* password)
245
0
{
246
0
    auto* fi = new FileInputSource(filename);
247
0
    processInputSource(std::shared_ptr<InputSource>(fi), password);
248
0
}
249
250
void
251
QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)
252
0
{
253
0
    auto* fi = new FileInputSource(description, filep, close_file);
254
0
    processInputSource(std::shared_ptr<InputSource>(fi), password);
255
0
}
256
257
void
258
QPDF::processMemoryFile(
259
    char const* description, char const* buf, size_t length, char const* password)
260
7.71k
{
261
7.71k
    auto is = std::make_shared<is::OffsetBuffer>(description, std::string_view{buf, length});
262
7.71k
    processInputSource(is, password);
263
7.71k
}
264
265
void
266
QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
267
7.71k
{
268
7.71k
    m->file = source;
269
7.71k
    parse(password);
270
7.71k
}
271
272
void
273
QPDF::closeInputSource()
274
0
{
275
0
    m->file = std::shared_ptr<InputSource>(new InvalidInputSource());
276
0
}
277
278
void
279
QPDF::setPasswordIsHexKey(bool val)
280
0
{
281
0
    m->provided_password_is_hex_key = val;
282
0
}
283
284
void
285
QPDF::emptyPDF()
286
0
{
287
0
    processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
288
0
}
289
290
void
291
QPDF::registerStreamFilter(
292
    std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
293
0
{
294
0
    qpdf::Stream::registerStreamFilter(filter_name, factory);
295
0
}
296
297
void
298
QPDF::setIgnoreXRefStreams(bool val)
299
0
{
300
0
    m->ignore_xref_streams = val;
301
0
}
302
303
std::shared_ptr<QPDFLogger>
304
QPDF::getLogger()
305
0
{
306
0
    return m->log;
307
0
}
308
309
void
310
QPDF::setLogger(std::shared_ptr<QPDFLogger> l)
311
0
{
312
0
    m->log = l;
313
0
}
314
315
void
316
QPDF::setOutputStreams(std::ostream* out, std::ostream* err)
317
0
{
318
0
    setLogger(QPDFLogger::create());
319
0
    m->log->setOutputStreams(out, err);
320
0
}
321
322
void
323
QPDF::setSuppressWarnings(bool val)
324
0
{
325
0
    m->suppress_warnings = val;
326
0
}
327
328
void
329
QPDF::setMaxWarnings(size_t val)
330
7.71k
{
331
7.71k
    m->max_warnings = val;
332
7.71k
}
333
334
void
335
QPDF::setAttemptRecovery(bool val)
336
0
{
337
0
    m->attempt_recovery = val;
338
0
}
339
340
void
341
QPDF::setImmediateCopyFrom(bool val)
342
0
{
343
0
    m->immediate_copy_from = val;
344
0
}
345
346
std::vector<QPDFExc>
347
QPDF::getWarnings()
348
0
{
349
0
    std::vector<QPDFExc> result = m->warnings;
350
0
    m->warnings.clear();
351
0
    return result;
352
0
}
353
354
bool
355
QPDF::anyWarnings() const
356
0
{
357
0
    return !m->warnings.empty();
358
0
}
359
360
size_t
361
QPDF::numWarnings() const
362
0
{
363
0
    return m->warnings.size();
364
0
}
365
366
bool
367
QPDF::validatePDFVersion(char const*& p, std::string& version)
368
11.0k
{
369
11.0k
    bool valid = util::is_digit(*p);
370
11.0k
    if (valid) {
371
42.1k
        while (util::is_digit(*p)) {
372
31.5k
            version.append(1, *p++);
373
31.5k
        }
374
10.5k
        if ((*p == '.') && util::is_digit(*(p + 1))) {
375
8.93k
            version.append(1, *p++);
376
21.0k
            while (util::is_digit(*p)) {
377
12.1k
                version.append(1, *p++);
378
12.1k
            }
379
8.93k
        } else {
380
1.61k
            valid = false;
381
1.61k
        }
382
10.5k
    }
383
11.0k
    return valid;
384
11.0k
}
385
386
bool
387
QPDF::findHeader()
388
7.71k
{
389
7.71k
    qpdf_offset_t global_offset = m->file->tell();
390
7.71k
    std::string line = m->file->readLine(1024);
391
7.71k
    char const* p = line.c_str();
392
7.71k
    if (strncmp(p, "%PDF-", 5) != 0) {
393
0
        throw std::logic_error("findHeader is not looking at %PDF-");
394
0
    }
395
7.71k
    p += 5;
396
7.71k
    std::string version;
397
    // Note: The string returned by line.c_str() is always null-terminated. The code below never
398
    // overruns the buffer because a null character always short-circuits further advancement.
399
7.71k
    bool valid = validatePDFVersion(p, version);
400
7.71k
    if (valid) {
401
7.71k
        m->pdf_version = version;
402
7.71k
        if (global_offset != 0) {
403
            // Empirical evidence strongly suggests that when there is leading material prior to the
404
            // PDF header, all explicit offsets in the file are such that 0 points to the beginning
405
            // of the header.
406
0
            QTC::TC("qpdf", "QPDF global offset");
407
0
            m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
408
0
        }
409
7.71k
    }
410
7.71k
    return valid;
411
7.71k
}
412
413
void
414
QPDF::warn(QPDFExc const& e)
415
86.6k
{
416
86.6k
    if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {
417
27
        stopOnError("Too many warnings - file is too badly damaged");
418
27
    }
419
86.6k
    m->warnings.push_back(e);
420
86.6k
    if (!m->suppress_warnings) {
421
86.5k
        *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
422
86.5k
    }
423
86.6k
}
424
425
void
426
QPDF::warn(
427
    qpdf_error_code_e error_code,
428
    std::string const& object,
429
    qpdf_offset_t offset,
430
    std::string const& message)
431
86.6k
{
432
86.6k
    warn(QPDFExc(error_code, getFilename(), object, offset, message));
433
86.6k
}
434
435
QPDFObjectHandle
436
QPDF::newReserved()
437
0
{
438
0
    return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>());
439
0
}
440
441
QPDFObjectHandle
442
QPDF::newIndirectNull()
443
0
{
444
0
    return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>());
445
0
}
446
447
QPDFObjectHandle
448
QPDF::newStream()
449
0
{
450
0
    return makeIndirectObject(
451
0
        qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));
452
0
}
453
454
QPDFObjectHandle
455
QPDF::newStream(std::shared_ptr<Buffer> data)
456
0
{
457
0
    auto result = newStream();
458
0
    result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
459
0
    return result;
460
0
}
461
462
QPDFObjectHandle
463
QPDF::newStream(std::string const& data)
464
0
{
465
0
    auto result = newStream();
466
0
    result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
467
0
    return result;
468
0
}
469
470
QPDFObjectHandle
471
QPDF::getObject(int objid, int generation)
472
0
{
473
0
    return getObject(QPDFObjGen(objid, generation));
474
0
}
475
476
QPDFObjectHandle
477
QPDF::getObjectByObjGen(QPDFObjGen og)
478
0
{
479
0
    return getObject(og);
480
0
}
481
482
QPDFObjectHandle
483
QPDF::getObjectByID(int objid, int generation)
484
0
{
485
0
    return getObject(QPDFObjGen(objid, generation));
486
0
}
487
488
QPDFObjectHandle
489
QPDF::copyForeignObject(QPDFObjectHandle foreign)
490
0
{
491
    // Here's an explanation of what's going on here.
492
    //
493
    // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
494
    // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
495
    // foreign QPDF into the local QPDF, we have to replace all indirect object references with
496
    // references to the corresponding object in the local file.
497
    //
498
    // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
499
    // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
500
    // mapping from the foreign ObjGen to the local QPDFObjectHandle.
501
    //
502
    // To copy, we do a deep traversal of the foreign object with loop detection to discover all
503
    // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
504
    // indirect object, we check to see if we have already created a local copy of it. If not, we
505
    // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
506
    // mapping from the foreign object ID to the new object. While we
507
    // do this, we keep a list of objects to copy.
508
    //
509
    // Once we are done with the traversal, we copy all the objects that we need to copy. However,
510
    // the copies will contain indirect object IDs that refer to objects in the foreign file. We
511
    // need to replace them with references to objects in the local file. This is what
512
    // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
513
    // all the indirect references replaced with new ones in the local context, we can replace the
514
    // local reserved object with the copy. This mechanism allows us to copy objects with circular
515
    // references in any order.
516
517
    // For streams, rather than copying the objects, we set up the stream data to pull from the
518
    // original stream by using a stream data provider. This is done in a manner that doesn't
519
    // require the original QPDF object but may require the original source of the stream data with
520
    // special handling for immediate_copy_from. This logic is also in
521
    // replaceForeignIndirectObjects.
522
523
    // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
524
    // use case to copy pages this way if the intention is to not update the pages tree.
525
0
    if (!foreign.isIndirect()) {
526
0
        QTC::TC("qpdf", "QPDF copyForeign direct");
527
0
        throw std::logic_error("QPDF::copyForeign called with direct object handle");
528
0
    }
529
0
    QPDF& other = foreign.getQPDF();
530
0
    if (&other == this) {
531
0
        QTC::TC("qpdf", "QPDF copyForeign not foreign");
532
0
        throw std::logic_error("QPDF::copyForeign called with object from this QPDF");
533
0
    }
534
535
0
    ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];
536
0
    if (!obj_copier.visiting.empty()) {
537
0
        throw std::logic_error(
538
0
            "obj_copier.visiting is not empty at the beginning of copyForeignObject");
539
0
    }
540
541
    // Make sure we have an object in this file for every referenced object in the old file.
542
    // obj_copier.object_map maps foreign QPDFObjGen to local objects.  For everything new that we
543
    // have to copy, the local object will be a reservation, unless it is a stream, in which case
544
    // the local object will already be a stream.
545
0
    reserveObjects(foreign, obj_copier, true);
546
547
0
    if (!obj_copier.visiting.empty()) {
548
0
        throw std::logic_error("obj_copier.visiting is not empty after reserving objects");
549
0
    }
550
551
    // Copy any new objects and replace the reservations.
552
0
    for (auto& to_copy: obj_copier.to_copy) {
553
0
        QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);
554
0
        if (!to_copy.isStream()) {
555
0
            QPDFObjGen og(to_copy.getObjGen());
556
0
            replaceReserved(obj_copier.object_map[og], copy);
557
0
        }
558
0
    }
559
0
    obj_copier.to_copy.clear();
560
561
0
    auto og = foreign.getObjGen();
562
0
    if (!obj_copier.object_map.contains(og)) {
563
0
        warn(damagedPDF(
564
0
            other.getFilename() + " object " + og.unparse(' '),
565
0
            foreign.getParsedOffset(),
566
0
            "unexpected reference to /Pages object while copying foreign object; replacing with "
567
0
            "null"));
568
0
        return QPDFObjectHandle::newNull();
569
0
    }
570
0
    return obj_copier.object_map[foreign.getObjGen()];
571
0
}
572
573
void
574
QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
575
0
{
576
0
    auto foreign_tc = foreign.getTypeCode();
577
0
    if (foreign_tc == ::ot_reserved) {
578
0
        throw std::logic_error("QPDF: attempting to copy a foreign reserved object");
579
0
    }
580
581
0
    if (foreign.isPagesObject()) {
582
0
        QTC::TC("qpdf", "QPDF not copying pages object");
583
0
        return;
584
0
    }
585
586
0
    if (foreign.isIndirect()) {
587
0
        QPDFObjGen foreign_og(foreign.getObjGen());
588
0
        if (!obj_copier.visiting.add(foreign_og)) {
589
0
            QTC::TC("qpdf", "QPDF loop reserving objects");
590
0
            return;
591
0
        }
592
0
        if (obj_copier.object_map.contains(foreign_og)) {
593
0
            QTC::TC("qpdf", "QPDF already reserved object");
594
0
            if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {
595
0
                obj_copier.visiting.erase(foreign);
596
0
                return;
597
0
            }
598
0
        } else {
599
0
            QTC::TC("qpdf", "QPDF copy indirect");
600
0
            obj_copier.object_map[foreign_og] =
601
0
                foreign.isStream() ? newStream() : newIndirectNull();
602
0
            if ((!top) && foreign.isPageObject()) {
603
0
                QTC::TC("qpdf", "QPDF not crossing page boundary");
604
0
                obj_copier.visiting.erase(foreign_og);
605
0
                return;
606
0
            }
607
0
        }
608
0
        obj_copier.to_copy.push_back(foreign);
609
0
    }
610
611
0
    if (foreign_tc == ::ot_array) {
612
0
        QTC::TC("qpdf", "QPDF reserve array");
613
0
        for (auto const& item: foreign.as_array()) {
614
0
            reserveObjects(item, obj_copier, false);
615
0
        }
616
0
    } else if (foreign_tc == ::ot_dictionary) {
617
0
        QTC::TC("qpdf", "QPDF reserve dictionary");
618
0
        for (auto const& item: foreign.as_dictionary()) {
619
0
            if (!item.second.null()) {
620
0
                reserveObjects(item.second, obj_copier, false);
621
0
            }
622
0
        }
623
0
    } else if (foreign_tc == ::ot_stream) {
624
0
        QTC::TC("qpdf", "QPDF reserve stream");
625
0
        reserveObjects(foreign.getDict(), obj_copier, false);
626
0
    }
627
628
0
    obj_copier.visiting.erase(foreign);
629
0
}
630
631
QPDFObjectHandle
632
QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
633
0
{
634
0
    auto foreign_tc = foreign.getTypeCode();
635
0
    QPDFObjectHandle result;
636
0
    if ((!top) && foreign.isIndirect()) {
637
0
        QTC::TC("qpdf", "QPDF replace indirect");
638
0
        auto mapping = obj_copier.object_map.find(foreign.getObjGen());
639
0
        if (mapping == obj_copier.object_map.end()) {
640
            // This case would occur if this is a reference to a Pages object that we didn't
641
            // traverse into.
642
0
            QTC::TC("qpdf", "QPDF replace foreign indirect with null");
643
0
            result = QPDFObjectHandle::newNull();
644
0
        } else {
645
0
            result = mapping->second;
646
0
        }
647
0
    } else if (foreign_tc == ::ot_array) {
648
0
        QTC::TC("qpdf", "QPDF replace array");
649
0
        result = QPDFObjectHandle::newArray();
650
0
        for (auto const& item: foreign.as_array()) {
651
0
            result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false));
652
0
        }
653
0
    } else if (foreign_tc == ::ot_dictionary) {
654
0
        QTC::TC("qpdf", "QPDF replace dictionary");
655
0
        result = QPDFObjectHandle::newDictionary();
656
0
        for (auto const& [key, value]: foreign.as_dictionary()) {
657
0
            if (!value.null()) {
658
0
                result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
659
0
            }
660
0
        }
661
0
    } else if (foreign_tc == ::ot_stream) {
662
0
        QTC::TC("qpdf", "QPDF replace stream");
663
0
        result = obj_copier.object_map[foreign.getObjGen()];
664
0
        QPDFObjectHandle dict = result.getDict();
665
0
        QPDFObjectHandle old_dict = foreign.getDict();
666
0
        for (auto const& [key, value]: old_dict.as_dictionary()) {
667
0
            if (!value.null()) {
668
0
                dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
669
0
            }
670
0
        }
671
0
        copyStreamData(result, foreign);
672
0
    } else {
673
0
        foreign.assertScalar();
674
0
        result = foreign;
675
0
        result.makeDirect();
676
0
    }
677
678
0
    if (top && (!result.isStream()) && result.isIndirect()) {
679
0
        throw std::logic_error("replacement for foreign object is indirect");
680
0
    }
681
682
0
    return result;
683
0
}
684
685
void
686
QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
687
0
{
688
    // This method was originally written for copying foreign streams, but it is used by
689
    // QPDFObjectHandle to copy streams from the same QPDF object as well.
690
691
0
    QPDFObjectHandle dict = result.getDict();
692
0
    QPDFObjectHandle old_dict = foreign.getDict();
693
0
    if (m->copied_stream_data_provider == nullptr) {
694
0
        m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);
695
0
        m->copied_streams =
696
0
            std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
697
0
    }
698
0
    QPDFObjGen local_og(result.getObjGen());
699
    // Copy information from the foreign stream so we can pipe its data later without keeping the
700
    // original QPDF object around.
701
702
0
    QPDF& foreign_stream_qpdf =
703
0
        foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
704
705
0
    auto stream = foreign.as_stream();
706
0
    if (!stream) {
707
0
        throw std::logic_error("unable to retrieve underlying stream object from foreign stream");
708
0
    }
709
0
    std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer();
710
0
    if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
711
        // Pull the stream data into a buffer before attempting the copy operation. Do it on the
712
        // source stream so that if the source stream is copied multiple times, we don't have to
713
        // keep duplicating the memory.
714
0
        QTC::TC("qpdf", "QPDF immediate copy stream data");
715
0
        foreign.replaceStreamData(
716
0
            foreign.getRawStreamData(),
717
0
            old_dict.getKey("/Filter"),
718
0
            old_dict.getKey("/DecodeParms"));
719
0
        stream_buffer = stream.getStreamDataBuffer();
720
0
    }
721
0
    std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =
722
0
        stream.getStreamDataProvider();
723
0
    if (stream_buffer.get()) {
724
0
        QTC::TC("qpdf", "QPDF copy foreign stream with buffer");
725
0
        result.replaceStreamData(
726
0
            stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
727
0
    } else if (stream_provider.get()) {
728
        // In this case, the remote stream's QPDF must stay in scope.
729
0
        QTC::TC("qpdf", "QPDF copy foreign stream with provider");
730
0
        m->copied_stream_data_provider->registerForeignStream(local_og, foreign);
731
0
        result.replaceStreamData(
732
0
            m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
733
0
    } else {
734
0
        auto foreign_stream_data = std::make_shared<ForeignStreamData>(
735
0
            foreign_stream_qpdf.m->encp,
736
0
            foreign_stream_qpdf.m->file,
737
0
            foreign,
738
0
            foreign.getParsedOffset(),
739
0
            stream.getLength(),
740
0
            dict,
741
0
            stream.isRootMetadata());
742
0
        m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
743
0
        result.replaceStreamData(
744
0
            m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
745
0
    }
746
0
}
747
748
unsigned long long
749
QPDF::getUniqueId() const
750
0
{
751
0
    return m->unique_id;
752
0
}
753
754
std::string
755
QPDF::getFilename() const
756
178k
{
757
178k
    return m->file->getName();
758
178k
}
759
760
PDFVersion
761
QPDF::getVersionAsPDFVersion()
762
0
{
763
0
    int major = 1;
764
0
    int minor = 3;
765
0
    int extension_level = getExtensionLevel();
766
767
0
    std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");
768
0
    std::smatch match;
769
0
    if (std::regex_search(m->pdf_version, match, v)) {
770
0
        major = QUtil::string_to_int(match[1].str().c_str());
771
0
        minor = QUtil::string_to_int(match[2].str().c_str());
772
0
    }
773
774
0
    return {major, minor, extension_level};
775
0
}
776
777
std::string
778
QPDF::getPDFVersion() const
779
0
{
780
0
    return m->pdf_version;
781
0
}
782
783
int
784
QPDF::getExtensionLevel()
785
0
{
786
0
    int result = 0;
787
0
    QPDFObjectHandle obj = getRoot();
788
0
    if (obj.hasKey("/Extensions")) {
789
0
        obj = obj.getKey("/Extensions");
790
0
        if (obj.isDictionary() && obj.hasKey("/ADBE")) {
791
0
            obj = obj.getKey("/ADBE");
792
0
            if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {
793
0
                obj = obj.getKey("/ExtensionLevel");
794
0
                if (obj.isInteger()) {
795
0
                    result = obj.getIntValueAsInt();
796
0
                }
797
0
            }
798
0
        }
799
0
    }
800
0
    return result;
801
0
}
802
803
QPDFObjectHandle
804
QPDF::getTrailer()
805
0
{
806
0
    return m->trailer;
807
0
}
808
809
QPDFObjectHandle
810
QPDF::getRoot()
811
0
{
812
0
    QPDFObjectHandle root = m->trailer.getKey("/Root");
813
0
    if (!root.isDictionary()) {
814
0
        throw damagedPDF("", -1, "unable to find /Root dictionary");
815
0
    } else if (
816
        // Check_mode is an interim solution to request #810 pending a more comprehensive review of
817
        // the approach to more extensive checks and warning levels.
818
0
        m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
819
0
        warn(damagedPDF("", -1, "catalog /Type entry missing or invalid"));
820
0
        root.replaceKey("/Type", "/Catalog"_qpdf);
821
0
    }
822
0
    return root;
823
0
}
824
825
std::map<QPDFObjGen, QPDFXRefEntry>
826
QPDF::getXRefTable()
827
0
{
828
0
    return getXRefTableInternal();
829
0
}
830
831
std::map<QPDFObjGen, QPDFXRefEntry> const&
832
QPDF::getXRefTableInternal()
833
0
{
834
0
    if (!m->parsed) {
835
0
        throw std::logic_error("QPDF::getXRefTable called before parsing.");
836
0
    }
837
838
0
    return m->xref_table;
839
0
}
840
841
bool
842
QPDF::pipeStreamData(
843
    std::shared_ptr<EncryptionParameters> encp,
844
    std::shared_ptr<InputSource> file,
845
    QPDF& qpdf_for_warning,
846
    QPDFObjGen og,
847
    qpdf_offset_t offset,
848
    size_t length,
849
    QPDFObjectHandle stream_dict,
850
    bool is_root_metadata,
851
    Pipeline* pipeline,
852
    bool suppress_warnings,
853
    bool will_retry)
854
0
{
855
0
    std::unique_ptr<Pipeline> to_delete;
856
0
    if (encp->encrypted) {
857
0
        decryptStream(
858
0
            encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete);
859
0
    }
860
861
0
    bool attempted_finish = false;
862
0
    try {
863
0
        auto buf = file->read(length, offset);
864
0
        if (buf.size() != length) {
865
0
            throw damagedPDF(
866
0
                *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");
867
0
        }
868
0
        pipeline->write(buf.data(), length);
869
0
        attempted_finish = true;
870
0
        pipeline->finish();
871
0
        return true;
872
0
    } catch (QPDFExc& e) {
873
0
        if (!suppress_warnings) {
874
0
            qpdf_for_warning.warn(e);
875
0
        }
876
0
    } catch (std::exception& e) {
877
0
        if (!suppress_warnings) {
878
0
            QTC::TC("qpdf", "QPDF decoding error warning");
879
0
            qpdf_for_warning.warn(
880
                // line-break
881
0
                damagedPDF(
882
0
                    *file,
883
0
                    "",
884
0
                    file->getLastOffset(),
885
0
                    ("error decoding stream data for object " + og.unparse(' ') + ": " +
886
0
                     e.what())));
887
0
            if (will_retry) {
888
0
                qpdf_for_warning.warn(
889
                    // line-break
890
0
                    damagedPDF(
891
0
                        *file,
892
0
                        "",
893
0
                        file->getLastOffset(),
894
0
                        "stream will be re-processed without filtering to avoid data loss"));
895
0
            }
896
0
        }
897
0
    }
898
0
    if (!attempted_finish) {
899
0
        try {
900
0
            pipeline->finish();
901
0
        } catch (std::exception&) {
902
            // ignore
903
0
        }
904
0
    }
905
0
    return false;
906
0
}
907
908
bool
909
QPDF::pipeStreamData(
910
    QPDFObjGen og,
911
    qpdf_offset_t offset,
912
    size_t length,
913
    QPDFObjectHandle stream_dict,
914
    bool is_root_metadata,
915
    Pipeline* pipeline,
916
    bool suppress_warnings,
917
    bool will_retry)
918
0
{
919
0
    return pipeStreamData(
920
0
        m->encp,
921
0
        m->file,
922
0
        *this,
923
0
        og,
924
0
        offset,
925
0
        length,
926
0
        stream_dict,
927
0
        is_root_metadata,
928
0
        pipeline,
929
0
        suppress_warnings,
930
0
        will_retry);
931
0
}
932
933
bool
934
QPDF::pipeForeignStreamData(
935
    std::shared_ptr<ForeignStreamData> foreign,
936
    Pipeline* pipeline,
937
    bool suppress_warnings,
938
    bool will_retry)
939
0
{
940
0
    if (foreign->encp->encrypted) {
941
0
        QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
942
0
    }
943
0
    return pipeStreamData(
944
0
        foreign->encp,
945
0
        foreign->file,
946
0
        *this,
947
0
        foreign->foreign_og,
948
0
        foreign->offset,
949
0
        foreign->length,
950
0
        foreign->local_dict,
951
0
        foreign->is_root_metadata,
952
0
        pipeline,
953
0
        suppress_warnings,
954
0
        will_retry);
955
0
}
956
957
// Throw a generic exception when we lack context for something more specific. New code should not
958
// use this. This method exists to improve somewhat from calling assert in very old code.
959
void
960
QPDF::stopOnError(std::string const& message)
961
27
{
962
27
    throw damagedPDF("", message);
963
27
}
964
965
// Return an exception of type qpdf_e_damaged_pdf.
966
QPDFExc
967
QPDF::damagedPDF(
968
    InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)
969
0
{
970
0
    return {qpdf_e_damaged_pdf, input.getName(), object, offset, message, true};
971
0
}
972
973
// Return an exception of type qpdf_e_damaged_pdf.  The object is taken from
974
// m->last_object_description.
975
QPDFExc
976
QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message)
977
0
{
978
0
    return damagedPDF(input, m->last_object_description, offset, message);
979
0
}
980
981
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file.
982
QPDFExc
983
QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
984
27
{
985
27
    return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message, true};
986
27
}
987
988
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file and the
989
// offset from .m->file->getLastOffset().
990
QPDFExc
991
QPDF::damagedPDF(std::string const& object, std::string const& message)
992
27
{
993
27
    return damagedPDF(object, m->file->getLastOffset(), message);
994
27
}
995
996
// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
997
// from .m->last_object_description.
998
QPDFExc
999
QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
1000
0
{
1001
0
    return damagedPDF(m->last_object_description, offset, message);
1002
0
}
1003
1004
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file, the object
1005
// from m->last_object_description and the offset from m->file->getLastOffset().
1006
QPDFExc
1007
QPDF::damagedPDF(std::string const& message)
1008
0
{
1009
0
    return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);
1010
0
}
1011
1012
bool
1013
QPDF::everCalledGetAllPages() const
1014
0
{
1015
0
    return m->ever_called_get_all_pages;
1016
0
}
1017
1018
bool
1019
QPDF::everPushedInheritedAttributesToPages() const
1020
0
{
1021
0
    return m->ever_pushed_inherited_attributes_to_pages;
1022
0
}
1023
1024
void
1025
QPDF::removeSecurityRestrictions()
1026
0
{
1027
0
    auto root = getRoot();
1028
0
    root.removeKey("/Perms");
1029
0
    auto acroform = root.getKey("/AcroForm");
1030
0
    if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {
1031
0
        acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));
1032
0
    }
1033
0
}