Coverage Report

Created: 2024-09-08 06:05

/src/qpdf/libqpdf/QPDF.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/qpdf-config.h> // include first for large file support
2
3
#include <qpdf/QPDF.hh>
4
5
#include <array>
6
#include <atomic>
7
#include <cstring>
8
#include <limits>
9
#include <map>
10
#include <regex>
11
#include <sstream>
12
#include <vector>
13
14
#include <qpdf/BufferInputSource.hh>
15
#include <qpdf/FileInputSource.hh>
16
#include <qpdf/OffsetInputSource.hh>
17
#include <qpdf/Pipeline.hh>
18
#include <qpdf/QPDFExc.hh>
19
#include <qpdf/QPDFLogger.hh>
20
#include <qpdf/QPDFObject_private.hh>
21
#include <qpdf/QPDFParser.hh>
22
#include <qpdf/QPDF_Array.hh>
23
#include <qpdf/QPDF_Dictionary.hh>
24
#include <qpdf/QPDF_Null.hh>
25
#include <qpdf/QPDF_Reserved.hh>
26
#include <qpdf/QPDF_Stream.hh>
27
#include <qpdf/QPDF_Unresolved.hh>
28
#include <qpdf/QTC.hh>
29
#include <qpdf/QUtil.hh>
30
31
// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
32
// being static as well.
33
std::string const QPDF::qpdf_version(QPDF_VERSION);
34
35
static char const* EMPTY_PDF = (
36
    // force line break
37
    "%PDF-1.3\n"
38
    "1 0 obj\n"
39
    "<< /Type /Catalog /Pages 2 0 R >>\n"
40
    "endobj\n"
41
    "2 0 obj\n"
42
    "<< /Type /Pages /Kids [] /Count 0 >>\n"
43
    "endobj\n"
44
    "xref\n"
45
    "0 3\n"
46
    "0000000000 65535 f \n"
47
    "0000000009 00000 n \n"
48
    "0000000058 00000 n \n"
49
    "trailer << /Size 3 /Root 1 0 R >>\n"
50
    "startxref\n"
51
    "110\n"
52
    "%%EOF\n");
53
54
namespace
55
{
56
    class InvalidInputSource: public InputSource
57
    {
58
      public:
59
        ~InvalidInputSource() override = default;
60
        qpdf_offset_t
61
        findAndSkipNextEOL() override
62
0
        {
63
0
            throwException();
64
0
            return 0;
65
0
        }
66
        std::string const&
67
        getName() const override
68
0
        {
69
0
            static std::string name("closed input source");
70
0
            return name;
71
0
        }
72
        qpdf_offset_t
73
        tell() override
74
0
        {
75
0
            throwException();
76
0
            return 0;
77
0
        }
78
        void
79
        seek(qpdf_offset_t offset, int whence) override
80
0
        {
81
0
            throwException();
82
0
        }
83
        void
84
        rewind() override
85
0
        {
86
0
            throwException();
87
0
        }
88
        size_t
89
        read(char* buffer, size_t length) override
90
0
        {
91
0
            throwException();
92
0
            return 0;
93
0
        }
94
        void
95
        unreadCh(char ch) override
96
0
        {
97
0
            throwException();
98
0
        }
99
100
      private:
101
        void
102
        throwException()
103
0
        {
104
0
            throw std::logic_error("QPDF operation attempted on a QPDF object with no input "
105
0
                                   "source. QPDF operations are invalid before processFile (or "
106
0
                                   "another process method) or after closeInputSource");
107
0
        }
108
    };
109
} // namespace
110
111
QPDF::ForeignStreamData::ForeignStreamData(
112
    std::shared_ptr<EncryptionParameters> encp,
113
    std::shared_ptr<InputSource> file,
114
    QPDFObjGen const& foreign_og,
115
    qpdf_offset_t offset,
116
    size_t length,
117
    QPDFObjectHandle local_dict) :
118
    encp(encp),
119
    file(file),
120
    foreign_og(foreign_og),
121
    offset(offset),
122
    length(length),
123
    local_dict(local_dict)
124
0
{
125
0
}
126
127
QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :
128
    QPDFObjectHandle::StreamDataProvider(true),
129
    destination_qpdf(destination_qpdf)
130
0
{
131
0
}
132
133
bool
134
QPDF::CopiedStreamDataProvider::provideStreamData(
135
    QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
136
0
{
137
0
    std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];
138
0
    bool result = false;
139
0
    if (foreign_data.get()) {
140
0
        result = destination_qpdf.pipeForeignStreamData(
141
0
            foreign_data, pipeline, suppress_warnings, will_retry);
142
0
        QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);
143
0
    } else {
144
0
        auto foreign_stream = foreign_streams[og];
145
0
        result = foreign_stream.pipeStreamData(
146
0
            pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);
147
0
        QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);
148
0
    }
149
0
    return result;
150
0
}
151
152
void
153
QPDF::CopiedStreamDataProvider::registerForeignStream(
154
    QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)
155
0
{
156
0
    this->foreign_streams[local_og] = foreign_stream;
157
0
}
158
159
void
160
QPDF::CopiedStreamDataProvider::registerForeignStream(
161
    QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)
162
0
{
163
0
    this->foreign_stream_data[local_og] = foreign_stream;
164
0
}
165
166
QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen const& og) :
167
    qpdf(qpdf),
168
    og(og)
169
0
{
170
0
}
171
172
void
173
QPDF::StringDecrypter::decryptString(std::string& val)
174
0
{
175
0
    qpdf->decryptString(val, og);
176
0
}
177
178
std::string const&
179
QPDF::QPDFVersion()
180
0
{
181
    // The C API relies on this being a static value.
182
0
    return QPDF::qpdf_version;
183
0
}
184
185
QPDF::EncryptionParameters::EncryptionParameters() :
186
    encrypted(false),
187
    encryption_initialized(false),
188
    encryption_V(0),
189
    encryption_R(0),
190
    encrypt_metadata(true),
191
    cf_stream(e_none),
192
    cf_string(e_none),
193
    cf_file(e_none),
194
    user_password_matched(false),
195
    owner_password_matched(false)
196
6.73k
{
197
6.73k
}
198
199
QPDF::Members::Members() :
200
    log(QPDFLogger::defaultLogger()),
201
    file(new InvalidInputSource()),
202
    encp(new EncryptionParameters)
203
6.73k
{
204
6.73k
}
205
206
QPDF::QPDF() :
207
    m(new Members())
208
6.73k
{
209
6.73k
    m->tokenizer.allowEOF();
210
    // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
211
    // the lifetime of this running application.
212
6.73k
    static std::atomic<unsigned long long> unique_id{0};
213
6.73k
    m->unique_id = unique_id.fetch_add(1ULL);
214
6.73k
}
215
216
QPDF::~QPDF()
217
6.73k
{
218
    // If two objects are mutually referential (through each object having an array or dictionary
219
    // that contains an indirect reference to the other), the circular references in the
220
    // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
221
    // in the object cache, which is those objects that we read from the file, and break all
222
    // resolved indirect references by replacing them with an internal object type representing that
223
    // they have been destroyed. Note that we can't break references like this at any time when the
224
    // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
225
    // are reachable from this object to release their association with this QPDF. Direct objects
226
    // are not destroyed since they can be moved to other QPDF objects safely.
227
228
    // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
229
    // the xref table anyway just to prevent any possibility of resolve() succeeding.
230
6.73k
    m->xref_table.clear();
231
16.1k
    for (auto const& iter: m->obj_cache) {
232
16.1k
        iter.second.object->disconnect();
233
16.1k
        if (iter.second.object->getTypeCode() != ::ot_null) {
234
10.2k
            iter.second.object->destroy();
235
10.2k
        }
236
16.1k
    }
237
6.73k
}
238
239
std::shared_ptr<QPDF>
240
QPDF::create()
241
0
{
242
0
    return std::make_shared<QPDF>();
243
0
}
244
245
void
246
QPDF::processFile(char const* filename, char const* password)
247
0
{
248
0
    auto* fi = new FileInputSource(filename);
249
0
    processInputSource(std::shared_ptr<InputSource>(fi), password);
250
0
}
251
252
void
253
QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)
254
0
{
255
0
    auto* fi = new FileInputSource(description, filep, close_file);
256
0
    processInputSource(std::shared_ptr<InputSource>(fi), password);
257
0
}
258
259
void
260
QPDF::processMemoryFile(
261
    char const* description, char const* buf, size_t length, char const* password)
262
6.73k
{
263
6.73k
    processInputSource(
264
6.73k
        std::shared_ptr<InputSource>(
265
            // line-break
266
6.73k
            new BufferInputSource(
267
6.73k
                description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)),
268
6.73k
        password);
269
6.73k
}
270
271
void
272
QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
273
6.73k
{
274
6.73k
    m->file = source;
275
6.73k
    parse(password);
276
6.73k
}
277
278
void
279
QPDF::closeInputSource()
280
0
{
281
0
    m->file = std::shared_ptr<InputSource>(new InvalidInputSource());
282
0
}
283
284
void
285
QPDF::setPasswordIsHexKey(bool val)
286
0
{
287
0
    m->provided_password_is_hex_key = val;
288
0
}
289
290
void
291
QPDF::emptyPDF()
292
0
{
293
0
    processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
294
0
}
295
296
void
297
QPDF::registerStreamFilter(
298
    std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
299
0
{
300
0
    QPDF_Stream::registerStreamFilter(filter_name, factory);
301
0
}
302
303
void
304
QPDF::setIgnoreXRefStreams(bool val)
305
0
{
306
0
    m->ignore_xref_streams = val;
307
0
}
308
309
std::shared_ptr<QPDFLogger>
310
QPDF::getLogger()
311
0
{
312
0
    return m->log;
313
0
}
314
315
void
316
QPDF::setLogger(std::shared_ptr<QPDFLogger> l)
317
0
{
318
0
    m->log = l;
319
0
}
320
321
void
322
QPDF::setOutputStreams(std::ostream* out, std::ostream* err)
323
0
{
324
0
    setLogger(QPDFLogger::create());
325
0
    m->log->setOutputStreams(out, err);
326
0
}
327
328
void
329
QPDF::setSuppressWarnings(bool val)
330
0
{
331
0
    m->suppress_warnings = val;
332
0
}
333
334
void
335
QPDF::setMaxWarnings(size_t val)
336
6.73k
{
337
6.73k
    m->max_warnings = val;
338
6.73k
}
339
340
void
341
QPDF::setAttemptRecovery(bool val)
342
0
{
343
0
    m->attempt_recovery = val;
344
0
}
345
346
void
347
QPDF::setImmediateCopyFrom(bool val)
348
0
{
349
0
    m->immediate_copy_from = val;
350
0
}
351
352
std::vector<QPDFExc>
353
QPDF::getWarnings()
354
0
{
355
0
    std::vector<QPDFExc> result = m->warnings;
356
0
    m->warnings.clear();
357
0
    return result;
358
0
}
359
360
bool
361
QPDF::anyWarnings() const
362
0
{
363
0
    return !m->warnings.empty();
364
0
}
365
366
size_t
367
QPDF::numWarnings() const
368
0
{
369
0
    return m->warnings.size();
370
0
}
371
372
bool
373
QPDF::validatePDFVersion(char const*& p, std::string& version)
374
6.83k
{
375
6.83k
    bool valid = QUtil::is_digit(*p);
376
6.83k
    if (valid) {
377
13.8k
        while (QUtil::is_digit(*p)) {
378
7.02k
            version.append(1, *p++);
379
7.02k
        }
380
6.82k
        if ((*p == '.') && QUtil::is_digit(*(p + 1))) {
381
6.79k
            version.append(1, *p++);
382
13.8k
            while (QUtil::is_digit(*p)) {
383
7.04k
                version.append(1, *p++);
384
7.04k
            }
385
6.79k
        } else {
386
31
            valid = false;
387
31
        }
388
6.82k
    }
389
6.83k
    return valid;
390
6.83k
}
391
392
bool
393
QPDF::findHeader()
394
6.73k
{
395
6.73k
    qpdf_offset_t global_offset = m->file->tell();
396
6.73k
    std::string line = m->file->readLine(1024);
397
6.73k
    char const* p = line.c_str();
398
6.73k
    if (strncmp(p, "%PDF-", 5) != 0) {
399
0
        throw std::logic_error("findHeader is not looking at %PDF-");
400
0
    }
401
6.73k
    p += 5;
402
6.73k
    std::string version;
403
    // Note: The string returned by line.c_str() is always null-terminated. The code below never
404
    // overruns the buffer because a null character always short-circuits further advancement.
405
6.73k
    bool valid = validatePDFVersion(p, version);
406
6.73k
    if (valid) {
407
6.73k
        m->pdf_version = version;
408
6.73k
        if (global_offset != 0) {
409
            // Empirical evidence strongly suggests that when there is leading material prior to the
410
            // PDF header, all explicit offsets in the file are such that 0 points to the beginning
411
            // of the header.
412
0
            QTC::TC("qpdf", "QPDF global offset");
413
0
            m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
414
0
        }
415
6.73k
    }
416
6.73k
    return valid;
417
6.73k
}
418
419
bool
420
QPDF::findStartxref()
421
6.73k
{
422
6.73k
    if (readToken(m->file).isWord("startxref") && readToken(m->file).isInteger()) {
423
        // Position in front of offset token
424
6.73k
        m->file->seek(m->file->getLastOffset(), SEEK_SET);
425
6.73k
        return true;
426
6.73k
    }
427
0
    return false;
428
6.73k
}
429
430
void
431
QPDF::parse(char const* password)
432
6.73k
{
433
6.73k
    if (password) {
434
0
        m->encp->provided_password = password;
435
0
    }
436
437
    // Find the header anywhere in the first 1024 bytes of the file.
438
6.73k
    PatternFinder hf(*this, &QPDF::findHeader);
439
6.73k
    if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
440
0
        QTC::TC("qpdf", "QPDF not a pdf file");
441
0
        warn(damagedPDF("", 0, "can't find PDF header"));
442
        // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
443
0
        m->pdf_version = "1.2";
444
0
    }
445
446
    // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file.  We add an extra
447
    // 30 characters to leave room for the startxref stuff.
448
6.73k
    m->file->seek(0, SEEK_END);
449
6.73k
    qpdf_offset_t end_offset = m->file->tell();
450
6.73k
    m->xref_table_max_offset = end_offset;
451
    // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
452
    // scenarios at least 3 bytes are required.
453
6.73k
    if (m->xref_table_max_id > m->xref_table_max_offset / 3) {
454
6.73k
        m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3);
455
6.73k
    }
456
6.73k
    qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
457
6.73k
    PatternFinder sf(*this, &QPDF::findStartxref);
458
6.73k
    qpdf_offset_t xref_offset = 0;
459
6.73k
    if (m->file->findLast("startxref", start_offset, 0, sf)) {
460
6.73k
        xref_offset = QUtil::string_to_ll(readToken(m->file).getValue().c_str());
461
6.73k
    }
462
463
6.73k
    try {
464
6.73k
        if (xref_offset == 0) {
465
0
            QTC::TC("qpdf", "QPDF can't find startxref");
466
0
            throw damagedPDF("", 0, "can't find startxref");
467
0
        }
468
6.73k
        try {
469
6.73k
            read_xref(xref_offset);
470
6.73k
        } catch (QPDFExc&) {
471
0
            throw;
472
0
        } catch (std::exception& e) {
473
0
            throw damagedPDF("", 0, std::string("error reading xref: ") + e.what());
474
0
        }
475
6.73k
    } catch (QPDFExc& e) {
476
0
        if (m->attempt_recovery) {
477
0
            reconstruct_xref(e);
478
0
            QTC::TC("qpdf", "QPDF reconstructed xref table");
479
0
        } else {
480
0
            throw;
481
0
        }
482
0
    }
483
484
6.73k
    initializeEncryption();
485
6.73k
    m->parsed = true;
486
6.73k
    if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
487
        // QPDFs created from JSON have an empty xref table and no root object yet.
488
0
        throw damagedPDF("", 0, "unable to find page tree");
489
0
    }
490
6.73k
}
491
492
void
493
QPDF::inParse(bool v)
494
13.4k
{
495
13.4k
    if (m->in_parse == v) {
496
        // This happens if QPDFParser::parse tries to resolve an indirect object while it is
497
        // parsing.
498
0
        throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug."
499
0
                               " Please report at https://github.com/qpdf/qpdf/issues.");
500
0
    }
501
13.4k
    m->in_parse = v;
502
13.4k
}
503
504
void
505
QPDF::warn(QPDFExc const& e)
506
50.9k
{
507
50.9k
    if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {
508
13
        stopOnError("Too many warnings - file is too badly damaged");
509
13
    }
510
50.9k
    m->warnings.push_back(e);
511
50.9k
    if (!m->suppress_warnings) {
512
50.9k
        *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
513
50.9k
    }
514
50.9k
}
515
516
void
517
QPDF::warn(
518
    qpdf_error_code_e error_code,
519
    std::string const& object,
520
    qpdf_offset_t offset,
521
    std::string const& message)
522
50.9k
{
523
50.9k
    warn(QPDFExc(error_code, getFilename(), object, offset, message));
524
50.9k
}
525
526
void
527
QPDF::setTrailer(QPDFObjectHandle obj)
528
6.73k
{
529
6.73k
    if (m->trailer.isInitialized()) {
530
0
        return;
531
0
    }
532
6.73k
    m->trailer = obj;
533
6.73k
}
534
535
void
536
QPDF::reconstruct_xref(QPDFExc& e)
537
0
{
538
0
    if (m->reconstructed_xref) {
539
        // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
540
        // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
541
0
        throw e;
542
0
    }
543
544
    // If recovery generates more than 1000 warnings, the file is so severely damaged that there
545
    // probably is no point trying to continue.
546
0
    const auto max_warnings = m->warnings.size() + 1000U;
547
0
    auto check_warnings = [this, max_warnings]() {
548
0
        if (m->warnings.size() > max_warnings) {
549
0
            throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
550
0
        }
551
0
    };
552
553
0
    m->reconstructed_xref = true;
554
    // We may find more objects, which may contain dangling references.
555
0
    m->fixed_dangling_refs = false;
556
557
0
    warn(damagedPDF("", 0, "file is damaged"));
558
0
    warn(e);
559
0
    warn(damagedPDF("", 0, "Attempting to reconstruct cross-reference table"));
560
561
    // Delete all references to type 1 (uncompressed) objects
562
0
    std::set<QPDFObjGen> to_delete;
563
0
    for (auto const& iter: m->xref_table) {
564
0
        if (iter.second.getType() == 1) {
565
0
            to_delete.insert(iter.first);
566
0
        }
567
0
    }
568
0
    for (auto const& iter: to_delete) {
569
0
        m->xref_table.erase(iter);
570
0
    }
571
572
0
    m->file->seek(0, SEEK_END);
573
0
    qpdf_offset_t eof = m->file->tell();
574
0
    m->file->seek(0, SEEK_SET);
575
    // Don't allow very long tokens here during recovery. All the interesting tokens are covered.
576
0
    static size_t const MAX_LEN = 10;
577
0
    while (m->file->tell() < eof) {
578
0
        QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN);
579
0
        qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length());
580
0
        if (t1.isInteger()) {
581
0
            auto pos = m->file->tell();
582
0
            QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN);
583
0
            if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
584
0
                int obj = QUtil::string_to_int(t1.getValue().c_str());
585
0
                int gen = QUtil::string_to_int(t2.getValue().c_str());
586
0
                if (obj <= m->xref_table_max_id) {
587
0
                    insertReconstructedXrefEntry(obj, token_start, gen);
588
0
                } else {
589
0
                    warn(damagedPDF(
590
0
                        "", 0, "ignoring object with impossibly large id " + std::to_string(obj)));
591
0
                }
592
0
            }
593
0
            m->file->seek(pos, SEEK_SET);
594
0
        } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) {
595
0
            auto pos = m->file->tell();
596
0
            QPDFObjectHandle t = readTrailer();
597
0
            if (!t.isDictionary()) {
598
                // Oh well.  It was worth a try.
599
0
            } else {
600
0
                setTrailer(t);
601
0
            }
602
0
            m->file->seek(pos, SEEK_SET);
603
0
        }
604
0
        check_warnings();
605
0
        m->file->findAndSkipNextEOL();
606
0
    }
607
0
    m->deleted_objects.clear();
608
609
0
    if (!m->trailer.isInitialized()) {
610
0
        qpdf_offset_t max_offset{0};
611
        // If there are any xref streams, take the last one to appear.
612
0
        for (auto const& iter: m->xref_table) {
613
0
            auto entry = iter.second;
614
0
            if (entry.getType() != 1) {
615
0
                continue;
616
0
            }
617
0
            auto oh = getObjectByObjGen(iter.first);
618
0
            try {
619
0
                if (!oh.isStreamOfType("/XRef")) {
620
0
                    continue;
621
0
                }
622
0
            } catch (std::exception&) {
623
0
                continue;
624
0
            }
625
0
            auto offset = entry.getOffset();
626
0
            if (offset > max_offset) {
627
0
                max_offset = offset;
628
0
                setTrailer(oh.getDict());
629
0
            }
630
0
            check_warnings();
631
0
        }
632
0
        if (max_offset > 0) {
633
0
            try {
634
0
                read_xref(max_offset);
635
0
            } catch (std::exception&) {
636
0
                throw damagedPDF(
637
0
                    "", 0, "error decoding candidate xref stream while recovering damaged file");
638
0
            }
639
0
            QTC::TC("qpdf", "QPDF recover xref stream");
640
0
        }
641
0
    }
642
643
0
    if (!m->trailer.isInitialized()) {
644
        // We could check the last encountered object to see if it was an xref stream.  If so, we
645
        // could try to get the trailer from there.  This may make it possible to recover files with
646
        // bad startxref pointers even when they have object streams.
647
648
0
        throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file");
649
0
    }
650
0
    if (m->xref_table.empty()) {
651
        // We cannot check for an empty xref table in parse because empty tables are valid when
652
        // creating QPDF objects from JSON.
653
0
        throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
654
0
    }
655
0
    check_warnings();
656
0
    if (!m->parsed) {
657
0
        m->parsed = true;
658
0
        getAllPages();
659
0
        check_warnings();
660
0
        if (m->all_pages.empty()) {
661
0
            m->parsed = false;
662
0
            throw damagedPDF("", 0, "unable to find any pages while recovering damaged file");
663
0
        }
664
0
    }
665
    // We could iterate through the objects looking for streams and try to find objects inside of
666
    // them, but it's probably not worth the trouble.  Acrobat can't recover files with any errors
667
    // in an xref stream, and this would be a real long shot anyway.  If we wanted to do anything
668
    // that involved looking at stream contents, we'd also have to call initializeEncryption() here.
669
    // It's safe to call it more than once.
670
0
}
671
672
void
673
QPDF::read_xref(qpdf_offset_t xref_offset)
674
6.73k
{
675
6.73k
    std::map<int, int> free_table;
676
6.73k
    std::set<qpdf_offset_t> visited;
677
13.4k
    while (xref_offset) {
678
6.73k
        visited.insert(xref_offset);
679
6.73k
        char buf[7];
680
6.73k
        memset(buf, 0, sizeof(buf));
681
6.73k
        m->file->seek(xref_offset, SEEK_SET);
682
        // Some files miss the mark a little with startxref. We could do a better job of searching
683
        // in the neighborhood for something that looks like either an xref table or stream, but the
684
        // simple heuristic of skipping whitespace can help with the xref table case and is harmless
685
        // with the stream case.
686
6.73k
        bool done = false;
687
6.73k
        bool skipped_space = false;
688
13.4k
        while (!done) {
689
6.73k
            char ch;
690
6.73k
            if (1 == m->file->read(&ch, 1)) {
691
6.73k
                if (QUtil::is_space(ch)) {
692
0
                    skipped_space = true;
693
6.73k
                } else {
694
6.73k
                    m->file->unreadCh(ch);
695
6.73k
                    done = true;
696
6.73k
                }
697
6.73k
            } else {
698
0
                QTC::TC("qpdf", "QPDF eof skipping spaces before xref", skipped_space ? 0 : 1);
699
0
                done = true;
700
0
            }
701
6.73k
        }
702
703
6.73k
        m->file->read(buf, sizeof(buf) - 1);
704
        // The PDF spec says xref must be followed by a line terminator, but files exist in the wild
705
        // where it is terminated by arbitrary whitespace.
706
6.73k
        if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {
707
6.73k
            if (skipped_space) {
708
0
                QTC::TC("qpdf", "QPDF xref skipped space");
709
0
                warn(damagedPDF("", 0, "extraneous whitespace seen before xref"));
710
0
            }
711
6.73k
            QTC::TC(
712
6.73k
                "qpdf",
713
6.73k
                "QPDF xref space",
714
6.73k
                ((buf[4] == '\n')       ? 0
715
6.73k
                     : (buf[4] == '\r') ? 1
716
0
                     : (buf[4] == ' ')  ? 2
717
0
                                        : 9999));
718
6.73k
            int skip = 4;
719
            // buf is null-terminated, and QUtil::is_space('\0') is false, so this won't overrun.
720
13.4k
            while (QUtil::is_space(buf[skip])) {
721
6.73k
                ++skip;
722
6.73k
            }
723
6.73k
            xref_offset = read_xrefTable(xref_offset + skip);
724
6.73k
        } else {
725
0
            xref_offset = read_xrefStream(xref_offset);
726
0
        }
727
6.73k
        if (visited.count(xref_offset) != 0) {
728
0
            QTC::TC("qpdf", "QPDF xref loop");
729
0
            throw damagedPDF("", 0, "loop detected following xref tables");
730
0
        }
731
6.73k
    }
732
733
6.73k
    if (!m->trailer.isInitialized()) {
734
0
        throw damagedPDF("", 0, "unable to find trailer while reading xref");
735
0
    }
736
6.73k
    int size = m->trailer.getKey("/Size").getIntValueAsInt();
737
6.73k
    int max_obj = 0;
738
6.73k
    if (!m->xref_table.empty()) {
739
0
        max_obj = m->xref_table.rbegin()->first.getObj();
740
0
    }
741
6.73k
    if (!m->deleted_objects.empty()) {
742
6.73k
        max_obj = std::max(max_obj, *(m->deleted_objects.rbegin()));
743
6.73k
    }
744
6.73k
    if ((size < 1) || (size - 1 != max_obj)) {
745
0
        QTC::TC("qpdf", "QPDF xref size mismatch");
746
0
        warn(damagedPDF(
747
0
            "",
748
0
            0,
749
0
            ("reported number of objects (" + std::to_string(size) +
750
0
             ") is not one plus the highest object number (" + std::to_string(max_obj) + ")")));
751
0
    }
752
753
    // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
754
    // never depend on its being set.
755
6.73k
    m->deleted_objects.clear();
756
757
    // Make sure we keep only the highest generation for any object.
758
6.73k
    QPDFObjGen last_og{-1, 0};
759
6.73k
    for (auto const& item: m->xref_table) {
760
0
        auto id = item.first.getObj();
761
0
        if (id == last_og.getObj() && id > 0) {
762
0
            removeObject(last_og);
763
0
        }
764
0
        last_og = item.first;
765
0
    }
766
6.73k
}
767
768
bool
769
QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
770
6.73k
{
771
    // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
772
    // buffer.
773
6.73k
    char const* p = line.c_str();
774
6.73k
    char const* start = line.c_str();
775
776
    // Skip zero or more spaces
777
6.73k
    while (QUtil::is_space(*p)) {
778
0
        ++p;
779
0
    }
780
    // Require digit
781
6.73k
    if (!QUtil::is_digit(*p)) {
782
0
        return false;
783
0
    }
784
    // Gather digits
785
6.73k
    std::string obj_str;
786
13.4k
    while (QUtil::is_digit(*p)) {
787
6.73k
        obj_str.append(1, *p++);
788
6.73k
    }
789
    // Require space
790
6.73k
    if (!QUtil::is_space(*p)) {
791
0
        return false;
792
0
    }
793
    // Skip spaces
794
13.4k
    while (QUtil::is_space(*p)) {
795
6.73k
        ++p;
796
6.73k
    }
797
    // Require digit
798
6.73k
    if (!QUtil::is_digit(*p)) {
799
0
        return false;
800
0
    }
801
    // Gather digits
802
6.73k
    std::string num_str;
803
13.4k
    while (QUtil::is_digit(*p)) {
804
6.73k
        num_str.append(1, *p++);
805
6.73k
    }
806
    // Skip any space including line terminators
807
13.4k
    while (QUtil::is_space(*p)) {
808
6.73k
        ++p;
809
6.73k
    }
810
6.73k
    bytes = toI(p - start);
811
6.73k
    obj = QUtil::string_to_int(obj_str.c_str());
812
6.73k
    num = QUtil::string_to_int(num_str.c_str());
813
6.73k
    return true;
814
6.73k
}
815
816
bool
817
QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
818
0
{
819
    // Reposition after initial read attempt and reread.
820
0
    m->file->seek(m->file->getLastOffset(), SEEK_SET);
821
0
    auto line = m->file->readLine(30);
822
823
    // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
824
    // buffer.
825
0
    char const* p = line.data();
826
827
    // Skip zero or more spaces. There aren't supposed to be any.
828
0
    bool invalid = false;
829
0
    while (QUtil::is_space(*p)) {
830
0
        ++p;
831
0
        QTC::TC("qpdf", "QPDF ignore first space in xref entry");
832
0
        invalid = true;
833
0
    }
834
    // Require digit
835
0
    if (!QUtil::is_digit(*p)) {
836
0
        return false;
837
0
    }
838
    // Gather digits
839
0
    std::string f1_str;
840
0
    while (QUtil::is_digit(*p)) {
841
0
        f1_str.append(1, *p++);
842
0
    }
843
    // Require space
844
0
    if (!QUtil::is_space(*p)) {
845
0
        return false;
846
0
    }
847
0
    if (QUtil::is_space(*(p + 1))) {
848
0
        QTC::TC("qpdf", "QPDF ignore first extra space in xref entry");
849
0
        invalid = true;
850
0
    }
851
    // Skip spaces
852
0
    while (QUtil::is_space(*p)) {
853
0
        ++p;
854
0
    }
855
    // Require digit
856
0
    if (!QUtil::is_digit(*p)) {
857
0
        return false;
858
0
    }
859
    // Gather digits
860
0
    std::string f2_str;
861
0
    while (QUtil::is_digit(*p)) {
862
0
        f2_str.append(1, *p++);
863
0
    }
864
    // Require space
865
0
    if (!QUtil::is_space(*p)) {
866
0
        return false;
867
0
    }
868
0
    if (QUtil::is_space(*(p + 1))) {
869
0
        QTC::TC("qpdf", "QPDF ignore second extra space in xref entry");
870
0
        invalid = true;
871
0
    }
872
    // Skip spaces
873
0
    while (QUtil::is_space(*p)) {
874
0
        ++p;
875
0
    }
876
0
    if ((*p == 'f') || (*p == 'n')) {
877
0
        type = *p;
878
0
    } else {
879
0
        return false;
880
0
    }
881
0
    if ((f1_str.length() != 10) || (f2_str.length() != 5)) {
882
0
        QTC::TC("qpdf", "QPDF ignore length error xref entry");
883
0
        invalid = true;
884
0
    }
885
886
0
    if (invalid) {
887
0
        warn(damagedPDF("xref table", "accepting invalid xref table entry"));
888
0
    }
889
890
0
    f1 = QUtil::string_to_ll(f1_str.c_str());
891
0
    f2 = QUtil::string_to_int(f2_str.c_str());
892
893
0
    return true;
894
0
}
895
896
// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
897
// result.
898
bool
899
QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
900
6.73k
{
901
6.73k
    std::array<char, 21> line;
902
6.73k
    if (m->file->read(line.data(), 20) != 20) {
903
        // C++20: [[unlikely]]
904
0
        return false;
905
0
    }
906
6.73k
    line[20] = '\0';
907
6.73k
    char const* p = line.data();
908
909
6.73k
    int f1_len = 0;
910
6.73k
    int f2_len = 0;
911
912
    // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
913
    // buffer.
914
915
    // Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long.
916
74.0k
    while (*p == '0') {
917
67.3k
        ++f1_len;
918
67.3k
        ++p;
919
67.3k
    }
920
6.73k
    while (QUtil::is_digit(*p) && f1_len++ < 10) {
921
0
        f1 *= 10;
922
0
        f1 += *p++ - '0';
923
0
    }
924
    // Require space
925
6.73k
    if (!QUtil::is_space(*p++)) {
926
        // Entry doesn't start with space or digit.
927
        // C++20: [[unlikely]]
928
0
        return false;
929
0
    }
930
    // Gather digits. NB No risk of overflow as 99'999 < max int.
931
6.73k
    while (*p == '0') {
932
0
        ++f2_len;
933
0
        ++p;
934
0
    }
935
40.3k
    while (QUtil::is_digit(*p) && f2_len++ < 5) {
936
33.6k
        f2 *= 10;
937
33.6k
        f2 += static_cast<int>(*p++ - '0');
938
33.6k
    }
939
6.73k
    if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) {
940
        // C++20: [[likely]]
941
6.73k
        type = *p;
942
        // No test for valid line[19].
943
6.73k
        if (*(++p) && *(++p) && (*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) {
944
            // C++20: [[likely]]
945
6.73k
            return true;
946
6.73k
        }
947
6.73k
    }
948
0
    return read_bad_xrefEntry(f1, f2, type);
949
6.73k
}
950
951
// Read a single cross-reference table section and associated trailer.
952
qpdf_offset_t
953
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
954
6.73k
{
955
6.73k
    std::vector<QPDFObjGen> deleted_items;
956
957
6.73k
    m->file->seek(xref_offset, SEEK_SET);
958
6.73k
    std::string line;
959
6.73k
    while (true) {
960
6.73k
        line.assign(50, '\0');
961
6.73k
        m->file->read(line.data(), line.size());
962
6.73k
        int obj = 0;
963
6.73k
        int num = 0;
964
6.73k
        int bytes = 0;
965
6.73k
        if (!parse_xrefFirst(line, obj, num, bytes)) {
966
0
            QTC::TC("qpdf", "QPDF invalid xref");
967
0
            throw damagedPDF("xref table", "xref syntax invalid");
968
0
        }
969
6.73k
        m->file->seek(m->file->getLastOffset() + bytes, SEEK_SET);
970
13.4k
        for (qpdf_offset_t i = obj; i - num < obj; ++i) {
971
6.73k
            if (i == 0) {
972
                // This is needed by checkLinearization()
973
6.73k
                m->first_xref_item_offset = m->file->tell();
974
6.73k
            }
975
            // For xref_table, these will always be small enough to be ints
976
6.73k
            qpdf_offset_t f1 = 0;
977
6.73k
            int f2 = 0;
978
6.73k
            char type = '\0';
979
6.73k
            if (!read_xrefEntry(f1, f2, type)) {
980
0
                QTC::TC("qpdf", "QPDF invalid xref entry");
981
0
                throw damagedPDF(
982
0
                    "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
983
0
            }
984
6.73k
            if (type == 'f') {
985
                // Save deleted items until after we've checked the XRefStm, if any.
986
6.73k
                deleted_items.emplace_back(toI(i), f2);
987
6.73k
            } else {
988
0
                insertXrefEntry(toI(i), 1, f1, f2);
989
0
            }
990
6.73k
        }
991
6.73k
        qpdf_offset_t pos = m->file->tell();
992
6.73k
        if (readToken(m->file).isWord("trailer")) {
993
6.73k
            break;
994
6.73k
        } else {
995
0
            m->file->seek(pos, SEEK_SET);
996
0
        }
997
6.73k
    }
998
999
    // Set offset to previous xref table if any
1000
6.73k
    QPDFObjectHandle cur_trailer = readTrailer();
1001
6.73k
    if (!cur_trailer.isDictionary()) {
1002
0
        QTC::TC("qpdf", "QPDF missing trailer");
1003
0
        throw damagedPDF("", "expected trailer dictionary");
1004
0
    }
1005
1006
6.73k
    if (!m->trailer.isInitialized()) {
1007
6.73k
        setTrailer(cur_trailer);
1008
1009
6.73k
        if (!m->trailer.hasKey("/Size")) {
1010
0
            QTC::TC("qpdf", "QPDF trailer lacks size");
1011
0
            throw damagedPDF("trailer", "trailer dictionary lacks /Size key");
1012
0
        }
1013
6.73k
        if (!m->trailer.getKey("/Size").isInteger()) {
1014
0
            QTC::TC("qpdf", "QPDF trailer size not integer");
1015
0
            throw damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");
1016
0
        }
1017
6.73k
    }
1018
1019
6.73k
    if (cur_trailer.hasKey("/XRefStm")) {
1020
0
        if (m->ignore_xref_streams) {
1021
0
            QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
1022
0
        } else {
1023
0
            if (cur_trailer.getKey("/XRefStm").isInteger()) {
1024
                // Read the xref stream but disregard any return value -- we'll use our trailer's
1025
                // /Prev key instead of the xref stream's.
1026
0
                (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue());
1027
0
            } else {
1028
0
                throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm");
1029
0
            }
1030
0
        }
1031
0
    }
1032
1033
    // Handle any deleted items now that we've read the /XRefStm.
1034
6.73k
    for (auto const& og: deleted_items) {
1035
6.73k
        insertFreeXrefEntry(og);
1036
6.73k
    }
1037
1038
6.73k
    if (cur_trailer.hasKey("/Prev")) {
1039
0
        if (!cur_trailer.getKey("/Prev").isInteger()) {
1040
0
            QTC::TC("qpdf", "QPDF trailer prev not integer");
1041
0
            throw damagedPDF("trailer", "/Prev key in trailer dictionary is not an integer");
1042
0
        }
1043
0
        QTC::TC("qpdf", "QPDF prev key in trailer dictionary");
1044
0
        xref_offset = cur_trailer.getKey("/Prev").getIntValue();
1045
6.73k
    } else {
1046
6.73k
        xref_offset = 0;
1047
6.73k
    }
1048
1049
6.73k
    return xref_offset;
1050
6.73k
}
1051
1052
// Read a single cross-reference stream.
1053
qpdf_offset_t
1054
QPDF::read_xrefStream(qpdf_offset_t xref_offset)
1055
0
{
1056
0
    if (!m->ignore_xref_streams) {
1057
0
        QPDFObjGen x_og;
1058
0
        QPDFObjectHandle xref_obj;
1059
0
        try {
1060
0
            xref_obj =
1061
0
                readObjectAtOffset(false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true);
1062
0
        } catch (QPDFExc&) {
1063
            // ignore -- report error below
1064
0
        }
1065
0
        if (xref_obj.isStreamOfType("/XRef")) {
1066
0
            QTC::TC("qpdf", "QPDF found xref stream");
1067
0
            return processXRefStream(xref_offset, xref_obj);
1068
0
        }
1069
0
    }
1070
1071
0
    QTC::TC("qpdf", "QPDF can't find xref");
1072
0
    throw damagedPDF("", xref_offset, "xref not found");
1073
0
    return 0; // unreachable
1074
0
}
1075
1076
// Return the entry size of the xref stream and the processed W array.
1077
std::pair<int, std::array<int, 3>>
1078
QPDF::processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
1079
0
{
1080
0
    auto W_obj = dict.getKey("/W");
1081
0
    if (!(W_obj.isArray() && (W_obj.getArrayNItems() >= 3) && W_obj.getArrayItem(0).isInteger() &&
1082
0
          W_obj.getArrayItem(1).isInteger() && W_obj.getArrayItem(2).isInteger())) {
1083
0
        throw damaged("Cross-reference stream does not have a proper /W key");
1084
0
    }
1085
1086
0
    std::array<int, 3> W;
1087
0
    int entry_size = 0;
1088
0
    auto w_vector = W_obj.getArrayAsVector();
1089
0
    int max_bytes = sizeof(qpdf_offset_t);
1090
0
    for (size_t i = 0; i < 3; ++i) {
1091
0
        W[i] = w_vector[i].getIntValueAsInt();
1092
0
        if (W[i] > max_bytes) {
1093
0
            throw damaged("Cross-reference stream's /W contains impossibly large values");
1094
0
        }
1095
0
        if (W[i] < 0) {
1096
0
            throw damaged("Cross-reference stream's /W contains negative values");
1097
0
        }
1098
0
        entry_size += W[i];
1099
0
    }
1100
0
    if (entry_size == 0) {
1101
0
        throw damaged("Cross-reference stream's /W indicates entry size of 0");
1102
0
    }
1103
0
    return {entry_size, W};
1104
0
}
1105
1106
// Validate Size key and return the maximum number of entries that the xref stream can contain.
1107
int
1108
QPDF::processXRefSize(
1109
    QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)
1110
0
{
1111
    // Number of entries is limited by the highest possible object id and stream size.
1112
0
    auto max_num_entries = std::numeric_limits<int>::max();
1113
0
    if (max_num_entries > (std::numeric_limits<qpdf_offset_t>::max() / entry_size)) {
1114
0
        max_num_entries = toI(std::numeric_limits<qpdf_offset_t>::max() / entry_size);
1115
0
    }
1116
1117
0
    auto Size_obj = dict.getKey("/Size");
1118
0
    long long size;
1119
0
    if (!dict.getKey("/Size").getValueAsInt(size)) {
1120
0
        throw damaged("Cross-reference stream does not have a proper /Size key");
1121
0
    } else if (size < 0) {
1122
0
        throw damaged("Cross-reference stream has a negative /Size key");
1123
0
    } else if (size >= max_num_entries) {
1124
0
        throw damaged("Cross-reference stream has an impossibly large /Size key");
1125
0
    }
1126
    // We are not validating that Size <= (Size key of parent xref / trailer).
1127
0
    return max_num_entries;
1128
0
}
1129
1130
// Return the number of entries of the xref stream and the processed Index array.
1131
std::pair<int, std::vector<std::pair<int, int>>>
1132
QPDF::processXRefIndex(
1133
    QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
1134
0
{
1135
0
    auto size = dict.getKey("/Size").getIntValueAsInt();
1136
0
    auto Index_obj = dict.getKey("/Index");
1137
1138
0
    if (Index_obj.isArray()) {
1139
0
        std::vector<std::pair<int, int>> indx;
1140
0
        int num_entries = 0;
1141
0
        auto index_vec = Index_obj.getArrayAsVector();
1142
0
        if ((index_vec.size() % 2) || index_vec.size() < 2) {
1143
0
            throw damaged("Cross-reference stream's /Index has an invalid number of values");
1144
0
        }
1145
1146
0
        int i = 0;
1147
0
        long long first = 0;
1148
0
        for (auto& val: index_vec) {
1149
0
            if (val.isInteger()) {
1150
0
                if (i % 2) {
1151
0
                    auto count = val.getIntValue();
1152
0
                    if (count <= 0) {
1153
0
                        throw damaged(
1154
0
                            "Cross-reference stream section claims to contain " +
1155
0
                            std::to_string(count) + " entries");
1156
0
                    }
1157
                    // We are guarding against the possibility of num_entries * entry_size
1158
                    // overflowing. We are not checking that entries are in ascending order as
1159
                    // required by the spec, which probably should generate a warning. We are also
1160
                    // not checking that for each subsection first object number + number of entries
1161
                    // <= /Size. The spec requires us to ignore object number > /Size.
1162
0
                    if (first > (max_num_entries - count) ||
1163
0
                        count > (max_num_entries - num_entries)) {
1164
0
                        throw damaged(
1165
0
                            "Cross-reference stream claims to contain too many entries: " +
1166
0
                            std::to_string(first) + " " + std::to_string(max_num_entries) + " " +
1167
0
                            std::to_string(num_entries));
1168
0
                    }
1169
0
                    indx.emplace_back(static_cast<int>(first), static_cast<int>(count));
1170
0
                    num_entries += static_cast<int>(count);
1171
0
                } else {
1172
0
                    first = val.getIntValue();
1173
0
                    if (first < 0) {
1174
0
                        throw damaged(
1175
0
                            "Cross-reference stream's /Index contains a negative object id");
1176
0
                    } else if (first > max_num_entries) {
1177
0
                        throw damaged("Cross-reference stream's /Index contains an impossibly "
1178
0
                                      "large object id");
1179
0
                    }
1180
0
                }
1181
0
            } else {
1182
0
                throw damaged(
1183
0
                    "Cross-reference stream's /Index's item " + std::to_string(i) +
1184
0
                    " is not an integer");
1185
0
            }
1186
0
            i++;
1187
0
        }
1188
0
        QTC::TC("qpdf", "QPDF xref /Index is array", index_vec.size() == 2 ? 0 : 1);
1189
0
        return {num_entries, indx};
1190
0
    } else if (Index_obj.isNull()) {
1191
0
        QTC::TC("qpdf", "QPDF xref /Index is null");
1192
0
        return {size, {{0, size}}};
1193
0
    } else {
1194
0
        throw damaged("Cross-reference stream does not have a proper /Index key");
1195
0
    }
1196
0
}
1197
1198
qpdf_offset_t
1199
QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
1200
0
{
1201
0
    auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
1202
0
        return damagedPDF("xref stream", xref_offset, msg.data());
1203
0
    };
1204
1205
0
    auto dict = xref_obj.getDict();
1206
1207
0
    auto [entry_size, W] = processXRefW(dict, damaged);
1208
0
    int max_num_entries = processXRefSize(dict, entry_size, damaged);
1209
0
    auto [num_entries, indx] = processXRefIndex(dict, max_num_entries, damaged);
1210
1211
0
    std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
1212
0
    size_t actual_size = bp->getSize();
1213
0
    auto expected_size = toS(entry_size) * toS(num_entries);
1214
1215
0
    if (expected_size != actual_size) {
1216
0
        QPDFExc x = damaged(
1217
0
            "Cross-reference stream data has the wrong size; expected = " +
1218
0
            std::to_string(expected_size) + "; actual = " + std::to_string(actual_size));
1219
0
        if (expected_size > actual_size) {
1220
0
            throw x;
1221
0
        } else {
1222
0
            warn(x);
1223
0
        }
1224
0
    }
1225
1226
0
    bool saw_first_compressed_object = false;
1227
1228
    // Actual size vs. expected size check above ensures that we will not overflow any buffers here.
1229
    // We know that entry_size * num_entries is less or equal to the size of the buffer.
1230
0
    auto p = bp->getBuffer();
1231
0
    for (auto [obj, sec_entries]: indx) {
1232
        // Process a subsection.
1233
0
        for (int i = 0; i < sec_entries; ++i) {
1234
            // Read this entry
1235
0
            std::array<qpdf_offset_t, 3> fields{};
1236
0
            if (W[0] == 0) {
1237
0
                QTC::TC("qpdf", "QPDF default for xref stream field 0");
1238
0
                fields[0] = 1;
1239
0
            }
1240
0
            for (size_t j = 0; j < 3; ++j) {
1241
0
                for (int k = 0; k < W[j]; ++k) {
1242
0
                    fields[j] <<= 8;
1243
0
                    fields[j] |= *p++;
1244
0
                }
1245
0
            }
1246
1247
            // Get the generation number.  The generation number is 0 unless this is an uncompressed
1248
            // object record, in which case the generation number appears as the third field.
1249
0
            if (saw_first_compressed_object) {
1250
0
                if (fields[0] != 2) {
1251
0
                    m->uncompressed_after_compressed = true;
1252
0
                }
1253
0
            } else if (fields[0] == 2) {
1254
0
                saw_first_compressed_object = true;
1255
0
            }
1256
0
            if (obj == 0) {
1257
                // This is needed by checkLinearization()
1258
0
                m->first_xref_item_offset = xref_offset;
1259
0
            } else if (fields[0] == 0) {
1260
                // Ignore fields[2], which we don't care about in this case. This works around the
1261
                // issue of some PDF files that put invalid values, like -1, here for deleted
1262
                // objects.
1263
0
                insertFreeXrefEntry(QPDFObjGen(obj, 0));
1264
0
            } else {
1265
0
                insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
1266
0
            }
1267
0
            ++obj;
1268
0
        }
1269
0
    }
1270
1271
0
    if (!m->trailer.isInitialized()) {
1272
0
        setTrailer(dict);
1273
0
    }
1274
1275
0
    if (dict.hasKey("/Prev")) {
1276
0
        if (!dict.getKey("/Prev").isInteger()) {
1277
0
            throw damagedPDF(
1278
0
                "xref stream", "/Prev key in xref stream dictionary is not an integer");
1279
0
        }
1280
0
        QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
1281
0
        return dict.getKey("/Prev").getIntValue();
1282
0
    } else {
1283
0
        return 0;
1284
0
    }
1285
0
}
1286
1287
void
1288
QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2)
1289
0
{
1290
    // Populate the xref table in such a way that the first reference to an object that we see,
1291
    // which is the one in the latest xref table in which it appears, is the one that gets stored.
1292
    // This works because we are reading more recent appends before older ones.
1293
1294
    // If there is already an entry for this object and generation in the table, it means that a
1295
    // later xref table has registered this object.  Disregard this one.
1296
1297
0
    if (obj > m->xref_table_max_id) {
1298
        // ignore impossibly large object ids or object ids > Size.
1299
0
        return;
1300
0
    }
1301
1302
0
    if (m->deleted_objects.count(obj)) {
1303
0
        QTC::TC("qpdf", "QPDF xref deleted object");
1304
0
        return;
1305
0
    }
1306
1307
0
    if (f0 == 2 && static_cast<int>(f1) == obj) {
1308
0
        warn(damagedPDF("xref stream", "self-referential object stream " + std::to_string(obj)));
1309
0
        return;
1310
0
    }
1311
1312
0
    auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
1313
0
    if (!created) {
1314
0
        QTC::TC("qpdf", "QPDF xref reused object");
1315
0
        return;
1316
0
    }
1317
1318
0
    switch (f0) {
1319
0
    case 1:
1320
        // f2 is generation
1321
0
        QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0));
1322
0
        iter->second = QPDFXRefEntry(f1);
1323
0
        break;
1324
1325
0
    case 2:
1326
0
        iter->second = QPDFXRefEntry(toI(f1), f2);
1327
0
        break;
1328
1329
0
    default:
1330
0
        throw damagedPDF("xref stream", "unknown xref stream entry type " + std::to_string(f0));
1331
0
        break;
1332
0
    }
1333
0
}
1334
1335
void
1336
QPDF::insertFreeXrefEntry(QPDFObjGen og)
1337
6.73k
{
1338
6.73k
    if (!m->xref_table.count(og)) {
1339
6.73k
        m->deleted_objects.insert(og.getObj());
1340
6.73k
    }
1341
6.73k
}
1342
1343
// Replace uncompressed object. This is used in xref recovery mode, which reads the file from
1344
// beginning to end.
1345
void
1346
QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2)
1347
0
{
1348
0
    if (!(obj > 0 && obj <= m->xref_table_max_id && 0 <= f2 && f2 < 65535)) {
1349
0
        QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
1350
0
        return;
1351
0
    }
1352
1353
0
    QPDFObjGen og(obj, f2);
1354
0
    if (!m->deleted_objects.count(obj)) {
1355
        // deleted_objects stores the uncompressed objects removed from the xref table at the start
1356
        // of recovery.
1357
0
        QTC::TC("qpdf", "QPDF xref overwrite object");
1358
0
        m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1);
1359
0
    }
1360
0
}
1361
1362
void
1363
QPDF::showXRefTable()
1364
0
{
1365
0
    auto& cout = *m->log->getInfo();
1366
0
    for (auto const& iter: m->xref_table) {
1367
0
        QPDFObjGen const& og = iter.first;
1368
0
        QPDFXRefEntry const& entry = iter.second;
1369
0
        cout << og.unparse('/') << ": ";
1370
0
        switch (entry.getType()) {
1371
0
        case 1:
1372
0
            cout << "uncompressed; offset = " << entry.getOffset();
1373
0
            break;
1374
1375
0
        case 2:
1376
0
            *m->log->getInfo() << "compressed; stream = " << entry.getObjStreamNumber()
1377
0
                               << ", index = " << entry.getObjStreamIndex();
1378
0
            break;
1379
1380
0
        default:
1381
0
            throw std::logic_error("unknown cross-reference table type while"
1382
0
                                   " showing xref_table");
1383
0
            break;
1384
0
        }
1385
0
        m->log->info("\n");
1386
0
    }
1387
0
}
1388
1389
// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
1390
// return false. Otherwise return true.
1391
bool
1392
QPDF::resolveXRefTable()
1393
0
{
1394
0
    bool may_change = !m->reconstructed_xref;
1395
0
    for (auto& iter: m->xref_table) {
1396
0
        if (isUnresolved(iter.first)) {
1397
0
            resolve(iter.first);
1398
0
            if (may_change && m->reconstructed_xref) {
1399
0
                return false;
1400
0
            }
1401
0
        }
1402
0
    }
1403
0
    return true;
1404
0
}
1405
1406
// Ensure all objects in the pdf file, including those in indirect references, appear in the object
1407
// cache.
1408
void
1409
QPDF::fixDanglingReferences(bool force)
1410
0
{
1411
0
    if (m->fixed_dangling_refs) {
1412
0
        return;
1413
0
    }
1414
0
    if (!resolveXRefTable()) {
1415
0
        QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");
1416
0
        resolveXRefTable();
1417
0
    }
1418
0
    m->fixed_dangling_refs = true;
1419
0
}
1420
1421
size_t
1422
QPDF::getObjectCount()
1423
0
{
1424
    // This method returns the next available indirect object number. makeIndirectObject uses it for
1425
    // this purpose. After fixDanglingReferences is called, all objects in the xref table will also
1426
    // be in obj_cache.
1427
0
    fixDanglingReferences();
1428
0
    QPDFObjGen og;
1429
0
    if (!m->obj_cache.empty()) {
1430
0
        og = (*(m->obj_cache.rbegin())).first;
1431
0
    }
1432
0
    return toS(og.getObj());
1433
0
}
1434
1435
std::vector<QPDFObjectHandle>
1436
QPDF::getAllObjects()
1437
0
{
1438
    // After fixDanglingReferences is called, all objects are in the object cache.
1439
0
    fixDanglingReferences();
1440
0
    std::vector<QPDFObjectHandle> result;
1441
0
    for (auto const& iter: m->obj_cache) {
1442
0
        result.push_back(newIndirect(iter.first, iter.second.object));
1443
0
    }
1444
0
    return result;
1445
0
}
1446
1447
void
1448
QPDF::setLastObjectDescription(std::string const& description, QPDFObjGen const& og)
1449
0
{
1450
0
    m->last_object_description.clear();
1451
0
    if (!description.empty()) {
1452
0
        m->last_object_description += description;
1453
0
        if (og.isIndirect()) {
1454
0
            m->last_object_description += ": ";
1455
0
        }
1456
0
    }
1457
0
    if (og.isIndirect()) {
1458
0
        m->last_object_description += "object " + og.unparse(' ');
1459
0
    }
1460
0
}
1461
1462
QPDFObjectHandle
1463
QPDF::readTrailer()
1464
6.73k
{
1465
6.73k
    qpdf_offset_t offset = m->file->tell();
1466
6.73k
    bool empty = false;
1467
6.73k
    auto object =
1468
6.73k
        QPDFParser(m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false);
1469
6.73k
    if (empty) {
1470
        // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1471
        // actual PDF files and Adobe Reader appears to ignore them.
1472
0
        warn(damagedPDF("trailer", "empty object treated as null"));
1473
6.73k
    } else if (object.isDictionary() && readToken(m->file).isWord("stream")) {
1474
0
        warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer"));
1475
0
    }
1476
    // Override last_offset so that it points to the beginning of the object we just read
1477
6.73k
    m->file->setLastOffset(offset);
1478
6.73k
    return object;
1479
6.73k
}
1480
1481
QPDFObjectHandle
1482
QPDF::readObject(std::string const& description, QPDFObjGen og)
1483
0
{
1484
0
    setLastObjectDescription(description, og);
1485
0
    qpdf_offset_t offset = m->file->tell();
1486
0
    bool empty = false;
1487
1488
0
    StringDecrypter decrypter{this, og};
1489
0
    StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1490
0
    auto object =
1491
0
        QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true)
1492
0
            .parse(empty, false);
1493
0
    if (empty) {
1494
        // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1495
        // actual PDF files and Adobe Reader appears to ignore them.
1496
0
        warn(damagedPDF(m->file, m->file->getLastOffset(), "empty object treated as null"));
1497
0
        return object;
1498
0
    }
1499
0
    auto token = readToken(m->file);
1500
0
    if (object.isDictionary() && token.isWord("stream")) {
1501
0
        readStream(object, og, offset);
1502
0
        token = readToken(m->file);
1503
0
    }
1504
0
    if (!token.isWord("endobj")) {
1505
0
        QTC::TC("qpdf", "QPDF err expected endobj");
1506
0
        warn(damagedPDF("expected endobj"));
1507
0
    }
1508
0
    return object;
1509
0
}
1510
1511
// After reading stream dictionary and stream keyword, read rest of stream.
1512
void
1513
QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
1514
0
{
1515
0
    validateStreamLineEnd(object, og, offset);
1516
1517
    // Must get offset before accessing any additional objects since resolving a previously
1518
    // unresolved indirect object will change file position.
1519
0
    qpdf_offset_t stream_offset = m->file->tell();
1520
0
    size_t length = 0;
1521
1522
0
    try {
1523
0
        auto length_obj = object.getKey("/Length");
1524
1525
0
        if (!length_obj.isInteger()) {
1526
0
            if (length_obj.isNull()) {
1527
0
                QTC::TC("qpdf", "QPDF stream without length");
1528
0
                throw damagedPDF(offset, "stream dictionary lacks /Length key");
1529
0
            }
1530
0
            QTC::TC("qpdf", "QPDF stream length not integer");
1531
0
            throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
1532
0
        }
1533
1534
0
        length = toS(length_obj.getUIntValue());
1535
        // Seek in two steps to avoid potential integer overflow
1536
0
        m->file->seek(stream_offset, SEEK_SET);
1537
0
        m->file->seek(toO(length), SEEK_CUR);
1538
0
        if (!readToken(m->file).isWord("endstream")) {
1539
0
            QTC::TC("qpdf", "QPDF missing endstream");
1540
0
            throw damagedPDF("expected endstream");
1541
0
        }
1542
0
    } catch (QPDFExc& e) {
1543
0
        if (m->attempt_recovery) {
1544
0
            warn(e);
1545
0
            length = recoverStreamLength(m->file, og, stream_offset);
1546
0
        } else {
1547
0
            throw;
1548
0
        }
1549
0
    }
1550
0
    object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length));
1551
0
}
1552
1553
void
1554
QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
1555
0
{
1556
    // The PDF specification states that the word "stream" should be followed by either a carriage
1557
    // return and a newline or by a newline alone.  It specifically disallowed following it by a
1558
    // carriage return alone since, in that case, there would be no way to tell whether the NL in a
1559
    // CR NL sequence was part of the stream data.  However, some readers, including Adobe reader,
1560
    // accept a carriage return by itself when followed by a non-newline character, so that's what
1561
    // we do here. We have also seen files that have extraneous whitespace between the stream
1562
    // keyword and the newline.
1563
0
    while (true) {
1564
0
        char ch;
1565
0
        if (m->file->read(&ch, 1) == 0) {
1566
            // A premature EOF here will result in some other problem that will get reported at
1567
            // another time.
1568
0
            return;
1569
0
        }
1570
0
        if (ch == '\n') {
1571
            // ready to read stream data
1572
0
            QTC::TC("qpdf", "QPDF stream with NL only");
1573
0
            return;
1574
0
        }
1575
0
        if (ch == '\r') {
1576
            // Read another character
1577
0
            if (m->file->read(&ch, 1) != 0) {
1578
0
                if (ch == '\n') {
1579
                    // Ready to read stream data
1580
0
                    QTC::TC("qpdf", "QPDF stream with CRNL");
1581
0
                } else {
1582
                    // Treat the \r by itself as the whitespace after endstream and start reading
1583
                    // stream data in spite of not having seen a newline.
1584
0
                    QTC::TC("qpdf", "QPDF stream with CR only");
1585
0
                    m->file->unreadCh(ch);
1586
0
                    warn(damagedPDF(
1587
0
                        m->file->tell(), "stream keyword followed by carriage return only"));
1588
0
                }
1589
0
            }
1590
0
            return;
1591
0
        }
1592
0
        if (!QUtil::is_space(ch)) {
1593
0
            QTC::TC("qpdf", "QPDF stream without newline");
1594
0
            m->file->unreadCh(ch);
1595
0
            warn(damagedPDF(
1596
0
                m->file->tell(), "stream keyword not followed by proper line terminator"));
1597
0
            return;
1598
0
        }
1599
0
        warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
1600
0
    }
1601
0
}
1602
1603
QPDFObjectHandle
1604
QPDF::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
1605
0
{
1606
0
    m->last_object_description.erase(7); // last_object_description starts with "object "
1607
0
    m->last_object_description += std::to_string(obj);
1608
0
    m->last_object_description += " 0";
1609
1610
0
    bool empty = false;
1611
0
    auto object = QPDFParser(input, m->last_object_description, m->tokenizer, nullptr, this, true)
1612
0
                      .parse(empty, false);
1613
0
    if (empty) {
1614
        // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1615
        // actual PDF files and Adobe Reader appears to ignore them.
1616
0
        warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null"));
1617
0
    }
1618
0
    return object;
1619
0
}
1620
1621
bool
1622
QPDF::findEndstream()
1623
0
{
1624
    // Find endstream or endobj. Position the input at that token.
1625
0
    auto t = readToken(m->file, 20);
1626
0
    if (t.isWord("endobj") || t.isWord("endstream")) {
1627
0
        m->file->seek(m->file->getLastOffset(), SEEK_SET);
1628
0
        return true;
1629
0
    }
1630
0
    return false;
1631
0
}
1632
1633
size_t
1634
QPDF::recoverStreamLength(
1635
    std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset)
1636
0
{
1637
    // Try to reconstruct stream length by looking for endstream or endobj
1638
0
    warn(damagedPDF(input, stream_offset, "attempting to recover stream length"));
1639
1640
0
    PatternFinder ef(*this, &QPDF::findEndstream);
1641
0
    size_t length = 0;
1642
0
    if (m->file->findFirst("end", stream_offset, 0, ef)) {
1643
0
        length = toS(m->file->tell() - stream_offset);
1644
        // Reread endstream but, if it was endobj, don't skip that.
1645
0
        QPDFTokenizer::Token t = readToken(m->file);
1646
0
        if (t.getValue() == "endobj") {
1647
0
            m->file->seek(m->file->getLastOffset(), SEEK_SET);
1648
0
        }
1649
0
    }
1650
1651
0
    if (length) {
1652
0
        qpdf_offset_t this_obj_offset = 0;
1653
0
        QPDFObjGen this_og;
1654
1655
        // Make sure this is inside this object
1656
0
        for (auto const& iter: m->xref_table) {
1657
0
            QPDFXRefEntry const& entry = iter.second;
1658
0
            if (entry.getType() == 1) {
1659
0
                qpdf_offset_t obj_offset = entry.getOffset();
1660
0
                if ((obj_offset > stream_offset) &&
1661
0
                    ((this_obj_offset == 0) || (this_obj_offset > obj_offset))) {
1662
0
                    this_obj_offset = obj_offset;
1663
0
                    this_og = iter.first;
1664
0
                }
1665
0
            }
1666
0
        }
1667
0
        if (this_obj_offset && (this_og == og)) {
1668
            // Well, we found endstream\nendobj within the space allowed for this object, so we're
1669
            // probably in good shape.
1670
0
        } else {
1671
0
            QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
1672
0
        }
1673
0
    }
1674
1675
0
    if (length == 0) {
1676
0
        warn(damagedPDF(
1677
0
            input, stream_offset, "unable to recover stream data; treating stream as empty"));
1678
0
    } else {
1679
0
        warn(
1680
0
            damagedPDF(input, stream_offset, "recovered stream length: " + std::to_string(length)));
1681
0
    }
1682
1683
0
    QTC::TC("qpdf", "QPDF recovered stream length");
1684
0
    return length;
1685
0
}
1686
1687
QPDFTokenizer::Token
1688
QPDF::readToken(std::shared_ptr<InputSource> input, size_t max_len)
1689
33.6k
{
1690
33.6k
    return m->tokenizer.readToken(input, m->last_object_description, true, max_len);
1691
33.6k
}
1692
1693
QPDFObjectHandle
1694
QPDF::readObjectAtOffset(
1695
    bool try_recovery,
1696
    qpdf_offset_t offset,
1697
    std::string const& description,
1698
    QPDFObjGen exp_og,
1699
    QPDFObjGen& og,
1700
    bool skip_cache_if_in_xref)
1701
0
{
1702
0
    bool check_og = true;
1703
0
    if (exp_og.getObj() == 0) {
1704
        // This method uses an expect object ID of 0 to indicate that we don't know or don't care
1705
        // what the actual object ID is at this offset. This is true when we read the xref stream
1706
        // and linearization hint streams. In this case, we don't verify the expect object
1707
        // ID/generation against what was read from the file. There is also no reason to attempt
1708
        // xref recovery if we get a failure in this case since the read attempt was not triggered
1709
        // by an xref lookup.
1710
0
        check_og = false;
1711
0
        try_recovery = false;
1712
0
    }
1713
0
    setLastObjectDescription(description, exp_og);
1714
1715
0
    if (!m->attempt_recovery) {
1716
0
        try_recovery = false;
1717
0
    }
1718
1719
    // Special case: if offset is 0, just return null.  Some PDF writers, in particular
1720
    // "Mac OS X 10.7.5 Quartz PDFContext", may store deleted objects in the xref table as
1721
    // "0000000000 00000 n", which is not correct, but it won't hurt anything for us to ignore
1722
    // these.
1723
0
    if (offset == 0) {
1724
0
        QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
1725
0
        warn(damagedPDF(0, "object has offset 0"));
1726
0
        return QPDFObjectHandle::newNull();
1727
0
    }
1728
1729
0
    m->file->seek(offset, SEEK_SET);
1730
0
    try {
1731
0
        QPDFTokenizer::Token tobjid = readToken(m->file);
1732
0
        bool objidok = tobjid.isInteger();
1733
0
        QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
1734
0
        if (!objidok) {
1735
0
            QTC::TC("qpdf", "QPDF expected n n obj");
1736
0
            throw damagedPDF(offset, "expected n n obj");
1737
0
        }
1738
0
        QPDFTokenizer::Token tgen = readToken(m->file);
1739
0
        bool genok = tgen.isInteger();
1740
0
        QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
1741
0
        if (!genok) {
1742
0
            throw damagedPDF(offset, "expected n n obj");
1743
0
        }
1744
0
        QPDFTokenizer::Token tobj = readToken(m->file);
1745
1746
0
        bool objok = tobj.isWord("obj");
1747
0
        QTC::TC("qpdf", "QPDF check obj", objok ? 1 : 0);
1748
1749
0
        if (!objok) {
1750
0
            throw damagedPDF(offset, "expected n n obj");
1751
0
        }
1752
0
        int objid = QUtil::string_to_int(tobjid.getValue().c_str());
1753
0
        int generation = QUtil::string_to_int(tgen.getValue().c_str());
1754
0
        og = QPDFObjGen(objid, generation);
1755
0
        if (objid == 0) {
1756
0
            QTC::TC("qpdf", "QPDF object id 0");
1757
0
            throw damagedPDF(offset, "object with ID 0");
1758
0
        }
1759
0
        if (check_og && (exp_og != og)) {
1760
0
            QTC::TC("qpdf", "QPDF err wrong objid/generation");
1761
0
            QPDFExc e = damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj");
1762
0
            if (try_recovery) {
1763
                // Will be retried below
1764
0
                throw e;
1765
0
            } else {
1766
                // We can try reading the object anyway even if the ID doesn't match.
1767
0
                warn(e);
1768
0
            }
1769
0
        }
1770
0
    } catch (QPDFExc& e) {
1771
0
        if (try_recovery) {
1772
            // Try again after reconstructing xref table
1773
0
            reconstruct_xref(e);
1774
0
            if (m->xref_table.count(exp_og) && (m->xref_table[exp_og].getType() == 1)) {
1775
0
                qpdf_offset_t new_offset = m->xref_table[exp_og].getOffset();
1776
0
                QPDFObjectHandle result =
1777
0
                    readObjectAtOffset(false, new_offset, description, exp_og, og, false);
1778
0
                QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
1779
0
                return result;
1780
0
            } else {
1781
0
                QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1782
0
                warn(damagedPDF(
1783
0
                    "",
1784
0
                    0,
1785
0
                    ("object " + exp_og.unparse(' ') +
1786
0
                     " not found in file after regenerating cross reference "
1787
0
                     "table")));
1788
0
                return QPDFObjectHandle::newNull();
1789
0
            }
1790
0
        } else {
1791
0
            throw;
1792
0
        }
1793
0
    }
1794
1795
0
    QPDFObjectHandle oh = readObject(description, og);
1796
1797
0
    if (isUnresolved(og)) {
1798
        // Store the object in the cache here so it gets cached whether we first know the offset or
1799
        // whether we first know the object ID and generation (in which we case we would get here
1800
        // through resolve).
1801
1802
        // Determine the end offset of this object before and after white space.  We use these
1803
        // numbers to validate linearization hint tables.  Offsets and lengths of objects may imply
1804
        // the end of an object to be anywhere between these values.
1805
0
        qpdf_offset_t end_before_space = m->file->tell();
1806
1807
        // skip over spaces
1808
0
        while (true) {
1809
0
            char ch;
1810
0
            if (m->file->read(&ch, 1)) {
1811
0
                if (!isspace(static_cast<unsigned char>(ch))) {
1812
0
                    m->file->seek(-1, SEEK_CUR);
1813
0
                    break;
1814
0
                }
1815
0
            } else {
1816
0
                throw damagedPDF(m->file->tell(), "EOF after endobj");
1817
0
            }
1818
0
        }
1819
0
        qpdf_offset_t end_after_space = m->file->tell();
1820
0
        if (skip_cache_if_in_xref && m->xref_table.count(og)) {
1821
            // Ordinarily, an object gets read here when resolved through xref table or stream. In
1822
            // the special case of the xref stream and linearization hint tables, the offset comes
1823
            // from another source. For the specific case of xref streams, the xref stream is read
1824
            // and loaded into the object cache very early in parsing. Ordinarily, when a file is
1825
            // updated by appending, items inserted into the xref table in later updates take
1826
            // precedence over earlier items. In the special case of reusing the object number
1827
            // previously used as the xref stream, we have the following order of events:
1828
            //
1829
            // * reused object gets loaded into the xref table
1830
            // * old object is read here while reading xref streams
1831
            // * original xref entry is ignored (since already in xref table)
1832
            //
1833
            // It is the second step that causes a problem. Even though the xref table is correct in
1834
            // this case, the old object is already in the cache and so effectively prevails over
1835
            // the reused object. To work around this issue, we have a special case for the xref
1836
            // stream (via the skip_cache_if_in_xref): if the object is already in the xref stream,
1837
            // don't cache what we read here.
1838
            //
1839
            // It is likely that the same bug may exist for linearization hint tables, but the
1840
            // existing code uses end_before_space and end_after_space from the cache, so fixing
1841
            // that would require more significant rework. The chances of a linearization hint
1842
            // stream being reused seems smaller because the xref stream is probably the highest
1843
            // object in the file and the linearization hint stream would be some random place in
1844
            // the middle, so I'm leaving that bug unfixed for now. If the bug were to be fixed, we
1845
            // could use !check_og in place of skip_cache_if_in_xref.
1846
0
            QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
1847
0
        } else {
1848
0
            updateCache(og, oh.getObj(), end_before_space, end_after_space);
1849
0
        }
1850
0
    }
1851
1852
0
    return oh;
1853
0
}
1854
1855
QPDFObject*
1856
QPDF::resolve(QPDFObjGen og)
1857
0
{
1858
0
    if (!isUnresolved(og)) {
1859
0
        return m->obj_cache[og].object.get();
1860
0
    }
1861
1862
0
    if (m->resolving.count(og)) {
1863
        // This can happen if an object references itself directly or indirectly in some key that
1864
        // has to be resolved during object parsing, such as stream length.
1865
0
        QTC::TC("qpdf", "QPDF recursion loop in resolve");
1866
0
        warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
1867
0
        updateCache(og, QPDF_Null::create(), -1, -1);
1868
0
        return m->obj_cache[og].object.get();
1869
0
    }
1870
0
    ResolveRecorder rr(this, og);
1871
1872
0
    if (m->xref_table.count(og) != 0) {
1873
0
        QPDFXRefEntry const& entry = m->xref_table[og];
1874
0
        try {
1875
0
            switch (entry.getType()) {
1876
0
            case 1:
1877
0
                {
1878
0
                    qpdf_offset_t offset = entry.getOffset();
1879
                    // Object stored in cache by readObjectAtOffset
1880
0
                    QPDFObjGen a_og;
1881
0
                    QPDFObjectHandle oh = readObjectAtOffset(true, offset, "", og, a_og, false);
1882
0
                }
1883
0
                break;
1884
1885
0
            case 2:
1886
0
                resolveObjectsInStream(entry.getObjStreamNumber());
1887
0
                break;
1888
1889
0
            default:
1890
0
                throw damagedPDF(
1891
0
                    "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
1892
0
            }
1893
0
        } catch (QPDFExc& e) {
1894
0
            warn(e);
1895
0
        } catch (std::exception& e) {
1896
0
            warn(damagedPDF(
1897
0
                "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
1898
0
        }
1899
0
    }
1900
1901
0
    if (isUnresolved(og)) {
1902
        // PDF spec says unknown objects resolve to the null object.
1903
0
        QTC::TC("qpdf", "QPDF resolve failure to null");
1904
0
        updateCache(og, QPDF_Null::create(), -1, -1);
1905
0
    }
1906
1907
0
    auto result(m->obj_cache[og].object);
1908
0
    result->setDefaultDescription(this, og);
1909
0
    return result.get();
1910
0
}
1911
1912
void
1913
QPDF::resolveObjectsInStream(int obj_stream_number)
1914
0
{
1915
0
    if (m->resolved_object_streams.count(obj_stream_number)) {
1916
0
        return;
1917
0
    }
1918
0
    m->resolved_object_streams.insert(obj_stream_number);
1919
    // Force resolution of object stream
1920
0
    QPDFObjectHandle obj_stream = getObjectByID(obj_stream_number, 0);
1921
0
    if (!obj_stream.isStream()) {
1922
0
        throw damagedPDF(
1923
0
            "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
1924
0
    }
1925
1926
    // For linearization data in the object, use the data from the object stream for the objects in
1927
    // the stream.
1928
0
    QPDFObjGen stream_og(obj_stream_number, 0);
1929
0
    qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space;
1930
0
    qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space;
1931
1932
0
    QPDFObjectHandle dict = obj_stream.getDict();
1933
0
    if (!dict.isDictionaryOfType("/ObjStm")) {
1934
0
        QTC::TC("qpdf", "QPDF ERR object stream with wrong type");
1935
0
        warn(damagedPDF(
1936
0
            "supposed object stream " + std::to_string(obj_stream_number) + " has wrong type"));
1937
0
    }
1938
1939
0
    if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) {
1940
0
        throw damagedPDF(
1941
0
            ("object stream " + std::to_string(obj_stream_number) + " has incorrect keys"));
1942
0
    }
1943
1944
0
    int n = dict.getKey("/N").getIntValueAsInt();
1945
0
    int first = dict.getKey("/First").getIntValueAsInt();
1946
1947
0
    std::map<int, int> offsets;
1948
1949
0
    std::shared_ptr<Buffer> bp = obj_stream.getStreamData(qpdf_dl_specialized);
1950
0
    auto input = std::shared_ptr<InputSource>(
1951
        // line-break
1952
0
        new BufferInputSource(
1953
0
            (m->file->getName() + " object stream " + std::to_string(obj_stream_number)),
1954
0
            bp.get()));
1955
1956
0
    for (int i = 0; i < n; ++i) {
1957
0
        QPDFTokenizer::Token tnum = readToken(input);
1958
0
        QPDFTokenizer::Token toffset = readToken(input);
1959
0
        if (!(tnum.isInteger() && toffset.isInteger())) {
1960
0
            throw damagedPDF(
1961
0
                input,
1962
0
                m->last_object_description,
1963
0
                input->getLastOffset(),
1964
0
                "expected integer in object stream header");
1965
0
        }
1966
1967
0
        int num = QUtil::string_to_int(tnum.getValue().c_str());
1968
0
        long long offset = QUtil::string_to_int(toffset.getValue().c_str());
1969
0
        if (num > m->xref_table_max_id) {
1970
0
            continue;
1971
0
        }
1972
0
        if (num == obj_stream_number) {
1973
0
            QTC::TC("qpdf", "QPDF ignore self-referential object stream");
1974
0
            warn(damagedPDF(
1975
0
                input,
1976
0
                m->last_object_description,
1977
0
                input->getLastOffset(),
1978
0
                "object stream claims to contain itself"));
1979
0
            continue;
1980
0
        }
1981
0
        offsets[num] = toI(offset + first);
1982
0
    }
1983
1984
    // To avoid having to read the object stream multiple times, store all objects that would be
1985
    // found here in the cache.  Remember that some objects stored here might have been overridden
1986
    // by new objects appended to the file, so it is necessary to recheck the xref table and only
1987
    // cache what would actually be resolved here.
1988
0
    m->last_object_description.clear();
1989
0
    m->last_object_description += "object ";
1990
0
    for (auto const& iter: offsets) {
1991
0
        QPDFObjGen og(iter.first, 0);
1992
0
        auto entry = m->xref_table.find(og);
1993
0
        if (entry != m->xref_table.end() && entry->second.getType() == 2 &&
1994
0
            entry->second.getObjStreamNumber() == obj_stream_number) {
1995
0
            int offset = iter.second;
1996
0
            input->seek(offset, SEEK_SET);
1997
0
            QPDFObjectHandle oh = readObjectInStream(input, iter.first);
1998
0
            updateCache(og, oh.getObj(), end_before_space, end_after_space);
1999
0
        } else {
2000
0
            QTC::TC("qpdf", "QPDF not caching overridden objstm object");
2001
0
        }
2002
0
    }
2003
0
}
2004
2005
QPDFObjectHandle
2006
QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& obj)
2007
0
{
2008
0
    obj->setDefaultDescription(this, og);
2009
0
    return {obj};
2010
0
}
2011
2012
void
2013
QPDF::updateCache(
2014
    QPDFObjGen const& og,
2015
    std::shared_ptr<QPDFObject> const& object,
2016
    qpdf_offset_t end_before_space,
2017
    qpdf_offset_t end_after_space)
2018
10.7k
{
2019
10.7k
    object->setObjGen(this, og);
2020
10.7k
    if (isCached(og)) {
2021
10.7k
        auto& cache = m->obj_cache[og];
2022
10.7k
        cache.object->assign(object);
2023
10.7k
        cache.end_before_space = end_before_space;
2024
10.7k
        cache.end_after_space = end_after_space;
2025
10.7k
    } else {
2026
0
        m->obj_cache[og] = ObjCache(object, end_before_space, end_after_space);
2027
0
    }
2028
10.7k
}
2029
2030
bool
2031
QPDF::isCached(QPDFObjGen const& og)
2032
10.7k
{
2033
10.7k
    return m->obj_cache.count(og) != 0;
2034
10.7k
}
2035
2036
bool
2037
QPDF::isUnresolved(QPDFObjGen const& og)
2038
0
{
2039
0
    return !isCached(og) || m->obj_cache[og].object->isUnresolved();
2040
0
}
2041
2042
QPDFObjGen
2043
QPDF::nextObjGen()
2044
0
{
2045
0
    int max_objid = toI(getObjectCount());
2046
0
    if (max_objid == std::numeric_limits<int>::max()) {
2047
0
        throw std::range_error("max object id is too high to create new objects");
2048
0
    }
2049
0
    return QPDFObjGen(max_objid + 1, 0);
2050
0
}
2051
2052
QPDFObjectHandle
2053
QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj)
2054
0
{
2055
0
    QPDFObjGen next{nextObjGen()};
2056
0
    m->obj_cache[next] = ObjCache(obj, -1, -1);
2057
0
    return newIndirect(next, m->obj_cache[next].object);
2058
0
}
2059
2060
QPDFObjectHandle
2061
QPDF::makeIndirectObject(QPDFObjectHandle oh)
2062
0
{
2063
0
    if (!oh.isInitialized()) {
2064
0
        throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect");
2065
0
    }
2066
0
    return makeIndirectFromQPDFObject(oh.getObj());
2067
0
}
2068
2069
QPDFObjectHandle
2070
QPDF::newReserved()
2071
0
{
2072
0
    return makeIndirectFromQPDFObject(QPDF_Reserved::create());
2073
0
}
2074
2075
QPDFObjectHandle
2076
QPDF::newIndirectNull()
2077
0
{
2078
0
    return makeIndirectFromQPDFObject(QPDF_Null::create());
2079
0
}
2080
2081
QPDFObjectHandle
2082
QPDF::newStream()
2083
0
{
2084
0
    return makeIndirectFromQPDFObject(
2085
0
        QPDF_Stream::create(this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));
2086
0
}
2087
2088
QPDFObjectHandle
2089
QPDF::newStream(std::shared_ptr<Buffer> data)
2090
0
{
2091
0
    auto result = newStream();
2092
0
    result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
2093
0
    return result;
2094
0
}
2095
2096
QPDFObjectHandle
2097
QPDF::newStream(std::string const& data)
2098
0
{
2099
0
    auto result = newStream();
2100
0
    result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
2101
0
    return result;
2102
0
}
2103
2104
QPDFObjectHandle
2105
QPDF::reserveStream(QPDFObjGen const& og)
2106
3.73k
{
2107
3.73k
    return {QPDF_Stream::create(this, og, QPDFObjectHandle::newDictionary(), 0, 0)};
2108
3.73k
}
2109
2110
std::shared_ptr<QPDFObject>
2111
QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2112
0
{
2113
    // This method is called by the parser and therefore must not resolve any objects.
2114
0
    auto og = QPDFObjGen(id, gen);
2115
0
    if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
2116
0
        return iter->second.object;
2117
0
    }
2118
0
    if (m->xref_table.count(og) || !m->parsed) {
2119
0
        return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;
2120
0
    }
2121
0
    if (parse_pdf) {
2122
0
        return QPDF_Null::create();
2123
0
    }
2124
0
    return m->obj_cache.insert({og, QPDF_Null::create(this, og)}).first->second.object;
2125
0
}
2126
2127
std::shared_ptr<QPDFObject>
2128
QPDF::getObjectForJSON(int id, int gen)
2129
306k
{
2130
306k
    auto og = QPDFObjGen(id, gen);
2131
306k
    auto [it, inserted] = m->obj_cache.try_emplace(og);
2132
306k
    auto& obj = it->second.object;
2133
306k
    if (inserted) {
2134
16.1k
        obj = (m->parsed && !m->xref_table.count(og)) ? QPDF_Null::create(this, og)
2135
16.1k
                                                      : QPDF_Unresolved::create(this, og);
2136
16.1k
    }
2137
306k
    return obj;
2138
306k
}
2139
2140
QPDFObjectHandle
2141
QPDF::getObject(QPDFObjGen const& og)
2142
10.7k
{
2143
10.7k
    if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
2144
10.7k
        return {it->second.object};
2145
10.7k
    } else if (m->parsed && !m->xref_table.count(og)) {
2146
0
        return QPDF_Null::create();
2147
0
    } else {
2148
0
        auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1);
2149
0
        return {result.first->second.object};
2150
0
    }
2151
10.7k
}
2152
2153
QPDFObjectHandle
2154
QPDF::getObject(int objid, int generation)
2155
0
{
2156
0
    return getObject(QPDFObjGen(objid, generation));
2157
0
}
2158
2159
QPDFObjectHandle
2160
QPDF::getObjectByObjGen(QPDFObjGen const& og)
2161
0
{
2162
0
    return getObject(og);
2163
0
}
2164
2165
QPDFObjectHandle
2166
QPDF::getObjectByID(int objid, int generation)
2167
0
{
2168
0
    return getObject(QPDFObjGen(objid, generation));
2169
0
}
2170
2171
void
2172
QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
2173
0
{
2174
0
    replaceObject(QPDFObjGen(objid, generation), oh);
2175
0
}
2176
2177
void
2178
QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh)
2179
10.7k
{
2180
10.7k
    if (oh.isIndirect() || !oh.isInitialized()) {
2181
0
        QTC::TC("qpdf", "QPDF replaceObject called with indirect object");
2182
0
        throw std::logic_error("QPDF::replaceObject called with indirect object handle");
2183
0
    }
2184
10.7k
    updateCache(og, oh.getObj(), -1, -1);
2185
10.7k
}
2186
2187
void
2188
QPDF::removeObject(QPDFObjGen og)
2189
0
{
2190
0
    m->xref_table.erase(og);
2191
0
    if (auto cached = m->obj_cache.find(og); cached != m->obj_cache.end()) {
2192
        // Take care of any object handles that may be floating around.
2193
0
        cached->second.object->assign(QPDF_Null::create());
2194
0
        cached->second.object->setObjGen(nullptr, QPDFObjGen());
2195
0
        m->obj_cache.erase(cached);
2196
0
    }
2197
0
}
2198
2199
void
2200
QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)
2201
0
{
2202
0
    QTC::TC("qpdf", "QPDF replaceReserved");
2203
0
    auto tc = reserved.getTypeCode();
2204
0
    if (!(tc == ::ot_reserved || tc == ::ot_null)) {
2205
0
        throw std::logic_error("replaceReserved called with non-reserved object");
2206
0
    }
2207
0
    replaceObject(reserved.getObjGen(), replacement);
2208
0
}
2209
2210
QPDFObjectHandle
2211
QPDF::copyForeignObject(QPDFObjectHandle foreign)
2212
0
{
2213
    // Here's an explanation of what's going on here.
2214
    //
2215
    // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
2216
    // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
2217
    // foreign QPDF into the local QPDF, we have to replace all indirect object references with
2218
    // references to the corresponding object in the local file.
2219
    //
2220
    // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
2221
    // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
2222
    // mapping from the foreign ObjGen to the local QPDFObjectHandle.
2223
    //
2224
    // To copy, we do a deep traversal of the foreign object with loop detection to discover all
2225
    // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
2226
    // indirect object, we check to see if we have already created a local copy of it. If not, we
2227
    // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
2228
    // mapping from the foreign object ID to the new object. While we
2229
    // do this, we keep a list of objects to copy.
2230
    //
2231
    // Once we are done with the traversal, we copy all the objects that we need to copy. However,
2232
    // the copies will contain indirect object IDs that refer to objects in the foreign file. We
2233
    // need to replace them with references to objects in the local file. This is what
2234
    // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
2235
    // all the indirect references replaced with new ones in the local context, we can replace the
2236
    // local reserved object with the copy. This mechanism allows us to copy objects with circular
2237
    // references in any order.
2238
2239
    // For streams, rather than copying the objects, we set up the stream data to pull from the
2240
    // original stream by using a stream data provider. This is done in a manner that doesn't
2241
    // require the original QPDF object but may require the original source of the stream data with
2242
    // special handling for immediate_copy_from. This logic is also in
2243
    // replaceForeignIndirectObjects.
2244
2245
    // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
2246
    // use case to copy pages this way if the intention is to not update the pages tree.
2247
0
    if (!foreign.isIndirect()) {
2248
0
        QTC::TC("qpdf", "QPDF copyForeign direct");
2249
0
        throw std::logic_error("QPDF::copyForeign called with direct object handle");
2250
0
    }
2251
0
    QPDF& other = foreign.getQPDF();
2252
0
    if (&other == this) {
2253
0
        QTC::TC("qpdf", "QPDF copyForeign not foreign");
2254
0
        throw std::logic_error("QPDF::copyForeign called with object from this QPDF");
2255
0
    }
2256
2257
0
    ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];
2258
0
    if (!obj_copier.visiting.empty()) {
2259
0
        throw std::logic_error("obj_copier.visiting is not empty"
2260
0
                               " at the beginning of copyForeignObject");
2261
0
    }
2262
2263
    // Make sure we have an object in this file for every referenced object in the old file.
2264
    // obj_copier.object_map maps foreign QPDFObjGen to local objects.  For everything new that we
2265
    // have to copy, the local object will be a reservation, unless it is a stream, in which case
2266
    // the local object will already be a stream.
2267
0
    reserveObjects(foreign, obj_copier, true);
2268
2269
0
    if (!obj_copier.visiting.empty()) {
2270
0
        throw std::logic_error("obj_copier.visiting is not empty after reserving objects");
2271
0
    }
2272
2273
    // Copy any new objects and replace the reservations.
2274
0
    for (auto& to_copy: obj_copier.to_copy) {
2275
0
        QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);
2276
0
        if (!to_copy.isStream()) {
2277
0
            QPDFObjGen og(to_copy.getObjGen());
2278
0
            replaceReserved(obj_copier.object_map[og], copy);
2279
0
        }
2280
0
    }
2281
0
    obj_copier.to_copy.clear();
2282
2283
0
    auto og = foreign.getObjGen();
2284
0
    if (!obj_copier.object_map.count(og)) {
2285
0
        warn(damagedPDF("unexpected reference to /Pages object while copying foreign object; "
2286
0
                        "replacing with null"));
2287
0
        return QPDFObjectHandle::newNull();
2288
0
    }
2289
0
    return obj_copier.object_map[foreign.getObjGen()];
2290
0
}
2291
2292
void
2293
QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
2294
0
{
2295
0
    auto foreign_tc = foreign.getTypeCode();
2296
0
    if (foreign_tc == ::ot_reserved) {
2297
0
        throw std::logic_error("QPDF: attempting to copy a foreign reserved object");
2298
0
    }
2299
2300
0
    if (foreign.isPagesObject()) {
2301
0
        QTC::TC("qpdf", "QPDF not copying pages object");
2302
0
        return;
2303
0
    }
2304
2305
0
    if (foreign.isIndirect()) {
2306
0
        QPDFObjGen foreign_og(foreign.getObjGen());
2307
0
        if (!obj_copier.visiting.add(foreign_og)) {
2308
0
            QTC::TC("qpdf", "QPDF loop reserving objects");
2309
0
            return;
2310
0
        }
2311
0
        if (obj_copier.object_map.count(foreign_og) > 0) {
2312
0
            QTC::TC("qpdf", "QPDF already reserved object");
2313
0
            if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {
2314
0
                obj_copier.visiting.erase(foreign);
2315
0
                return;
2316
0
            }
2317
0
        } else {
2318
0
            QTC::TC("qpdf", "QPDF copy indirect");
2319
0
            obj_copier.object_map[foreign_og] =
2320
0
                foreign.isStream() ? newStream() : newIndirectNull();
2321
0
            if ((!top) && foreign.isPageObject()) {
2322
0
                QTC::TC("qpdf", "QPDF not crossing page boundary");
2323
0
                obj_copier.visiting.erase(foreign_og);
2324
0
                return;
2325
0
            }
2326
0
        }
2327
0
        obj_copier.to_copy.push_back(foreign);
2328
0
    }
2329
2330
0
    if (foreign_tc == ::ot_array) {
2331
0
        QTC::TC("qpdf", "QPDF reserve array");
2332
0
        int n = foreign.getArrayNItems();
2333
0
        for (int i = 0; i < n; ++i) {
2334
0
            reserveObjects(foreign.getArrayItem(i), obj_copier, false);
2335
0
        }
2336
0
    } else if (foreign_tc == ::ot_dictionary) {
2337
0
        QTC::TC("qpdf", "QPDF reserve dictionary");
2338
0
        for (auto const& key: foreign.getKeys()) {
2339
0
            reserveObjects(foreign.getKey(key), obj_copier, false);
2340
0
        }
2341
0
    } else if (foreign_tc == ::ot_stream) {
2342
0
        QTC::TC("qpdf", "QPDF reserve stream");
2343
0
        reserveObjects(foreign.getDict(), obj_copier, false);
2344
0
    }
2345
2346
0
    obj_copier.visiting.erase(foreign);
2347
0
}
2348
2349
QPDFObjectHandle
2350
QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
2351
0
{
2352
0
    auto foreign_tc = foreign.getTypeCode();
2353
0
    QPDFObjectHandle result;
2354
0
    if ((!top) && foreign.isIndirect()) {
2355
0
        QTC::TC("qpdf", "QPDF replace indirect");
2356
0
        auto mapping = obj_copier.object_map.find(foreign.getObjGen());
2357
0
        if (mapping == obj_copier.object_map.end()) {
2358
            // This case would occur if this is a reference to a Pages object that we didn't
2359
            // traverse into.
2360
0
            QTC::TC("qpdf", "QPDF replace foreign indirect with null");
2361
0
            result = QPDFObjectHandle::newNull();
2362
0
        } else {
2363
0
            result = mapping->second;
2364
0
        }
2365
0
    } else if (foreign_tc == ::ot_array) {
2366
0
        QTC::TC("qpdf", "QPDF replace array");
2367
0
        result = QPDFObjectHandle::newArray();
2368
0
        int n = foreign.getArrayNItems();
2369
0
        for (int i = 0; i < n; ++i) {
2370
0
            result.appendItem(
2371
                // line-break
2372
0
                replaceForeignIndirectObjects(foreign.getArrayItem(i), obj_copier, false));
2373
0
        }
2374
0
    } else if (foreign_tc == ::ot_dictionary) {
2375
0
        QTC::TC("qpdf", "QPDF replace dictionary");
2376
0
        result = QPDFObjectHandle::newDictionary();
2377
0
        std::set<std::string> keys = foreign.getKeys();
2378
0
        for (auto const& iter: keys) {
2379
0
            result.replaceKey(
2380
0
                iter, replaceForeignIndirectObjects(foreign.getKey(iter), obj_copier, false));
2381
0
        }
2382
0
    } else if (foreign_tc == ::ot_stream) {
2383
0
        QTC::TC("qpdf", "QPDF replace stream");
2384
0
        result = obj_copier.object_map[foreign.getObjGen()];
2385
0
        result.assertStream();
2386
0
        QPDFObjectHandle dict = result.getDict();
2387
0
        QPDFObjectHandle old_dict = foreign.getDict();
2388
0
        std::set<std::string> keys = old_dict.getKeys();
2389
0
        for (auto const& iter: keys) {
2390
0
            dict.replaceKey(
2391
0
                iter, replaceForeignIndirectObjects(old_dict.getKey(iter), obj_copier, false));
2392
0
        }
2393
0
        copyStreamData(result, foreign);
2394
0
    } else {
2395
0
        foreign.assertScalar();
2396
0
        result = foreign;
2397
0
        result.makeDirect();
2398
0
    }
2399
2400
0
    if (top && (!result.isStream()) && result.isIndirect()) {
2401
0
        throw std::logic_error("replacement for foreign object is indirect");
2402
0
    }
2403
2404
0
    return result;
2405
0
}
2406
2407
void
2408
QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
2409
0
{
2410
    // This method was originally written for copying foreign streams, but it is used by
2411
    // QPDFObjectHandle to copy streams from the same QPDF object as well.
2412
2413
0
    QPDFObjectHandle dict = result.getDict();
2414
0
    QPDFObjectHandle old_dict = foreign.getDict();
2415
0
    if (m->copied_stream_data_provider == nullptr) {
2416
0
        m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);
2417
0
        m->copied_streams =
2418
0
            std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
2419
0
    }
2420
0
    QPDFObjGen local_og(result.getObjGen());
2421
    // Copy information from the foreign stream so we can pipe its data later without keeping the
2422
    // original QPDF object around.
2423
2424
0
    QPDF& foreign_stream_qpdf =
2425
0
        foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
2426
2427
0
    auto stream = foreign.getObjectPtr()->as<QPDF_Stream>();
2428
0
    if (stream == nullptr) {
2429
0
        throw std::logic_error("unable to retrieve underlying"
2430
0
                               " stream object from foreign stream");
2431
0
    }
2432
0
    std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer();
2433
0
    if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
2434
        // Pull the stream data into a buffer before attempting the copy operation. Do it on the
2435
        // source stream so that if the source stream is copied multiple times, we don't have to
2436
        // keep duplicating the memory.
2437
0
        QTC::TC("qpdf", "QPDF immediate copy stream data");
2438
0
        foreign.replaceStreamData(
2439
0
            foreign.getRawStreamData(),
2440
0
            old_dict.getKey("/Filter"),
2441
0
            old_dict.getKey("/DecodeParms"));
2442
0
        stream_buffer = stream->getStreamDataBuffer();
2443
0
    }
2444
0
    std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =
2445
0
        stream->getStreamDataProvider();
2446
0
    if (stream_buffer.get()) {
2447
0
        QTC::TC("qpdf", "QPDF copy foreign stream with buffer");
2448
0
        result.replaceStreamData(
2449
0
            stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
2450
0
    } else if (stream_provider.get()) {
2451
        // In this case, the remote stream's QPDF must stay in scope.
2452
0
        QTC::TC("qpdf", "QPDF copy foreign stream with provider");
2453
0
        m->copied_stream_data_provider->registerForeignStream(local_og, foreign);
2454
0
        result.replaceStreamData(
2455
0
            m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
2456
0
    } else {
2457
0
        auto foreign_stream_data = std::make_shared<ForeignStreamData>(
2458
0
            foreign_stream_qpdf.m->encp,
2459
0
            foreign_stream_qpdf.m->file,
2460
0
            foreign.getObjGen(),
2461
0
            stream->getParsedOffset(),
2462
0
            stream->getLength(),
2463
0
            dict);
2464
0
        m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
2465
0
        result.replaceStreamData(
2466
0
            m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
2467
0
    }
2468
0
}
2469
2470
void
2471
QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
2472
0
{
2473
0
    swapObjects(QPDFObjGen(objid1, generation1), QPDFObjGen(objid2, generation2));
2474
0
}
2475
2476
void
2477
QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
2478
0
{
2479
    // Force objects to be read from the input source if needed, then swap them in the cache.
2480
0
    resolve(og1);
2481
0
    resolve(og2);
2482
0
    m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);
2483
0
}
2484
2485
unsigned long long
2486
QPDF::getUniqueId() const
2487
0
{
2488
0
    return m->unique_id;
2489
0
}
2490
2491
std::string
2492
QPDF::getFilename() const
2493
105k
{
2494
105k
    return m->file->getName();
2495
105k
}
2496
2497
PDFVersion
2498
QPDF::getVersionAsPDFVersion()
2499
0
{
2500
0
    int major = 1;
2501
0
    int minor = 3;
2502
0
    int extension_level = getExtensionLevel();
2503
2504
0
    std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");
2505
0
    std::smatch match;
2506
0
    if (std::regex_search(m->pdf_version, match, v)) {
2507
0
        major = QUtil::string_to_int(match[1].str().c_str());
2508
0
        minor = QUtil::string_to_int(match[2].str().c_str());
2509
0
    }
2510
2511
0
    return {major, minor, extension_level};
2512
0
}
2513
2514
std::string
2515
QPDF::getPDFVersion() const
2516
0
{
2517
0
    return m->pdf_version;
2518
0
}
2519
2520
int
2521
QPDF::getExtensionLevel()
2522
0
{
2523
0
    int result = 0;
2524
0
    QPDFObjectHandle obj = getRoot();
2525
0
    if (obj.hasKey("/Extensions")) {
2526
0
        obj = obj.getKey("/Extensions");
2527
0
        if (obj.isDictionary() && obj.hasKey("/ADBE")) {
2528
0
            obj = obj.getKey("/ADBE");
2529
0
            if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {
2530
0
                obj = obj.getKey("/ExtensionLevel");
2531
0
                if (obj.isInteger()) {
2532
0
                    result = obj.getIntValueAsInt();
2533
0
                }
2534
0
            }
2535
0
        }
2536
0
    }
2537
0
    return result;
2538
0
}
2539
2540
QPDFObjectHandle
2541
QPDF::getTrailer()
2542
0
{
2543
0
    return m->trailer;
2544
0
}
2545
2546
QPDFObjectHandle
2547
QPDF::getRoot()
2548
0
{
2549
0
    QPDFObjectHandle root = m->trailer.getKey("/Root");
2550
0
    if (!root.isDictionary()) {
2551
0
        throw damagedPDF("", 0, "unable to find /Root dictionary");
2552
0
    } else if (
2553
        // Check_mode is an interim solution to request #810 pending a more comprehensive review of
2554
        // the approach to more extensive checks and warning levels.
2555
0
        m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
2556
0
        warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
2557
0
        root.replaceKey("/Type", "/Catalog"_qpdf);
2558
0
    }
2559
0
    return root;
2560
0
}
2561
2562
std::map<QPDFObjGen, QPDFXRefEntry>
2563
QPDF::getXRefTable()
2564
0
{
2565
0
    return getXRefTableInternal();
2566
0
}
2567
2568
std::map<QPDFObjGen, QPDFXRefEntry> const&
2569
QPDF::getXRefTableInternal()
2570
0
{
2571
0
    if (!m->parsed) {
2572
0
        throw std::logic_error("QPDF::getXRefTable called before parsing.");
2573
0
    }
2574
2575
0
    return m->xref_table;
2576
0
}
2577
2578
size_t
2579
QPDF::tableSize()
2580
0
{
2581
    // If obj_cache is dense, accommodate all object in tables,else accommodate only original
2582
    // objects.
2583
0
    auto max_xref = m->xref_table.size() ? m->xref_table.crbegin()->first.getObj() : 0;
2584
0
    auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0;
2585
0
    auto max_id = std::numeric_limits<int>::max() - 1;
2586
0
    if (max_obj >= max_id || max_xref >= max_id) {
2587
        // Temporary fix. Long-term solution is
2588
        // - QPDFObjGen to enforce objgens are valid and sensible
2589
        // - xref table and obj cache to protect against insertion of impossibly large obj ids
2590
0
        stopOnError("Impossibly large object id encountered.");
2591
0
    }
2592
0
    if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) {
2593
0
        return toS(++max_obj);
2594
0
    }
2595
0
    return toS(++max_xref);
2596
0
}
2597
2598
std::vector<QPDFObjGen>
2599
QPDF::getCompressibleObjVector()
2600
0
{
2601
0
    return getCompressibleObjGens<QPDFObjGen>();
2602
0
}
2603
2604
std::vector<bool>
2605
QPDF::getCompressibleObjSet()
2606
0
{
2607
0
    return getCompressibleObjGens<bool>();
2608
0
}
2609
2610
template <typename T>
2611
std::vector<T>
2612
QPDF::getCompressibleObjGens()
2613
0
{
2614
    // Return a list of objects that are allowed to be in object streams.  Walk through the objects
2615
    // by traversing the document from the root, including a traversal of the pages tree.  This
2616
    // makes that objects that are on the same page are more likely to be in the same object stream,
2617
    // which is slightly more efficient, particularly with linearized files.  This is better than
2618
    // iterating through the xref table since it avoids preserving orphaned items.
2619
2620
    // Exclude encryption dictionary, if any
2621
0
    QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
2622
0
    QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
2623
2624
0
    const size_t max_obj = getObjectCount();
2625
0
    std::vector<bool> visited(max_obj, false);
2626
0
    std::vector<QPDFObjectHandle> queue;
2627
0
    queue.reserve(512);
2628
0
    queue.push_back(m->trailer);
2629
0
    std::vector<T> result;
2630
0
    if constexpr (std::is_same_v<T, QPDFObjGen>) {
2631
0
        result.reserve(m->obj_cache.size());
2632
0
    } else if constexpr (std::is_same_v<T, bool>) {
2633
0
        result.resize(max_obj + 1U, false);
2634
0
    } else {
2635
0
        throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens");
2636
0
    }
2637
0
    while (!queue.empty()) {
2638
0
        auto obj = queue.back();
2639
0
        queue.pop_back();
2640
0
        if (obj.getObjectID() > 0) {
2641
0
            QPDFObjGen og = obj.getObjGen();
2642
0
            const size_t id = toS(og.getObj() - 1);
2643
0
            if (id >= max_obj) {
2644
0
                throw std::logic_error(
2645
0
                    "unexpected object id encountered in getCompressibleObjGens");
2646
0
            }
2647
0
            if (visited[id]) {
2648
0
                QTC::TC("qpdf", "QPDF loop detected traversing objects");
2649
0
                continue;
2650
0
            }
2651
2652
            // Check whether this is the current object. If not, remove it (which changes it into a
2653
            // direct null and therefore stops us from revisiting it) and move on to the next object
2654
            // in the queue.
2655
0
            auto upper = m->obj_cache.upper_bound(og);
2656
0
            if (upper != m->obj_cache.end() && upper->first.getObj() == og.getObj()) {
2657
0
                removeObject(og);
2658
0
                continue;
2659
0
            }
2660
2661
0
            visited[id] = true;
2662
2663
0
            if (og == encryption_dict_og) {
2664
0
                QTC::TC("qpdf", "QPDF exclude encryption dictionary");
2665
0
            } else if (!(obj.isStream() ||
2666
0
                         (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") &&
2667
0
                          obj.hasKey("/Contents")))) {
2668
0
                if constexpr (std::is_same_v<T, QPDFObjGen>) {
2669
0
                    result.push_back(og);
2670
0
                } else if constexpr (std::is_same_v<T, bool>) {
2671
0
                    result[id + 1U] = true;
2672
0
                }
2673
0
            }
2674
0
        }
2675
0
        if (obj.isStream()) {
2676
0
            QPDFObjectHandle dict = obj.getDict();
2677
0
            std::set<std::string> keys = dict.getKeys();
2678
0
            for (auto iter = keys.rbegin(); iter != keys.rend(); ++iter) {
2679
0
                std::string const& key = *iter;
2680
0
                QPDFObjectHandle value = dict.getKey(key);
2681
0
                if (key == "/Length") {
2682
                    // omit stream lengths
2683
0
                    if (value.isIndirect()) {
2684
0
                        QTC::TC("qpdf", "QPDF exclude indirect length");
2685
0
                    }
2686
0
                } else {
2687
0
                    queue.push_back(value);
2688
0
                }
2689
0
            }
2690
0
        } else if (obj.isDictionary()) {
2691
0
            std::set<std::string> keys = obj.getKeys();
2692
0
            for (auto iter = keys.rbegin(); iter != keys.rend(); ++iter) {
2693
0
                queue.push_back(obj.getKey(*iter));
2694
0
            }
2695
0
        } else if (obj.isArray()) {
2696
0
            int n = obj.getArrayNItems();
2697
0
            for (int i = 1; i <= n; ++i) {
2698
0
                queue.push_back(obj.getArrayItem(n - i));
2699
0
            }
2700
0
        }
2701
0
    }
2702
2703
0
    return result;
2704
0
}
Unexecuted instantiation: std::__1::vector<QPDFObjGen, std::__1::allocator<QPDFObjGen> > QPDF::getCompressibleObjGens<QPDFObjGen>()
Unexecuted instantiation: std::__1::vector<bool, std::__1::allocator<bool> > QPDF::getCompressibleObjGens<bool>()
2705
2706
bool
2707
QPDF::pipeStreamData(
2708
    std::shared_ptr<EncryptionParameters> encp,
2709
    std::shared_ptr<InputSource> file,
2710
    QPDF& qpdf_for_warning,
2711
    QPDFObjGen const& og,
2712
    qpdf_offset_t offset,
2713
    size_t length,
2714
    QPDFObjectHandle stream_dict,
2715
    Pipeline* pipeline,
2716
    bool suppress_warnings,
2717
    bool will_retry)
2718
0
{
2719
0
    std::unique_ptr<Pipeline> to_delete;
2720
0
    if (encp->encrypted) {
2721
0
        decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);
2722
0
    }
2723
2724
0
    bool attempted_finish = false;
2725
0
    try {
2726
0
        file->seek(offset, SEEK_SET);
2727
0
        auto buf = std::make_unique<char[]>(length);
2728
0
        if (auto read = file->read(buf.get(), length); read != length) {
2729
0
            throw damagedPDF(file, "", offset + toO(read), "unexpected EOF reading stream data");
2730
0
        }
2731
0
        pipeline->write(buf.get(), length);
2732
0
        attempted_finish = true;
2733
0
        pipeline->finish();
2734
0
        return true;
2735
0
    } catch (QPDFExc& e) {
2736
0
        if (!suppress_warnings) {
2737
0
            qpdf_for_warning.warn(e);
2738
0
        }
2739
0
    } catch (std::exception& e) {
2740
0
        if (!suppress_warnings) {
2741
0
            QTC::TC("qpdf", "QPDF decoding error warning");
2742
0
            qpdf_for_warning.warn(
2743
                // line-break
2744
0
                damagedPDF(
2745
0
                    file,
2746
0
                    "",
2747
0
                    file->getLastOffset(),
2748
0
                    ("error decoding stream data for object " + og.unparse(' ') + ": " +
2749
0
                     e.what())));
2750
0
            if (will_retry) {
2751
0
                qpdf_for_warning.warn(
2752
                    // line-break
2753
0
                    damagedPDF(
2754
0
                        file,
2755
0
                        "",
2756
0
                        file->getLastOffset(),
2757
0
                        "stream will be re-processed without filtering to avoid data loss"));
2758
0
            }
2759
0
        }
2760
0
    }
2761
0
    if (!attempted_finish) {
2762
0
        try {
2763
0
            pipeline->finish();
2764
0
        } catch (std::exception&) {
2765
            // ignore
2766
0
        }
2767
0
    }
2768
0
    return false;
2769
0
}
2770
2771
bool
2772
QPDF::pipeStreamData(
2773
    QPDFObjGen const& og,
2774
    qpdf_offset_t offset,
2775
    size_t length,
2776
    QPDFObjectHandle stream_dict,
2777
    Pipeline* pipeline,
2778
    bool suppress_warnings,
2779
    bool will_retry)
2780
0
{
2781
0
    return pipeStreamData(
2782
0
        m->encp,
2783
0
        m->file,
2784
0
        *this,
2785
0
        og,
2786
0
        offset,
2787
0
        length,
2788
0
        stream_dict,
2789
0
        pipeline,
2790
0
        suppress_warnings,
2791
0
        will_retry);
2792
0
}
2793
2794
bool
2795
QPDF::pipeForeignStreamData(
2796
    std::shared_ptr<ForeignStreamData> foreign,
2797
    Pipeline* pipeline,
2798
    bool suppress_warnings,
2799
    bool will_retry)
2800
0
{
2801
0
    if (foreign->encp->encrypted) {
2802
0
        QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
2803
0
    }
2804
0
    return pipeStreamData(
2805
0
        foreign->encp,
2806
0
        foreign->file,
2807
0
        *this,
2808
0
        foreign->foreign_og,
2809
0
        foreign->offset,
2810
0
        foreign->length,
2811
0
        foreign->local_dict,
2812
0
        pipeline,
2813
0
        suppress_warnings,
2814
0
        will_retry);
2815
0
}
2816
2817
// Throw a generic exception when we lack context for something more specific. New code should not
2818
// use this. This method exists to improve somewhat from calling assert in very old code.
2819
void
2820
QPDF::stopOnError(std::string const& message)
2821
13
{
2822
13
    throw damagedPDF("", message);
2823
13
}
2824
2825
// Return an exception of type qpdf_e_damaged_pdf.
2826
QPDFExc
2827
QPDF::damagedPDF(
2828
    std::shared_ptr<InputSource> const& input,
2829
    std::string const& object,
2830
    qpdf_offset_t offset,
2831
    std::string const& message)
2832
0
{
2833
0
    return {qpdf_e_damaged_pdf, input->getName(), object, offset, message};
2834
0
}
2835
2836
// Return an exception of type qpdf_e_damaged_pdf.  The object is taken from
2837
// m->last_object_description.
2838
QPDFExc
2839
QPDF::damagedPDF(
2840
    std::shared_ptr<InputSource> const& input, qpdf_offset_t offset, std::string const& message)
2841
0
{
2842
0
    return damagedPDF(input, m->last_object_description, offset, message);
2843
0
}
2844
2845
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file.
2846
QPDFExc
2847
QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
2848
13
{
2849
13
    return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message};
2850
13
}
2851
2852
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file and the
2853
// offset from .m->file->getLastOffset().
2854
QPDFExc
2855
QPDF::damagedPDF(std::string const& object, std::string const& message)
2856
13
{
2857
13
    return damagedPDF(object, m->file->getLastOffset(), message);
2858
13
}
2859
2860
// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
2861
// from .m->last_object_description.
2862
QPDFExc
2863
QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
2864
0
{
2865
0
    return damagedPDF(m->last_object_description, offset, message);
2866
0
}
2867
2868
// Return an exception of type qpdf_e_damaged_pdf.  The filename is taken from m->file, the object
2869
// from m->last_object_description and the offset from m->file->getLastOffset().
2870
QPDFExc
2871
QPDF::damagedPDF(std::string const& message)
2872
0
{
2873
0
    return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);
2874
0
}
2875
2876
bool
2877
QPDF::everCalledGetAllPages() const
2878
0
{
2879
0
    return m->ever_called_get_all_pages;
2880
0
}
2881
2882
bool
2883
QPDF::everPushedInheritedAttributesToPages() const
2884
0
{
2885
0
    return m->ever_pushed_inherited_attributes_to_pages;
2886
0
}
2887
2888
void
2889
QPDF::removeSecurityRestrictions()
2890
0
{
2891
0
    auto root = getRoot();
2892
0
    root.removeKey("/Perms");
2893
0
    auto acroform = root.getKey("/AcroForm");
2894
0
    if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {
2895
0
        acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));
2896
0
    }
2897
0
}