Coverage Report

Created: 2025-10-10 06:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDF_objects.cc
Line
Count
Source
1
#include <qpdf/qpdf-config.h> // include first for large file support
2
3
#include <qpdf/QPDF_private.hh>
4
5
#include <qpdf/InputSource_private.hh>
6
#include <qpdf/Pipeline.hh>
7
#include <qpdf/QPDFExc.hh>
8
#include <qpdf/QPDFLogger.hh>
9
#include <qpdf/QPDFObjectHandle_private.hh>
10
#include <qpdf/QPDFObject_private.hh>
11
#include <qpdf/QPDFParser.hh>
12
#include <qpdf/QTC.hh>
13
#include <qpdf/QUtil.hh>
14
#include <qpdf/Util.hh>
15
16
#include <array>
17
#include <atomic>
18
#include <cstring>
19
#include <limits>
20
#include <map>
21
#include <vector>
22
23
using namespace qpdf;
24
using namespace std::literals;
25
26
using Objects = QPDF::Doc::Objects;
27
28
namespace
29
{
30
    class InvalidInputSource: public InputSource
31
    {
32
      public:
33
        ~InvalidInputSource() override = default;
34
        qpdf_offset_t
35
        findAndSkipNextEOL() override
36
0
        {
37
0
            throwException();
38
0
            return 0;
39
0
        }
40
        std::string const&
41
        getName() const override
42
0
        {
43
0
            static std::string name("closed input source");
44
0
            return name;
45
0
        }
46
        qpdf_offset_t
47
        tell() override
48
0
        {
49
0
            throwException();
50
0
            return 0;
51
0
        }
52
        void
53
        seek(qpdf_offset_t offset, int whence) override
54
0
        {
55
0
            throwException();
56
0
        }
57
        void
58
        rewind() override
59
0
        {
60
0
            throwException();
61
0
        }
62
        size_t
63
        read(char* buffer, size_t length) override
64
0
        {
65
0
            throwException();
66
0
            return 0;
67
0
        }
68
        void
69
        unreadCh(char ch) override
70
0
        {
71
0
            throwException();
72
0
        }
73
74
      private:
75
        void
76
        throwException()
77
0
        {
78
0
            throw std::logic_error(
79
0
                "QPDF operation attempted on a QPDF object with no input "
80
0
                "source. QPDF operations are invalid before processFile (or "
81
0
                "another process method) or after closeInputSource");
82
0
        }
83
    };
84
} // namespace
85
86
class QPDF::ResolveRecorder final
87
{
88
  public:
89
    ResolveRecorder(QPDF& qpdf, QPDFObjGen const& og) :
90
307k
        qpdf(qpdf),
91
307k
        iter(qpdf.m->resolving.insert(og).first)
92
307k
    {
93
307k
    }
94
    ~ResolveRecorder()
95
307k
    {
96
307k
        qpdf.m->resolving.erase(iter);
97
307k
    }
98
99
  private:
100
    QPDF& qpdf;
101
    std::set<QPDFObjGen>::const_iterator iter;
102
};
103
104
bool
105
QPDF::findStartxref()
106
7.61k
{
107
7.61k
    if (m->objects.readToken(*m->file).isWord("startxref") &&
108
7.09k
        m->objects.readToken(*m->file).isInteger()) {
109
        // Position in front of offset token
110
5.78k
        m->file->seek(m->file->getLastOffset(), SEEK_SET);
111
5.78k
        return true;
112
5.78k
    }
113
1.83k
    return false;
114
7.61k
}
115
116
void
117
Objects::parse(char const* password)
118
24.5k
{
119
24.5k
    if (password) {
120
0
        m->encp->provided_password = password;
121
0
    }
122
123
    // Find the header anywhere in the first 1024 bytes of the file.
124
24.5k
    PatternFinder hf(qpdf, &QPDF::findHeader);
125
24.5k
    if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
126
22.1k
        qpdf.warn(qpdf.damagedPDF("", -1, "can't find PDF header"));
127
        // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
128
22.1k
        m->pdf_version = "1.2";
129
22.1k
    }
130
131
    // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file.  We add an extra
132
    // 30 characters to leave room for the startxref stuff.
133
24.5k
    m->file->seek(0, SEEK_END);
134
24.5k
    qpdf_offset_t end_offset = m->file->tell();
135
24.5k
    m->xref_table_max_offset = end_offset;
136
    // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
137
    // scenarios at least 3 bytes are required.
138
24.5k
    if (m->xref_table_max_id > m->xref_table_max_offset / 3) {
139
24.4k
        m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3);
140
24.4k
    }
141
24.5k
    qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
142
24.5k
    PatternFinder sf(qpdf, &QPDF::findStartxref);
143
24.5k
    qpdf_offset_t xref_offset = 0;
144
24.5k
    if (m->file->findLast("startxref", start_offset, 0, sf)) {
145
5.46k
        xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str());
146
5.46k
    }
147
148
24.5k
    try {
149
24.5k
        if (xref_offset == 0) {
150
19.1k
            throw qpdf.damagedPDF("", -1, "can't find startxref");
151
19.1k
        }
152
5.34k
        try {
153
5.34k
            read_xref(xref_offset);
154
5.34k
        } catch (QPDFExc&) {
155
4.33k
            throw;
156
4.33k
        } catch (std::exception& e) {
157
435
            throw qpdf.damagedPDF("", -1, std::string("error reading xref: ") + e.what());
158
435
        }
159
23.9k
    } catch (QPDFExc& e) {
160
23.9k
        if (m->attempt_recovery) {
161
23.9k
            reconstruct_xref(e, xref_offset > 0);
162
23.9k
        } else {
163
0
            throw;
164
0
        }
165
23.9k
    }
166
167
23.9k
    qpdf.initializeEncryption();
168
10.0k
    m->parsed = true;
169
10.0k
    if (!m->xref_table.empty() && !qpdf.getRoot().getKey("/Pages").isDictionary()) {
170
        // QPDFs created from JSON have an empty xref table and no root object yet.
171
11
        throw qpdf.damagedPDF("", -1, "unable to find page tree");
172
11
    }
173
10.0k
}
174
175
void
176
Objects::inParse(bool v)
177
321k
{
178
321k
    if (m->in_parse == v) {
179
        // This happens if QPDFParser::parse tries to resolve an indirect object while it is
180
        // parsing.
181
0
        throw std::logic_error(
182
0
            "QPDF: re-entrant parsing detected. This is a qpdf bug."
183
0
            " Please report at https://github.com/qpdf/qpdf/issues.");
184
0
    }
185
321k
    m->in_parse = v;
186
321k
}
187
188
void
189
Objects::setTrailer(QPDFObjectHandle obj)
190
6.99k
{
191
6.99k
    if (m->trailer) {
192
146
        return;
193
146
    }
194
6.84k
    m->trailer = obj;
195
6.84k
}
196
197
void
198
Objects::reconstruct_xref(QPDFExc& e, bool found_startxref)
199
28.3k
{
200
28.3k
    if (m->reconstructed_xref) {
201
        // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
202
        // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
203
4.25k
        throw e;
204
4.25k
    }
205
206
    // If recovery generates more than 1000 warnings, the file is so severely damaged that there
207
    // probably is no point trying to continue.
208
24.0k
    const auto max_warnings = m->warnings.size() + 1000U;
209
1.37M
    auto check_warnings = [this, max_warnings]() {
210
1.37M
        if (m->warnings.size() > max_warnings) {
211
0
            throw qpdf.damagedPDF(
212
0
                "", -1, "too many errors while reconstructing cross-reference table");
213
0
        }
214
1.37M
    };
215
216
24.0k
    m->reconstructed_xref = true;
217
    // We may find more objects, which may contain dangling references.
218
24.0k
    m->fixed_dangling_refs = false;
219
220
24.0k
    qpdf.warn(qpdf.damagedPDF("", -1, "file is damaged"));
221
24.0k
    qpdf.warn(e);
222
24.0k
    qpdf.warn(qpdf.damagedPDF("", -1, "Attempting to reconstruct cross-reference table"));
223
224
    // Delete all references to type 1 (uncompressed) objects
225
24.0k
    std::vector<QPDFObjGen> to_delete;
226
93.3k
    for (auto const& iter: m->xref_table) {
227
93.3k
        if (iter.second.getType() == 1) {
228
84.9k
            to_delete.emplace_back(iter.first);
229
84.9k
        }
230
93.3k
    }
231
84.9k
    for (auto const& iter: to_delete) {
232
84.9k
        m->xref_table.erase(iter);
233
84.9k
    }
234
235
24.0k
    std::vector<std::tuple<int, int, qpdf_offset_t>> found_objects;
236
24.0k
    std::vector<qpdf_offset_t> trailers;
237
24.0k
    std::vector<qpdf_offset_t> startxrefs;
238
239
24.0k
    m->file->seek(0, SEEK_END);
240
24.0k
    qpdf_offset_t eof = m->file->tell();
241
24.0k
    m->file->seek(0, SEEK_SET);
242
    // Don't allow very long tokens here during recovery. All the interesting tokens are covered.
243
24.0k
    static size_t const MAX_LEN = 10;
244
1.24M
    while (m->file->tell() < eof) {
245
1.21M
        QPDFTokenizer::Token t1 = m->objects.readToken(*m->file, MAX_LEN);
246
1.21M
        qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length());
247
1.21M
        if (t1.isInteger()) {
248
214k
            auto pos = m->file->tell();
249
214k
            auto t2 = m->objects.readToken(*m->file, MAX_LEN);
250
214k
            if (t2.isInteger() && m->objects.readToken(*m->file, MAX_LEN).isWord("obj")) {
251
114k
                int obj = QUtil::string_to_int(t1.getValue().c_str());
252
114k
                int gen = QUtil::string_to_int(t2.getValue().c_str());
253
114k
                if (obj <= m->xref_table_max_id) {
254
113k
                    found_objects.emplace_back(obj, gen, token_start);
255
113k
                } else {
256
1.05k
                    qpdf.warn(qpdf.damagedPDF(
257
1.05k
                        "", -1, "ignoring object with impossibly large id " + std::to_string(obj)));
258
1.05k
                }
259
114k
            }
260
214k
            m->file->seek(pos, SEEK_SET);
261
1.00M
        } else if (!m->trailer && t1.isWord("trailer")) {
262
84.9k
            trailers.emplace_back(m->file->tell());
263
918k
        } else if (!found_startxref && t1.isWord("startxref")) {
264
1.47k
            startxrefs.emplace_back(m->file->tell());
265
1.47k
        }
266
1.21M
        check_warnings();
267
1.21M
        m->file->findAndSkipNextEOL();
268
1.21M
    }
269
270
24.0k
    if (!found_startxref && !startxrefs.empty() && !found_objects.empty() &&
271
694
        startxrefs.back() > std::get<2>(found_objects.back())) {
272
389
        auto xref_backup{m->xref_table};
273
389
        try {
274
389
            m->file->seek(startxrefs.back(), SEEK_SET);
275
389
            if (auto offset =
276
389
                    QUtil::string_to_ll(m->objects.readToken(*m->file).getValue().data())) {
277
248
                m->objects.read_xref(offset);
278
279
248
                if (qpdf.getRoot().getKey("/Pages").isDictionary()) {
280
6
                    QTC::TC("qpdf", "QPDF startxref more than 1024 before end");
281
6
                    qpdf.warn(qpdf.damagedPDF(
282
6
                        "", -1, "startxref was more than 1024 bytes before end of file"));
283
6
                    qpdf.initializeEncryption();
284
6
                    m->parsed = true;
285
6
                    m->reconstructed_xref = false;
286
6
                    return;
287
6
                }
288
248
            }
289
389
        } catch (...) {
290
            // ok, bad luck. Do recovery.
291
242
        }
292
383
        m->xref_table = std::move(xref_backup);
293
383
    }
294
295
24.0k
    auto rend = found_objects.rend();
296
137k
    for (auto it = found_objects.rbegin(); it != rend; it++) {
297
113k
        auto [obj, gen, token_start] = *it;
298
113k
        insertXrefEntry(obj, 1, token_start, gen);
299
113k
        check_warnings();
300
113k
    }
301
24.0k
    m->deleted_objects.clear();
302
303
    // Search at most the last 100 trailer candidates. If none of them are valid, odds are this file
304
    // is deliberately broken.
305
24.0k
    int end_index = trailers.size() > 100 ? static_cast<int>(trailers.size()) - 100 : 0;
306
36.8k
    for (auto it = trailers.rbegin(); it != std::prev(trailers.rend(), end_index); it++) {
307
15.6k
        m->file->seek(*it, SEEK_SET);
308
15.6k
        auto t = readTrailer();
309
15.6k
        if (!t.isDictionary()) {
310
            // Oh well.  It was worth a try.
311
11.8k
        } else {
312
3.77k
            if (t.hasKey("/Root")) {
313
2.89k
                m->trailer = t;
314
2.89k
                break;
315
2.89k
            }
316
876
            qpdf.warn(qpdf.damagedPDF("trailer", *it, "recovered trailer has no /Root entry"));
317
876
        }
318
12.7k
        check_warnings();
319
12.7k
    }
320
321
24.0k
    if (!m->trailer) {
322
20.4k
        qpdf_offset_t max_offset{0};
323
20.4k
        size_t max_size{0};
324
        // If there are any xref streams, take the last one to appear.
325
69.6k
        for (auto const& iter: m->xref_table) {
326
69.6k
            auto entry = iter.second;
327
69.6k
            if (entry.getType() != 1) {
328
1.51k
                continue;
329
1.51k
            }
330
68.1k
            auto oh = qpdf.getObject(iter.first);
331
68.1k
            try {
332
68.1k
                if (!oh.isStreamOfType("/XRef")) {
333
59.6k
                    continue;
334
59.6k
                }
335
68.1k
            } catch (std::exception&) {
336
2.51k
                continue;
337
2.51k
            }
338
5.97k
            auto offset = entry.getOffset();
339
5.97k
            auto size = oh.getDict().getKey("/Size").getUIntValueAsUInt();
340
5.97k
            if (size > max_size || (size == max_size && offset > max_offset)) {
341
5.93k
                max_offset = offset;
342
5.93k
                setTrailer(oh.getDict());
343
5.93k
            }
344
5.97k
            check_warnings();
345
5.97k
        }
346
20.4k
        if (max_offset > 0) {
347
5.78k
            try {
348
5.78k
                read_xref(max_offset, true);
349
5.78k
            } catch (std::exception&) {
350
3.32k
                qpdf.warn(qpdf.damagedPDF(
351
3.32k
                    "", -1, "error decoding candidate xref stream while recovering damaged file"));
352
3.32k
            }
353
5.78k
            QTC::TC("qpdf", "QPDF recover xref stream");
354
5.75k
        }
355
20.4k
    }
356
357
24.0k
    if (!m->trailer || (!m->parsed && !m->trailer.getKey("/Root").isDictionary())) {
358
        // Try to find a Root dictionary. As a quick fix try the one with the highest object id.
359
20.7k
        QPDFObjectHandle root;
360
159k
        for (auto const& iter: m->obj_cache) {
361
159k
            try {
362
159k
                if (QPDFObjectHandle(iter.second.object).isDictionaryOfType("/Catalog")) {
363
7.87k
                    root = iter.second.object;
364
7.87k
                }
365
159k
            } catch (std::exception&) {
366
4.38k
                continue;
367
4.38k
            }
368
159k
        }
369
20.7k
        if (root) {
370
7.74k
            if (!m->trailer) {
371
6.56k
                qpdf.warn(qpdf.damagedPDF(
372
6.56k
                    "", -1, "unable to find trailer dictionary while recovering damaged file"));
373
6.56k
                m->trailer = QPDFObjectHandle::newDictionary();
374
6.56k
            }
375
7.74k
            m->trailer.replaceKey("/Root", root);
376
7.74k
        }
377
20.7k
    }
378
379
24.0k
    if (!m->trailer) {
380
        // We could check the last encountered object to see if it was an xref stream.  If so, we
381
        // could try to get the trailer from there.  This may make it possible to recover files with
382
        // bad startxref pointers even when they have object streams.
383
384
8.06k
        throw qpdf.damagedPDF(
385
8.06k
            "", -1, "unable to find trailer dictionary while recovering damaged file");
386
8.06k
    }
387
15.9k
    if (m->xref_table.empty()) {
388
        // We cannot check for an empty xref table in parse because empty tables are valid when
389
        // creating QPDF objects from JSON.
390
372
        throw qpdf.damagedPDF("", -1, "unable to find objects while recovering damaged file");
391
372
    }
392
15.6k
    check_warnings();
393
15.6k
    if (!m->parsed) {
394
15.3k
        m->parsed = true;
395
15.3k
        qpdf.getAllPages();
396
15.3k
        check_warnings();
397
15.3k
        if (m->all_pages.empty()) {
398
804
            m->parsed = false;
399
804
            throw qpdf.damagedPDF("", -1, "unable to find any pages while recovering damaged file");
400
804
        }
401
15.3k
    }
402
403
    // We could iterate through the objects looking for streams and try to find objects inside of
404
    // them, but it's probably not worth the trouble.  Acrobat can't recover files with any errors
405
    // in an xref stream, and this would be a real long shot anyway.  If we wanted to do anything
406
    // that involved looking at stream contents, we'd also have to call initializeEncryption() here.
407
    // It's safe to call it more than once.
408
15.6k
}
409
410
void
411
Objects::read_xref(qpdf_offset_t xref_offset, bool in_stream_recovery)
412
11.3k
{
413
11.3k
    std::map<int, int> free_table;
414
11.3k
    std::set<qpdf_offset_t> visited;
415
23.4k
    while (xref_offset) {
416
12.2k
        visited.insert(xref_offset);
417
12.2k
        char buf[7];
418
12.2k
        memset(buf, 0, sizeof(buf));
419
12.2k
        m->file->seek(xref_offset, SEEK_SET);
420
        // Some files miss the mark a little with startxref. We could do a better job of searching
421
        // in the neighborhood for something that looks like either an xref table or stream, but the
422
        // simple heuristic of skipping whitespace can help with the xref table case and is harmless
423
        // with the stream case.
424
12.2k
        bool done = false;
425
12.2k
        bool skipped_space = false;
426
27.1k
        while (!done) {
427
14.8k
            char ch;
428
14.8k
            if (1 == m->file->read(&ch, 1)) {
429
14.2k
                if (util::is_space(ch)) {
430
2.96k
                    skipped_space = true;
431
11.2k
                } else {
432
11.2k
                    m->file->unreadCh(ch);
433
11.2k
                    done = true;
434
11.2k
                }
435
14.2k
            } else {
436
617
                QTC::TC("qpdf", "QPDF eof skipping spaces before xref", skipped_space ? 0 : 1);
437
617
                done = true;
438
617
            }
439
14.8k
        }
440
441
12.2k
        m->file->read(buf, sizeof(buf) - 1);
442
        // The PDF spec says xref must be followed by a line terminator, but files exist in the wild
443
        // where it is terminated by arbitrary whitespace.
444
12.2k
        if ((strncmp(buf, "xref", 4) == 0) && util::is_space(buf[4])) {
445
1.59k
            if (skipped_space) {
446
130
                qpdf.warn(qpdf.damagedPDF("", -1, "extraneous whitespace seen before xref"));
447
130
            }
448
1.59k
            QTC::TC(
449
1.59k
                "qpdf",
450
1.59k
                "QPDF xref space",
451
1.59k
                ((buf[4] == '\n')       ? 0
452
1.59k
                     : (buf[4] == '\r') ? 1
453
1.19k
                     : (buf[4] == ' ')  ? 2
454
550
                                        : 9999));
455
1.59k
            int skip = 4;
456
            // buf is null-terminated, and util::is_space('\0') is false, so this won't overrun.
457
3.42k
            while (util::is_space(buf[skip])) {
458
1.82k
                ++skip;
459
1.82k
            }
460
1.59k
            xref_offset = read_xrefTable(xref_offset + skip);
461
10.6k
        } else {
462
10.6k
            xref_offset = read_xrefStream(xref_offset, in_stream_recovery);
463
10.6k
        }
464
12.2k
        if (visited.contains(xref_offset)) {
465
184
            throw qpdf.damagedPDF("", -1, "loop detected following xref tables");
466
184
        }
467
12.2k
    }
468
469
11.1k
    if (!m->trailer) {
470
0
        throw qpdf.damagedPDF("", -1, "unable to find trailer while reading xref");
471
0
    }
472
11.1k
    int size = m->trailer.getKey("/Size").getIntValueAsInt();
473
11.1k
    int max_obj = 0;
474
11.1k
    if (!m->xref_table.empty()) {
475
2.83k
        max_obj = m->xref_table.rbegin()->first.getObj();
476
2.83k
    }
477
11.1k
    if (!m->deleted_objects.empty()) {
478
1.08k
        max_obj = std::max(max_obj, *(m->deleted_objects.rbegin()));
479
1.08k
    }
480
11.1k
    if ((size < 1) || (size - 1 != max_obj)) {
481
2.28k
        qpdf.warn(qpdf.damagedPDF(
482
2.28k
            "",
483
2.28k
            -1,
484
2.28k
            ("reported number of objects (" + std::to_string(size) +
485
2.28k
             ") is not one plus the highest object number (" + std::to_string(max_obj) + ")")));
486
2.28k
    }
487
488
    // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
489
    // never depend on its being set.
490
11.1k
    m->deleted_objects.clear();
491
492
    // Make sure we keep only the highest generation for any object.
493
11.1k
    QPDFObjGen last_og{-1, 0};
494
313k
    for (auto const& item: m->xref_table) {
495
313k
        auto id = item.first.getObj();
496
313k
        if (id == last_og.getObj() && id > 0) {
497
20.9k
            qpdf.removeObject(last_og);
498
20.9k
        }
499
313k
        last_og = item.first;
500
313k
    }
501
11.1k
}
502
503
bool
504
Objects::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
505
6.97k
{
506
    // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
507
    // buffer.
508
6.97k
    char const* p = line.c_str();
509
6.97k
    char const* start = line.c_str();
510
511
    // Skip zero or more spaces
512
8.26k
    while (util::is_space(*p)) {
513
1.28k
        ++p;
514
1.28k
    }
515
    // Require digit
516
6.97k
    if (!util::is_digit(*p)) {
517
215
        return false;
518
215
    }
519
    // Gather digits
520
6.76k
    std::string obj_str;
521
28.6k
    while (util::is_digit(*p)) {
522
21.8k
        obj_str.append(1, *p++);
523
21.8k
    }
524
    // Require space
525
6.76k
    if (!util::is_space(*p)) {
526
123
        return false;
527
123
    }
528
    // Skip spaces
529
17.8k
    while (util::is_space(*p)) {
530
11.1k
        ++p;
531
11.1k
    }
532
    // Require digit
533
6.64k
    if (!util::is_digit(*p)) {
534
102
        return false;
535
102
    }
536
    // Gather digits
537
6.53k
    std::string num_str;
538
21.4k
    while (util::is_digit(*p)) {
539
14.8k
        num_str.append(1, *p++);
540
14.8k
    }
541
    // Skip any space including line terminators
542
19.4k
    while (util::is_space(*p)) {
543
12.9k
        ++p;
544
12.9k
    }
545
6.53k
    bytes = toI(p - start);
546
6.53k
    obj = QUtil::string_to_int(obj_str.c_str());
547
6.53k
    num = QUtil::string_to_int(num_str.c_str());
548
6.53k
    return true;
549
6.64k
}
550
551
bool
552
Objects::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
553
7.75k
{
554
    // Reposition after initial read attempt and reread.
555
7.75k
    m->file->seek(m->file->getLastOffset(), SEEK_SET);
556
7.75k
    auto line = m->file->readLine(30);
557
558
    // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
559
    // buffer.
560
7.75k
    char const* p = line.data();
561
562
    // Skip zero or more spaces. There aren't supposed to be any.
563
7.75k
    bool invalid = false;
564
17.4k
    while (util::is_space(*p)) {
565
9.68k
        ++p;
566
9.68k
        invalid = true;
567
9.68k
    }
568
    // Require digit
569
7.75k
    if (!util::is_digit(*p)) {
570
17
        return false;
571
17
    }
572
    // Gather digits
573
7.73k
    std::string f1_str;
574
27.4k
    while (util::is_digit(*p)) {
575
19.6k
        f1_str.append(1, *p++);
576
19.6k
    }
577
    // Require space
578
7.73k
    if (!util::is_space(*p)) {
579
19
        return false;
580
19
    }
581
7.71k
    if (util::is_space(*(p + 1))) {
582
2.32k
        invalid = true;
583
2.32k
    }
584
    // Skip spaces
585
18.7k
    while (util::is_space(*p)) {
586
11.0k
        ++p;
587
11.0k
    }
588
    // Require digit
589
7.71k
    if (!util::is_digit(*p)) {
590
70
        return false;
591
70
    }
592
    // Gather digits
593
7.64k
    std::string f2_str;
594
31.7k
    while (util::is_digit(*p)) {
595
24.1k
        f2_str.append(1, *p++);
596
24.1k
    }
597
    // Require space
598
7.64k
    if (!util::is_space(*p)) {
599
48
        return false;
600
48
    }
601
7.59k
    if (util::is_space(*(p + 1))) {
602
1.60k
        invalid = true;
603
1.60k
    }
604
    // Skip spaces
605
18.1k
    while (util::is_space(*p)) {
606
10.5k
        ++p;
607
10.5k
    }
608
7.59k
    if ((*p == 'f') || (*p == 'n')) {
609
7.45k
        type = *p;
610
7.45k
    } else {
611
147
        return false;
612
147
    }
613
7.45k
    if ((f1_str.length() != 10) || (f2_str.length() != 5)) {
614
7.27k
        invalid = true;
615
7.27k
    }
616
617
7.45k
    if (invalid) {
618
7.27k
        qpdf.warn(qpdf.damagedPDF("xref table", "accepting invalid xref table entry"));
619
7.27k
    }
620
621
7.45k
    f1 = QUtil::string_to_ll(f1_str.c_str());
622
7.45k
    f2 = QUtil::string_to_int(f2_str.c_str());
623
624
7.45k
    return true;
625
7.59k
}
626
627
// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
628
// result.
629
bool
630
Objects::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
631
20.7k
{
632
20.7k
    std::array<char, 21> line;
633
20.7k
    if (m->file->read(line.data(), 20) != 20) {
634
        // C++20: [[unlikely]]
635
229
        return false;
636
229
    }
637
20.4k
    line[20] = '\0';
638
20.4k
    char const* p = line.data();
639
640
20.4k
    int f1_len = 0;
641
20.4k
    int f2_len = 0;
642
643
    // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
644
    // buffer.
645
646
    // Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long.
647
102k
    while (*p == '0') {
648
82.1k
        ++f1_len;
649
82.1k
        ++p;
650
82.1k
    }
651
80.8k
    while (util::is_digit(*p) && f1_len++ < 10) {
652
60.3k
        f1 *= 10;
653
60.3k
        f1 += *p++ - '0';
654
60.3k
    }
655
    // Require space
656
20.4k
    if (!util::is_space(*p++)) {
657
        // Entry doesn't start with space or digit.
658
        // C++20: [[unlikely]]
659
107
        return false;
660
107
    }
661
    // Gather digits. NB No risk of overflow as 99'999 < max int.
662
76.9k
    while (*p == '0') {
663
56.5k
        ++f2_len;
664
56.5k
        ++p;
665
56.5k
    }
666
36.2k
    while (util::is_digit(*p) && f2_len++ < 5) {
667
15.9k
        f2 *= 10;
668
15.9k
        f2 += static_cast<int>(*p++ - '0');
669
15.9k
    }
670
20.3k
    if (util::is_space(*p++) && (*p == 'f' || *p == 'n')) {
671
        // C++20: [[likely]]
672
15.8k
        type = *p;
673
        // No test for valid line[19].
674
15.8k
        if (*(++p) && *(++p) && (*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) {
675
            // C++20: [[likely]]
676
12.6k
            return true;
677
12.6k
        }
678
15.8k
    }
679
7.75k
    return read_bad_xrefEntry(f1, f2, type);
680
20.3k
}
681
682
// Read a single cross-reference table section and associated trailer.
683
qpdf_offset_t
684
Objects::read_xrefTable(qpdf_offset_t xref_offset)
685
1.59k
{
686
1.59k
    m->file->seek(xref_offset, SEEK_SET);
687
1.59k
    std::string line;
688
7.01k
    while (true) {
689
6.97k
        line.assign(50, '\0');
690
6.97k
        m->file->read(line.data(), line.size());
691
6.97k
        int obj = 0;
692
6.97k
        int num = 0;
693
6.97k
        int bytes = 0;
694
6.97k
        if (!parse_xrefFirst(line, obj, num, bytes)) {
695
440
            throw qpdf.damagedPDF("xref table", "xref syntax invalid");
696
440
        }
697
6.53k
        m->file->seek(m->file->getLastOffset() + bytes, SEEK_SET);
698
26.6k
        for (qpdf_offset_t i = obj; i - num < obj; ++i) {
699
20.7k
            if (i == 0) {
700
                // This is needed by checkLinearization()
701
456
                m->first_xref_item_offset = m->file->tell();
702
456
            }
703
            // For xref_table, these will always be small enough to be ints
704
20.7k
            qpdf_offset_t f1 = 0;
705
20.7k
            int f2 = 0;
706
20.7k
            char type = '\0';
707
20.7k
            if (!read_xrefEntry(f1, f2, type)) {
708
637
                throw qpdf.damagedPDF(
709
637
                    "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
710
637
            }
711
20.0k
            if (type == 'f') {
712
4.09k
                insertFreeXrefEntry(QPDFObjGen(toI(i), f2));
713
15.9k
            } else {
714
15.9k
                insertXrefEntry(toI(i), 1, f1, f2);
715
15.9k
            }
716
20.0k
        }
717
5.90k
        qpdf_offset_t pos = m->file->tell();
718
5.90k
        if (readToken(*m->file).isWord("trailer")) {
719
487
            break;
720
5.41k
        } else {
721
5.41k
            m->file->seek(pos, SEEK_SET);
722
5.41k
        }
723
5.90k
    }
724
725
    // Set offset to previous xref table if any
726
521
    QPDFObjectHandle cur_trailer = m->objects.readTrailer();
727
521
    if (!cur_trailer.isDictionary()) {
728
67
        throw qpdf.damagedPDF("", "expected trailer dictionary");
729
67
    }
730
731
454
    if (!m->trailer) {
732
370
        setTrailer(cur_trailer);
733
734
370
        if (!m->trailer.hasKey("/Size")) {
735
137
            throw qpdf.damagedPDF("trailer", "trailer dictionary lacks /Size key");
736
137
        }
737
233
        if (!m->trailer.getKey("/Size").isInteger()) {
738
6
            throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");
739
6
        }
740
233
    }
741
742
311
    if (cur_trailer.hasKey("/XRefStm")) {
743
23
        if (m->ignore_xref_streams) {
744
0
            QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
745
23
        } else {
746
23
            if (cur_trailer.getKey("/XRefStm").isInteger()) {
747
                // Read the xref stream but disregard any return value -- we'll use our trailer's
748
                // /Prev key instead of the xref stream's.
749
22
                (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue());
750
22
            } else {
751
1
                throw qpdf.damagedPDF("xref stream", xref_offset, "invalid /XRefStm");
752
1
            }
753
23
        }
754
23
    }
755
756
310
    if (cur_trailer.hasKey("/Prev")) {
757
54
        if (!cur_trailer.getKey("/Prev").isInteger()) {
758
1
            throw qpdf.damagedPDF("trailer", "/Prev key in trailer dictionary is not an integer");
759
1
        }
760
53
        return cur_trailer.getKey("/Prev").getIntValue();
761
54
    }
762
763
256
    return 0;
764
310
}
765
766
// Read a single cross-reference stream.
767
qpdf_offset_t
768
Objects::read_xrefStream(qpdf_offset_t xref_offset, bool in_stream_recovery)
769
10.3k
{
770
10.3k
    if (!m->ignore_xref_streams) {
771
10.3k
        QPDFObjectHandle xref_obj;
772
10.3k
        try {
773
10.3k
            m->in_read_xref_stream = true;
774
10.3k
            xref_obj = readObjectAtOffset(xref_offset, "xref stream", true);
775
10.3k
        } catch (QPDFExc&) {
776
            // ignore -- report error below
777
1.39k
        }
778
10.3k
        m->in_read_xref_stream = false;
779
10.2k
        if (xref_obj.isStreamOfType("/XRef")) {
780
7.76k
            return processXRefStream(xref_offset, xref_obj, in_stream_recovery);
781
7.76k
        }
782
10.2k
    }
783
784
2.51k
    throw qpdf.damagedPDF("", xref_offset, "xref not found");
785
0
    return 0; // unreachable
786
10.3k
}
787
788
// Return the entry size of the xref stream and the processed W array.
789
std::pair<int, std::array<int, 3>>
790
Objects::processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
791
7.76k
{
792
7.76k
    auto W_obj = dict.getKey("/W");
793
7.76k
    if (!(W_obj.size() >= 3 && W_obj.getArrayItem(0).isInteger() &&
794
7.50k
          W_obj.getArrayItem(1).isInteger() && W_obj.getArrayItem(2).isInteger())) {
795
277
        throw damaged("Cross-reference stream does not have a proper /W key");
796
277
    }
797
798
7.48k
    std::array<int, 3> W;
799
7.48k
    int entry_size = 0;
800
7.48k
    auto w_vector = W_obj.getArrayAsVector();
801
7.48k
    int max_bytes = sizeof(qpdf_offset_t);
802
29.8k
    for (size_t i = 0; i < 3; ++i) {
803
22.3k
        W[i] = w_vector[i].getIntValueAsInt();
804
22.3k
        if (W[i] > max_bytes) {
805
10
            throw damaged("Cross-reference stream's /W contains impossibly large values");
806
10
        }
807
22.3k
        if (W[i] < 0) {
808
47
            throw damaged("Cross-reference stream's /W contains negative values");
809
47
        }
810
22.3k
        entry_size += W[i];
811
22.3k
    }
812
7.42k
    if (entry_size == 0) {
813
4
        throw damaged("Cross-reference stream's /W indicates entry size of 0");
814
4
    }
815
7.42k
    return {entry_size, W};
816
7.42k
}
817
818
// Validate Size key and return the maximum number of entries that the xref stream can contain.
819
int
820
Objects::processXRefSize(
821
    QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)
822
7.42k
{
823
    // Number of entries is limited by the highest possible object id and stream size.
824
7.42k
    auto max_num_entries = std::numeric_limits<int>::max();
825
7.42k
    if (max_num_entries > (std::numeric_limits<qpdf_offset_t>::max() / entry_size)) {
826
0
        max_num_entries = toI(std::numeric_limits<qpdf_offset_t>::max() / entry_size);
827
0
    }
828
829
7.42k
    auto Size_obj = dict.getKey("/Size");
830
7.42k
    long long size;
831
7.42k
    if (!dict.getKey("/Size").getValueAsInt(size)) {
832
92
        throw damaged("Cross-reference stream does not have a proper /Size key");
833
7.33k
    } else if (size < 0) {
834
70
        throw damaged("Cross-reference stream has a negative /Size key");
835
7.26k
    } else if (size >= max_num_entries) {
836
80
        throw damaged("Cross-reference stream has an impossibly large /Size key");
837
80
    }
838
    // We are not validating that Size <= (Size key of parent xref / trailer).
839
7.18k
    return max_num_entries;
840
7.42k
}
841
842
// Return the number of entries of the xref stream and the processed Index array.
843
std::pair<int, std::vector<std::pair<int, int>>>
844
Objects::processXRefIndex(
845
    QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
846
7.18k
{
847
7.18k
    auto size = dict.getKey("/Size").getIntValueAsInt();
848
7.18k
    auto Index_obj = dict.getKey("/Index");
849
850
7.18k
    if (Index_obj.isArray()) {
851
1.21k
        std::vector<std::pair<int, int>> indx;
852
1.21k
        int num_entries = 0;
853
1.21k
        auto index_vec = Index_obj.getArrayAsVector();
854
1.21k
        if ((index_vec.size() % 2) || index_vec.size() < 2) {
855
11
            throw damaged("Cross-reference stream's /Index has an invalid number of values");
856
11
        }
857
858
1.20k
        int i = 0;
859
1.20k
        long long first = 0;
860
12.9k
        for (auto& val: index_vec) {
861
12.9k
            if (val.isInteger()) {
862
12.9k
                if (i % 2) {
863
6.40k
                    auto count = val.getIntValue();
864
6.40k
                    if (count <= 0) {
865
70
                        throw damaged(
866
70
                            "Cross-reference stream section claims to contain " +
867
70
                            std::to_string(count) + " entries");
868
70
                    }
869
                    // We are guarding against the possibility of num_entries * entry_size
870
                    // overflowing. We are not checking that entries are in ascending order as
871
                    // required by the spec, which probably should generate a warning. We are also
872
                    // not checking that for each subsection first object number + number of entries
873
                    // <= /Size. The spec requires us to ignore object number > /Size.
874
6.33k
                    if (first > (max_num_entries - count) ||
875
6.27k
                        count > (max_num_entries - num_entries)) {
876
73
                        throw damaged(
877
73
                            "Cross-reference stream claims to contain too many entries: " +
878
73
                            std::to_string(first) + " " + std::to_string(max_num_entries) + " " +
879
73
                            std::to_string(num_entries));
880
73
                    }
881
6.26k
                    indx.emplace_back(static_cast<int>(first), static_cast<int>(count));
882
6.26k
                    num_entries += static_cast<int>(count);
883
6.55k
                } else {
884
6.55k
                    first = val.getIntValue();
885
6.55k
                    if (first < 0) {
886
70
                        throw damaged(
887
70
                            "Cross-reference stream's /Index contains a negative object id");
888
6.48k
                    } else if (first > max_num_entries) {
889
77
                        throw damaged(
890
77
                            "Cross-reference stream's /Index contains an impossibly "
891
77
                            "large object id");
892
77
                    }
893
6.55k
                }
894
12.9k
            } else {
895
12
                throw damaged(
896
12
                    "Cross-reference stream's /Index's item " + std::to_string(i) +
897
12
                    " is not an integer");
898
12
            }
899
12.6k
            i++;
900
12.6k
        }
901
900
        QTC::TC("qpdf", "QPDF xref /Index is array", index_vec.size() == 2 ? 0 : 1);
902
900
        return {num_entries, indx};
903
5.96k
    } else if (Index_obj.null()) {
904
5.96k
        return {size, {{0, size}}};
905
5.96k
    } else {
906
4
        throw damaged("Cross-reference stream does not have a proper /Index key");
907
4
    }
908
7.18k
}
909
910
qpdf_offset_t
911
Objects::processXRefStream(
912
    qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj, bool in_stream_recovery)
913
7.76k
{
914
7.76k
    auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
915
4.63k
        return qpdf.damagedPDF("xref stream", xref_offset, msg.data());
916
4.63k
    };
917
918
7.76k
    auto dict = xref_obj.getDict();
919
920
7.76k
    auto [entry_size, W] = processXRefW(dict, damaged);
921
7.76k
    int max_num_entries = processXRefSize(dict, entry_size, damaged);
922
7.76k
    auto [num_entries, indx] = processXRefIndex(dict, max_num_entries, damaged);
923
924
7.76k
    std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
925
7.76k
    size_t actual_size = bp->getSize();
926
7.76k
    auto expected_size = toS(entry_size) * toS(num_entries);
927
928
7.76k
    if (expected_size != actual_size) {
929
3.73k
        QPDFExc x = damaged(
930
3.73k
            "Cross-reference stream data has the wrong size; expected = " +
931
3.73k
            std::to_string(expected_size) + "; actual = " + std::to_string(actual_size));
932
3.73k
        if (expected_size > actual_size) {
933
756
            throw x;
934
2.98k
        } else {
935
2.98k
            qpdf.warn(x);
936
2.98k
        }
937
3.73k
    }
938
939
7.00k
    bool saw_first_compressed_object = false;
940
941
    // Actual size vs. expected size check above ensures that we will not overflow any buffers here.
942
    // We know that entry_size * num_entries is less or equal to the size of the buffer.
943
7.00k
    auto p = bp->getBuffer();
944
7.00k
    for (auto [obj, sec_entries]: indx) {
945
        // Process a subsection.
946
803k
        for (int i = 0; i < sec_entries; ++i) {
947
            // Read this entry
948
797k
            std::array<qpdf_offset_t, 3> fields{};
949
797k
            if (W[0] == 0) {
950
266k
                fields[0] = 1;
951
266k
            }
952
3.19M
            for (size_t j = 0; j < 3; ++j) {
953
4.98M
                for (int k = 0; k < W[j]; ++k) {
954
2.59M
                    fields[j] <<= 8;
955
2.59M
                    fields[j] |= *p++;
956
2.59M
                }
957
2.39M
            }
958
959
            // Get the generation number.  The generation number is 0 unless this is an uncompressed
960
            // object record, in which case the generation number appears as the third field.
961
797k
            if (saw_first_compressed_object) {
962
499k
                if (fields[0] != 2) {
963
177k
                    m->uncompressed_after_compressed = true;
964
177k
                }
965
499k
            } else if (fields[0] == 2) {
966
2.23k
                saw_first_compressed_object = true;
967
2.23k
            }
968
797k
            if (obj == 0) {
969
                // This is needed by checkLinearization()
970
3.03k
                m->first_xref_item_offset = xref_offset;
971
794k
            } else if (fields[0] == 0) {
972
                // Ignore fields[2], which we don't care about in this case. This works around the
973
                // issue of some PDF files that put invalid values, like -1, here for deleted
974
                // objects.
975
71.0k
                insertFreeXrefEntry(QPDFObjGen(obj, 0));
976
723k
            } else {
977
723k
                auto typ = toI(fields[0]);
978
723k
                if (!in_stream_recovery || typ == 2) {
979
                    // If we are in xref stream recovery all actual uncompressed objects have
980
                    // already been inserted into the xref table. Avoid adding junk data into the
981
                    // xref table.
982
659k
                    insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
983
659k
                }
984
723k
            }
985
797k
            ++obj;
986
797k
        }
987
5.89k
    }
988
989
7.00k
    if (!m->trailer) {
990
687
        setTrailer(dict);
991
687
    }
992
993
7.00k
    if (dict.hasKey("/Prev")) {
994
1.10k
        if (!dict.getKey("/Prev").isInteger()) {
995
75
            throw qpdf.damagedPDF(
996
75
                "xref stream", "/Prev key in xref stream dictionary is not an integer");
997
75
        }
998
1.03k
        return dict.getKey("/Prev").getIntValue();
999
5.89k
    } else {
1000
5.89k
        return 0;
1001
5.89k
    }
1002
7.00k
}
1003
1004
void
1005
Objects::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2)
1006
789k
{
1007
    // Populate the xref table in such a way that the first reference to an object that we see,
1008
    // which is the one in the latest xref table in which it appears, is the one that gets stored.
1009
    // This works because we are reading more recent appends before older ones.
1010
1011
    // If there is already an entry for this object and generation in the table, it means that a
1012
    // later xref table has registered this object.  Disregard this one.
1013
789k
    int new_gen = f0 == 2 ? 0 : f2;
1014
1015
789k
    if (!(f0 == 1 || f0 == 2)) {
1016
32.4k
        return;
1017
32.4k
    }
1018
1019
756k
    if (!(obj > 0 && obj <= m->xref_table_max_id && 0 <= f2 && new_gen < 65535)) {
1020
        // We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There
1021
        // is probably no point having another warning but we could count invalid items in order to
1022
        // decide when to give up.
1023
        // ignore impossibly large object ids or object ids > Size.
1024
238k
        return;
1025
238k
    }
1026
1027
518k
    if (m->deleted_objects.contains(obj)) {
1028
600
        return;
1029
600
    }
1030
1031
518k
    if (f0 == 2) {
1032
272k
        if (f1 == obj) {
1033
703
            qpdf.warn(qpdf.damagedPDF(
1034
703
                "xref stream", "self-referential object stream " + std::to_string(obj)));
1035
703
            return;
1036
703
        }
1037
271k
        if (f1 > m->xref_table_max_id) {
1038
            // ignore impossibly large object stream ids
1039
7.84k
            qpdf.warn(qpdf.damagedPDF(
1040
7.84k
                "xref stream",
1041
7.84k
                "object stream id " + std::to_string(f1) + " for object " + std::to_string(obj) +
1042
7.84k
                    " is impossibly large"));
1043
7.84k
            return;
1044
7.84k
        }
1045
271k
    }
1046
1047
509k
    auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
1048
509k
    if (!created) {
1049
26.3k
        return;
1050
26.3k
    }
1051
1052
483k
    switch (f0) {
1053
221k
    case 1:
1054
        // f2 is generation
1055
221k
        QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0));
1056
221k
        iter->second = QPDFXRefEntry(f1);
1057
221k
        break;
1058
1059
261k
    case 2:
1060
261k
        iter->second = QPDFXRefEntry(toI(f1), f2);
1061
261k
        break;
1062
1063
0
    default:
1064
0
        throw qpdf.damagedPDF(
1065
0
            "xref stream", "unknown xref stream entry type " + std::to_string(f0));
1066
0
        break;
1067
483k
    }
1068
483k
}
1069
1070
void
1071
Objects::insertFreeXrefEntry(QPDFObjGen og)
1072
75.1k
{
1073
75.1k
    if (!m->xref_table.contains(og)) {
1074
74.1k
        m->deleted_objects.insert(og.getObj());
1075
74.1k
    }
1076
75.1k
}
1077
1078
void
1079
QPDF::showXRefTable()
1080
0
{
1081
0
    auto& cout = *m->log->getInfo();
1082
0
    for (auto const& iter: m->xref_table) {
1083
0
        QPDFObjGen const& og = iter.first;
1084
0
        QPDFXRefEntry const& entry = iter.second;
1085
0
        cout << og.unparse('/') << ": ";
1086
0
        switch (entry.getType()) {
1087
0
        case 1:
1088
0
            cout << "uncompressed; offset = " << entry.getOffset();
1089
0
            break;
1090
1091
0
        case 2:
1092
0
            *m->log->getInfo() << "compressed; stream = " << entry.getObjStreamNumber()
1093
0
                               << ", index = " << entry.getObjStreamIndex();
1094
0
            break;
1095
1096
0
        default:
1097
0
            throw std::logic_error("unknown cross-reference table type while showing xref_table");
1098
0
            break;
1099
0
        }
1100
0
        m->log->info("\n");
1101
0
    }
1102
0
}
1103
1104
// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
1105
// return false. Otherwise return true.
1106
bool
1107
Objects::resolveXRefTable()
1108
9.76k
{
1109
9.76k
    bool may_change = !m->reconstructed_xref;
1110
221k
    for (auto& iter: m->xref_table) {
1111
221k
        if (isUnresolved(iter.first)) {
1112
133k
            resolve(iter.first);
1113
133k
            if (may_change && m->reconstructed_xref) {
1114
47
                return false;
1115
47
            }
1116
133k
        }
1117
221k
    }
1118
9.71k
    return true;
1119
9.76k
}
1120
1121
// Ensure all objects in the pdf file, including those in indirect references, appear in the object
1122
// cache.
1123
void
1124
QPDF::fixDanglingReferences(bool force)
1125
22.6k
{
1126
22.6k
    if (m->fixed_dangling_refs) {
1127
12.9k
        return;
1128
12.9k
    }
1129
9.71k
    if (!m->objects.resolveXRefTable()) {
1130
47
        m->objects.resolveXRefTable();
1131
47
    }
1132
9.71k
    m->fixed_dangling_refs = true;
1133
9.71k
}
1134
1135
size_t
1136
QPDF::getObjectCount()
1137
13.5k
{
1138
    // This method returns the next available indirect object number. makeIndirectObject uses it for
1139
    // this purpose. After fixDanglingReferences is called, all objects in the xref table will also
1140
    // be in obj_cache.
1141
13.5k
    fixDanglingReferences();
1142
13.5k
    QPDFObjGen og;
1143
13.5k
    if (!m->obj_cache.empty()) {
1144
13.4k
        og = (*(m->obj_cache.rbegin())).first;
1145
13.4k
    }
1146
13.5k
    return toS(og.getObj());
1147
13.5k
}
1148
1149
std::vector<QPDFObjectHandle>
1150
QPDF::getAllObjects()
1151
0
{
1152
    // After fixDanglingReferences is called, all objects are in the object cache.
1153
0
    fixDanglingReferences();
1154
0
    std::vector<QPDFObjectHandle> result;
1155
0
    for (auto const& iter: m->obj_cache) {
1156
0
        result.emplace_back(m->objects.newIndirect(iter.first, iter.second.object));
1157
0
    }
1158
0
    return result;
1159
0
}
1160
1161
void
1162
Objects::setLastObjectDescription(std::string const& description, QPDFObjGen og)
1163
222k
{
1164
222k
    m->last_object_description.clear();
1165
222k
    if (!description.empty()) {
1166
9.05k
        m->last_object_description += description;
1167
9.05k
        if (og.isIndirect()) {
1168
9.05k
            m->last_object_description += ": ";
1169
9.05k
        }
1170
9.05k
    }
1171
222k
    if (og.isIndirect()) {
1172
222k
        m->last_object_description += "object " + og.unparse(' ');
1173
222k
    }
1174
222k
}
1175
1176
QPDFObjectHandle
1177
Objects::readTrailer()
1178
16.1k
{
1179
16.1k
    qpdf_offset_t offset = m->file->tell();
1180
16.1k
    auto [object, empty] =
1181
16.1k
        QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref);
1182
16.1k
    if (empty) {
1183
        // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1184
        // actual PDF files and Adobe Reader appears to ignore them.
1185
617
        qpdf.warn(qpdf.damagedPDF("trailer", "empty object treated as null"));
1186
15.5k
    } else if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) {
1187
131
        qpdf.warn(qpdf.damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer"));
1188
131
    }
1189
    // Override last_offset so that it points to the beginning of the object we just read
1190
16.1k
    m->file->setLastOffset(offset);
1191
16.1k
    return object;
1192
16.1k
}
1193
1194
QPDFObjectHandle
1195
Objects::readObject(std::string const& description, QPDFObjGen og)
1196
113k
{
1197
113k
    setLastObjectDescription(description, og);
1198
113k
    qpdf_offset_t offset = m->file->tell();
1199
1200
113k
    StringDecrypter decrypter{&qpdf, og};
1201
113k
    StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1202
113k
    auto [object, empty] = QPDFParser::parse(
1203
113k
        *m->file,
1204
113k
        m->last_object_description,
1205
113k
        m->tokenizer,
1206
113k
        decrypter_ptr,
1207
113k
        qpdf,
1208
113k
        m->reconstructed_xref || m->in_read_xref_stream);
1209
113k
    ;
1210
113k
    if (empty) {
1211
        // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1212
        // actual PDF files and Adobe Reader appears to ignore them.
1213
96
        qpdf.warn(
1214
96
            qpdf.damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null"));
1215
96
        return object;
1216
96
    }
1217
113k
    auto token = readToken(*m->file);
1218
113k
    if (object.isDictionary() && token.isWord("stream")) {
1219
47.8k
        readStream(object, og, offset);
1220
47.8k
        token = readToken(*m->file);
1221
47.8k
    }
1222
113k
    if (!token.isWord("endobj")) {
1223
39.1k
        qpdf.warn(qpdf.damagedPDF("expected endobj"));
1224
39.1k
    }
1225
113k
    return object;
1226
113k
}
1227
1228
// After reading stream dictionary and stream keyword, read rest of stream.
1229
void
1230
Objects::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
1231
47.8k
{
1232
47.8k
    validateStreamLineEnd(object, og, offset);
1233
1234
    // Must get offset before accessing any additional objects since resolving a previously
1235
    // unresolved indirect object will change file position.
1236
47.8k
    qpdf_offset_t stream_offset = m->file->tell();
1237
47.8k
    size_t length = 0;
1238
1239
47.8k
    try {
1240
47.8k
        auto length_obj = object.getKey("/Length");
1241
1242
47.8k
        if (!length_obj.isInteger()) {
1243
21.3k
            if (length_obj.null()) {
1244
21.0k
                throw qpdf.damagedPDF(offset, "stream dictionary lacks /Length key");
1245
21.0k
            }
1246
227
            throw qpdf.damagedPDF(offset, "/Length key in stream dictionary is not an integer");
1247
21.3k
        }
1248
1249
26.5k
        length = toS(length_obj.getUIntValue());
1250
        // Seek in two steps to avoid potential integer overflow
1251
26.5k
        m->file->seek(stream_offset, SEEK_SET);
1252
26.5k
        m->file->seek(toO(length), SEEK_CUR);
1253
26.5k
        if (!readToken(*m->file).isWord("endstream")) {
1254
7.04k
            throw qpdf.damagedPDF("expected endstream");
1255
7.04k
        }
1256
34.5k
    } catch (QPDFExc& e) {
1257
34.5k
        if (m->attempt_recovery) {
1258
34.5k
            qpdf.warn(e);
1259
34.5k
            length = recoverStreamLength(m->file, og, stream_offset);
1260
34.5k
        } else {
1261
0
            throw;
1262
0
        }
1263
34.5k
    }
1264
40.2k
    object = QPDFObjectHandle(qpdf::Stream(qpdf, og, object, stream_offset, length));
1265
40.2k
}
1266
1267
void
1268
Objects::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
1269
47.8k
{
1270
    // The PDF specification states that the word "stream" should be followed by either a carriage
1271
    // return and a newline or by a newline alone.  It specifically disallowed following it by a
1272
    // carriage return alone since, in that case, there would be no way to tell whether the NL in a
1273
    // CR NL sequence was part of the stream data.  However, some readers, including Adobe reader,
1274
    // accept a carriage return by itself when followed by a non-newline character, so that's what
1275
    // we do here. We have also seen files that have extraneous whitespace between the stream
1276
    // keyword and the newline.
1277
54.7k
    while (true) {
1278
54.7k
        char ch;
1279
54.7k
        if (m->file->read(&ch, 1) == 0) {
1280
            // A premature EOF here will result in some other problem that will get reported at
1281
            // another time.
1282
162
            return;
1283
162
        }
1284
54.5k
        if (ch == '\n') {
1285
            // ready to read stream data
1286
24.9k
            return;
1287
24.9k
        }
1288
29.6k
        if (ch == '\r') {
1289
            // Read another character
1290
18.2k
            if (m->file->read(&ch, 1) != 0) {
1291
18.2k
                if (ch == '\n') {
1292
                    // Ready to read stream data
1293
15.2k
                    QTC::TC("qpdf", "QPDF stream with CRNL");
1294
15.2k
                } else {
1295
                    // Treat the \r by itself as the whitespace after endstream and start reading
1296
                    // stream data in spite of not having seen a newline.
1297
3.04k
                    m->file->unreadCh(ch);
1298
3.04k
                    qpdf.warn(qpdf.damagedPDF(
1299
3.04k
                        m->file->tell(), "stream keyword followed by carriage return only"));
1300
3.04k
                }
1301
18.2k
            }
1302
18.2k
            return;
1303
18.2k
        }
1304
11.3k
        if (!util::is_space(ch)) {
1305
4.48k
            m->file->unreadCh(ch);
1306
4.48k
            qpdf.warn(qpdf.damagedPDF(
1307
4.48k
                m->file->tell(), "stream keyword not followed by proper line terminator"));
1308
4.48k
            return;
1309
4.48k
        }
1310
6.89k
        qpdf.warn(
1311
6.89k
            qpdf.damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
1312
6.89k
    }
1313
47.8k
}
1314
1315
QPDFObjectHandle
1316
Objects::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id)
1317
31.5k
{
1318
31.5k
    auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, qpdf);
1319
31.5k
    if (empty) {
1320
        // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1321
        // actual PDF files and Adobe Reader appears to ignore them.
1322
17
        qpdf.warn(QPDFExc(
1323
17
            qpdf_e_damaged_pdf,
1324
17
            m->file->getName() + " object stream " + std::to_string(stream_id),
1325
17
            +"object " + std::to_string(obj_id) + " 0, offset " +
1326
17
                std::to_string(input.getLastOffset()),
1327
17
            0,
1328
17
            "empty object treated as null"));
1329
17
    }
1330
31.5k
    return object;
1331
31.5k
}
1332
1333
bool
1334
QPDF::findEndstream()
1335
30.9k
{
1336
    // Find endstream or endobj. Position the input at that token.
1337
30.9k
    auto t = m->objects.readToken(*m->file, 20);
1338
30.9k
    if (t.isWord("endobj") || t.isWord("endstream")) {
1339
26.5k
        m->file->seek(m->file->getLastOffset(), SEEK_SET);
1340
26.5k
        return true;
1341
26.5k
    }
1342
4.46k
    return false;
1343
30.9k
}
1344
1345
size_t
1346
Objects::recoverStreamLength(
1347
    std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset)
1348
27.1k
{
1349
    // Try to reconstruct stream length by looking for endstream or endobj
1350
27.1k
    qpdf.warn(qpdf.damagedPDF(*input, stream_offset, "attempting to recover stream length"));
1351
1352
27.1k
    PatternFinder ef(qpdf, &QPDF::findEndstream);
1353
27.1k
    size_t length = 0;
1354
27.1k
    if (m->file->findFirst("end", stream_offset, 0, ef)) {
1355
26.5k
        length = toS(m->file->tell() - stream_offset);
1356
        // Reread endstream but, if it was endobj, don't skip that.
1357
26.5k
        QPDFTokenizer::Token t = readToken(*m->file);
1358
26.5k
        if (t.getValue() == "endobj") {
1359
19.0k
            m->file->seek(m->file->getLastOffset(), SEEK_SET);
1360
19.0k
        }
1361
26.5k
    }
1362
1363
27.1k
    if (length) {
1364
25.4k
        auto end = stream_offset + toO(length);
1365
25.4k
        qpdf_offset_t found_offset = 0;
1366
25.4k
        QPDFObjGen found_og;
1367
1368
        // Make sure this is inside this object
1369
466k
        for (auto const& [current_og, entry]: m->xref_table) {
1370
466k
            if (entry.getType() == 1) {
1371
458k
                qpdf_offset_t obj_offset = entry.getOffset();
1372
458k
                if (found_offset < obj_offset && obj_offset < end) {
1373
128k
                    found_offset = obj_offset;
1374
128k
                    found_og = current_og;
1375
128k
                }
1376
458k
            }
1377
466k
        }
1378
25.4k
        if (!found_offset || found_og == og) {
1379
            // If we are trying to recover an XRef stream the xref table will not contain and
1380
            // won't contain any entries, therefore we cannot check the found length. Otherwise we
1381
            // found endstream\nendobj within the space allowed for this object, so we're probably
1382
            // in good shape.
1383
23.7k
        } else {
1384
1.64k
            length = 0;
1385
1.64k
        }
1386
25.4k
    }
1387
1388
27.1k
    if (length == 0) {
1389
3.40k
        qpdf.warn(qpdf.damagedPDF(
1390
3.40k
            *input, stream_offset, "unable to recover stream data; treating stream as empty"));
1391
23.7k
    } else {
1392
23.7k
        qpdf.warn(qpdf.damagedPDF(
1393
23.7k
            *input, stream_offset, "recovered stream length: " + std::to_string(length)));
1394
23.7k
    }
1395
1396
27.1k
    return length;
1397
27.1k
}
1398
1399
QPDFTokenizer::Token
1400
Objects::readToken(InputSource& input, size_t max_len)
1401
2.34M
{
1402
2.34M
    return m->tokenizer.readToken(input, m->last_object_description, true, max_len);
1403
2.34M
}
1404
1405
QPDFObjGen
1406
Objects::read_object_start(qpdf_offset_t offset)
1407
119k
{
1408
119k
    m->file->seek(offset, SEEK_SET);
1409
119k
    QPDFTokenizer::Token tobjid = readToken(*m->file);
1410
119k
    bool objidok = tobjid.isInteger();
1411
119k
    if (!objidok) {
1412
3.54k
        throw qpdf.damagedPDF(offset, "expected n n obj");
1413
3.54k
    }
1414
116k
    QPDFTokenizer::Token tgen = readToken(*m->file);
1415
116k
    bool genok = tgen.isInteger();
1416
116k
    if (!genok) {
1417
624
        throw qpdf.damagedPDF(offset, "expected n n obj");
1418
624
    }
1419
115k
    QPDFTokenizer::Token tobj = readToken(*m->file);
1420
1421
115k
    bool objok = tobj.isWord("obj");
1422
1423
115k
    if (!objok) {
1424
886
        throw qpdf.damagedPDF(offset, "expected n n obj");
1425
886
    }
1426
114k
    int objid = QUtil::string_to_int(tobjid.getValue().c_str());
1427
114k
    int generation = QUtil::string_to_int(tgen.getValue().c_str());
1428
114k
    if (objid == 0) {
1429
196
        throw qpdf.damagedPDF(offset, "object with ID 0");
1430
196
    }
1431
114k
    return {objid, generation};
1432
114k
}
1433
1434
void
1435
Objects::readObjectAtOffset(
1436
    bool try_recovery, qpdf_offset_t offset, std::string const& description, QPDFObjGen exp_og)
1437
109k
{
1438
109k
    QPDFObjGen og;
1439
109k
    setLastObjectDescription(description, exp_og);
1440
1441
109k
    if (!m->attempt_recovery) {
1442
0
        try_recovery = false;
1443
0
    }
1444
1445
    // Special case: if offset is 0, just return null.  Some PDF writers, in particular
1446
    // "Mac OS X 10.7.5 Quartz PDFContext", may store deleted objects in the xref table as
1447
    // "0000000000 00000 n", which is not correct, but it won't hurt anything for us to ignore
1448
    // these.
1449
109k
    if (offset == 0) {
1450
332
        qpdf.warn(qpdf.damagedPDF(-1, "object has offset 0"));
1451
332
        return;
1452
332
    }
1453
1454
109k
    try {
1455
109k
        og = read_object_start(offset);
1456
109k
        if (exp_og != og) {
1457
371
            QPDFExc e = qpdf.damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj");
1458
371
            if (try_recovery) {
1459
                // Will be retried below
1460
371
                throw e;
1461
371
            } else {
1462
                // We can try reading the object anyway even if the ID doesn't match.
1463
0
                qpdf.warn(e);
1464
0
            }
1465
371
        }
1466
109k
    } catch (QPDFExc& e) {
1467
4.40k
        if (!try_recovery) {
1468
0
            throw;
1469
0
        }
1470
        // Try again after reconstructing xref table
1471
4.40k
        reconstruct_xref(e);
1472
4.40k
        if (m->xref_table.contains(exp_og) && m->xref_table[exp_og].getType() == 1) {
1473
64
            qpdf_offset_t new_offset = m->xref_table[exp_og].getOffset();
1474
64
            readObjectAtOffset(false, new_offset, description, exp_og);
1475
64
            return;
1476
64
        }
1477
4.33k
        qpdf.warn(qpdf.damagedPDF(
1478
4.33k
            "",
1479
4.33k
            -1,
1480
4.33k
            ("object " + exp_og.unparse(' ') +
1481
4.33k
             " not found in file after regenerating cross reference table")));
1482
4.33k
        return;
1483
4.40k
    }
1484
1485
104k
    QPDFObjectHandle oh = readObject(description, og);
1486
1487
    // Determine the end offset of this object before and after white space.  We use these
1488
    // numbers to validate linearization hint tables.  Offsets and lengths of objects may imply
1489
    // the end of an object to be anywhere between these values.
1490
104k
    qpdf_offset_t end_before_space = m->file->tell();
1491
1492
    // skip over spaces
1493
209k
    while (true) {
1494
198k
        char ch;
1495
198k
        if (!m->file->read(&ch, 1)) {
1496
3.78k
            throw qpdf.damagedPDF(m->file->tell(), "EOF after endobj");
1497
3.78k
        }
1498
194k
        if (!isspace(static_cast<unsigned char>(ch))) {
1499
89.1k
            m->file->seek(-1, SEEK_CUR);
1500
89.1k
            break;
1501
89.1k
        }
1502
194k
    }
1503
100k
    m->objects.updateCache(og, oh.getObj(), end_before_space, m->file->tell());
1504
100k
}
1505
1506
QPDFObjectHandle
1507
Objects::readObjectAtOffset(
1508
    qpdf_offset_t offset, std::string const& description, bool skip_cache_if_in_xref)
1509
10.3k
{
1510
10.3k
    auto og = read_object_start(offset);
1511
10.3k
    auto oh = readObject(description, og);
1512
1513
10.3k
    if (!m->objects.isUnresolved(og)) {
1514
5.84k
        return oh;
1515
5.84k
    }
1516
1517
4.49k
    if (skip_cache_if_in_xref && m->xref_table.contains(og)) {
1518
        // In the special case of the xref stream and linearization hint tables, the offset comes
1519
        // from another source. For the specific case of xref streams, the xref stream is read and
1520
        // loaded into the object cache very early in parsing. Ordinarily, when a file is updated by
1521
        // appending, items inserted into the xref table in later updates take precedence over
1522
        // earlier items. In the special case of reusing the object number previously used as the
1523
        // xref stream, we have the following order of events:
1524
        //
1525
        // * reused object gets loaded into the xref table
1526
        // * old object is read here while reading xref streams
1527
        // * original xref entry is ignored (since already in xref table)
1528
        //
1529
        // It is the second step that causes a problem. Even though the xref table is correct in
1530
        // this case, the old object is already in the cache and so effectively prevails over the
1531
        // reused object. To work around this issue, we have a special case for the xref stream (via
1532
        // the skip_cache_if_in_xref): if the object is already in the xref stream, don't cache what
1533
        // we read here.
1534
        //
1535
        // It is likely that the same bug may exist for linearization hint tables, but the existing
1536
        // code uses end_before_space and end_after_space from the cache, so fixing that would
1537
        // require more significant rework. The chances of a linearization hint stream being reused
1538
        // seems smaller because the xref stream is probably the highest object in the file and the
1539
        // linearization hint stream would be some random place in the middle, so I'm leaving that
1540
        // bug unfixed for now. If the bug were to be fixed, we could use !check_og in place of
1541
        // skip_cache_if_in_xref.
1542
10
        QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
1543
10
        return oh;
1544
10
    }
1545
1546
    // Determine the end offset of this object before and after white space.  We use these
1547
    // numbers to validate linearization hint tables.  Offsets and lengths of objects may imply
1548
    // the end of an object to be anywhere between these values.
1549
4.48k
    qpdf_offset_t end_before_space = m->file->tell();
1550
1551
    // skip over spaces
1552
7.72k
    while (true) {
1553
6.43k
        char ch;
1554
6.43k
        if (!m->file->read(&ch, 1)) {
1555
169
            throw qpdf.damagedPDF(m->file->tell(), "EOF after endobj");
1556
169
        }
1557
6.26k
        if (!isspace(static_cast<unsigned char>(ch))) {
1558
3.02k
            m->file->seek(-1, SEEK_CUR);
1559
3.02k
            break;
1560
3.02k
        }
1561
6.26k
    }
1562
4.31k
    m->objects.updateCache(og, oh.getObj(), end_before_space, m->file->tell());
1563
1564
4.31k
    return oh;
1565
4.48k
}
1566
1567
std::shared_ptr<QPDFObject> const&
1568
Objects::resolve(QPDFObjGen og)
1569
307k
{
1570
307k
    if (!isUnresolved(og)) {
1571
0
        return m->obj_cache[og].object;
1572
0
    }
1573
1574
307k
    if (m->resolving.contains(og)) {
1575
        // This can happen if an object references itself directly or indirectly in some key that
1576
        // has to be resolved during object parsing, such as stream length.
1577
191
        qpdf.warn(qpdf.damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
1578
191
        updateCache(og, QPDFObject::create<QPDF_Null>(), -1, -1);
1579
191
        return m->obj_cache[og].object;
1580
191
    }
1581
307k
    ResolveRecorder rr(qpdf, og);
1582
1583
307k
    if (m->xref_table.contains(og)) {
1584
254k
        QPDFXRefEntry const& entry = m->xref_table[og];
1585
254k
        try {
1586
254k
            switch (entry.getType()) {
1587
109k
            case 1:
1588
                // Object stored in cache by readObjectAtOffset
1589
109k
                readObjectAtOffset(true, entry.getOffset(), "", og);
1590
109k
                break;
1591
1592
144k
            case 2:
1593
144k
                resolveObjectsInStream(entry.getObjStreamNumber());
1594
144k
                break;
1595
1596
3
            default:
1597
3
                throw qpdf.damagedPDF(
1598
3
                    "", -1, ("object " + og.unparse('/') + " has unexpected xref entry type"));
1599
254k
            }
1600
254k
        } catch (QPDFExc& e) {
1601
43.7k
            qpdf.warn(e);
1602
43.7k
        } catch (std::exception& e) {
1603
1.20k
            qpdf.warn(qpdf.damagedPDF(
1604
1.20k
                "", -1, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
1605
1.20k
        }
1606
254k
    }
1607
1608
293k
    if (isUnresolved(og)) {
1609
        // PDF spec says unknown objects resolve to the null object.
1610
203k
        updateCache(og, QPDFObject::create<QPDF_Null>(), -1, -1);
1611
203k
    }
1612
1613
293k
    auto& result(m->obj_cache[og].object);
1614
293k
    result->setDefaultDescription(&qpdf, og);
1615
293k
    return result;
1616
307k
}
1617
1618
void
1619
Objects::resolveObjectsInStream(int obj_stream_number)
1620
144k
{
1621
144k
    auto damaged =
1622
144k
        [this, obj_stream_number](int id, qpdf_offset_t offset, std::string const& msg) -> QPDFExc {
1623
19.7k
        return {
1624
19.7k
            qpdf_e_damaged_pdf,
1625
19.7k
            m->file->getName() + " object stream " + std::to_string(obj_stream_number),
1626
19.7k
            +"object " + std::to_string(id) + " 0",
1627
19.7k
            offset,
1628
19.7k
            msg,
1629
19.7k
            true};
1630
19.7k
    };
1631
1632
144k
    if (m->resolved_object_streams.contains(obj_stream_number)) {
1633
119k
        return;
1634
119k
    }
1635
25.6k
    m->resolved_object_streams.insert(obj_stream_number);
1636
    // Force resolution of object stream
1637
25.6k
    Stream obj_stream = qpdf.getObject(obj_stream_number, 0);
1638
25.6k
    if (!obj_stream) {
1639
22.4k
        throw qpdf.damagedPDF(
1640
22.4k
            "object " + std::to_string(obj_stream_number) + " 0",
1641
22.4k
            "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
1642
22.4k
    }
1643
1644
    // For linearization data in the object, use the data from the object stream for the objects in
1645
    // the stream.
1646
3.14k
    QPDFObjGen stream_og(obj_stream_number, 0);
1647
3.14k
    qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space;
1648
3.14k
    qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space;
1649
1650
3.14k
    QPDFObjectHandle dict = obj_stream.getDict();
1651
3.14k
    if (!dict.isDictionaryOfType("/ObjStm")) {
1652
1.10k
        qpdf.warn(qpdf.damagedPDF(
1653
1.10k
            "object " + std::to_string(obj_stream_number) + " 0",
1654
1.10k
            "supposed object stream " + std::to_string(obj_stream_number) + " has wrong type"));
1655
1.10k
    }
1656
1657
3.14k
    unsigned int n{0};
1658
3.14k
    int first{0};
1659
3.14k
    if (!(dict.getKey("/N").getValueAsUInt(n) && dict.getKey("/First").getValueAsInt(first))) {
1660
244
        throw qpdf.damagedPDF(
1661
244
            "object " + std::to_string(obj_stream_number) + " 0",
1662
244
            "object stream " + std::to_string(obj_stream_number) + " has incorrect keys");
1663
244
    }
1664
1665
    // id, offset, size
1666
2.90k
    std::vector<std::tuple<int, qpdf_offset_t, size_t>> offsets;
1667
1668
2.90k
    auto stream_data = obj_stream.getStreamData(qpdf_dl_specialized);
1669
1670
2.90k
    is::OffsetBuffer input("", stream_data);
1671
1672
2.90k
    const auto b_size = stream_data.size();
1673
2.90k
    const auto end_offset = static_cast<qpdf_offset_t>(b_size);
1674
2.90k
    auto b_start = stream_data.data();
1675
1676
2.90k
    if (first >= end_offset) {
1677
79
        throw qpdf.damagedPDF(
1678
79
            "object " + std::to_string(obj_stream_number) + " 0",
1679
79
            "object stream " + std::to_string(obj_stream_number) + " has invalid /First entry");
1680
79
    }
1681
1682
2.82k
    int id = 0;
1683
2.82k
    long long last_offset = -1;
1684
2.82k
    bool is_first = true;
1685
66.0k
    for (unsigned int i = 0; i < n; ++i) {
1686
63.4k
        auto tnum = readToken(input);
1687
63.4k
        auto id_offset = input.getLastOffset();
1688
63.4k
        auto toffset = readToken(input);
1689
63.4k
        if (!(tnum.isInteger() && toffset.isInteger())) {
1690
201
            throw damaged(0, input.getLastOffset(), "expected integer in object stream header");
1691
201
        }
1692
1693
63.2k
        int num = QUtil::string_to_int(tnum.getValue().c_str());
1694
63.2k
        long long offset = QUtil::string_to_int(toffset.getValue().c_str());
1695
1696
63.2k
        if (num == obj_stream_number) {
1697
406
            qpdf.warn(damaged(num, id_offset, "object stream claims to contain itself"));
1698
406
            continue;
1699
406
        }
1700
1701
62.8k
        if (num < 1) {
1702
949
            qpdf.warn(damaged(num, id_offset, "object id is invalid"s));
1703
949
            continue;
1704
949
        }
1705
1706
61.8k
        if (offset <= last_offset) {
1707
9.76k
            qpdf.warn(damaged(
1708
9.76k
                num,
1709
9.76k
                input.getLastOffset(),
1710
9.76k
                "offset " + std::to_string(offset) +
1711
9.76k
                    " is invalid (must be larger than previous offset " +
1712
9.76k
                    std::to_string(last_offset) + ")"));
1713
9.76k
            continue;
1714
9.76k
        }
1715
1716
52.0k
        if (num > m->xref_table_max_id) {
1717
1.84k
            continue;
1718
1.84k
        }
1719
1720
50.2k
        if (first + offset >= end_offset) {
1721
8.40k
            qpdf.warn(damaged(
1722
8.40k
                num, input.getLastOffset(), "offset " + std::to_string(offset) + " is too large"));
1723
8.40k
            continue;
1724
8.40k
        }
1725
1726
41.8k
        if (is_first) {
1727
1.02k
            is_first = false;
1728
40.8k
        } else {
1729
40.8k
            offsets.emplace_back(
1730
40.8k
                id, last_offset + first, static_cast<size_t>(offset - last_offset));
1731
40.8k
        }
1732
1733
41.8k
        last_offset = offset;
1734
41.8k
        id = num;
1735
41.8k
    }
1736
1737
2.62k
    if (!is_first) {
1738
        // We found at least one valid entry.
1739
863
        offsets.emplace_back(
1740
863
            id, last_offset + first, b_size - static_cast<size_t>(last_offset + first));
1741
863
    }
1742
1743
    // To avoid having to read the object stream multiple times, store all objects that would be
1744
    // found here in the cache.  Remember that some objects stored here might have been overridden
1745
    // by new objects appended to the file, so it is necessary to recheck the xref table and only
1746
    // cache what would actually be resolved here.
1747
36.5k
    for (auto const& [obj_id, obj_offset, obj_size]: offsets) {
1748
36.5k
        QPDFObjGen og(obj_id, 0);
1749
36.5k
        auto entry = m->xref_table.find(og);
1750
36.5k
        if (entry != m->xref_table.end() && entry->second.getType() == 2 &&
1751
35.2k
            entry->second.getObjStreamNumber() == obj_stream_number) {
1752
31.5k
            is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset);
1753
31.5k
            auto oh = readObjectInStream(in, obj_stream_number, obj_id);
1754
31.5k
            updateCache(og, oh.getObj(), end_before_space, end_after_space);
1755
31.5k
        } else {
1756
5.02k
            QTC::TC("qpdf", "QPDF not caching overridden objstm object");
1757
5.02k
        }
1758
36.5k
    }
1759
2.62k
}
1760
1761
QPDFObjectHandle
1762
Objects::newIndirect(QPDFObjGen og, std::shared_ptr<QPDFObject> const& obj)
1763
3.46k
{
1764
3.46k
    obj->setDefaultDescription(&qpdf, og);
1765
3.46k
    return {obj};
1766
3.46k
}
1767
1768
void
1769
Objects::updateCache(
1770
    QPDFObjGen og,
1771
    std::shared_ptr<QPDFObject> const& object,
1772
    qpdf_offset_t end_before_space,
1773
    qpdf_offset_t end_after_space,
1774
    bool destroy)
1775
327k
{
1776
327k
    object->setObjGen(&qpdf, og);
1777
327k
    if (isCached(og)) {
1778
185k
        auto& cache = m->obj_cache[og];
1779
185k
        object->move_to(cache.object, destroy);
1780
185k
        cache.end_before_space = end_before_space;
1781
185k
        cache.end_after_space = end_after_space;
1782
185k
    } else {
1783
142k
        m->obj_cache[og] = ObjCache(object, end_before_space, end_after_space);
1784
142k
    }
1785
327k
}
1786
1787
bool
1788
Objects::isCached(QPDFObjGen og)
1789
1.15M
{
1790
1.15M
    return m->obj_cache.contains(og);
1791
1.15M
}
1792
1793
bool
1794
Objects::isUnresolved(QPDFObjGen og)
1795
832k
{
1796
832k
    return !isCached(og) || m->obj_cache[og].object->isUnresolved();
1797
832k
}
1798
1799
QPDFObjGen
1800
Objects::nextObjGen()
1801
3.48k
{
1802
3.48k
    int max_objid = toI(qpdf.getObjectCount());
1803
3.48k
    if (max_objid == std::numeric_limits<int>::max()) {
1804
3
        throw std::range_error("max object id is too high to create new objects");
1805
3
    }
1806
3.47k
    return {max_objid + 1, 0};
1807
3.48k
}
1808
1809
QPDFObjectHandle
1810
Objects::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj)
1811
3.48k
{
1812
3.48k
    QPDFObjGen next{nextObjGen()};
1813
3.48k
    m->obj_cache[next] = ObjCache(obj, -1, -1);
1814
3.48k
    return newIndirect(next, m->obj_cache[next].object);
1815
3.48k
}
1816
1817
QPDFObjectHandle
1818
QPDF::makeIndirectObject(QPDFObjectHandle oh)
1819
3.48k
{
1820
3.48k
    if (!oh) {
1821
0
        throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect");
1822
0
    }
1823
3.48k
    return m->objects.makeIndirectFromQPDFObject(oh.getObj());
1824
3.48k
}
1825
1826
std::shared_ptr<QPDFObject>
1827
Objects::getObjectForParser(int id, int gen, bool parse_pdf)
1828
282k
{
1829
    // This method is called by the parser and therefore must not resolve any objects.
1830
282k
    auto og = QPDFObjGen(id, gen);
1831
282k
    if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
1832
116k
        return iter->second.object;
1833
116k
    }
1834
165k
    if (m->xref_table.contains(og) || (!m->parsed && og.getObj() < m->xref_table_max_id)) {
1835
146k
        return m->obj_cache.insert({og, QPDFObject::create<QPDF_Unresolved>(&qpdf, og)})
1836
146k
            .first->second.object;
1837
146k
    }
1838
18.9k
    if (parse_pdf) {
1839
18.9k
        return QPDFObject::create<QPDF_Null>();
1840
18.9k
    }
1841
0
    return m->obj_cache.insert({og, QPDFObject::create<QPDF_Null>(&qpdf, og)}).first->second.object;
1842
18.9k
}
1843
1844
std::shared_ptr<QPDFObject>
1845
Objects::getObjectForJSON(int id, int gen)
1846
0
{
1847
0
    auto og = QPDFObjGen(id, gen);
1848
0
    auto [it, inserted] = m->obj_cache.try_emplace(og);
1849
0
    auto& obj = it->second.object;
1850
0
    if (inserted) {
1851
0
        obj = (m->parsed && !m->xref_table.contains(og))
1852
0
            ? QPDFObject::create<QPDF_Null>(&qpdf, og)
1853
0
            : QPDFObject::create<QPDF_Unresolved>(&qpdf, og);
1854
0
    }
1855
0
    return obj;
1856
0
}
1857
1858
QPDFObjectHandle
1859
QPDF::getObject(QPDFObjGen og)
1860
110k
{
1861
110k
    if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
1862
57.7k
        return {it->second.object};
1863
57.7k
    } else if (m->parsed && !m->xref_table.contains(og)) {
1864
5.11k
        return QPDFObject::create<QPDF_Null>();
1865
47.5k
    } else {
1866
47.5k
        auto result =
1867
47.5k
            m->obj_cache.try_emplace(og, QPDFObject::create<QPDF_Unresolved>(this, og), -1, -1);
1868
47.5k
        return {result.first->second.object};
1869
47.5k
    }
1870
110k
}
1871
1872
void
1873
QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
1874
0
{
1875
0
    replaceObject(QPDFObjGen(objid, generation), oh);
1876
0
}
1877
1878
void
1879
QPDF::replaceObject(QPDFObjGen og, QPDFObjectHandle oh)
1880
0
{
1881
0
    if (!oh || (oh.isIndirect() && !(oh.isStream() && oh.getObjGen() == og))) {
1882
0
        throw std::logic_error("QPDF::replaceObject called with indirect object handle");
1883
0
    }
1884
0
    m->objects.updateCache(og, oh.getObj(), -1, -1, false);
1885
0
}
1886
1887
void
1888
QPDF::removeObject(QPDFObjGen og)
1889
21.5k
{
1890
21.5k
    m->xref_table.erase(og);
1891
21.5k
    if (auto cached = m->obj_cache.find(og); cached != m->obj_cache.end()) {
1892
        // Take care of any object handles that may be floating around.
1893
867
        cached->second.object->assign_null();
1894
867
        cached->second.object->setObjGen(nullptr, QPDFObjGen());
1895
867
        m->obj_cache.erase(cached);
1896
867
    }
1897
21.5k
}
1898
1899
void
1900
QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)
1901
0
{
1902
0
    QTC::TC("qpdf", "QPDF replaceReserved");
1903
0
    auto tc = reserved.getTypeCode();
1904
0
    if (!(tc == ::ot_reserved || tc == ::ot_null)) {
1905
0
        throw std::logic_error("replaceReserved called with non-reserved object");
1906
0
    }
1907
0
    replaceObject(reserved.getObjGen(), replacement);
1908
0
}
1909
1910
void
1911
QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
1912
0
{
1913
0
    swapObjects(QPDFObjGen(objid1, generation1), QPDFObjGen(objid2, generation2));
1914
0
}
1915
1916
void
1917
QPDF::swapObjects(QPDFObjGen og1, QPDFObjGen og2)
1918
0
{
1919
    // Force objects to be read from the input source if needed, then swap them in the cache.
1920
0
    m->objects.resolve(og1);
1921
0
    m->objects.resolve(og2);
1922
0
    m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);
1923
0
}
1924
1925
size_t
1926
Objects::tableSize()
1927
9.19k
{
1928
    // If obj_cache is dense, accommodate all object in tables,else accommodate only original
1929
    // objects.
1930
9.19k
    auto max_xref = !m->xref_table.empty() ? m->xref_table.crbegin()->first.getObj() : 0;
1931
9.19k
    auto max_obj = !m->obj_cache.empty() ? m->obj_cache.crbegin()->first.getObj() : 0;
1932
9.19k
    auto max_id = std::numeric_limits<int>::max() - 1;
1933
9.19k
    if (max_obj >= max_id || max_xref >= max_id) {
1934
        // Temporary fix. Long-term solution is
1935
        // - QPDFObjGen to enforce objgens are valid and sensible
1936
        // - xref table and obj cache to protect against insertion of impossibly large obj ids
1937
2
        qpdf.stopOnError("Impossibly large object id encountered.");
1938
2
    }
1939
9.19k
    if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) {
1940
7.67k
        return toS(++max_obj);
1941
7.67k
    }
1942
1.52k
    return toS(++max_xref);
1943
9.19k
}
1944
1945
std::vector<QPDFObjGen>
1946
Objects::getCompressibleObjVector()
1947
0
{
1948
0
    return getCompressibleObjGens<QPDFObjGen>();
1949
0
}
1950
1951
std::vector<bool>
1952
Objects::getCompressibleObjSet()
1953
882
{
1954
882
    return getCompressibleObjGens<bool>();
1955
882
}
1956
1957
template <typename T>
1958
std::vector<T>
1959
Objects::getCompressibleObjGens()
1960
882
{
1961
    // Return a list of objects that are allowed to be in object streams.  Walk through the objects
1962
    // by traversing the document from the root, including a traversal of the pages tree.  This
1963
    // makes that objects that are on the same page are more likely to be in the same object stream,
1964
    // which is slightly more efficient, particularly with linearized files.  This is better than
1965
    // iterating through the xref table since it avoids preserving orphaned items.
1966
1967
    // Exclude encryption dictionary, if any
1968
882
    QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
1969
882
    QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
1970
1971
882
    const size_t max_obj = qpdf.getObjectCount();
1972
882
    std::vector<bool> visited(max_obj, false);
1973
882
    std::vector<QPDFObjectHandle> queue;
1974
882
    queue.reserve(512);
1975
882
    queue.emplace_back(m->trailer);
1976
882
    std::vector<T> result;
1977
882
    if constexpr (std::is_same_v<T, QPDFObjGen>) {
1978
0
        result.reserve(m->obj_cache.size());
1979
882
    } else if constexpr (std::is_same_v<T, bool>) {
1980
882
        result.resize(max_obj + 1U, false);
1981
    } else {
1982
        throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens");
1983
    }
1984
337k
    while (!queue.empty()) {
1985
336k
        auto obj = queue.back();
1986
336k
        queue.pop_back();
1987
336k
        if (obj.getObjectID() > 0) {
1988
40.7k
            QPDFObjGen og = obj.getObjGen();
1989
40.7k
            const size_t id = toS(og.getObj() - 1);
1990
40.7k
            if (id >= max_obj) {
1991
0
                throw std::logic_error(
1992
0
                    "unexpected object id encountered in getCompressibleObjGens");
1993
0
            }
1994
40.7k
            if (visited[id]) {
1995
12.8k
                continue;
1996
12.8k
            }
1997
1998
            // Check whether this is the current object. If not, remove it (which changes it into a
1999
            // direct null and therefore stops us from revisiting it) and move on to the next object
2000
            // in the queue.
2001
27.8k
            auto upper = m->obj_cache.upper_bound(og);
2002
27.8k
            if (upper != m->obj_cache.end() && upper->first.getObj() == og.getObj()) {
2003
600
                qpdf.removeObject(og);
2004
600
                continue;
2005
600
            }
2006
2007
27.2k
            visited[id] = true;
2008
2009
27.2k
            if (og == encryption_dict_og) {
2010
20
                QTC::TC("qpdf", "QPDF exclude encryption dictionary");
2011
27.2k
            } else if (!(obj.isStream() ||
2012
25.0k
                         (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") &&
2013
25.0k
                          obj.hasKey("/Contents")))) {
2014
25.0k
                if constexpr (std::is_same_v<T, QPDFObjGen>) {
2015
0
                    result.push_back(og);
2016
25.0k
                } else if constexpr (std::is_same_v<T, bool>) {
2017
25.0k
                    result[id + 1U] = true;
2018
25.0k
                }
2019
25.0k
            }
2020
27.2k
        }
2021
323k
        if (obj.isStream()) {
2022
2.17k
            auto dict = obj.getDict().as_dictionary();
2023
2.17k
            auto end = dict.crend();
2024
16.4k
            for (auto iter = dict.crbegin(); iter != end; ++iter) {
2025
14.2k
                std::string const& key = iter->first;
2026
14.2k
                QPDFObjectHandle const& value = iter->second;
2027
14.2k
                if (!value.null()) {
2028
12.2k
                    if (key == "/Length") {
2029
                        // omit stream lengths
2030
1.86k
                        if (value.isIndirect()) {
2031
143
                            QTC::TC("qpdf", "QPDF exclude indirect length");
2032
143
                        }
2033
10.4k
                    } else {
2034
10.4k
                        queue.emplace_back(value);
2035
10.4k
                    }
2036
12.2k
                }
2037
14.2k
            }
2038
320k
        } else if (obj.isDictionary()) {
2039
18.2k
            auto dict = obj.as_dictionary();
2040
18.2k
            auto end = dict.crend();
2041
91.5k
            for (auto iter = dict.crbegin(); iter != end; ++iter) {
2042
73.2k
                if (!iter->second.null()) {
2043
63.0k
                    queue.emplace_back(iter->second);
2044
63.0k
                }
2045
73.2k
            }
2046
302k
        } else if (auto items = obj.as_array()) {
2047
302k
            queue.insert(queue.end(), items.crbegin(), items.crend());
2048
302k
        }
2049
323k
    }
2050
2051
882
    return result;
2052
882
}
Unexecuted instantiation: std::__1::vector<QPDFObjGen, std::__1::allocator<QPDFObjGen> > QPDF::Doc::Objects::getCompressibleObjGens<QPDFObjGen>()
std::__1::vector<bool, std::__1::allocator<bool> > QPDF::Doc::Objects::getCompressibleObjGens<bool>()
Line
Count
Source
1960
882
{
1961
    // Return a list of objects that are allowed to be in object streams.  Walk through the objects
1962
    // by traversing the document from the root, including a traversal of the pages tree.  This
1963
    // makes that objects that are on the same page are more likely to be in the same object stream,
1964
    // which is slightly more efficient, particularly with linearized files.  This is better than
1965
    // iterating through the xref table since it avoids preserving orphaned items.
1966
1967
    // Exclude encryption dictionary, if any
1968
882
    QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
1969
882
    QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
1970
1971
882
    const size_t max_obj = qpdf.getObjectCount();
1972
882
    std::vector<bool> visited(max_obj, false);
1973
882
    std::vector<QPDFObjectHandle> queue;
1974
882
    queue.reserve(512);
1975
882
    queue.emplace_back(m->trailer);
1976
882
    std::vector<T> result;
1977
    if constexpr (std::is_same_v<T, QPDFObjGen>) {
1978
        result.reserve(m->obj_cache.size());
1979
882
    } else if constexpr (std::is_same_v<T, bool>) {
1980
882
        result.resize(max_obj + 1U, false);
1981
    } else {
1982
        throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens");
1983
    }
1984
337k
    while (!queue.empty()) {
1985
336k
        auto obj = queue.back();
1986
336k
        queue.pop_back();
1987
336k
        if (obj.getObjectID() > 0) {
1988
40.7k
            QPDFObjGen og = obj.getObjGen();
1989
40.7k
            const size_t id = toS(og.getObj() - 1);
1990
40.7k
            if (id >= max_obj) {
1991
0
                throw std::logic_error(
1992
0
                    "unexpected object id encountered in getCompressibleObjGens");
1993
0
            }
1994
40.7k
            if (visited[id]) {
1995
12.8k
                continue;
1996
12.8k
            }
1997
1998
            // Check whether this is the current object. If not, remove it (which changes it into a
1999
            // direct null and therefore stops us from revisiting it) and move on to the next object
2000
            // in the queue.
2001
27.8k
            auto upper = m->obj_cache.upper_bound(og);
2002
27.8k
            if (upper != m->obj_cache.end() && upper->first.getObj() == og.getObj()) {
2003
600
                qpdf.removeObject(og);
2004
600
                continue;
2005
600
            }
2006
2007
27.2k
            visited[id] = true;
2008
2009
27.2k
            if (og == encryption_dict_og) {
2010
20
                QTC::TC("qpdf", "QPDF exclude encryption dictionary");
2011
27.2k
            } else if (!(obj.isStream() ||
2012
25.0k
                         (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") &&
2013
25.0k
                          obj.hasKey("/Contents")))) {
2014
                if constexpr (std::is_same_v<T, QPDFObjGen>) {
2015
                    result.push_back(og);
2016
25.0k
                } else if constexpr (std::is_same_v<T, bool>) {
2017
25.0k
                    result[id + 1U] = true;
2018
25.0k
                }
2019
25.0k
            }
2020
27.2k
        }
2021
323k
        if (obj.isStream()) {
2022
2.17k
            auto dict = obj.getDict().as_dictionary();
2023
2.17k
            auto end = dict.crend();
2024
16.4k
            for (auto iter = dict.crbegin(); iter != end; ++iter) {
2025
14.2k
                std::string const& key = iter->first;
2026
14.2k
                QPDFObjectHandle const& value = iter->second;
2027
14.2k
                if (!value.null()) {
2028
12.2k
                    if (key == "/Length") {
2029
                        // omit stream lengths
2030
1.86k
                        if (value.isIndirect()) {
2031
143
                            QTC::TC("qpdf", "QPDF exclude indirect length");
2032
143
                        }
2033
10.4k
                    } else {
2034
10.4k
                        queue.emplace_back(value);
2035
10.4k
                    }
2036
12.2k
                }
2037
14.2k
            }
2038
320k
        } else if (obj.isDictionary()) {
2039
18.2k
            auto dict = obj.as_dictionary();
2040
18.2k
            auto end = dict.crend();
2041
91.5k
            for (auto iter = dict.crbegin(); iter != end; ++iter) {
2042
73.2k
                if (!iter->second.null()) {
2043
63.0k
                    queue.emplace_back(iter->second);
2044
63.0k
                }
2045
73.2k
            }
2046
302k
        } else if (auto items = obj.as_array()) {
2047
302k
            queue.insert(queue.end(), items.crbegin(), items.crend());
2048
302k
        }
2049
323k
    }
2050
2051
882
    return result;
2052
882
}