Coverage Report

Created: 2025-10-10 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
19
// special access to allow the parser to create unresolved objects and dangling references.
20
class QPDF::Doc::ParseGuard
21
{
22
  public:
23
    ParseGuard(QPDF* qpdf) :
24
1.95M
        objects(qpdf ? &qpdf->m->objects : nullptr)
25
1.95M
    {
26
1.95M
        if (objects) {
27
1.93M
            objects->inParse(true);
28
1.93M
        }
29
1.95M
    }
30
31
    static std::shared_ptr<QPDFObject>
32
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
33
496k
    {
34
496k
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
35
496k
    }
36
37
    ~ParseGuard()
38
1.95M
    {
39
1.95M
        if (objects) {
40
1.93M
            objects->inParse(false);
41
1.93M
        }
42
1.95M
    }
43
    QPDF::Doc::Objects* objects;
44
};
45
46
using ParseGuard = QPDF::Doc::ParseGuard;
47
48
QPDFObjectHandle
49
QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context)
50
19.8k
{
51
19.8k
    qpdf::Tokenizer tokenizer;
52
19.8k
    bool empty = false;
53
19.8k
    return QPDFParser(
54
19.8k
               input,
55
19.8k
               make_description(input.getName(), object_description),
56
19.8k
               object_description,
57
19.8k
               tokenizer,
58
19.8k
               nullptr,
59
19.8k
               context,
60
19.8k
               false)
61
19.8k
        .parse(empty, false);
62
19.8k
}
63
64
QPDFObjectHandle
65
QPDFParser::parse_content(
66
    InputSource& input,
67
    std::shared_ptr<QPDFObject::Description> sp_description,
68
    qpdf::Tokenizer& tokenizer,
69
    QPDF* context)
70
1.74M
{
71
1.74M
    bool empty = false;
72
1.74M
    return QPDFParser(
73
1.74M
               input,
74
1.74M
               std::move(sp_description),
75
1.74M
               "content",
76
1.74M
               tokenizer,
77
1.74M
               nullptr,
78
1.74M
               context,
79
1.74M
               true,
80
1.74M
               0,
81
1.74M
               0,
82
1.74M
               context && context->doc().reconstructed_xref())
83
1.74M
        .parse(empty, true);
84
1.74M
}
85
86
QPDFObjectHandle
87
QPDFParser::parse(
88
    InputSource& input,
89
    std::string const& object_description,
90
    QPDFTokenizer& tokenizer,
91
    bool& empty,
92
    QPDFObjectHandle::StringDecrypter* decrypter,
93
    QPDF* context)
94
0
{
95
0
    return QPDFParser(
96
0
               input,
97
0
               make_description(input.getName(), object_description),
98
0
               object_description,
99
0
               *tokenizer.m,
100
0
               decrypter,
101
0
               context,
102
0
               false)
103
0
        .parse(empty, false);
104
0
}
105
106
std::pair<QPDFObjectHandle, bool>
107
QPDFParser::parse(
108
    InputSource& input,
109
    std::string const& object_description,
110
    qpdf::Tokenizer& tokenizer,
111
    QPDFObjectHandle::StringDecrypter* decrypter,
112
    QPDF& context,
113
    bool sanity_checks)
114
162k
{
115
162k
    bool empty{false};
116
162k
    auto result = QPDFParser(
117
162k
                      input,
118
162k
                      make_description(input.getName(), object_description),
119
162k
                      object_description,
120
162k
                      tokenizer,
121
162k
                      decrypter,
122
162k
                      &context,
123
162k
                      true,
124
162k
                      0,
125
162k
                      0,
126
162k
                      sanity_checks)
127
162k
                      .parse(empty, false);
128
162k
    return {result, empty};
129
162k
}
130
131
std::pair<QPDFObjectHandle, bool>
132
QPDFParser::parse(
133
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
134
25.9k
{
135
25.9k
    bool empty{false};
136
25.9k
    auto result = QPDFParser(
137
25.9k
                      input,
138
25.9k
                      std::make_shared<QPDFObject::Description>(
139
25.9k
                          QPDFObject::ObjStreamDescr(stream_id, obj_id)),
140
25.9k
                      "",
141
25.9k
                      tokenizer,
142
25.9k
                      nullptr,
143
25.9k
                      &context,
144
25.9k
                      true,
145
25.9k
                      stream_id,
146
25.9k
                      obj_id)
147
25.9k
                      .parse(empty, false);
148
25.9k
    return {result, empty};
149
25.9k
}
150
151
QPDFObjectHandle
152
QPDFParser::parse(bool& empty, bool content_stream)
153
1.95M
{
154
    // This method must take care not to resolve any objects. Don't check the type of any object
155
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
156
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
157
    // logic error to be thrown from QPDF::inParse().
158
159
1.95M
    ParseGuard pg(context);
160
1.95M
    empty = false;
161
1.95M
    start = input.tell();
162
163
1.95M
    if (!tokenizer.nextToken(input, object_description)) {
164
17.5k
        warn(tokenizer.getErrorMessage());
165
17.5k
    }
166
167
1.95M
    switch (tokenizer.getType()) {
168
4.45k
    case QPDFTokenizer::tt_eof:
169
4.45k
        if (content_stream) {
170
            // In content stream mode, leave object uninitialized to indicate EOF
171
4.09k
            return {};
172
4.09k
        }
173
364
        QTC::TC("qpdf", "QPDFParser eof in parse");
174
364
        warn("unexpected EOF");
175
364
        return {QPDFObject::create<QPDF_Null>()};
176
177
17.3k
    case QPDFTokenizer::tt_bad:
178
17.3k
        QTC::TC("qpdf", "QPDFParser bad token in parse");
179
17.3k
        return {QPDFObject::create<QPDF_Null>()};
180
181
811
    case QPDFTokenizer::tt_brace_open:
182
1.71k
    case QPDFTokenizer::tt_brace_close:
183
1.71k
        QTC::TC("qpdf", "QPDFParser bad brace");
184
1.71k
        warn("treating unexpected brace token as null");
185
1.71k
        return {QPDFObject::create<QPDF_Null>()};
186
187
2.90k
    case QPDFTokenizer::tt_array_close:
188
2.90k
        QTC::TC("qpdf", "QPDFParser bad array close");
189
2.90k
        warn("treating unexpected array close token as null");
190
2.90k
        return {QPDFObject::create<QPDF_Null>()};
191
192
1.00k
    case QPDFTokenizer::tt_dict_close:
193
1.00k
        QTC::TC("qpdf", "QPDFParser bad dictionary close");
194
1.00k
        warn("unexpected dictionary close token");
195
1.00k
        return {QPDFObject::create<QPDF_Null>()};
196
197
31.6k
    case QPDFTokenizer::tt_array_open:
198
199k
    case QPDFTokenizer::tt_dict_open:
199
199k
        stack.clear();
200
199k
        stack.emplace_back(
201
199k
            input,
202
199k
            (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
203
199k
        frame = &stack.back();
204
199k
        return parseRemainder(content_stream);
205
206
1.79k
    case QPDFTokenizer::tt_bool:
207
1.79k
        return withDescription<QPDF_Bool>(tokenizer.getValue() == "true");
208
209
279
    case QPDFTokenizer::tt_null:
210
279
        return {QPDFObject::create<QPDF_Null>()};
211
212
217k
    case QPDFTokenizer::tt_integer:
213
217k
        return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
214
215
136k
    case QPDFTokenizer::tt_real:
216
136k
        return withDescription<QPDF_Real>(tokenizer.getValue());
217
218
100k
    case QPDFTokenizer::tt_name:
219
100k
        return withDescription<QPDF_Name>(tokenizer.getValue());
220
221
1.25M
    case QPDFTokenizer::tt_word:
222
1.25M
        {
223
1.25M
            auto const& value = tokenizer.getValue();
224
1.25M
            if (content_stream) {
225
1.24M
                return withDescription<QPDF_Operator>(value);
226
1.24M
            } else if (value == "endobj") {
227
                // We just saw endobj without having read anything.  Treat this as a null and do
228
                // not move the input source's offset.
229
218
                input.seek(input.getLastOffset(), SEEK_SET);
230
218
                empty = true;
231
218
                return {QPDFObject::create<QPDF_Null>()};
232
6.46k
            } else {
233
6.46k
                QTC::TC("qpdf", "QPDFParser treat word as string");
234
6.46k
                warn("unknown token while reading object; treating as string");
235
6.46k
                return withDescription<QPDF_String>(value);
236
6.46k
            }
237
1.25M
        }
238
239
13.8k
    case QPDFTokenizer::tt_string:
240
13.8k
        if (decrypter) {
241
425
            std::string s{tokenizer.getValue()};
242
425
            decrypter->decryptString(s);
243
425
            return withDescription<QPDF_String>(s);
244
13.3k
        } else {
245
13.3k
            return withDescription<QPDF_String>(tokenizer.getValue());
246
13.3k
        }
247
248
0
    default:
249
0
        warn("treating unknown token type as null while reading object");
250
0
        return {QPDFObject::create<QPDF_Null>()};
251
1.95M
    }
252
1.95M
}
253
254
QPDFObjectHandle
255
QPDFParser::parseRemainder(bool content_stream)
256
199k
{
257
    // This method must take care not to resolve any objects. Don't check the type of any object
258
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
259
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
260
    // logic error to be thrown from QPDF::inParse().
261
262
199k
    bad_count = 0;
263
199k
    bool b_contents = false;
264
265
7.64M
    while (true) {
266
7.63M
        if (!tokenizer.nextToken(input, object_description)) {
267
46.9k
            warn(tokenizer.getErrorMessage());
268
46.9k
        }
269
7.63M
        ++good_count; // optimistically
270
271
7.63M
        if (int_count != 0) {
272
            // Special handling of indirect references. Treat integer tokens as part of an indirect
273
            // reference until proven otherwise.
274
1.56M
            if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
275
776k
                if (++int_count > 2) {
276
                    // Process the oldest buffered integer.
277
199k
                    addInt(int_count);
278
199k
                }
279
776k
                last_offset_buffer[int_count % 2] = input.getLastOffset();
280
776k
                int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
281
776k
                continue;
282
283
792k
            } else if (
284
792k
                int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
285
523k
                tokenizer.getValue() == "R") {
286
499k
                if (context == nullptr) {
287
0
                    QTC::TC("qpdf", "QPDFParser indirect without context");
288
0
                    throw std::logic_error(
289
0
                        "QPDFParser::parse called without context on an object "
290
0
                        "with indirect references");
291
0
                }
292
499k
                auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]);
293
499k
                auto gen = QIntC::to_int(int_buffer[(int_count) % 2]);
294
499k
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
295
496k
                    add(ParseGuard::getObject(context, id, gen, parse_pdf));
296
496k
                } else {
297
3.59k
                    QTC::TC("qpdf", "QPDFParser invalid objgen");
298
3.59k
                    addNull();
299
3.59k
                }
300
499k
                int_count = 0;
301
499k
                continue;
302
303
499k
            } else if (int_count > 0) {
304
                // Process the buffered integers before processing the current token.
305
292k
                if (int_count > 1) {
306
77.6k
                    addInt(int_count - 1);
307
77.6k
                }
308
292k
                addInt(int_count);
309
292k
                int_count = 0;
310
292k
            }
311
1.56M
        }
312
313
6.36M
        switch (tokenizer.getType()) {
314
9.40k
        case QPDFTokenizer::tt_eof:
315
9.40k
            warn("parse error while reading object");
316
9.40k
            if (content_stream) {
317
                // In content stream mode, leave object uninitialized to indicate EOF
318
290
                return {};
319
290
            }
320
9.11k
            QTC::TC("qpdf", "QPDFParser eof in parseRemainder");
321
9.11k
            warn("unexpected EOF");
322
9.11k
            return {QPDFObject::create<QPDF_Null>()};
323
324
42.2k
        case QPDFTokenizer::tt_bad:
325
42.2k
            QTC::TC("qpdf", "QPDFParser bad token in parseRemainder");
326
42.2k
            if (tooManyBadTokens()) {
327
1.33k
                return {QPDFObject::create<QPDF_Null>()};
328
1.33k
            }
329
40.9k
            addNull();
330
40.9k
            continue;
331
332
1.98k
        case QPDFTokenizer::tt_brace_open:
333
7.84k
        case QPDFTokenizer::tt_brace_close:
334
7.84k
            QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder");
335
7.84k
            warn("treating unexpected brace token as null");
336
7.84k
            if (tooManyBadTokens()) {
337
559
                return {QPDFObject::create<QPDF_Null>()};
338
559
            }
339
7.28k
            addNull();
340
7.28k
            continue;
341
342
164k
        case QPDFTokenizer::tt_array_close:
343
164k
            if ((bad_count || sanity_checks) && !max_bad_count) {
344
                // Trigger warning.
345
288
                (void)tooManyBadTokens();
346
288
                return {QPDFObject::create<QPDF_Null>()};
347
288
            }
348
164k
            if (frame->state == st_array) {
349
161k
                auto object = frame->null_count > 100
350
161k
                    ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true)
351
161k
                    : QPDFObject::create<QPDF_Array>(std::move(frame->olist));
352
161k
                setDescription(object, frame->offset - 1);
353
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
354
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
355
                // array start delimiter. getLastOffset points to the array end token and therefore
356
                // can't be used here.
357
161k
                if (stack.size() <= 1) {
358
24.3k
                    return object;
359
24.3k
                }
360
136k
                stack.pop_back();
361
136k
                frame = &stack.back();
362
136k
                add(std::move(object));
363
136k
            } else {
364
3.01k
                QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder");
365
3.01k
                if (sanity_checks) {
366
                    // During sanity checks, assume nesting of containers is corrupt and object is
367
                    // unusable.
368
2.56k
                    warn("unexpected array close token; giving up on reading object");
369
2.56k
                    return {QPDFObject::create<QPDF_Null>()};
370
2.56k
                }
371
454
                warn("treating unexpected array close token as null");
372
454
                if (tooManyBadTokens()) {
373
43
                    return {QPDFObject::create<QPDF_Null>()};
374
43
                }
375
411
                addNull();
376
411
            }
377
137k
            continue;
378
379
259k
        case QPDFTokenizer::tt_dict_close:
380
259k
            if ((bad_count || sanity_checks) && !max_bad_count) {
381
                // Trigger warning.
382
185
                (void)tooManyBadTokens();
383
185
                return {QPDFObject::create<QPDF_Null>()};
384
185
            }
385
259k
            if (frame->state <= st_dictionary_value) {
386
                // Attempt to recover more or less gracefully from invalid dictionaries.
387
256k
                auto& dict = frame->dict;
388
389
256k
                if (frame->state == st_dictionary_value) {
390
8.93k
                    QTC::TC("qpdf", "QPDFParser no val for last key");
391
8.93k
                    warn(
392
8.93k
                        frame->offset,
393
8.93k
                        "dictionary ended prematurely; using null as value for last key");
394
8.93k
                    dict[frame->key] = QPDFObject::create<QPDF_Null>();
395
8.93k
                }
396
256k
                if (!frame->olist.empty()) {
397
55.9k
                    if (sanity_checks) {
398
49.3k
                        warn(
399
49.3k
                            frame->offset,
400
49.3k
                            "expected dictionary keys but found non-name objects; ignoring");
401
49.3k
                    } else {
402
6.63k
                        fixMissingKeys();
403
6.63k
                    }
404
55.9k
                }
405
406
256k
                if (!frame->contents_string.empty() && dict.contains("/Type") &&
407
96
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
408
13
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
409
13
                    dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
410
13
                    dict["/Contents"].setParsedOffset(frame->contents_offset);
411
13
                }
412
256k
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
413
256k
                setDescription(object, frame->offset - 2);
414
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
415
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
416
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
417
                // and therefore can't be used here.
418
256k
                if (stack.size() <= 1) {
419
146k
                    return object;
420
146k
                }
421
110k
                stack.pop_back();
422
110k
                frame = &stack.back();
423
110k
                add(std::move(object));
424
110k
            } else {
425
2.59k
                if (sanity_checks) {
426
                    // During sanity checks, assume nesting of containers is corrupt and object is
427
                    // unusable.
428
2.00k
                    warn("unexpected dictionary close token; giving up on reading object");
429
2.00k
                    return {QPDFObject::create<QPDF_Null>()};
430
2.00k
                }
431
588
                warn("unexpected dictionary close token");
432
588
                if (tooManyBadTokens()) {
433
37
                    return {QPDFObject::create<QPDF_Null>()};
434
37
                }
435
551
                addNull();
436
551
            }
437
111k
            continue;
438
439
297k
        case QPDFTokenizer::tt_array_open:
440
469k
        case QPDFTokenizer::tt_dict_open:
441
469k
            if (stack.size() > 499) {
442
311
                QTC::TC("qpdf", "QPDFParser too deep");
443
311
                warn("ignoring excessively deeply nested data structure");
444
311
                return {QPDFObject::create<QPDF_Null>()};
445
469k
            } else {
446
469k
                b_contents = false;
447
469k
                stack.emplace_back(
448
469k
                    input,
449
469k
                    (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
450
469k
                                                                          : st_dictionary_key);
451
469k
                frame = &stack.back();
452
469k
                continue;
453
469k
            }
454
455
20.2k
        case QPDFTokenizer::tt_bool:
456
20.2k
            addScalar<QPDF_Bool>(tokenizer.getValue() == "true");
457
20.2k
            continue;
458
459
58.6k
        case QPDFTokenizer::tt_null:
460
58.6k
            addNull();
461
58.6k
            continue;
462
463
1.63M
        case QPDFTokenizer::tt_integer:
464
1.63M
            if (!content_stream) {
465
                // Buffer token in case it is part of an indirect reference.
466
792k
                last_offset_buffer[1] = input.getLastOffset();
467
792k
                int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
468
792k
                int_count = 1;
469
842k
            } else {
470
842k
                addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
471
842k
            }
472
1.63M
            continue;
473
474
192k
        case QPDFTokenizer::tt_real:
475
192k
            addScalar<QPDF_Real>(tokenizer.getValue());
476
192k
            continue;
477
478
2.16M
        case QPDFTokenizer::tt_name:
479
2.16M
            if (frame->state == st_dictionary_key) {
480
1.10M
                frame->key = tokenizer.getValue();
481
1.10M
                frame->state = st_dictionary_value;
482
1.10M
                b_contents = decrypter && frame->key == "/Contents";
483
1.10M
                continue;
484
1.10M
            } else {
485
1.05M
                addScalar<QPDF_Name>(tokenizer.getValue());
486
1.05M
            }
487
1.05M
            continue;
488
489
1.05M
        case QPDFTokenizer::tt_word:
490
437k
            if (content_stream) {
491
204k
                addScalar<QPDF_Operator>(tokenizer.getValue());
492
204k
                continue;
493
204k
            }
494
495
232k
            if (sanity_checks) {
496
221k
                if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") {
497
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
498
                    // we are parsing past the end of the object.
499
3.73k
                    warn(
500
3.73k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
501
3.73k
                        "reading object");
502
3.73k
                    return {QPDFObject::create<QPDF_Null>()};
503
3.73k
                }
504
505
217k
                warn("unknown token while reading object; treating as null");
506
217k
                if (tooManyBadTokens()) {
507
5.74k
                    return {QPDFObject::create<QPDF_Null>()};
508
5.74k
                }
509
211k
                addNull();
510
211k
                continue;
511
217k
            }
512
513
10.6k
            QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
514
10.6k
            warn("unknown token while reading object; treating as string");
515
10.6k
            if (tooManyBadTokens()) {
516
183
                return {QPDFObject::create<QPDF_Null>()};
517
183
            }
518
10.4k
            addScalar<QPDF_String>(tokenizer.getValue());
519
520
10.4k
            continue;
521
522
903k
        case QPDFTokenizer::tt_string:
523
903k
            {
524
903k
                auto const& val = tokenizer.getValue();
525
903k
                if (decrypter) {
526
16.4k
                    if (b_contents) {
527
714
                        frame->contents_string = val;
528
714
                        frame->contents_offset = input.getLastOffset();
529
714
                        b_contents = false;
530
714
                    }
531
16.4k
                    std::string s{val};
532
16.4k
                    decrypter->decryptString(s);
533
16.4k
                    addScalar<QPDF_String>(s);
534
887k
                } else {
535
887k
                    addScalar<QPDF_String>(val);
536
887k
                }
537
903k
            }
538
903k
            continue;
539
540
0
        default:
541
0
            warn("treating unknown token type as null while reading object");
542
0
            if (tooManyBadTokens()) {
543
0
                return {QPDFObject::create<QPDF_Null>()};
544
0
            }
545
0
            addNull();
546
6.36M
        }
547
6.36M
    }
548
199k
}
549
550
void
551
QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
552
4.41M
{
553
4.41M
    if (frame->state != st_dictionary_value) {
554
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
555
        // processing once the tt_dict_close token has been found.
556
3.36M
        frame->olist.emplace_back(std::move(obj));
557
3.36M
    } else {
558
1.05M
        if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
559
60.6k
            warnDuplicateKey();
560
60.6k
        }
561
1.05M
        frame->state = st_dictionary_key;
562
1.05M
    }
563
4.41M
}
564
565
void
566
QPDFParser::addNull()
567
322k
{
568
322k
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
569
570
322k
    if (frame->state != st_dictionary_value) {
571
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
572
        // processing once the tt_dict_close token has been found.
573
283k
        frame->olist.emplace_back(null_obj);
574
283k
    } else {
575
39.1k
        if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
576
5.17k
            warnDuplicateKey();
577
5.17k
        }
578
39.1k
        frame->state = st_dictionary_key;
579
39.1k
    }
580
322k
    ++frame->null_count;
581
322k
}
582
583
void
584
QPDFParser::addInt(int count)
585
569k
{
586
569k
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]);
587
569k
    obj->setDescription(context, description, last_offset_buffer[count % 2]);
588
569k
    add(std::move(obj));
589
569k
}
590
591
template <typename T, typename... Args>
592
void
593
QPDFParser::addScalar(Args&&... args)
594
3.22M
{
595
3.22M
    if ((bad_count || sanity_checks) &&
596
3.13M
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
597
        // Stop adding scalars. We are going to abort when the close token or a bad token is
598
        // encountered.
599
128k
        max_bad_count = 0;
600
128k
        return;
601
128k
    }
602
3.09M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
603
3.09M
    obj->setDescription(context, description, input.getLastOffset());
604
3.09M
    add(std::move(obj));
605
3.09M
}
void QPDFParser::addScalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
594
20.2k
{
595
20.2k
    if ((bad_count || sanity_checks) &&
596
18.2k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
597
        // Stop adding scalars. We are going to abort when the close token or a bad token is
598
        // encountered.
599
253
        max_bad_count = 0;
600
253
        return;
601
253
    }
602
19.9k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
603
19.9k
    obj->setDescription(context, description, input.getLastOffset());
604
19.9k
    add(std::move(obj));
605
19.9k
}
void QPDFParser::addScalar<QPDF_Integer, long long>(long long&&)
Line
Count
Source
594
842k
{
595
842k
    if ((bad_count || sanity_checks) &&
596
842k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
597
        // Stop adding scalars. We are going to abort when the close token or a bad token is
598
        // encountered.
599
30.7k
        max_bad_count = 0;
600
30.7k
        return;
601
30.7k
    }
602
812k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
603
812k
    obj->setDescription(context, description, input.getLastOffset());
604
812k
    add(std::move(obj));
605
812k
}
void QPDFParser::addScalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
594
192k
{
595
192k
    if ((bad_count || sanity_checks) &&
596
163k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
597
        // Stop adding scalars. We are going to abort when the close token or a bad token is
598
        // encountered.
599
587
        max_bad_count = 0;
600
587
        return;
601
587
    }
602
191k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
603
191k
    obj->setDescription(context, description, input.getLastOffset());
604
191k
    add(std::move(obj));
605
191k
}
void QPDFParser::addScalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
594
1.05M
{
595
1.05M
    if ((bad_count || sanity_checks) &&
596
1.01M
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
597
        // Stop adding scalars. We are going to abort when the close token or a bad token is
598
        // encountered.
599
35.3k
        max_bad_count = 0;
600
35.3k
        return;
601
35.3k
    }
602
1.01M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
603
1.01M
    obj->setDescription(context, description, input.getLastOffset());
604
1.01M
    add(std::move(obj));
605
1.01M
}
void QPDFParser::addScalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
594
204k
{
595
204k
    if ((bad_count || sanity_checks) &&
596
204k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
597
        // Stop adding scalars. We are going to abort when the close token or a bad token is
598
        // encountered.
599
61.1k
        max_bad_count = 0;
600
61.1k
        return;
601
61.1k
    }
602
143k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
603
143k
    obj->setDescription(context, description, input.getLastOffset());
604
143k
    add(std::move(obj));
605
143k
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
594
897k
{
595
897k
    if ((bad_count || sanity_checks) &&
596
876k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
597
        // Stop adding scalars. We are going to abort when the close token or a bad token is
598
        // encountered.
599
221
        max_bad_count = 0;
600
221
        return;
601
221
    }
602
897k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
603
897k
    obj->setDescription(context, description, input.getLastOffset());
604
897k
    add(std::move(obj));
605
897k
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
594
16.4k
{
595
16.4k
    if ((bad_count || sanity_checks) &&
596
16.3k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
597
        // Stop adding scalars. We are going to abort when the close token or a bad token is
598
        // encountered.
599
205
        max_bad_count = 0;
600
205
        return;
601
205
    }
602
16.2k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
603
16.2k
    obj->setDescription(context, description, input.getLastOffset());
604
16.2k
    add(std::move(obj));
605
16.2k
}
606
607
template <typename T, typename... Args>
608
QPDFObjectHandle
609
QPDFParser::withDescription(Args&&... args)
610
1.72M
{
611
1.72M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
612
1.72M
    obj->setDescription(context, description, start);
613
1.72M
    return {obj};
614
1.72M
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Bool, bool>(bool&&)
Line
Count
Source
610
1.79k
{
611
1.79k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
612
1.79k
    obj->setDescription(context, description, start);
613
1.79k
    return {obj};
614
1.79k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Integer, long long>(long long&&)
Line
Count
Source
610
217k
{
611
217k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
612
217k
    obj->setDescription(context, description, start);
613
217k
    return {obj};
614
217k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
610
136k
{
611
136k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
612
136k
    obj->setDescription(context, description, start);
613
136k
    return {obj};
614
136k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
610
100k
{
611
100k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
612
100k
    obj->setDescription(context, description, start);
613
100k
    return {obj};
614
100k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
610
1.24M
{
611
1.24M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
612
1.24M
    obj->setDescription(context, description, start);
613
1.24M
    return {obj};
614
1.24M
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
610
19.3k
{
611
19.3k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
612
19.3k
    obj->setDescription(context, description, start);
613
19.3k
    return {obj};
614
19.3k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
610
348
{
611
348
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
612
348
    obj->setDescription(context, description, start);
613
348
    return {obj};
614
348
}
615
616
void
617
QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset)
618
417k
{
619
417k
    if (obj) {
620
417k
        obj->setDescription(context, description, parsed_offset);
621
417k
    }
622
417k
}
623
624
void
625
QPDFParser::fixMissingKeys()
626
6.63k
{
627
6.63k
    std::set<std::string> names;
628
17.8k
    for (auto& obj: frame->olist) {
629
17.8k
        if (obj.getObj()->getTypeCode() == ::ot_name) {
630
70
            names.insert(obj.getObj()->getStringValue());
631
70
        }
632
17.8k
    }
633
6.63k
    int next_fake_key = 1;
634
17.7k
    for (auto const& item: frame->olist) {
635
17.7k
        while (true) {
636
17.7k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
637
17.7k
            const bool found_fake = !frame->dict.contains(key) && !names.contains(key);
638
17.7k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
639
17.7k
            if (found_fake) {
640
17.7k
                warn(
641
17.7k
                    frame->offset,
642
17.7k
                    "expected dictionary key but found non-name object; inserting key " + key);
643
17.7k
                frame->dict[key] = item;
644
17.7k
                break;
645
17.7k
            }
646
17.7k
        }
647
17.7k
    }
648
6.63k
}
649
650
bool
651
QPDFParser::tooManyBadTokens()
652
279k
{
653
279k
    if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) {
654
89
        if (bad_count) {
655
61
            warn(
656
61
                "encountered errors while parsing an array or dictionary with more than 5000 "
657
61
                "elements; giving up on reading object");
658
61
            return true;
659
61
        }
660
28
        warn(
661
28
            "encountered an array or dictionary with more than 5000 elements during xref recovery; "
662
28
            "giving up on reading object");
663
28
    }
664
279k
    if (max_bad_count && --max_bad_count > 0 && good_count > 4) {
665
106k
        good_count = 0;
666
106k
        bad_count = 1;
667
106k
        return false;
668
106k
    }
669
172k
    if (++bad_count > 5 ||
670
167k
        (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) {
671
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
672
        // exceeds the remaining number of allowable total errors.
673
8.02k
        warn("too many errors; giving up on reading object");
674
8.02k
        return true;
675
8.02k
    }
676
164k
    good_count = 0;
677
164k
    return false;
678
172k
}
679
680
void
681
QPDFParser::warn(QPDFExc const& e) const
682
491k
{
683
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
684
    // object. If parsing for some other reason, such as an explicit creation of an object from a
685
    // string, then just throw the exception.
686
491k
    if (context) {
687
491k
        context->warn(e);
688
491k
    } else {
689
0
        throw e;
690
0
    }
691
491k
}
692
693
void
694
QPDFParser::warnDuplicateKey()
695
65.8k
{
696
65.8k
    QTC::TC("qpdf", "QPDFParser duplicate dict key");
697
65.8k
    warn(
698
65.8k
        frame->offset,
699
65.8k
        "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
700
65.8k
}
701
702
void
703
QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
704
491k
{
705
491k
    if (stream_id) {
706
15.3k
        std::string descr = "object "s + std::to_string(obj_id) + " 0";
707
15.3k
        std::string name = context->getFilename() + " object stream " + std::to_string(stream_id);
708
15.3k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
709
475k
    } else {
710
475k
        warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg));
711
475k
    }
712
491k
}
713
714
void
715
QPDFParser::warn(std::string const& msg) const
716
349k
{
717
349k
    warn(input.getLastOffset(), msg);
718
349k
}