Coverage Report

Created: 2026-05-30 06:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
static uint32_t const& max_nesting{global::Limits::parser_max_nesting()};
19
20
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
21
// special access to allow the parser to create unresolved objects and dangling references.
22
class QPDF::Doc::ParseGuard
23
{
24
  public:
25
    ParseGuard(QPDF* qpdf) :
26
17.1M
        objects(qpdf ? &qpdf->m->objects : nullptr)
27
17.1M
    {
28
17.1M
        if (objects) {
29
17.0M
            objects->inParse(true);
30
17.0M
        }
31
17.1M
    }
32
33
    static std::shared_ptr<QPDFObject>
34
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
35
4.37M
    {
36
4.37M
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
37
4.37M
    }
38
39
    ~ParseGuard()
40
17.1M
    {
41
17.1M
        if (objects) {
42
17.0M
            objects->inParse(false);
43
17.0M
        }
44
17.1M
    }
45
    QPDF::Doc::Objects* objects;
46
};
47
48
using ParseGuard = QPDF::Doc::ParseGuard;
49
using Parser = qpdf::impl::Parser;
50
51
QPDFObjectHandle
52
Parser::parse(InputSource& input, std::string const& object_description, QPDF* context)
53
47.0k
{
54
47.0k
    qpdf::Tokenizer tokenizer;
55
47.0k
    if (auto result = Parser(
56
47.0k
                          input,
57
47.0k
                          make_description(input.getName(), object_description),
58
47.0k
                          object_description,
59
47.0k
                          tokenizer,
60
47.0k
                          nullptr,
61
47.0k
                          context,
62
47.0k
                          false)
63
47.0k
                          .parse()) {
64
46.4k
        return result;
65
46.4k
    }
66
652
    return {QPDFObject::create<QPDF_Null>()};
67
47.0k
}
68
69
std::pair<QPDFObjectHandle, bool>
70
Parser::parse_content(
71
    InputSource& input,
72
    std::shared_ptr<QPDFObject::Description> sp_description,
73
    qpdf::Tokenizer& tokenizer,
74
    QPDF* context)
75
15.0M
{
76
15.0M
    static const std::string content("content"); // GCC12 - make constexpr
77
15.0M
    auto p = Parser(
78
15.0M
        input,
79
15.0M
        std::move(sp_description),
80
15.0M
        content,
81
15.0M
        tokenizer,
82
15.0M
        nullptr,
83
15.0M
        context,
84
15.0M
        true,
85
15.0M
        0,
86
15.0M
        0,
87
15.0M
        context && context->doc().reconstructed_xref());
88
15.0M
    if (auto result = p.parse(true)) {
89
15.0M
        return {result, false};
90
15.0M
    }
91
16.0k
    return {{}, p.empty_};
92
15.0M
}
93
94
QPDFObjectHandle
95
Parser::parse(
96
    InputSource& input,
97
    std::string const& object_description,
98
    QPDFTokenizer& tokenizer,
99
    bool& empty,
100
    QPDFObjectHandle::StringDecrypter* decrypter,
101
    QPDF* context)
102
0
{
103
    // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
104
    // only user of the 'empty' member. When removing this overload also remove 'empty'.
105
0
    auto p = Parser(
106
0
        input,
107
0
        make_description(input.getName(), object_description),
108
0
        object_description,
109
0
        *tokenizer.m,
110
0
        decrypter,
111
0
        context,
112
0
        false);
113
0
    auto result = p.parse();
114
0
    empty = p.empty_;
115
0
    if (result) {
116
0
        return result;
117
0
    }
118
0
    return {QPDFObject::create<QPDF_Null>()};
119
0
}
120
121
QPDFObjectHandle
122
Parser::parse(
123
    InputSource& input,
124
    std::string const& object_description,
125
    qpdf::Tokenizer& tokenizer,
126
    QPDFObjectHandle::StringDecrypter* decrypter,
127
    QPDF& context,
128
    bool sanity_checks)
129
1.59M
{
130
1.59M
    return Parser(
131
1.59M
               input,
132
1.59M
               make_description(input.getName(), object_description),
133
1.59M
               object_description,
134
1.59M
               tokenizer,
135
1.59M
               decrypter,
136
1.59M
               &context,
137
1.59M
               true,
138
1.59M
               0,
139
1.59M
               0,
140
1.59M
               sanity_checks)
141
1.59M
        .parse();
142
1.59M
}
143
144
QPDFObjectHandle
145
Parser::parse(
146
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
147
401k
{
148
401k
    return Parser(
149
401k
               input,
150
401k
               std::make_shared<QPDFObject::Description>(
151
401k
                   QPDFObject::ObjStreamDescr(stream_id, obj_id)),
152
401k
               "",
153
401k
               tokenizer,
154
401k
               nullptr,
155
401k
               &context,
156
401k
               true,
157
401k
               stream_id,
158
401k
               obj_id)
159
401k
        .parse();
160
401k
}
161
162
QPDFObjectHandle
163
Parser::parse(bool content_stream)
164
17.1M
{
165
17.1M
    try {
166
17.1M
        return parse_first(content_stream);
167
17.1M
    } catch (Error&) {
168
105k
        return {};
169
105k
    } catch (QPDFExc& e) {
170
49.5k
        throw e;
171
49.5k
    } catch (std::logic_error& e) {
172
19
        throw e;
173
15.7k
    } catch (std::exception& e) {
174
15.7k
        warn("treating object as null because of error during parsing: "s + e.what());
175
15.7k
        return {};
176
15.7k
    }
177
17.1M
}
178
179
QPDFObjectHandle
180
Parser::parse_first(bool content_stream)
181
17.1M
{
182
    // This method must take care not to resolve any objects. Don't check the type of any object
183
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
184
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
185
    // logic error to be thrown from QPDF::inParse().
186
187
17.1M
    QPDF::Doc::ParseGuard pg(context_);
188
17.1M
    start_ = input_.tell();
189
17.1M
    if (!tokenizer_.nextToken(input_, object_description_)) {
190
27.1k
        warn(tokenizer_.getErrorMessage());
191
27.1k
    }
192
193
17.1M
    switch (tokenizer_.getType()) {
194
13.8k
    case QPDFTokenizer::tt_eof:
195
13.8k
        if (content_stream) {
196
            // In content stream mode, leave object uninitialized to indicate EOF
197
7.49k
            empty_ = true;
198
7.49k
            return {};
199
7.49k
        }
200
6.31k
        warn("unexpected EOF");
201
6.31k
        return {};
202
203
24.9k
    case QPDFTokenizer::tt_bad:
204
24.9k
        return {};
205
206
2.32k
    case QPDFTokenizer::tt_brace_open:
207
4.45k
    case QPDFTokenizer::tt_brace_close:
208
4.45k
        warn("treating unexpected brace token as null");
209
4.45k
        return {};
210
211
7.29k
    case QPDFTokenizer::tt_array_close:
212
7.29k
        warn("treating unexpected array close token as null");
213
7.29k
        return {};
214
215
7.13k
    case QPDFTokenizer::tt_dict_close:
216
7.13k
        warn("unexpected dictionary close token");
217
7.13k
        return {};
218
219
134k
    case QPDFTokenizer::tt_array_open:
220
1.73M
    case QPDFTokenizer::tt_dict_open:
221
1.73M
        stack_.clear();
222
1.73M
        stack_.emplace_back(
223
1.73M
            input_,
224
1.73M
            (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
225
1.73M
        frame_ = &stack_.back();
226
1.73M
        return parse_remainder(content_stream);
227
228
6.33k
    case QPDFTokenizer::tt_bool:
229
6.33k
        return with_description<QPDF_Bool>(tokenizer_.getValue() == "true");
230
231
3.15k
    case QPDFTokenizer::tt_null:
232
3.15k
        return {QPDFObject::create<QPDF_Null>()};
233
234
354k
    case QPDFTokenizer::tt_integer:
235
354k
        return with_description<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
236
237
124k
    case QPDFTokenizer::tt_real:
238
124k
        return with_description<QPDF_Real>(tokenizer_.getValue());
239
240
146k
    case QPDFTokenizer::tt_name:
241
146k
        return with_description<QPDF_Name>(tokenizer_.getValue());
242
243
14.6M
    case QPDFTokenizer::tt_word:
244
14.6M
        {
245
14.6M
            auto const& value = tokenizer_.getValue();
246
14.6M
            if (content_stream) {
247
14.5M
                return with_description<QPDF_Operator>(value);
248
14.5M
            } else if (value == "endobj") {
249
                // We just saw endobj without having read anything. Nothing in the PDF spec appears
250
                // to allow empty objects, but they have been encountered in actual PDF files and
251
                // Adobe Reader appears to ignore them. Treat this as a null and do not move the
252
                // input source's offset.
253
2.78k
                empty_ = true;
254
2.78k
                input_.seek(input_.getLastOffset(), SEEK_SET);
255
2.78k
                if (!content_stream) {
256
2.78k
                    warn("empty object treated as null");
257
2.78k
                }
258
2.78k
                return {};
259
94.0k
            } else {
260
94.0k
                warn("unknown token while reading object; treating as string");
261
94.0k
                return with_description<QPDF_String>(value);
262
94.0k
            }
263
14.6M
        }
264
265
11.1k
    case QPDFTokenizer::tt_string:
266
11.1k
        if (decrypter_) {
267
1.66k
            std::string s{tokenizer_.getValue()};
268
1.66k
            decrypter_->decryptString(s);
269
1.66k
            return with_description<QPDF_String>(s);
270
9.52k
        } else {
271
9.52k
            return with_description<QPDF_String>(tokenizer_.getValue());
272
9.52k
        }
273
274
0
    default:
275
0
        warn("treating unknown token type as null while reading object");
276
0
        return {};
277
17.1M
    }
278
17.1M
}
279
280
QPDFObjectHandle
281
Parser::parse_remainder(bool content_stream)
282
1.73M
{
283
    // This method must take care not to resolve any objects. Don't check the type of any object
284
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
285
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
286
    // logic error to be thrown from QPDF::inParse().
287
288
1.73M
    bad_count_ = 0;
289
1.73M
    bool b_contents = false;
290
291
110M
    while (true) {
292
110M
        if (!tokenizer_.nextToken(input_, object_description_)) {
293
445k
            warn(tokenizer_.getErrorMessage());
294
445k
        }
295
110M
        ++good_count_; // optimistically
296
297
110M
        if (int_count_ != 0) {
298
            // Special handling of indirect references. Treat integer tokens as part of an indirect
299
            // reference until proven otherwise.
300
35.2M
            if (tokenizer_.getType() == QPDFTokenizer::tt_integer) {
301
27.9M
                if (++int_count_ > 2) {
302
                    // Process the oldest buffered integer.
303
22.8M
                    add_int(int_count_);
304
22.8M
                }
305
27.9M
                last_offset_buffer_[int_count_ % 2] = input_.getLastOffset();
306
27.9M
                int_buffer_[int_count_ % 2] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
307
27.9M
                continue;
308
309
27.9M
            } else if (
310
7.29M
                int_count_ >= 2 && tokenizer_.getType() == QPDFTokenizer::tt_word &&
311
4.64M
                tokenizer_.getValue() == "R") {
312
4.40M
                if (!context_) {
313
0
                    throw std::logic_error(
314
0
                        "Parser::parse called without context on an object with indirect "
315
0
                        "references");
316
0
                }
317
4.40M
                auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]);
318
4.40M
                auto gen = QIntC::to_int(int_buffer_[(int_count_) % 2]);
319
4.40M
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
320
4.37M
                    add(ParseGuard::getObject(context_, id, gen, parse_pdf_));
321
4.37M
                } else {
322
31.6k
                    add_bad_null(
323
31.6k
                        "treating bad indirect reference (" + std::to_string(id) + " " +
324
31.6k
                        std::to_string(gen) + " R) as null");
325
31.6k
                }
326
4.40M
                int_count_ = 0;
327
4.40M
                continue;
328
329
4.40M
            } else if (int_count_ > 0) {
330
                // Process the buffered integers before processing the current token.
331
2.89M
                if (int_count_ > 1) {
332
771k
                    add_int(int_count_ - 1);
333
771k
                }
334
2.89M
                add_int(int_count_);
335
2.89M
                int_count_ = 0;
336
2.89M
            }
337
35.2M
        }
338
339
77.7M
        switch (tokenizer_.getType()) {
340
78.7k
        case QPDFTokenizer::tt_eof:
341
78.7k
            warn("parse error while reading object");
342
78.7k
            if (content_stream) {
343
                // In content stream mode, leave object uninitialized to indicate EOF
344
331
                return {};
345
331
            }
346
78.4k
            warn("unexpected EOF");
347
78.4k
            return {};
348
349
365k
        case QPDFTokenizer::tt_bad:
350
365k
            check_too_many_bad_tokens();
351
365k
            add_null();
352
365k
            continue;
353
354
29.4k
        case QPDFTokenizer::tt_brace_open:
355
52.1k
        case QPDFTokenizer::tt_brace_close:
356
52.1k
            add_bad_null("treating unexpected brace token as null");
357
52.1k
            continue;
358
359
1.26M
        case QPDFTokenizer::tt_array_close:
360
1.26M
            if (frame_->state == st_array) {
361
1.23M
                auto object = frame_->null_count > 100
362
1.23M
                    ? QPDFObject::create<QPDF_Array>(std::move(frame_->olist), true)
363
1.23M
                    : QPDFObject::create<QPDF_Array>(std::move(frame_->olist));
364
1.23M
                set_description(object, frame_->offset - 1);
365
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
366
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
367
                // array start delimiter. getLastOffset points to the array end token and therefore
368
                // can't be used here.
369
1.23M
                if (stack_.size() <= 1) {
370
39.9k
                    return object;
371
39.9k
                }
372
1.19M
                stack_.pop_back();
373
1.19M
                frame_ = &stack_.back();
374
1.19M
                add(std::move(object));
375
1.19M
            } else {
376
27.3k
                if (sanity_checks_) {
377
                    // During sanity checks, assume nesting of containers is corrupt and object is
378
                    // unusable.
379
19.2k
                    warn("unexpected array close token; giving up on reading object");
380
19.2k
                    return {};
381
19.2k
                }
382
8.12k
                add_bad_null("treating unexpected array close token as null");
383
8.12k
            }
384
1.20M
            continue;
385
386
2.16M
        case QPDFTokenizer::tt_dict_close:
387
2.16M
            if (frame_->state <= st_dictionary_value) {
388
                // Attempt to recover more or less gracefully from invalid dictionaries.
389
2.14M
                auto& dict = frame_->dict;
390
391
2.14M
                if (frame_->state == st_dictionary_value) {
392
111k
                    warn(
393
111k
                        frame_->offset,
394
111k
                        "dictionary ended prematurely; using null as value for last key");
395
111k
                    dict[frame_->key] = QPDFObject::create<QPDF_Null>();
396
111k
                }
397
2.14M
                if (!frame_->olist.empty()) {
398
516k
                    if (sanity_checks_) {
399
483k
                        warn(
400
483k
                            frame_->offset,
401
483k
                            "expected dictionary keys but found non-name objects; ignoring");
402
483k
                    } else {
403
33.2k
                        fix_missing_keys();
404
33.2k
                    }
405
516k
                }
406
407
2.14M
                if (!frame_->contents_string.empty() && dict.contains("/Type") &&
408
1.25k
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
409
412
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
410
356
                    dict["/Contents"] = QPDFObjectHandle::newString(frame_->contents_string);
411
356
                    dict["/Contents"].setParsedOffset(frame_->contents_offset);
412
356
                }
413
2.14M
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
414
2.14M
                set_description(object, frame_->offset - 2);
415
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
416
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
417
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
418
                // and therefore can't be used here.
419
2.14M
                if (stack_.size() <= 1) {
420
1.40M
                    return object;
421
1.40M
                }
422
737k
                stack_.pop_back();
423
737k
                frame_ = &stack_.back();
424
737k
                add(std::move(object));
425
737k
            } else {
426
20.4k
                if (sanity_checks_) {
427
                    // During sanity checks, assume nesting of containers is corrupt and object is
428
                    // unusable.
429
14.2k
                    warn("unexpected dictionary close token; giving up on reading object");
430
14.2k
                    return {};
431
14.2k
                }
432
6.20k
                add_bad_null("unexpected dictionary close token");
433
6.20k
            }
434
744k
            continue;
435
436
2.15M
        case QPDFTokenizer::tt_array_open:
437
3.27M
        case QPDFTokenizer::tt_dict_open:
438
3.27M
            if (stack_.size() > max_nesting) {
439
1.01k
                limits_error(
440
1.01k
                    "parser-max-nesting", "ignoring excessively deeply nested data structure");
441
1.01k
            }
442
3.27M
            b_contents = false;
443
3.27M
            stack_.emplace_back(
444
3.27M
                input_,
445
3.27M
                (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array
446
3.27M
                                                                       : st_dictionary_key);
447
3.27M
            frame_ = &stack_.back();
448
3.27M
            continue;
449
450
114k
        case QPDFTokenizer::tt_bool:
451
114k
            add_scalar<QPDF_Bool>(tokenizer_.getValue() == "true");
452
114k
            continue;
453
454
926k
        case QPDFTokenizer::tt_null:
455
926k
            add_null();
456
926k
            continue;
457
458
7.77M
        case QPDFTokenizer::tt_integer:
459
7.77M
            if (!content_stream) {
460
                // Buffer token in case it is part of an indirect reference.
461
7.29M
                last_offset_buffer_[1] = input_.getLastOffset();
462
7.29M
                int_buffer_[1] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
463
7.29M
                int_count_ = 1;
464
7.29M
            } else {
465
478k
                add_scalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
466
478k
            }
467
7.77M
            continue;
468
469
1.00M
        case QPDFTokenizer::tt_real:
470
1.00M
            add_scalar<QPDF_Real>(tokenizer_.getValue());
471
1.00M
            continue;
472
473
55.6M
        case QPDFTokenizer::tt_name:
474
55.6M
            if (frame_->state == st_dictionary_key) {
475
8.80M
                frame_->key = tokenizer_.getValue();
476
8.80M
                frame_->state = st_dictionary_value;
477
8.80M
                b_contents = decrypter_ && frame_->key == "/Contents";
478
8.80M
                continue;
479
46.8M
            } else {
480
46.8M
                add_scalar<QPDF_Name>(tokenizer_.getValue());
481
46.8M
            }
482
46.8M
            continue;
483
484
46.8M
        case QPDFTokenizer::tt_word:
485
3.17M
            if (content_stream) {
486
880k
                add_scalar<QPDF_Operator>(tokenizer_.getValue());
487
880k
                continue;
488
880k
            }
489
490
2.28M
            if (sanity_checks_) {
491
2.19M
                if (tokenizer_.getValue() == "endobj" || tokenizer_.getValue() == "endstream") {
492
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
493
                    // we are parsing past the end of the object.
494
26.0k
                    warn(
495
26.0k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
496
26.0k
                        "reading object");
497
26.0k
                    return {};
498
26.0k
                }
499
500
2.17M
                add_bad_null("unknown token while reading object; treating as null");
501
2.17M
                continue;
502
2.19M
            }
503
504
89.6k
            warn("unknown token while reading object; treating as string");
505
89.6k
            check_too_many_bad_tokens();
506
89.6k
            add_scalar<QPDF_String>(tokenizer_.getValue());
507
508
89.6k
            continue;
509
510
1.95M
        case QPDFTokenizer::tt_string:
511
1.95M
            {
512
1.95M
                auto const& val = tokenizer_.getValue();
513
1.95M
                if (decrypter_) {
514
255k
                    if (b_contents) {
515
6.07k
                        frame_->contents_string = val;
516
6.07k
                        frame_->contents_offset = input_.getLastOffset();
517
6.07k
                        b_contents = false;
518
6.07k
                    }
519
255k
                    std::string s{val};
520
255k
                    decrypter_->decryptString(s);
521
255k
                    add_scalar<QPDF_String>(s);
522
1.70M
                } else {
523
1.70M
                    add_scalar<QPDF_String>(val);
524
1.70M
                }
525
1.95M
            }
526
1.95M
            continue;
527
528
0
        default:
529
0
            add_bad_null("treating unknown token type as null while reading object");
530
77.7M
        }
531
77.7M
    }
532
1.73M
}
533
534
void
535
Parser::add(std::shared_ptr<QPDFObject>&& obj)
536
84.1M
{
537
84.1M
    if (frame_->state != st_dictionary_value) {
538
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
539
        // processing once the tt_dict_close token has been found.
540
75.8M
        frame_->olist.emplace_back(std::move(obj));
541
75.8M
    } else {
542
8.22M
        if (auto res = frame_->dict.insert_or_assign(frame_->key, std::move(obj)); !res.second) {
543
89.6k
            warn_duplicate_key();
544
89.6k
        }
545
8.22M
        frame_->state = st_dictionary_key;
546
8.22M
    }
547
84.1M
}
548
549
void
550
Parser::add_null()
551
3.44M
{
552
3.44M
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
553
554
3.44M
    if (frame_->state != st_dictionary_value) {
555
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
556
        // processing once the tt_dict_close token has been found.
557
3.08M
        frame_->olist.emplace_back(null_obj);
558
3.08M
    } else {
559
355k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, null_obj); !res.second) {
560
16.4k
            warn_duplicate_key();
561
16.4k
        }
562
355k
        frame_->state = st_dictionary_key;
563
355k
    }
564
3.44M
    ++frame_->null_count;
565
3.44M
}
566
567
void
568
Parser::add_bad_null(std::string const& msg)
569
2.26M
{
570
2.26M
    warn(msg);
571
2.26M
    check_too_many_bad_tokens();
572
2.26M
    add_null();
573
2.26M
}
574
575
void
576
Parser::add_int(int count)
577
26.4M
{
578
26.4M
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]);
579
26.4M
    obj->setDescription(context_, description_, last_offset_buffer_[count % 2]);
580
26.4M
    add(std::move(obj));
581
26.4M
}
582
583
template <typename T, typename... Args>
584
void
585
Parser::add_scalar(Args&&... args)
586
51.3M
{
587
51.3M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
51.3M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
4.88k
        max_bad_count_ = 1;
592
4.88k
        check_too_many_bad_tokens(); // always throws Error()
593
4.88k
    }
594
51.3M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
51.3M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
51.3M
    add(std::move(obj));
597
51.3M
}
void qpdf::impl::Parser::add_scalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
586
114k
{
587
114k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
114k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
529
        max_bad_count_ = 1;
592
529
        check_too_many_bad_tokens(); // always throws Error()
593
529
    }
594
114k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
114k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
114k
    add(std::move(obj));
597
114k
}
void qpdf::impl::Parser::add_scalar<QPDF_Integer, long long>(long long&&)
Line
Count
Source
586
478k
{
587
478k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
478k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
43
        max_bad_count_ = 1;
592
43
        check_too_many_bad_tokens(); // always throws Error()
593
43
    }
594
478k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
478k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
478k
    add(std::move(obj));
597
478k
}
void qpdf::impl::Parser::add_scalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
1.00M
{
587
1.00M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
1.00M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
915
        max_bad_count_ = 1;
592
915
        check_too_many_bad_tokens(); // always throws Error()
593
915
    }
594
1.00M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
1.00M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
1.00M
    add(std::move(obj));
597
1.00M
}
void qpdf::impl::Parser::add_scalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
46.8M
{
587
46.8M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
46.8M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
2.45k
        max_bad_count_ = 1;
592
2.45k
        check_too_many_bad_tokens(); // always throws Error()
593
2.45k
    }
594
46.8M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
46.8M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
46.8M
    add(std::move(obj));
597
46.8M
}
void qpdf::impl::Parser::add_scalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
880k
{
587
880k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
880k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
119
        max_bad_count_ = 1;
592
119
        check_too_many_bad_tokens(); // always throws Error()
593
119
    }
594
880k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
880k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
880k
    add(std::move(obj));
597
880k
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
1.78M
{
587
1.78M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
1.78M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
723
        max_bad_count_ = 1;
592
723
        check_too_many_bad_tokens(); // always throws Error()
593
723
    }
594
1.78M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
1.78M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
1.78M
    add(std::move(obj));
597
1.78M
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
586
255k
{
587
255k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
255k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
102
        max_bad_count_ = 1;
592
102
        check_too_many_bad_tokens(); // always throws Error()
593
102
    }
594
255k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
255k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
255k
    add(std::move(obj));
597
255k
}
598
599
template <typename T, typename... Args>
600
QPDFObjectHandle
601
Parser::with_description(Args&&... args)
602
15.3M
{
603
15.3M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
15.3M
    obj->setDescription(context_, description_, start_);
605
15.3M
    return {obj};
606
15.3M
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Bool, bool>(bool&&)
Line
Count
Source
602
6.33k
{
603
6.33k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
6.33k
    obj->setDescription(context_, description_, start_);
605
6.33k
    return {obj};
606
6.33k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Integer, long long>(long long&&)
Line
Count
Source
602
348k
{
603
348k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
348k
    obj->setDescription(context_, description_, start_);
605
348k
    return {obj};
606
348k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
124k
{
603
124k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
124k
    obj->setDescription(context_, description_, start_);
605
124k
    return {obj};
606
124k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
146k
{
603
146k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
146k
    obj->setDescription(context_, description_, start_);
605
146k
    return {obj};
606
146k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
14.5M
{
603
14.5M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
14.5M
    obj->setDescription(context_, description_, start_);
605
14.5M
    return {obj};
606
14.5M
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
99.2k
{
603
99.2k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
99.2k
    obj->setDescription(context_, description_, start_);
605
99.2k
    return {obj};
606
99.2k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
602
1.65k
{
603
1.65k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
1.65k
    obj->setDescription(context_, description_, start_);
605
1.65k
    return {obj};
606
1.65k
}
607
608
void
609
Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset)
610
3.37M
{
611
3.37M
    if (obj) {
612
3.37M
        obj->setDescription(context_, description_, parsed_offset);
613
3.37M
    }
614
3.37M
}
615
616
void
617
Parser::fix_missing_keys()
618
33.2k
{
619
33.2k
    std::set<std::string> names;
620
134k
    for (auto& obj: frame_->olist) {
621
134k
        if (obj.raw_type_code() == ::ot_name) {
622
6.86k
            names.insert(obj.getName());
623
6.86k
        }
624
134k
    }
625
33.2k
    int next_fake_key = 1;
626
128k
    for (auto const& item: frame_->olist) {
627
129k
        while (true) {
628
129k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
629
129k
            const bool found_fake = !frame_->dict.contains(key) && !names.contains(key);
630
129k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
631
129k
            if (found_fake) {
632
128k
                warn(
633
128k
                    frame_->offset,
634
128k
                    "expected dictionary key but found non-name object; inserting key " + key);
635
128k
                frame_->dict[key] = item;
636
128k
                break;
637
128k
            }
638
129k
        }
639
128k
    }
640
33.2k
}
641
642
void
643
Parser::check_too_many_bad_tokens()
644
2.80M
{
645
2.80M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
646
2.80M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
647
4.97k
        if (bad_count_) {
648
3.86k
            limits_error(
649
3.86k
                "parser-max-container-size-damaged",
650
3.86k
                "encountered errors while parsing an array or dictionary with more than " +
651
3.86k
                    std::to_string(limit) + " elements; giving up on reading object");
652
3.86k
        }
653
4.97k
        limits_error(
654
4.97k
            "parser-max-container-size",
655
4.97k
            "encountered an array or dictionary with more than " + std::to_string(limit) +
656
4.97k
                " elements during xref recovery; giving up on reading object");
657
4.97k
    }
658
2.80M
    if (max_bad_count_ && --max_bad_count_ == 0) {
659
17.5k
        limits_error(
660
17.5k
            "parser-max-errors", "too many errors during parsing; treating object as null");
661
17.5k
    }
662
2.80M
    if (good_count_ > 4) {
663
974k
        good_count_ = 0;
664
974k
        bad_count_ = 1;
665
974k
        return;
666
974k
    }
667
1.82M
    if (++bad_count_ > 5 ||
668
1.74M
        (frame_->state != st_array && std::cmp_less(max_bad_count_, frame_->olist.size()))) {
669
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
670
        // exceeds the remaining number of allowable total errors.
671
82.8k
        warn("too many errors; giving up on reading object");
672
82.8k
        throw Error();
673
82.8k
    }
674
1.74M
    good_count_ = 0;
675
1.74M
}
676
677
void
678
Parser::limits_error(std::string const& limit, std::string const& msg)
679
23.5k
{
680
23.5k
    Limits::error();
681
23.5k
    warn("limits error("s + limit + "): " + msg);
682
23.5k
    throw Error();
683
23.5k
}
684
685
void
686
Parser::warn(QPDFExc const& e) const
687
4.11M
{
688
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
689
    // object. If parsing for some other reason, such as an explicit creation of an object from a
690
    // string, then just throw the exception.
691
4.11M
    if (context_) {
692
4.11M
        context_->warn(e);
693
4.11M
    } else {
694
652
        throw e;
695
652
    }
696
4.11M
}
697
698
void
699
Parser::warn_duplicate_key()
700
106k
{
701
106k
    warn(
702
106k
        frame_->offset,
703
106k
        "dictionary has duplicated key " + frame_->key +
704
106k
            "; last occurrence overrides earlier ones");
705
106k
    check_too_many_bad_tokens();
706
106k
}
707
708
void
709
Parser::warn(qpdf_offset_t offset, std::string const& msg) const
710
4.11M
{
711
4.11M
    if (stream_id_) {
712
259k
        std::string descr = "object "s + std::to_string(obj_id_) + " 0";
713
259k
        std::string name = context_->getFilename() + " object stream " + std::to_string(stream_id_);
714
259k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
715
3.85M
    } else {
716
3.85M
        warn(QPDFExc(qpdf_e_damaged_pdf, input_.getName(), object_description_, offset, msg));
717
3.85M
    }
718
4.11M
}
719
720
void
721
Parser::warn(std::string const& msg) const
722
3.28M
{
723
3.28M
    warn(input_.getLastOffset(), msg);
724
3.28M
}