Coverage Report

Created: 2026-06-09 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
static uint32_t const& max_nesting{global::Limits::parser_max_nesting()};
19
20
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
21
// special access to allow the parser to create unresolved objects and dangling references.
22
class QPDF::Doc::ParseGuard
23
{
24
  public:
25
    ParseGuard(QPDF* qpdf) :
26
964k
        objects(qpdf ? &qpdf->m->objects : nullptr)
27
964k
    {
28
964k
        if (objects) {
29
964k
            objects->inParse(true);
30
964k
        }
31
964k
    }
32
33
    static std::shared_ptr<QPDFObject>
34
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
35
574k
    {
36
574k
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
37
574k
    }
38
39
    ~ParseGuard()
40
964k
    {
41
964k
        if (objects) {
42
964k
            objects->inParse(false);
43
964k
        }
44
964k
    }
45
    QPDF::Doc::Objects* objects;
46
};
47
48
using ParseGuard = QPDF::Doc::ParseGuard;
49
using Parser = qpdf::impl::Parser;
50
51
QPDFObjectHandle
52
Parser::parse(InputSource& input, std::string const& object_description, QPDF* context)
53
0
{
54
0
    qpdf::Tokenizer tokenizer;
55
0
    if (auto result = Parser(
56
0
                          input,
57
0
                          make_description(input.getName(), object_description),
58
0
                          object_description,
59
0
                          tokenizer,
60
0
                          nullptr,
61
0
                          context,
62
0
                          false)
63
0
                          .parse()) {
64
0
        return result;
65
0
    }
66
0
    return {QPDFObject::create<QPDF_Null>()};
67
0
}
68
69
std::pair<QPDFObjectHandle, bool>
70
Parser::parse_content(
71
    InputSource& input,
72
    std::shared_ptr<QPDFObject::Description> sp_description,
73
    qpdf::Tokenizer& tokenizer,
74
    QPDF* context)
75
785k
{
76
785k
    static const std::string content("content"); // GCC12 - make constexpr
77
785k
    auto p = Parser(
78
785k
        input,
79
785k
        std::move(sp_description),
80
785k
        content,
81
785k
        tokenizer,
82
785k
        nullptr,
83
785k
        context,
84
785k
        true,
85
785k
        0,
86
785k
        0,
87
785k
        context && context->doc().reconstructed_xref());
88
785k
    if (auto result = p.parse(true)) {
89
778k
        return {result, false};
90
778k
    }
91
7.44k
    return {{}, p.empty_};
92
785k
}
93
94
QPDFObjectHandle
95
Parser::parse(
96
    InputSource& input,
97
    std::string const& object_description,
98
    QPDFTokenizer& tokenizer,
99
    bool& empty,
100
    QPDFObjectHandle::StringDecrypter* decrypter,
101
    QPDF* context)
102
0
{
103
    // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
104
    // only user of the 'empty' member. When removing this overload also remove 'empty'.
105
0
    auto p = Parser(
106
0
        input,
107
0
        make_description(input.getName(), object_description),
108
0
        object_description,
109
0
        *tokenizer.m,
110
0
        decrypter,
111
0
        context,
112
0
        false);
113
0
    auto result = p.parse();
114
0
    empty = p.empty_;
115
0
    if (result) {
116
0
        return result;
117
0
    }
118
0
    return {QPDFObject::create<QPDF_Null>()};
119
0
}
120
121
QPDFObjectHandle
122
Parser::parse(
123
    InputSource& input,
124
    std::string const& object_description,
125
    qpdf::Tokenizer& tokenizer,
126
    QPDFObjectHandle::StringDecrypter* decrypter,
127
    QPDF& context,
128
    bool sanity_checks)
129
155k
{
130
155k
    return Parser(
131
155k
               input,
132
155k
               make_description(input.getName(), object_description),
133
155k
               object_description,
134
155k
               tokenizer,
135
155k
               decrypter,
136
155k
               &context,
137
155k
               true,
138
155k
               0,
139
155k
               0,
140
155k
               sanity_checks)
141
155k
        .parse();
142
155k
}
143
144
QPDFObjectHandle
145
Parser::parse(
146
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
147
22.5k
{
148
22.5k
    return Parser(
149
22.5k
               input,
150
22.5k
               std::make_shared<QPDFObject::Description>(
151
22.5k
                   QPDFObject::ObjStreamDescr(stream_id, obj_id)),
152
22.5k
               "",
153
22.5k
               tokenizer,
154
22.5k
               nullptr,
155
22.5k
               &context,
156
22.5k
               true,
157
22.5k
               stream_id,
158
22.5k
               obj_id)
159
22.5k
        .parse();
160
22.5k
}
161
162
QPDFObjectHandle
163
Parser::parse(bool content_stream)
164
964k
{
165
964k
    try {
166
964k
        return parse_first(content_stream);
167
964k
    } catch (Error&) {
168
10.5k
        return {};
169
10.5k
    } catch (QPDFExc& e) {
170
3.02k
        throw e;
171
3.02k
    } catch (std::logic_error& e) {
172
4
        throw e;
173
2.24k
    } catch (std::exception& e) {
174
2.24k
        warn("treating object as null because of error during parsing: "s + e.what());
175
2.24k
        return {};
176
2.24k
    }
177
964k
}
178
179
QPDFObjectHandle
180
Parser::parse_first(bool content_stream)
181
964k
{
182
    // This method must take care not to resolve any objects. Don't check the type of any object
183
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
184
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
185
    // logic error to be thrown from QPDF::inParse().
186
187
964k
    QPDF::Doc::ParseGuard pg(context_);
188
964k
    start_ = input_.tell();
189
964k
    if (!tokenizer_.nextToken(input_, object_description_)) {
190
3.42k
        warn(tokenizer_.getErrorMessage());
191
3.42k
    }
192
193
964k
    switch (tokenizer_.getType()) {
194
4.15k
    case QPDFTokenizer::tt_eof:
195
4.15k
        if (content_stream) {
196
            // In content stream mode, leave object uninitialized to indicate EOF
197
3.48k
            empty_ = true;
198
3.48k
            return {};
199
3.48k
        }
200
674
        warn("unexpected EOF");
201
674
        return {};
202
203
3.32k
    case QPDFTokenizer::tt_bad:
204
3.32k
        return {};
205
206
252
    case QPDFTokenizer::tt_brace_open:
207
489
    case QPDFTokenizer::tt_brace_close:
208
489
        warn("treating unexpected brace token as null");
209
489
        return {};
210
211
828
    case QPDFTokenizer::tt_array_close:
212
828
        warn("treating unexpected array close token as null");
213
828
        return {};
214
215
685
    case QPDFTokenizer::tt_dict_close:
216
685
        warn("unexpected dictionary close token");
217
685
        return {};
218
219
23.7k
    case QPDFTokenizer::tt_array_open:
220
174k
    case QPDFTokenizer::tt_dict_open:
221
174k
        stack_.clear();
222
174k
        stack_.emplace_back(
223
174k
            input_,
224
174k
            (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
225
174k
        frame_ = &stack_.back();
226
174k
        return parse_remainder(content_stream);
227
228
974
    case QPDFTokenizer::tt_bool:
229
974
        return with_description<QPDF_Bool>(tokenizer_.getValue() == "true");
230
231
363
    case QPDFTokenizer::tt_null:
232
363
        return {QPDFObject::create<QPDF_Null>()};
233
234
163k
    case QPDFTokenizer::tt_integer:
235
163k
        return with_description<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
236
237
48.1k
    case QPDFTokenizer::tt_real:
238
48.1k
        return with_description<QPDF_Real>(tokenizer_.getValue());
239
240
31.1k
    case QPDFTokenizer::tt_name:
241
31.1k
        return with_description<QPDF_Name>(tokenizer_.getValue());
242
243
532k
    case QPDFTokenizer::tt_word:
244
532k
        {
245
532k
            auto const& value = tokenizer_.getValue();
246
532k
            if (content_stream) {
247
526k
                return with_description<QPDF_Operator>(value);
248
526k
            } else if (value == "endobj") {
249
                // We just saw endobj without having read anything. Nothing in the PDF spec appears
250
                // to allow empty objects, but they have been encountered in actual PDF files and
251
                // Adobe Reader appears to ignore them. Treat this as a null and do not move the
252
                // input source's offset.
253
183
                empty_ = true;
254
183
                input_.seek(input_.getLastOffset(), SEEK_SET);
255
183
                if (!content_stream) {
256
183
                    warn("empty object treated as null");
257
183
                }
258
183
                return {};
259
5.42k
            } else {
260
5.42k
                warn("unknown token while reading object; treating as string");
261
5.42k
                return with_description<QPDF_String>(value);
262
5.42k
            }
263
532k
        }
264
265
2.89k
    case QPDFTokenizer::tt_string:
266
2.89k
        if (decrypter_) {
267
232
            std::string s{tokenizer_.getValue()};
268
232
            decrypter_->decryptString(s);
269
232
            return with_description<QPDF_String>(s);
270
2.66k
        } else {
271
2.66k
            return with_description<QPDF_String>(tokenizer_.getValue());
272
2.66k
        }
273
274
0
    default:
275
0
        warn("treating unknown token type as null while reading object");
276
0
        return {};
277
964k
    }
278
964k
}
279
280
QPDFObjectHandle
281
Parser::parse_remainder(bool content_stream)
282
174k
{
283
    // This method must take care not to resolve any objects. Don't check the type of any object
284
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
285
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
286
    // logic error to be thrown from QPDF::inParse().
287
288
174k
    bad_count_ = 0;
289
174k
    bool b_contents = false;
290
291
8.42M
    while (true) {
292
8.41M
        if (!tokenizer_.nextToken(input_, object_description_)) {
293
50.6k
            warn(tokenizer_.getErrorMessage());
294
50.6k
        }
295
8.41M
        ++good_count_; // optimistically
296
297
8.41M
        if (int_count_ != 0) {
298
            // Special handling of indirect references. Treat integer tokens as part of an indirect
299
            // reference until proven otherwise.
300
3.99M
            if (tokenizer_.getType() == QPDFTokenizer::tt_integer) {
301
3.09M
                if (++int_count_ > 2) {
302
                    // Process the oldest buffered integer.
303
2.42M
                    add_int(int_count_);
304
2.42M
                }
305
3.09M
                last_offset_buffer_[int_count_ % 2] = input_.getLastOffset();
306
3.09M
                int_buffer_[int_count_ % 2] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
307
3.09M
                continue;
308
309
3.09M
            } else if (
310
897k
                int_count_ >= 2 && tokenizer_.getType() == QPDFTokenizer::tt_word &&
311
610k
                tokenizer_.getValue() == "R") {
312
578k
                if (!context_) {
313
0
                    throw std::logic_error(
314
0
                        "Parser::parse called without context on an object with indirect "
315
0
                        "references");
316
0
                }
317
578k
                auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]);
318
578k
                auto gen = QIntC::to_int(int_buffer_[(int_count_) % 2]);
319
578k
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
320
574k
                    add(ParseGuard::getObject(context_, id, gen, parse_pdf_));
321
574k
                } else {
322
3.55k
                    add_bad_null(
323
3.55k
                        "treating bad indirect reference (" + std::to_string(id) + " " +
324
3.55k
                        std::to_string(gen) + " R) as null");
325
3.55k
                }
326
578k
                int_count_ = 0;
327
578k
                continue;
328
329
578k
            } else if (int_count_ > 0) {
330
                // Process the buffered integers before processing the current token.
331
319k
                if (int_count_ > 1) {
332
94.0k
                    add_int(int_count_ - 1);
333
94.0k
                }
334
319k
                add_int(int_count_);
335
319k
                int_count_ = 0;
336
319k
            }
337
3.99M
        }
338
339
4.74M
        switch (tokenizer_.getType()) {
340
6.41k
        case QPDFTokenizer::tt_eof:
341
6.41k
            warn("parse error while reading object");
342
6.41k
            if (content_stream) {
343
                // In content stream mode, leave object uninitialized to indicate EOF
344
132
                return {};
345
132
            }
346
6.28k
            warn("unexpected EOF");
347
6.28k
            return {};
348
349
43.4k
        case QPDFTokenizer::tt_bad:
350
43.4k
            check_too_many_bad_tokens();
351
43.4k
            add_null();
352
43.4k
            continue;
353
354
3.24k
        case QPDFTokenizer::tt_brace_open:
355
5.28k
        case QPDFTokenizer::tt_brace_close:
356
5.28k
            add_bad_null("treating unexpected brace token as null");
357
5.28k
            continue;
358
359
179k
        case QPDFTokenizer::tt_array_close:
360
179k
            if (frame_->state == st_array) {
361
176k
                auto object = frame_->null_count > 100
362
176k
                    ? QPDFObject::create<QPDF_Array>(std::move(frame_->olist), true)
363
176k
                    : QPDFObject::create<QPDF_Array>(std::move(frame_->olist));
364
176k
                set_description(object, frame_->offset - 1);
365
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
366
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
367
                // array start delimiter. getLastOffset points to the array end token and therefore
368
                // can't be used here.
369
176k
                if (stack_.size() <= 1) {
370
17.3k
                    return object;
371
17.3k
                }
372
159k
                stack_.pop_back();
373
159k
                frame_ = &stack_.back();
374
159k
                add(std::move(object));
375
159k
            } else {
376
2.83k
                if (sanity_checks_) {
377
                    // During sanity checks, assume nesting of containers is corrupt and object is
378
                    // unusable.
379
2.38k
                    warn("unexpected array close token; giving up on reading object");
380
2.38k
                    return {};
381
2.38k
                }
382
459
                add_bad_null("treating unexpected array close token as null");
383
459
            }
384
159k
            continue;
385
386
229k
        case QPDFTokenizer::tt_dict_close:
387
229k
            if (frame_->state <= st_dictionary_value) {
388
                // Attempt to recover more or less gracefully from invalid dictionaries.
389
225k
                auto& dict = frame_->dict;
390
391
225k
                if (frame_->state == st_dictionary_value) {
392
7.76k
                    warn(
393
7.76k
                        frame_->offset,
394
7.76k
                        "dictionary ended prematurely; using null as value for last key");
395
7.76k
                    dict[frame_->key] = QPDFObject::create<QPDF_Null>();
396
7.76k
                }
397
225k
                if (!frame_->olist.empty()) {
398
61.2k
                    if (sanity_checks_) {
399
59.4k
                        warn(
400
59.4k
                            frame_->offset,
401
59.4k
                            "expected dictionary keys but found non-name objects; ignoring");
402
59.4k
                    } else {
403
1.79k
                        fix_missing_keys();
404
1.79k
                    }
405
61.2k
                }
406
407
225k
                if (!frame_->contents_string.empty() && dict.contains("/Type") &&
408
163
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
409
33
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
410
22
                    dict["/Contents"] = QPDFObjectHandle::newString(frame_->contents_string);
411
22
                    dict["/Contents"].setParsedOffset(frame_->contents_offset);
412
22
                }
413
225k
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
414
225k
                set_description(object, frame_->offset - 2);
415
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
416
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
417
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
418
                // and therefore can't be used here.
419
225k
                if (stack_.size() <= 1) {
420
125k
                    return object;
421
125k
                }
422
100k
                stack_.pop_back();
423
100k
                frame_ = &stack_.back();
424
100k
                add(std::move(object));
425
100k
            } else {
426
3.61k
                if (sanity_checks_) {
427
                    // During sanity checks, assume nesting of containers is corrupt and object is
428
                    // unusable.
429
3.19k
                    warn("unexpected dictionary close token; giving up on reading object");
430
3.19k
                    return {};
431
3.19k
                }
432
415
                add_bad_null("unexpected dictionary close token");
433
415
            }
434
100k
            continue;
435
436
221k
        case QPDFTokenizer::tt_array_open:
437
379k
        case QPDFTokenizer::tt_dict_open:
438
379k
            if (stack_.size() > max_nesting) {
439
111
                limits_error(
440
111
                    "parser-max-nesting", "ignoring excessively deeply nested data structure");
441
111
            }
442
379k
            b_contents = false;
443
379k
            stack_.emplace_back(
444
379k
                input_,
445
379k
                (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array
446
379k
                                                                       : st_dictionary_key);
447
379k
            frame_ = &stack_.back();
448
379k
            continue;
449
450
16.8k
        case QPDFTokenizer::tt_bool:
451
16.8k
            add_scalar<QPDF_Bool>(tokenizer_.getValue() == "true");
452
16.8k
            continue;
453
454
86.0k
        case QPDFTokenizer::tt_null:
455
86.0k
            add_null();
456
86.0k
            continue;
457
458
1.08M
        case QPDFTokenizer::tt_integer:
459
1.08M
            if (!content_stream) {
460
                // Buffer token in case it is part of an indirect reference.
461
898k
                last_offset_buffer_[1] = input_.getLastOffset();
462
898k
                int_buffer_[1] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
463
898k
                int_count_ = 1;
464
898k
            } else {
465
188k
                add_scalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
466
188k
            }
467
1.08M
            continue;
468
469
205k
        case QPDFTokenizer::tt_real:
470
205k
            add_scalar<QPDF_Real>(tokenizer_.getValue());
471
205k
            continue;
472
473
1.73M
        case QPDFTokenizer::tt_name:
474
1.73M
            if (frame_->state == st_dictionary_key) {
475
980k
                frame_->key = tokenizer_.getValue();
476
980k
                frame_->state = st_dictionary_value;
477
980k
                b_contents = decrypter_ && frame_->key == "/Contents";
478
980k
                continue;
479
980k
            } else {
480
755k
                add_scalar<QPDF_Name>(tokenizer_.getValue());
481
755k
            }
482
755k
            continue;
483
484
755k
        case QPDFTokenizer::tt_word:
485
495k
            if (content_stream) {
486
227k
                add_scalar<QPDF_Operator>(tokenizer_.getValue());
487
227k
                continue;
488
227k
            }
489
490
268k
            if (sanity_checks_) {
491
263k
                if (tokenizer_.getValue() == "endobj" || tokenizer_.getValue() == "endstream") {
492
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
493
                    // we are parsing past the end of the object.
494
6.03k
                    warn(
495
6.03k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
496
6.03k
                        "reading object");
497
6.03k
                    return {};
498
6.03k
                }
499
500
257k
                add_bad_null("unknown token while reading object; treating as null");
501
257k
                continue;
502
263k
            }
503
504
4.34k
            warn("unknown token while reading object; treating as string");
505
4.34k
            check_too_many_bad_tokens();
506
4.34k
            add_scalar<QPDF_String>(tokenizer_.getValue());
507
508
4.34k
            continue;
509
510
270k
        case QPDFTokenizer::tt_string:
511
270k
            {
512
270k
                auto const& val = tokenizer_.getValue();
513
270k
                if (decrypter_) {
514
64.9k
                    if (b_contents) {
515
339
                        frame_->contents_string = val;
516
339
                        frame_->contents_offset = input_.getLastOffset();
517
339
                        b_contents = false;
518
339
                    }
519
64.9k
                    std::string s{val};
520
64.9k
                    decrypter_->decryptString(s);
521
64.9k
                    add_scalar<QPDF_String>(s);
522
205k
                } else {
523
205k
                    add_scalar<QPDF_String>(val);
524
205k
                }
525
270k
            }
526
270k
            continue;
527
528
0
        default:
529
0
            add_bad_null("treating unknown token type as null while reading object");
530
4.74M
        }
531
4.74M
    }
532
174k
}
533
534
void
535
Parser::add(std::shared_ptr<QPDFObject>&& obj)
536
5.33M
{
537
5.33M
    if (frame_->state != st_dictionary_value) {
538
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
539
        // processing once the tt_dict_close token has been found.
540
4.41M
        frame_->olist.emplace_back(std::move(obj));
541
4.41M
    } else {
542
918k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, std::move(obj)); !res.second) {
543
9.45k
            warn_duplicate_key();
544
9.45k
        }
545
918k
        frame_->state = st_dictionary_key;
546
918k
    }
547
5.33M
}
548
549
void
550
Parser::add_null()
551
385k
{
552
385k
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
553
554
385k
    if (frame_->state != st_dictionary_value) {
555
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
556
        // processing once the tt_dict_close token has been found.
557
344k
        frame_->olist.emplace_back(null_obj);
558
344k
    } else {
559
40.9k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, null_obj); !res.second) {
560
1.92k
            warn_duplicate_key();
561
1.92k
        }
562
40.9k
        frame_->state = st_dictionary_key;
563
40.9k
    }
564
385k
    ++frame_->null_count;
565
385k
}
566
567
void
568
Parser::add_bad_null(std::string const& msg)
569
266k
{
570
266k
    warn(msg);
571
266k
    check_too_many_bad_tokens();
572
266k
    add_null();
573
266k
}
574
575
void
576
Parser::add_int(int count)
577
2.83M
{
578
2.83M
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]);
579
2.83M
    obj->setDescription(context_, description_, last_offset_buffer_[count % 2]);
580
2.83M
    add(std::move(obj));
581
2.83M
}
582
583
template <typename T, typename... Args>
584
void
585
Parser::add_scalar(Args&&... args)
586
1.66M
{
587
1.66M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
1.66M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
401
        max_bad_count_ = 1;
592
401
        check_too_many_bad_tokens(); // always throws Error()
593
401
    }
594
1.66M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
1.66M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
1.66M
    add(std::move(obj));
597
1.66M
}
void qpdf::impl::Parser::add_scalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
586
16.8k
{
587
16.8k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
16.8k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
95
        max_bad_count_ = 1;
592
95
        check_too_many_bad_tokens(); // always throws Error()
593
95
    }
594
16.8k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
16.8k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
16.8k
    add(std::move(obj));
597
16.8k
}
void qpdf::impl::Parser::add_scalar<QPDF_Integer, long long>(long long&&)
Line
Count
Source
586
188k
{
587
188k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
188k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
24
        max_bad_count_ = 1;
592
24
        check_too_many_bad_tokens(); // always throws Error()
593
24
    }
594
188k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
188k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
188k
    add(std::move(obj));
597
188k
}
void qpdf::impl::Parser::add_scalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
205k
{
587
205k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
205k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
73
        max_bad_count_ = 1;
592
73
        check_too_many_bad_tokens(); // always throws Error()
593
73
    }
594
205k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
205k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
205k
    add(std::move(obj));
597
205k
}
void qpdf::impl::Parser::add_scalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
755k
{
587
755k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
755k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
88
        max_bad_count_ = 1;
592
88
        check_too_many_bad_tokens(); // always throws Error()
593
88
    }
594
755k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
755k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
755k
    add(std::move(obj));
597
755k
}
void qpdf::impl::Parser::add_scalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
227k
{
587
227k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
227k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
27
        max_bad_count_ = 1;
592
27
        check_too_many_bad_tokens(); // always throws Error()
593
27
    }
594
227k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
227k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
227k
    add(std::move(obj));
597
227k
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
210k
{
587
210k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
210k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
84
        max_bad_count_ = 1;
592
84
        check_too_many_bad_tokens(); // always throws Error()
593
84
    }
594
210k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
210k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
210k
    add(std::move(obj));
597
210k
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
586
64.9k
{
587
64.9k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
64.9k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
10
        max_bad_count_ = 1;
592
10
        check_too_many_bad_tokens(); // always throws Error()
593
10
    }
594
64.9k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
64.9k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
64.9k
    add(std::move(obj));
597
64.9k
}
598
599
template <typename T, typename... Args>
600
QPDFObjectHandle
601
Parser::with_description(Args&&... args)
602
777k
{
603
777k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
777k
    obj->setDescription(context_, description_, start_);
605
777k
    return {obj};
606
777k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Bool, bool>(bool&&)
Line
Count
Source
602
974
{
603
974
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
974
    obj->setDescription(context_, description_, start_);
605
974
    return {obj};
606
974
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Integer, long long>(long long&&)
Line
Count
Source
602
162k
{
603
162k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
162k
    obj->setDescription(context_, description_, start_);
605
162k
    return {obj};
606
162k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
48.1k
{
603
48.1k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
48.1k
    obj->setDescription(context_, description_, start_);
605
48.1k
    return {obj};
606
48.1k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
31.1k
{
603
31.1k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
31.1k
    obj->setDescription(context_, description_, start_);
605
31.1k
    return {obj};
606
31.1k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
526k
{
603
526k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
526k
    obj->setDescription(context_, description_, start_);
605
526k
    return {obj};
606
526k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
7.75k
{
603
7.75k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
7.75k
    obj->setDescription(context_, description_, start_);
605
7.75k
    return {obj};
606
7.75k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
602
232
{
603
232
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
232
    obj->setDescription(context_, description_, start_);
605
232
    return {obj};
606
232
}
607
608
void
609
Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset)
610
401k
{
611
401k
    if (obj) {
612
401k
        obj->setDescription(context_, description_, parsed_offset);
613
401k
    }
614
401k
}
615
616
void
617
Parser::fix_missing_keys()
618
1.79k
{
619
1.79k
    std::set<std::string> names;
620
5.53k
    for (auto& obj: frame_->olist) {
621
5.53k
        if (obj.raw_type_code() == ::ot_name) {
622
154
            names.insert(obj.getName());
623
154
        }
624
5.53k
    }
625
1.79k
    int next_fake_key = 1;
626
5.24k
    for (auto const& item: frame_->olist) {
627
5.26k
        while (true) {
628
5.26k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
629
5.26k
            const bool found_fake = !frame_->dict.contains(key) && !names.contains(key);
630
5.26k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
631
5.26k
            if (found_fake) {
632
5.24k
                warn(
633
5.24k
                    frame_->offset,
634
5.24k
                    "expected dictionary key but found non-name object; inserting key " + key);
635
5.24k
                frame_->dict[key] = item;
636
5.24k
                break;
637
5.24k
            }
638
5.26k
        }
639
5.24k
    }
640
1.79k
}
641
642
void
643
Parser::check_too_many_bad_tokens()
644
324k
{
645
324k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
646
324k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
647
437
        if (bad_count_) {
648
390
            limits_error(
649
390
                "parser-max-container-size-damaged",
650
390
                "encountered errors while parsing an array or dictionary with more than " +
651
390
                    std::to_string(limit) + " elements; giving up on reading object");
652
390
        }
653
437
        limits_error(
654
437
            "parser-max-container-size",
655
437
            "encountered an array or dictionary with more than " + std::to_string(limit) +
656
437
                " elements during xref recovery; giving up on reading object");
657
437
    }
658
324k
    if (max_bad_count_ && --max_bad_count_ == 0) {
659
1.42k
        limits_error(
660
1.42k
            "parser-max-errors", "too many errors during parsing; treating object as null");
661
1.42k
    }
662
324k
    if (good_count_ > 4) {
663
128k
        good_count_ = 0;
664
128k
        bad_count_ = 1;
665
128k
        return;
666
128k
    }
667
195k
    if (++bad_count_ > 5 ||
668
187k
        (frame_->state != st_array && std::cmp_less(max_bad_count_, frame_->olist.size()))) {
669
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
670
        // exceeds the remaining number of allowable total errors.
671
8.58k
        warn("too many errors; giving up on reading object");
672
8.58k
        throw Error();
673
8.58k
    }
674
187k
    good_count_ = 0;
675
187k
}
676
677
void
678
Parser::limits_error(std::string const& limit, std::string const& msg)
679
1.97k
{
680
1.97k
    Limits::error();
681
1.97k
    warn("limits error("s + limit + "): " + msg);
682
1.97k
    throw Error();
683
1.97k
}
684
685
void
686
Parser::warn(QPDFExc const& e) const
687
454k
{
688
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
689
    // object. If parsing for some other reason, such as an explicit creation of an object from a
690
    // string, then just throw the exception.
691
454k
    if (context_) {
692
454k
        context_->warn(e);
693
454k
    } else {
694
0
        throw e;
695
0
    }
696
454k
}
697
698
void
699
Parser::warn_duplicate_key()
700
11.3k
{
701
11.3k
    warn(
702
11.3k
        frame_->offset,
703
11.3k
        "dictionary has duplicated key " + frame_->key +
704
11.3k
            "; last occurrence overrides earlier ones");
705
11.3k
    check_too_many_bad_tokens();
706
11.3k
}
707
708
void
709
Parser::warn(qpdf_offset_t offset, std::string const& msg) const
710
454k
{
711
454k
    if (stream_id_) {
712
12.6k
        std::string descr = "object "s + std::to_string(obj_id_) + " 0";
713
12.6k
        std::string name = context_->getFilename() + " object stream " + std::to_string(stream_id_);
714
12.6k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
715
441k
    } else {
716
441k
        warn(QPDFExc(qpdf_e_damaged_pdf, input_.getName(), object_description_, offset, msg));
717
441k
    }
718
454k
}
719
720
void
721
Parser::warn(std::string const& msg) const
722
370k
{
723
370k
    warn(input_.getLastOffset(), msg);
724
370k
}