Coverage Report

Created: 2026-06-16 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
static uint32_t const& max_nesting{global::Limits::parser_max_nesting()};
19
20
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
21
// special access to allow the parser to create unresolved objects and dangling references.
22
class QPDF::Doc::ParseGuard
23
{
24
  public:
25
    ParseGuard(QPDF* qpdf) :
26
120k
        objects(qpdf ? &qpdf->m->objects : nullptr)
27
120k
    {
28
120k
        if (objects) {
29
120k
            objects->inParse(true);
30
120k
        }
31
120k
    }
32
33
    static std::shared_ptr<QPDFObject>
34
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
35
234k
    {
36
234k
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
37
234k
    }
38
39
    ~ParseGuard()
40
120k
    {
41
120k
        if (objects) {
42
120k
            objects->inParse(false);
43
120k
        }
44
120k
    }
45
    QPDF::Doc::Objects* objects;
46
};
47
48
using ParseGuard = QPDF::Doc::ParseGuard;
49
using Parser = qpdf::impl::Parser;
50
51
QPDFObjectHandle
52
Parser::parse(InputSource& input, std::string const& object_description, QPDF* context)
53
0
{
54
0
    qpdf::Tokenizer tokenizer;
55
0
    if (auto result = Parser(
56
0
                          input,
57
0
                          make_description(input.getName(), object_description),
58
0
                          object_description,
59
0
                          tokenizer,
60
0
                          nullptr,
61
0
                          context,
62
0
                          false)
63
0
                          .parse()) {
64
0
        return result;
65
0
    }
66
0
    return {QPDFObject::create<QPDF_Null>()};
67
0
}
68
69
std::pair<QPDFObjectHandle, bool>
70
Parser::parse_content(
71
    InputSource& input,
72
    std::shared_ptr<QPDFObject::Description> sp_description,
73
    qpdf::Tokenizer& tokenizer,
74
    QPDF* context)
75
0
{
76
0
    static const std::string content("content"); // GCC12 - make constexpr
77
0
    auto p = Parser(
78
0
        input,
79
0
        std::move(sp_description),
80
0
        content,
81
0
        tokenizer,
82
0
        nullptr,
83
0
        context,
84
0
        true,
85
0
        0,
86
0
        0,
87
0
        context && context->doc().reconstructed_xref());
88
0
    if (auto result = p.parse(true)) {
89
0
        return {result, false};
90
0
    }
91
0
    return {{}, p.empty_};
92
0
}
93
94
QPDFObjectHandle
95
Parser::parse(
96
    InputSource& input,
97
    std::string const& object_description,
98
    QPDFTokenizer& tokenizer,
99
    bool& empty,
100
    QPDFObjectHandle::StringDecrypter* decrypter,
101
    QPDF* context)
102
0
{
103
    // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
104
    // only user of the 'empty' member. When removing this overload also remove 'empty'.
105
0
    auto p = Parser(
106
0
        input,
107
0
        make_description(input.getName(), object_description),
108
0
        object_description,
109
0
        *tokenizer.m,
110
0
        decrypter,
111
0
        context,
112
0
        false);
113
0
    auto result = p.parse();
114
0
    empty = p.empty_;
115
0
    if (result) {
116
0
        return result;
117
0
    }
118
0
    return {QPDFObject::create<QPDF_Null>()};
119
0
}
120
121
QPDFObjectHandle
122
Parser::parse(
123
    InputSource& input,
124
    std::string const& object_description,
125
    qpdf::Tokenizer& tokenizer,
126
    QPDFObjectHandle::StringDecrypter* decrypter,
127
    QPDF& context,
128
    bool sanity_checks)
129
101k
{
130
101k
    return Parser(
131
101k
               input,
132
101k
               make_description(input.getName(), object_description),
133
101k
               object_description,
134
101k
               tokenizer,
135
101k
               decrypter,
136
101k
               &context,
137
101k
               true,
138
101k
               0,
139
101k
               0,
140
101k
               sanity_checks)
141
101k
        .parse();
142
101k
}
143
144
QPDFObjectHandle
145
Parser::parse(
146
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
147
18.3k
{
148
18.3k
    return Parser(
149
18.3k
               input,
150
18.3k
               std::make_shared<QPDFObject::Description>(
151
18.3k
                   QPDFObject::ObjStreamDescr(stream_id, obj_id)),
152
18.3k
               "",
153
18.3k
               tokenizer,
154
18.3k
               nullptr,
155
18.3k
               &context,
156
18.3k
               true,
157
18.3k
               stream_id,
158
18.3k
               obj_id)
159
18.3k
        .parse();
160
18.3k
}
161
162
QPDFObjectHandle
163
Parser::parse(bool content_stream)
164
120k
{
165
120k
    try {
166
120k
        return parse_first(content_stream);
167
120k
    } catch (Error&) {
168
9.07k
        return {};
169
9.07k
    } catch (QPDFExc& e) {
170
5.08k
        throw e;
171
5.08k
    } catch (std::logic_error& e) {
172
1
        throw e;
173
1.19k
    } catch (std::exception& e) {
174
1.19k
        warn("treating object as null because of error during parsing: "s + e.what());
175
1.19k
        return {};
176
1.19k
    }
177
120k
}
178
179
QPDFObjectHandle
180
Parser::parse_first(bool content_stream)
181
120k
{
182
    // This method must take care not to resolve any objects. Don't check the type of any object
183
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
184
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
185
    // logic error to be thrown from QPDF::inParse().
186
187
120k
    QPDF::Doc::ParseGuard pg(context_);
188
120k
    start_ = input_.tell();
189
120k
    if (!tokenizer_.nextToken(input_, object_description_)) {
190
1.82k
        warn(tokenizer_.getErrorMessage());
191
1.82k
    }
192
193
120k
    switch (tokenizer_.getType()) {
194
634
    case QPDFTokenizer::tt_eof:
195
634
        if (content_stream) {
196
            // In content stream mode, leave object uninitialized to indicate EOF
197
0
            empty_ = true;
198
0
            return {};
199
0
        }
200
634
        warn("unexpected EOF");
201
634
        return {};
202
203
1.62k
    case QPDFTokenizer::tt_bad:
204
1.62k
        return {};
205
206
128
    case QPDFTokenizer::tt_brace_open:
207
320
    case QPDFTokenizer::tt_brace_close:
208
320
        warn("treating unexpected brace token as null");
209
320
        return {};
210
211
174
    case QPDFTokenizer::tt_array_close:
212
174
        warn("treating unexpected array close token as null");
213
174
        return {};
214
215
414
    case QPDFTokenizer::tt_dict_close:
216
414
        warn("unexpected dictionary close token");
217
414
        return {};
218
219
12.6k
    case QPDFTokenizer::tt_array_open:
220
97.0k
    case QPDFTokenizer::tt_dict_open:
221
97.0k
        stack_.clear();
222
97.0k
        stack_.emplace_back(
223
97.0k
            input_,
224
97.0k
            (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
225
97.0k
        frame_ = &stack_.back();
226
97.0k
        return parse_remainder(content_stream);
227
228
540
    case QPDFTokenizer::tt_bool:
229
540
        return with_description<QPDF_Bool>(tokenizer_.getValue() == "true");
230
231
126
    case QPDFTokenizer::tt_null:
232
126
        return {QPDFObject::create<QPDF_Null>()};
233
234
8.18k
    case QPDFTokenizer::tt_integer:
235
8.18k
        return with_description<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
236
237
351
    case QPDFTokenizer::tt_real:
238
351
        return with_description<QPDF_Real>(tokenizer_.getValue());
239
240
1.30k
    case QPDFTokenizer::tt_name:
241
1.30k
        return with_description<QPDF_Name>(tokenizer_.getValue());
242
243
7.89k
    case QPDFTokenizer::tt_word:
244
7.89k
        {
245
7.89k
            auto const& value = tokenizer_.getValue();
246
7.89k
            if (content_stream) {
247
0
                return with_description<QPDF_Operator>(value);
248
7.89k
            } else if (value == "endobj") {
249
                // We just saw endobj without having read anything. Nothing in the PDF spec appears
250
                // to allow empty objects, but they have been encountered in actual PDF files and
251
                // Adobe Reader appears to ignore them. Treat this as a null and do not move the
252
                // input source's offset.
253
410
                empty_ = true;
254
410
                input_.seek(input_.getLastOffset(), SEEK_SET);
255
410
                if (!content_stream) {
256
410
                    warn("empty object treated as null");
257
410
                }
258
410
                return {};
259
7.48k
            } else {
260
7.48k
                warn("unknown token while reading object; treating as string");
261
7.48k
                return with_description<QPDF_String>(value);
262
7.48k
            }
263
7.89k
        }
264
265
1.28k
    case QPDFTokenizer::tt_string:
266
1.28k
        if (decrypter_) {
267
427
            std::string s{tokenizer_.getValue()};
268
427
            decrypter_->decryptString(s);
269
427
            return with_description<QPDF_String>(s);
270
854
        } else {
271
854
            return with_description<QPDF_String>(tokenizer_.getValue());
272
854
        }
273
274
0
    default:
275
0
        warn("treating unknown token type as null while reading object");
276
0
        return {};
277
120k
    }
278
120k
}
279
280
QPDFObjectHandle
281
Parser::parse_remainder(bool content_stream)
282
97.0k
{
283
    // This method must take care not to resolve any objects. Don't check the type of any object
284
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
285
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
286
    // logic error to be thrown from QPDF::inParse().
287
288
97.0k
    bad_count_ = 0;
289
97.0k
    bool b_contents = false;
290
291
4.71M
    while (true) {
292
4.70M
        if (!tokenizer_.nextToken(input_, object_description_)) {
293
35.3k
            warn(tokenizer_.getErrorMessage());
294
35.3k
        }
295
4.70M
        ++good_count_; // optimistically
296
297
4.70M
        if (int_count_ != 0) {
298
            // Special handling of indirect references. Treat integer tokens as part of an indirect
299
            // reference until proven otherwise.
300
1.26M
            if (tokenizer_.getType() == QPDFTokenizer::tt_integer) {
301
850k
                if (++int_count_ > 2) {
302
                    // Process the oldest buffered integer.
303
563k
                    add_int(int_count_);
304
563k
                }
305
850k
                last_offset_buffer_[int_count_ % 2] = input_.getLastOffset();
306
850k
                int_buffer_[int_count_ % 2] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
307
850k
                continue;
308
309
850k
            } else if (
310
411k
                int_count_ >= 2 && tokenizer_.getType() == QPDFTokenizer::tt_word &&
311
256k
                tokenizer_.getValue() == "R") {
312
236k
                if (!context_) {
313
0
                    throw std::logic_error(
314
0
                        "Parser::parse called without context on an object with indirect "
315
0
                        "references");
316
0
                }
317
236k
                auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]);
318
236k
                auto gen = QIntC::to_int(int_buffer_[(int_count_) % 2]);
319
236k
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
320
234k
                    add(ParseGuard::getObject(context_, id, gen, parse_pdf_));
321
234k
                } else {
322
2.49k
                    add_bad_null(
323
2.49k
                        "treating bad indirect reference (" + std::to_string(id) + " " +
324
2.49k
                        std::to_string(gen) + " R) as null");
325
2.49k
                }
326
236k
                int_count_ = 0;
327
236k
                continue;
328
329
236k
            } else if (int_count_ > 0) {
330
                // Process the buffered integers before processing the current token.
331
174k
                if (int_count_ > 1) {
332
49.5k
                    add_int(int_count_ - 1);
333
49.5k
                }
334
174k
                add_int(int_count_);
335
174k
                int_count_ = 0;
336
174k
            }
337
1.26M
        }
338
339
3.61M
        switch (tokenizer_.getType()) {
340
6.78k
        case QPDFTokenizer::tt_eof:
341
6.78k
            warn("parse error while reading object");
342
6.78k
            if (content_stream) {
343
                // In content stream mode, leave object uninitialized to indicate EOF
344
0
                return {};
345
0
            }
346
6.78k
            warn("unexpected EOF");
347
6.78k
            return {};
348
349
29.4k
        case QPDFTokenizer::tt_bad:
350
29.4k
            check_too_many_bad_tokens();
351
29.4k
            add_null();
352
29.4k
            continue;
353
354
2.01k
        case QPDFTokenizer::tt_brace_open:
355
3.42k
        case QPDFTokenizer::tt_brace_close:
356
3.42k
            add_bad_null("treating unexpected brace token as null");
357
3.42k
            continue;
358
359
55.5k
        case QPDFTokenizer::tt_array_close:
360
55.5k
            if (frame_->state == st_array) {
361
53.8k
                auto object = frame_->null_count > 100
362
53.8k
                    ? QPDFObject::create<QPDF_Array>(std::move(frame_->olist), true)
363
53.8k
                    : QPDFObject::create<QPDF_Array>(std::move(frame_->olist));
364
53.8k
                set_description(object, frame_->offset - 1);
365
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
366
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
367
                // array start delimiter. getLastOffset points to the array end token and therefore
368
                // can't be used here.
369
53.8k
                if (stack_.size() <= 1) {
370
1.23k
                    return object;
371
1.23k
                }
372
52.6k
                stack_.pop_back();
373
52.6k
                frame_ = &stack_.back();
374
52.6k
                add(std::move(object));
375
52.6k
            } else {
376
1.63k
                if (sanity_checks_) {
377
                    // During sanity checks, assume nesting of containers is corrupt and object is
378
                    // unusable.
379
902
                    warn("unexpected array close token; giving up on reading object");
380
902
                    return {};
381
902
                }
382
730
                add_bad_null("treating unexpected array close token as null");
383
730
            }
384
53.3k
            continue;
385
386
111k
        case QPDFTokenizer::tt_dict_close:
387
111k
            if (frame_->state <= st_dictionary_value) {
388
                // Attempt to recover more or less gracefully from invalid dictionaries.
389
109k
                auto& dict = frame_->dict;
390
391
109k
                if (frame_->state == st_dictionary_value) {
392
7.93k
                    warn(
393
7.93k
                        frame_->offset,
394
7.93k
                        "dictionary ended prematurely; using null as value for last key");
395
7.93k
                    dict[frame_->key] = QPDFObject::create<QPDF_Null>();
396
7.93k
                }
397
109k
                if (!frame_->olist.empty()) {
398
30.6k
                    if (sanity_checks_) {
399
27.7k
                        warn(
400
27.7k
                            frame_->offset,
401
27.7k
                            "expected dictionary keys but found non-name objects; ignoring");
402
27.7k
                    } else {
403
2.92k
                        fix_missing_keys();
404
2.92k
                    }
405
30.6k
                }
406
407
109k
                if (!frame_->contents_string.empty() && dict.contains("/Type") &&
408
54
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
409
21
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
410
21
                    dict["/Contents"] = QPDFObjectHandle::newString(frame_->contents_string);
411
21
                    dict["/Contents"].setParsedOffset(frame_->contents_offset);
412
21
                }
413
109k
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
414
109k
                set_description(object, frame_->offset - 2);
415
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
416
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
417
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
418
                // and therefore can't be used here.
419
109k
                if (stack_.size() <= 1) {
420
71.9k
                    return object;
421
71.9k
                }
422
37.6k
                stack_.pop_back();
423
37.6k
                frame_ = &stack_.back();
424
37.6k
                add(std::move(object));
425
37.6k
            } else {
426
1.68k
                if (sanity_checks_) {
427
                    // During sanity checks, assume nesting of containers is corrupt and object is
428
                    // unusable.
429
1.11k
                    warn("unexpected dictionary close token; giving up on reading object");
430
1.11k
                    return {};
431
1.11k
                }
432
568
                add_bad_null("unexpected dictionary close token");
433
568
            }
434
38.2k
            continue;
435
436
178k
        case QPDFTokenizer::tt_array_open:
437
230k
        case QPDFTokenizer::tt_dict_open:
438
230k
            if (stack_.size() > max_nesting) {
439
126
                limits_error(
440
126
                    "parser-max-nesting", "ignoring excessively deeply nested data structure");
441
126
            }
442
230k
            b_contents = false;
443
230k
            stack_.emplace_back(
444
230k
                input_,
445
230k
                (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array
446
230k
                                                                       : st_dictionary_key);
447
230k
            frame_ = &stack_.back();
448
230k
            continue;
449
450
2.53k
        case QPDFTokenizer::tt_bool:
451
2.53k
            add_scalar<QPDF_Bool>(tokenizer_.getValue() == "true");
452
2.53k
            continue;
453
454
30.7k
        case QPDFTokenizer::tt_null:
455
30.7k
            add_null();
456
30.7k
            continue;
457
458
411k
        case QPDFTokenizer::tt_integer:
459
411k
            if (!content_stream) {
460
                // Buffer token in case it is part of an indirect reference.
461
411k
                last_offset_buffer_[1] = input_.getLastOffset();
462
411k
                int_buffer_[1] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
463
411k
                int_count_ = 1;
464
411k
            } else {
465
0
                add_scalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
466
0
            }
467
411k
            continue;
468
469
36.6k
        case QPDFTokenizer::tt_real:
470
36.6k
            add_scalar<QPDF_Real>(tokenizer_.getValue());
471
36.6k
            continue;
472
473
2.40M
        case QPDFTokenizer::tt_name:
474
2.40M
            if (frame_->state == st_dictionary_key) {
475
391k
                frame_->key = tokenizer_.getValue();
476
391k
                frame_->state = st_dictionary_value;
477
391k
                b_contents = decrypter_ && frame_->key == "/Contents";
478
391k
                continue;
479
2.00M
            } else {
480
2.00M
                add_scalar<QPDF_Name>(tokenizer_.getValue());
481
2.00M
            }
482
2.00M
            continue;
483
484
2.00M
        case QPDFTokenizer::tt_word:
485
161k
            if (content_stream) {
486
0
                add_scalar<QPDF_Operator>(tokenizer_.getValue());
487
0
                continue;
488
0
            }
489
490
161k
            if (sanity_checks_) {
491
154k
                if (tokenizer_.getValue() == "endobj" || tokenizer_.getValue() == "endstream") {
492
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
493
                    // we are parsing past the end of the object.
494
1.44k
                    warn(
495
1.44k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
496
1.44k
                        "reading object");
497
1.44k
                    return {};
498
1.44k
                }
499
500
153k
                add_bad_null("unknown token while reading object; treating as null");
501
153k
                continue;
502
154k
            }
503
504
7.31k
            warn("unknown token while reading object; treating as string");
505
7.31k
            check_too_many_bad_tokens();
506
7.31k
            add_scalar<QPDF_String>(tokenizer_.getValue());
507
508
7.31k
            continue;
509
510
132k
        case QPDFTokenizer::tt_string:
511
132k
            {
512
132k
                auto const& val = tokenizer_.getValue();
513
132k
                if (decrypter_) {
514
47.1k
                    if (b_contents) {
515
513
                        frame_->contents_string = val;
516
513
                        frame_->contents_offset = input_.getLastOffset();
517
513
                        b_contents = false;
518
513
                    }
519
47.1k
                    std::string s{val};
520
47.1k
                    decrypter_->decryptString(s);
521
47.1k
                    add_scalar<QPDF_String>(s);
522
85.4k
                } else {
523
85.4k
                    add_scalar<QPDF_String>(val);
524
85.4k
                }
525
132k
            }
526
132k
            continue;
527
528
0
        default:
529
0
            add_bad_null("treating unknown token type as null while reading object");
530
3.61M
        }
531
3.61M
    }
532
97.0k
}
533
534
void
535
Parser::add(std::shared_ptr<QPDFObject>&& obj)
536
3.30M
{
537
3.30M
    if (frame_->state != st_dictionary_value) {
538
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
539
        // processing once the tt_dict_close token has been found.
540
2.94M
        frame_->olist.emplace_back(std::move(obj));
541
2.94M
    } else {
542
355k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, std::move(obj)); !res.second) {
543
6.51k
            warn_duplicate_key();
544
6.51k
        }
545
355k
        frame_->state = st_dictionary_key;
546
355k
    }
547
3.30M
}
548
549
void
550
Parser::add_null()
551
209k
{
552
209k
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
553
554
209k
    if (frame_->state != st_dictionary_value) {
555
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
556
        // processing once the tt_dict_close token has been found.
557
189k
        frame_->olist.emplace_back(null_obj);
558
189k
    } else {
559
20.5k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, null_obj); !res.second) {
560
872
            warn_duplicate_key();
561
872
        }
562
20.5k
        frame_->state = st_dictionary_key;
563
20.5k
    }
564
209k
    ++frame_->null_count;
565
209k
}
566
567
void
568
Parser::add_bad_null(std::string const& msg)
569
159k
{
570
159k
    warn(msg);
571
159k
    check_too_many_bad_tokens();
572
159k
    add_null();
573
159k
}
574
575
void
576
Parser::add_int(int count)
577
787k
{
578
787k
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]);
579
787k
    obj->setDescription(context_, description_, last_offset_buffer_[count % 2]);
580
787k
    add(std::move(obj));
581
787k
}
582
583
template <typename T, typename... Args>
584
void
585
Parser::add_scalar(Args&&... args)
586
2.18M
{
587
2.18M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
2.18M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
331
        max_bad_count_ = 1;
592
331
        check_too_many_bad_tokens(); // always throws Error()
593
331
    }
594
2.18M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
2.18M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
2.18M
    add(std::move(obj));
597
2.18M
}
void qpdf::impl::Parser::add_scalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
586
2.53k
{
587
2.53k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
2.53k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
25
        max_bad_count_ = 1;
592
25
        check_too_many_bad_tokens(); // always throws Error()
593
25
    }
594
2.53k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
2.53k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
2.53k
    add(std::move(obj));
597
2.53k
}
Unexecuted instantiation: void qpdf::impl::Parser::add_scalar<QPDF_Integer, long long>(long long&&)
void qpdf::impl::Parser::add_scalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
36.6k
{
587
36.6k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
36.6k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
85
        max_bad_count_ = 1;
592
85
        check_too_many_bad_tokens(); // always throws Error()
593
85
    }
594
36.6k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
36.6k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
36.6k
    add(std::move(obj));
597
36.6k
}
void qpdf::impl::Parser::add_scalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
2.00M
{
587
2.00M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
2.00M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
164
        max_bad_count_ = 1;
592
164
        check_too_many_bad_tokens(); // always throws Error()
593
164
    }
594
2.00M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
2.00M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
2.00M
    add(std::move(obj));
597
2.00M
}
Unexecuted instantiation: void qpdf::impl::Parser::add_scalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
92.5k
{
587
92.5k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
92.5k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
46
        max_bad_count_ = 1;
592
46
        check_too_many_bad_tokens(); // always throws Error()
593
46
    }
594
92.5k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
92.5k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
92.5k
    add(std::move(obj));
597
92.5k
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
586
47.1k
{
587
47.1k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
47.1k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
11
        max_bad_count_ = 1;
592
11
        check_too_many_bad_tokens(); // always throws Error()
593
11
    }
594
47.1k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
47.1k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
47.1k
    add(std::move(obj));
597
47.1k
}
598
599
template <typename T, typename... Args>
600
QPDFObjectHandle
601
Parser::with_description(Args&&... args)
602
18.3k
{
603
18.3k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
18.3k
    obj->setDescription(context_, description_, start_);
605
18.3k
    return {obj};
606
18.3k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Bool, bool>(bool&&)
Line
Count
Source
602
540
{
603
540
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
540
    obj->setDescription(context_, description_, start_);
605
540
    return {obj};
606
540
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Integer, long long>(long long&&)
Line
Count
Source
602
7.79k
{
603
7.79k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
7.79k
    obj->setDescription(context_, description_, start_);
605
7.79k
    return {obj};
606
7.79k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
351
{
603
351
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
351
    obj->setDescription(context_, description_, start_);
605
351
    return {obj};
606
351
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
1.30k
{
603
1.30k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
1.30k
    obj->setDescription(context_, description_, start_);
605
1.30k
    return {obj};
606
1.30k
}
Unexecuted instantiation: QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
7.95k
{
603
7.95k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
7.95k
    obj->setDescription(context_, description_, start_);
605
7.95k
    return {obj};
606
7.95k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
602
425
{
603
425
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
425
    obj->setDescription(context_, description_, start_);
605
425
    return {obj};
606
425
}
607
608
void
609
Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset)
610
163k
{
611
163k
    if (obj) {
612
163k
        obj->setDescription(context_, description_, parsed_offset);
613
163k
    }
614
163k
}
615
616
void
617
Parser::fix_missing_keys()
618
2.92k
{
619
2.92k
    std::set<std::string> names;
620
11.0k
    for (auto& obj: frame_->olist) {
621
11.0k
        if (obj.raw_type_code() == ::ot_name) {
622
663
            names.insert(obj.getName());
623
663
        }
624
11.0k
    }
625
2.92k
    int next_fake_key = 1;
626
10.8k
    for (auto const& item: frame_->olist) {
627
10.9k
        while (true) {
628
10.9k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
629
10.9k
            const bool found_fake = !frame_->dict.contains(key) && !names.contains(key);
630
10.9k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
631
10.9k
            if (found_fake) {
632
10.8k
                warn(
633
10.8k
                    frame_->offset,
634
10.8k
                    "expected dictionary key but found non-name object; inserting key " + key);
635
10.8k
                frame_->dict[key] = item;
636
10.8k
                break;
637
10.8k
            }
638
10.9k
        }
639
10.8k
    }
640
2.92k
}
641
642
void
643
Parser::check_too_many_bad_tokens()
644
201k
{
645
201k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
646
201k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
647
334
        if (bad_count_) {
648
264
            limits_error(
649
264
                "parser-max-container-size-damaged",
650
264
                "encountered errors while parsing an array or dictionary with more than " +
651
264
                    std::to_string(limit) + " elements; giving up on reading object");
652
264
        }
653
334
        limits_error(
654
334
            "parser-max-container-size",
655
334
            "encountered an array or dictionary with more than " + std::to_string(limit) +
656
334
                " elements during xref recovery; giving up on reading object");
657
334
    }
658
201k
    if (max_bad_count_ && --max_bad_count_ == 0) {
659
1.93k
        limits_error(
660
1.93k
            "parser-max-errors", "too many errors during parsing; treating object as null");
661
1.93k
    }
662
201k
    if (good_count_ > 4) {
663
60.4k
        good_count_ = 0;
664
60.4k
        bad_count_ = 1;
665
60.4k
        return;
666
60.4k
    }
667
140k
    if (++bad_count_ > 5 ||
668
133k
        (frame_->state != st_array && std::cmp_less(max_bad_count_, frame_->olist.size()))) {
669
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
670
        // exceeds the remaining number of allowable total errors.
671
6.78k
        warn("too many errors; giving up on reading object");
672
6.78k
        throw Error();
673
6.78k
    }
674
133k
    good_count_ = 0;
675
133k
}
676
677
void
678
Parser::limits_error(std::string const& limit, std::string const& msg)
679
2.39k
{
680
2.39k
    Limits::error();
681
2.39k
    warn("limits error("s + limit + "): " + msg);
682
2.39k
    throw Error();
683
2.39k
}
684
685
void
686
Parser::warn(QPDFExc const& e) const
687
294k
{
688
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
689
    // object. If parsing for some other reason, such as an explicit creation of an object from a
690
    // string, then just throw the exception.
691
294k
    if (context_) {
692
294k
        context_->warn(e);
693
294k
    } else {
694
0
        throw e;
695
0
    }
696
294k
}
697
698
void
699
Parser::warn_duplicate_key()
700
7.38k
{
701
7.38k
    warn(
702
7.38k
        frame_->offset,
703
7.38k
        "dictionary has duplicated key " + frame_->key +
704
7.38k
            "; last occurrence overrides earlier ones");
705
7.38k
    check_too_many_bad_tokens();
706
7.38k
}
707
708
void
709
Parser::warn(qpdf_offset_t offset, std::string const& msg) const
710
294k
{
711
294k
    if (stream_id_) {
712
18.4k
        std::string descr = "object "s + std::to_string(obj_id_) + " 0";
713
18.4k
        std::string name = context_->getFilename() + " object stream " + std::to_string(stream_id_);
714
18.4k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
715
276k
    } else {
716
276k
        warn(QPDFExc(qpdf_e_damaged_pdf, input_.getName(), object_description_, offset, msg));
717
276k
    }
718
294k
}
719
720
void
721
Parser::warn(std::string const& msg) const
722
240k
{
723
240k
    warn(input_.getLastOffset(), msg);
724
240k
}