Coverage Report

Created: 2026-06-09 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
static uint32_t const& max_nesting{global::Limits::parser_max_nesting()};
19
20
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
21
// special access to allow the parser to create unresolved objects and dangling references.
22
class QPDF::Doc::ParseGuard
23
{
24
  public:
25
    ParseGuard(QPDF* qpdf) :
26
149k
        objects(qpdf ? &qpdf->m->objects : nullptr)
27
149k
    {
28
149k
        if (objects) {
29
149k
            objects->inParse(true);
30
149k
        }
31
149k
    }
32
33
    static std::shared_ptr<QPDFObject>
34
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
35
247k
    {
36
247k
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
37
247k
    }
38
39
    ~ParseGuard()
40
149k
    {
41
149k
        if (objects) {
42
149k
            objects->inParse(false);
43
149k
        }
44
149k
    }
45
    QPDF::Doc::Objects* objects;
46
};
47
48
using ParseGuard = QPDF::Doc::ParseGuard;
49
using Parser = qpdf::impl::Parser;
50
51
QPDFObjectHandle
52
Parser::parse(InputSource& input, std::string const& object_description, QPDF* context)
53
40
{
54
40
    qpdf::Tokenizer tokenizer;
55
40
    if (auto result = Parser(
56
40
                          input,
57
40
                          make_description(input.getName(), object_description),
58
40
                          object_description,
59
40
                          tokenizer,
60
40
                          nullptr,
61
40
                          context,
62
40
                          false)
63
40
                          .parse()) {
64
40
        return result;
65
40
    }
66
0
    return {QPDFObject::create<QPDF_Null>()};
67
40
}
68
69
std::pair<QPDFObjectHandle, bool>
70
Parser::parse_content(
71
    InputSource& input,
72
    std::shared_ptr<QPDFObject::Description> sp_description,
73
    qpdf::Tokenizer& tokenizer,
74
    QPDF* context)
75
0
{
76
0
    static const std::string content("content"); // GCC12 - make constexpr
77
0
    auto p = Parser(
78
0
        input,
79
0
        std::move(sp_description),
80
0
        content,
81
0
        tokenizer,
82
0
        nullptr,
83
0
        context,
84
0
        true,
85
0
        0,
86
0
        0,
87
0
        context && context->doc().reconstructed_xref());
88
0
    if (auto result = p.parse(true)) {
89
0
        return {result, false};
90
0
    }
91
0
    return {{}, p.empty_};
92
0
}
93
94
QPDFObjectHandle
95
Parser::parse(
96
    InputSource& input,
97
    std::string const& object_description,
98
    QPDFTokenizer& tokenizer,
99
    bool& empty,
100
    QPDFObjectHandle::StringDecrypter* decrypter,
101
    QPDF* context)
102
0
{
103
    // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
104
    // only user of the 'empty' member. When removing this overload also remove 'empty'.
105
0
    auto p = Parser(
106
0
        input,
107
0
        make_description(input.getName(), object_description),
108
0
        object_description,
109
0
        *tokenizer.m,
110
0
        decrypter,
111
0
        context,
112
0
        false);
113
0
    auto result = p.parse();
114
0
    empty = p.empty_;
115
0
    if (result) {
116
0
        return result;
117
0
    }
118
0
    return {QPDFObject::create<QPDF_Null>()};
119
0
}
120
121
QPDFObjectHandle
122
Parser::parse(
123
    InputSource& input,
124
    std::string const& object_description,
125
    qpdf::Tokenizer& tokenizer,
126
    QPDFObjectHandle::StringDecrypter* decrypter,
127
    QPDF& context,
128
    bool sanity_checks)
129
122k
{
130
122k
    return Parser(
131
122k
               input,
132
122k
               make_description(input.getName(), object_description),
133
122k
               object_description,
134
122k
               tokenizer,
135
122k
               decrypter,
136
122k
               &context,
137
122k
               true,
138
122k
               0,
139
122k
               0,
140
122k
               sanity_checks)
141
122k
        .parse();
142
122k
}
143
144
QPDFObjectHandle
145
Parser::parse(
146
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
147
26.9k
{
148
26.9k
    return Parser(
149
26.9k
               input,
150
26.9k
               std::make_shared<QPDFObject::Description>(
151
26.9k
                   QPDFObject::ObjStreamDescr(stream_id, obj_id)),
152
26.9k
               "",
153
26.9k
               tokenizer,
154
26.9k
               nullptr,
155
26.9k
               &context,
156
26.9k
               true,
157
26.9k
               stream_id,
158
26.9k
               obj_id)
159
26.9k
        .parse();
160
26.9k
}
161
162
QPDFObjectHandle
163
Parser::parse(bool content_stream)
164
149k
{
165
149k
    try {
166
149k
        return parse_first(content_stream);
167
149k
    } catch (Error&) {
168
7.51k
        return {};
169
7.51k
    } catch (QPDFExc& e) {
170
4.46k
        throw e;
171
4.46k
    } catch (std::logic_error& e) {
172
3
        throw e;
173
948
    } catch (std::exception& e) {
174
948
        warn("treating object as null because of error during parsing: "s + e.what());
175
948
        return {};
176
948
    }
177
149k
}
178
179
QPDFObjectHandle
180
Parser::parse_first(bool content_stream)
181
149k
{
182
    // This method must take care not to resolve any objects. Don't check the type of any object
183
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
184
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
185
    // logic error to be thrown from QPDF::inParse().
186
187
149k
    QPDF::Doc::ParseGuard pg(context_);
188
149k
    start_ = input_.tell();
189
149k
    if (!tokenizer_.nextToken(input_, object_description_)) {
190
1.28k
        warn(tokenizer_.getErrorMessage());
191
1.28k
    }
192
193
149k
    switch (tokenizer_.getType()) {
194
422
    case QPDFTokenizer::tt_eof:
195
422
        if (content_stream) {
196
            // In content stream mode, leave object uninitialized to indicate EOF
197
0
            empty_ = true;
198
0
            return {};
199
0
        }
200
422
        warn("unexpected EOF");
201
422
        return {};
202
203
1.14k
    case QPDFTokenizer::tt_bad:
204
1.14k
        return {};
205
206
202
    case QPDFTokenizer::tt_brace_open:
207
310
    case QPDFTokenizer::tt_brace_close:
208
310
        warn("treating unexpected brace token as null");
209
310
        return {};
210
211
596
    case QPDFTokenizer::tt_array_close:
212
596
        warn("treating unexpected array close token as null");
213
596
        return {};
214
215
315
    case QPDFTokenizer::tt_dict_close:
216
315
        warn("unexpected dictionary close token");
217
315
        return {};
218
219
10.7k
    case QPDFTokenizer::tt_array_open:
220
127k
    case QPDFTokenizer::tt_dict_open:
221
127k
        stack_.clear();
222
127k
        stack_.emplace_back(
223
127k
            input_,
224
127k
            (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
225
127k
        frame_ = &stack_.back();
226
127k
        return parse_remainder(content_stream);
227
228
577
    case QPDFTokenizer::tt_bool:
229
577
        return with_description<QPDF_Bool>(tokenizer_.getValue() == "true");
230
231
512
    case QPDFTokenizer::tt_null:
232
512
        return {QPDFObject::create<QPDF_Null>()};
233
234
8.35k
    case QPDFTokenizer::tt_integer:
235
8.35k
        return with_description<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
236
237
965
    case QPDFTokenizer::tt_real:
238
965
        return with_description<QPDF_Real>(tokenizer_.getValue());
239
240
1.69k
    case QPDFTokenizer::tt_name:
241
1.69k
        return with_description<QPDF_Name>(tokenizer_.getValue());
242
243
5.95k
    case QPDFTokenizer::tt_word:
244
5.95k
        {
245
5.95k
            auto const& value = tokenizer_.getValue();
246
5.95k
            if (content_stream) {
247
0
                return with_description<QPDF_Operator>(value);
248
5.95k
            } else if (value == "endobj") {
249
                // We just saw endobj without having read anything. Nothing in the PDF spec appears
250
                // to allow empty objects, but they have been encountered in actual PDF files and
251
                // Adobe Reader appears to ignore them. Treat this as a null and do not move the
252
                // input source's offset.
253
309
                empty_ = true;
254
309
                input_.seek(input_.getLastOffset(), SEEK_SET);
255
309
                if (!content_stream) {
256
309
                    warn("empty object treated as null");
257
309
                }
258
309
                return {};
259
5.64k
            } else {
260
5.64k
                warn("unknown token while reading object; treating as string");
261
5.64k
                return with_description<QPDF_String>(value);
262
5.64k
            }
263
5.95k
        }
264
265
483
    case QPDFTokenizer::tt_string:
266
483
        if (decrypter_) {
267
192
            std::string s{tokenizer_.getValue()};
268
192
            decrypter_->decryptString(s);
269
192
            return with_description<QPDF_String>(s);
270
291
        } else {
271
291
            return with_description<QPDF_String>(tokenizer_.getValue());
272
291
        }
273
274
0
    default:
275
0
        warn("treating unknown token type as null while reading object");
276
0
        return {};
277
149k
    }
278
149k
}
279
280
QPDFObjectHandle
281
Parser::parse_remainder(bool content_stream)
282
127k
{
283
    // This method must take care not to resolve any objects. Don't check the type of any object
284
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
285
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
286
    // logic error to be thrown from QPDF::inParse().
287
288
127k
    bad_count_ = 0;
289
127k
    bool b_contents = false;
290
291
10.0M
    while (true) {
292
10.0M
        if (!tokenizer_.nextToken(input_, object_description_)) {
293
31.0k
            warn(tokenizer_.getErrorMessage());
294
31.0k
        }
295
10.0M
        ++good_count_; // optimistically
296
297
10.0M
        if (int_count_ != 0) {
298
            // Special handling of indirect references. Treat integer tokens as part of an indirect
299
            // reference until proven otherwise.
300
3.38M
            if (tokenizer_.getType() == QPDFTokenizer::tt_integer) {
301
2.92M
                if (++int_count_ > 2) {
302
                    // Process the oldest buffered integer.
303
2.61M
                    add_int(int_count_);
304
2.61M
                }
305
2.92M
                last_offset_buffer_[int_count_ % 2] = input_.getLastOffset();
306
2.92M
                int_buffer_[int_count_ % 2] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
307
2.92M
                continue;
308
309
2.92M
            } else if (
310
462k
                int_count_ >= 2 && tokenizer_.getType() == QPDFTokenizer::tt_word &&
311
272k
                tokenizer_.getValue() == "R") {
312
250k
                if (!context_) {
313
0
                    throw std::logic_error(
314
0
                        "Parser::parse called without context on an object with indirect "
315
0
                        "references");
316
0
                }
317
250k
                auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]);
318
250k
                auto gen = QIntC::to_int(int_buffer_[(int_count_) % 2]);
319
250k
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
320
247k
                    add(ParseGuard::getObject(context_, id, gen, parse_pdf_));
321
247k
                } else {
322
2.35k
                    add_bad_null(
323
2.35k
                        "treating bad indirect reference (" + std::to_string(id) + " " +
324
2.35k
                        std::to_string(gen) + " R) as null");
325
2.35k
                }
326
250k
                int_count_ = 0;
327
250k
                continue;
328
329
250k
            } else if (int_count_ > 0) {
330
                // Process the buffered integers before processing the current token.
331
212k
                if (int_count_ > 1) {
332
66.5k
                    add_int(int_count_ - 1);
333
66.5k
                }
334
212k
                add_int(int_count_);
335
212k
                int_count_ = 0;
336
212k
            }
337
3.38M
        }
338
339
6.87M
        switch (tokenizer_.getType()) {
340
6.87k
        case QPDFTokenizer::tt_eof:
341
6.87k
            warn("parse error while reading object");
342
6.87k
            if (content_stream) {
343
                // In content stream mode, leave object uninitialized to indicate EOF
344
0
                return {};
345
0
            }
346
6.87k
            warn("unexpected EOF");
347
6.87k
            return {};
348
349
24.4k
        case QPDFTokenizer::tt_bad:
350
24.4k
            check_too_many_bad_tokens();
351
24.4k
            add_null();
352
24.4k
            continue;
353
354
1.90k
        case QPDFTokenizer::tt_brace_open:
355
3.42k
        case QPDFTokenizer::tt_brace_close:
356
3.42k
            add_bad_null("treating unexpected brace token as null");
357
3.42k
            continue;
358
359
87.5k
        case QPDFTokenizer::tt_array_close:
360
87.5k
            if (frame_->state == st_array) {
361
86.1k
                auto object = frame_->null_count > 100
362
86.1k
                    ? QPDFObject::create<QPDF_Array>(std::move(frame_->olist), true)
363
86.1k
                    : QPDFObject::create<QPDF_Array>(std::move(frame_->olist));
364
86.1k
                set_description(object, frame_->offset - 1);
365
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
366
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
367
                // array start delimiter. getLastOffset points to the array end token and therefore
368
                // can't be used here.
369
86.1k
                if (stack_.size() <= 1) {
370
2.01k
                    return object;
371
2.01k
                }
372
84.0k
                stack_.pop_back();
373
84.0k
                frame_ = &stack_.back();
374
84.0k
                add(std::move(object));
375
84.0k
            } else {
376
1.47k
                if (sanity_checks_) {
377
                    // During sanity checks, assume nesting of containers is corrupt and object is
378
                    // unusable.
379
1.01k
                    warn("unexpected array close token; giving up on reading object");
380
1.01k
                    return {};
381
1.01k
                }
382
465
                add_bad_null("treating unexpected array close token as null");
383
465
            }
384
84.5k
            continue;
385
386
149k
        case QPDFTokenizer::tt_dict_close:
387
149k
            if (frame_->state <= st_dictionary_value) {
388
                // Attempt to recover more or less gracefully from invalid dictionaries.
389
148k
                auto& dict = frame_->dict;
390
391
148k
                if (frame_->state == st_dictionary_value) {
392
11.9k
                    warn(
393
11.9k
                        frame_->offset,
394
11.9k
                        "dictionary ended prematurely; using null as value for last key");
395
11.9k
                    dict[frame_->key] = QPDFObject::create<QPDF_Null>();
396
11.9k
                }
397
148k
                if (!frame_->olist.empty()) {
398
44.4k
                    if (sanity_checks_) {
399
42.4k
                        warn(
400
42.4k
                            frame_->offset,
401
42.4k
                            "expected dictionary keys but found non-name objects; ignoring");
402
42.4k
                    } else {
403
2.00k
                        fix_missing_keys();
404
2.00k
                    }
405
44.4k
                }
406
407
148k
                if (!frame_->contents_string.empty() && dict.contains("/Type") &&
408
115
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
409
50
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
410
37
                    dict["/Contents"] = QPDFObjectHandle::newString(frame_->contents_string);
411
37
                    dict["/Contents"].setParsedOffset(frame_->contents_offset);
412
37
                }
413
148k
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
414
148k
                set_description(object, frame_->offset - 2);
415
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
416
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
417
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
418
                // and therefore can't be used here.
419
148k
                if (stack_.size() <= 1) {
420
103k
                    return object;
421
103k
                }
422
44.3k
                stack_.pop_back();
423
44.3k
                frame_ = &stack_.back();
424
44.3k
                add(std::move(object));
425
44.3k
            } else {
426
1.32k
                if (sanity_checks_) {
427
                    // During sanity checks, assume nesting of containers is corrupt and object is
428
                    // unusable.
429
803
                    warn("unexpected dictionary close token; giving up on reading object");
430
803
                    return {};
431
803
                }
432
525
                add_bad_null("unexpected dictionary close token");
433
525
            }
434
44.8k
            continue;
435
436
194k
        case QPDFTokenizer::tt_array_open:
437
252k
        case QPDFTokenizer::tt_dict_open:
438
252k
            if (stack_.size() > max_nesting) {
439
95
                limits_error(
440
95
                    "parser-max-nesting", "ignoring excessively deeply nested data structure");
441
95
            }
442
252k
            b_contents = false;
443
252k
            stack_.emplace_back(
444
252k
                input_,
445
252k
                (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array
446
252k
                                                                       : st_dictionary_key);
447
252k
            frame_ = &stack_.back();
448
252k
            continue;
449
450
8.07k
        case QPDFTokenizer::tt_bool:
451
8.07k
            add_scalar<QPDF_Bool>(tokenizer_.getValue() == "true");
452
8.07k
            continue;
453
454
56.1k
        case QPDFTokenizer::tt_null:
455
56.1k
            add_null();
456
56.1k
            continue;
457
458
462k
        case QPDFTokenizer::tt_integer:
459
462k
            if (!content_stream) {
460
                // Buffer token in case it is part of an indirect reference.
461
462k
                last_offset_buffer_[1] = input_.getLastOffset();
462
462k
                int_buffer_[1] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
463
462k
                int_count_ = 1;
464
462k
            } else {
465
0
                add_scalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
466
0
            }
467
462k
            continue;
468
469
42.4k
        case QPDFTokenizer::tt_real:
470
42.4k
            add_scalar<QPDF_Real>(tokenizer_.getValue());
471
42.4k
            continue;
472
473
5.53M
        case QPDFTokenizer::tt_name:
474
5.53M
            if (frame_->state == st_dictionary_key) {
475
581k
                frame_->key = tokenizer_.getValue();
476
581k
                frame_->state = st_dictionary_value;
477
581k
                b_contents = decrypter_ && frame_->key == "/Contents";
478
581k
                continue;
479
4.95M
            } else {
480
4.95M
                add_scalar<QPDF_Name>(tokenizer_.getValue());
481
4.95M
            }
482
4.95M
            continue;
483
484
4.95M
        case QPDFTokenizer::tt_word:
485
180k
            if (content_stream) {
486
0
                add_scalar<QPDF_Operator>(tokenizer_.getValue());
487
0
                continue;
488
0
            }
489
490
180k
            if (sanity_checks_) {
491
174k
                if (tokenizer_.getValue() == "endobj" || tokenizer_.getValue() == "endstream") {
492
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
493
                    // we are parsing past the end of the object.
494
1.69k
                    warn(
495
1.69k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
496
1.69k
                        "reading object");
497
1.69k
                    return {};
498
1.69k
                }
499
500
172k
                add_bad_null("unknown token while reading object; treating as null");
501
172k
                continue;
502
174k
            }
503
504
6.65k
            warn("unknown token while reading object; treating as string");
505
6.65k
            check_too_many_bad_tokens();
506
6.65k
            add_scalar<QPDF_String>(tokenizer_.getValue());
507
508
6.65k
            continue;
509
510
56.9k
        case QPDFTokenizer::tt_string:
511
56.9k
            {
512
56.9k
                auto const& val = tokenizer_.getValue();
513
56.9k
                if (decrypter_) {
514
25.8k
                    if (b_contents) {
515
464
                        frame_->contents_string = val;
516
464
                        frame_->contents_offset = input_.getLastOffset();
517
464
                        b_contents = false;
518
464
                    }
519
25.8k
                    std::string s{val};
520
25.8k
                    decrypter_->decryptString(s);
521
25.8k
                    add_scalar<QPDF_String>(s);
522
31.1k
                } else {
523
31.1k
                    add_scalar<QPDF_String>(val);
524
31.1k
                }
525
56.9k
            }
526
56.9k
            continue;
527
528
0
        default:
529
0
            add_bad_null("treating unknown token type as null while reading object");
530
6.87M
        }
531
6.87M
    }
532
127k
}
533
534
void
535
Parser::add(std::shared_ptr<QPDFObject>&& obj)
536
8.33M
{
537
8.33M
    if (frame_->state != st_dictionary_value) {
538
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
539
        // processing once the tt_dict_close token has been found.
540
7.80M
        frame_->olist.emplace_back(std::move(obj));
541
7.80M
    } else {
542
535k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, std::move(obj)); !res.second) {
543
5.92k
            warn_duplicate_key();
544
5.92k
        }
545
535k
        frame_->state = st_dictionary_key;
546
535k
    }
547
8.33M
}
548
549
void
550
Parser::add_null()
551
250k
{
552
250k
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
553
554
250k
    if (frame_->state != st_dictionary_value) {
555
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
556
        // processing once the tt_dict_close token has been found.
557
224k
        frame_->olist.emplace_back(null_obj);
558
224k
    } else {
559
26.2k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, null_obj); !res.second) {
560
937
            warn_duplicate_key();
561
937
        }
562
26.2k
        frame_->state = st_dictionary_key;
563
26.2k
    }
564
250k
    ++frame_->null_count;
565
250k
}
566
567
void
568
Parser::add_bad_null(std::string const& msg)
569
178k
{
570
178k
    warn(msg);
571
178k
    check_too_many_bad_tokens();
572
178k
    add_null();
573
178k
}
574
575
void
576
Parser::add_int(int count)
577
2.88M
{
578
2.88M
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]);
579
2.88M
    obj->setDescription(context_, description_, last_offset_buffer_[count % 2]);
580
2.88M
    add(std::move(obj));
581
2.88M
}
582
583
template <typename T, typename... Args>
584
void
585
Parser::add_scalar(Args&&... args)
586
5.07M
{
587
5.07M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
5.07M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
311
        max_bad_count_ = 1;
592
311
        check_too_many_bad_tokens(); // always throws Error()
593
311
    }
594
5.07M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
5.07M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
5.07M
    add(std::move(obj));
597
5.07M
}
void qpdf::impl::Parser::add_scalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
586
8.07k
{
587
8.07k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
8.07k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
19
        max_bad_count_ = 1;
592
19
        check_too_many_bad_tokens(); // always throws Error()
593
19
    }
594
8.07k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
8.07k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
8.07k
    add(std::move(obj));
597
8.07k
}
Unexecuted instantiation: void qpdf::impl::Parser::add_scalar<QPDF_Integer, long long>(long long&&)
void qpdf::impl::Parser::add_scalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
42.4k
{
587
42.4k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
42.4k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
40
        max_bad_count_ = 1;
592
40
        check_too_many_bad_tokens(); // always throws Error()
593
40
    }
594
42.4k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
42.4k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
42.4k
    add(std::move(obj));
597
42.4k
}
void qpdf::impl::Parser::add_scalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
4.95M
{
587
4.95M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
4.95M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
185
        max_bad_count_ = 1;
592
185
        check_too_many_bad_tokens(); // always throws Error()
593
185
    }
594
4.95M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
4.95M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
4.95M
    add(std::move(obj));
597
4.95M
}
Unexecuted instantiation: void qpdf::impl::Parser::add_scalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
37.6k
{
587
37.6k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
37.6k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
53
        max_bad_count_ = 1;
592
53
        check_too_many_bad_tokens(); // always throws Error()
593
53
    }
594
37.6k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
37.6k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
37.6k
    add(std::move(obj));
597
37.6k
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
586
25.8k
{
587
25.8k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
25.8k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
14
        max_bad_count_ = 1;
592
14
        check_too_many_bad_tokens(); // always throws Error()
593
14
    }
594
25.8k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
25.8k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
25.8k
    add(std::move(obj));
597
25.8k
}
598
599
template <typename T, typename... Args>
600
QPDFObjectHandle
601
Parser::with_description(Args&&... args)
602
17.0k
{
603
17.0k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
17.0k
    obj->setDescription(context_, description_, start_);
605
17.0k
    return {obj};
606
17.0k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Bool, bool>(bool&&)
Line
Count
Source
602
577
{
603
577
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
577
    obj->setDescription(context_, description_, start_);
605
577
    return {obj};
606
577
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Integer, long long>(long long&&)
Line
Count
Source
602
8.18k
{
603
8.18k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
8.18k
    obj->setDescription(context_, description_, start_);
605
8.18k
    return {obj};
606
8.18k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
965
{
603
965
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
965
    obj->setDescription(context_, description_, start_);
605
965
    return {obj};
606
965
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
1.69k
{
603
1.69k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
1.69k
    obj->setDescription(context_, description_, start_);
605
1.69k
    return {obj};
606
1.69k
}
Unexecuted instantiation: QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
5.41k
{
603
5.41k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
5.41k
    obj->setDescription(context_, description_, start_);
605
5.41k
    return {obj};
606
5.41k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
602
190
{
603
190
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
190
    obj->setDescription(context_, description_, start_);
605
190
    return {obj};
606
190
}
607
608
void
609
Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset)
610
233k
{
611
233k
    if (obj) {
612
233k
        obj->setDescription(context_, description_, parsed_offset);
613
233k
    }
614
233k
}
615
616
void
617
Parser::fix_missing_keys()
618
2.00k
{
619
2.00k
    std::set<std::string> names;
620
15.0k
    for (auto& obj: frame_->olist) {
621
15.0k
        if (obj.raw_type_code() == ::ot_name) {
622
314
            names.insert(obj.getName());
623
314
        }
624
15.0k
    }
625
2.00k
    int next_fake_key = 1;
626
15.0k
    for (auto const& item: frame_->olist) {
627
15.1k
        while (true) {
628
15.1k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
629
15.1k
            const bool found_fake = !frame_->dict.contains(key) && !names.contains(key);
630
15.1k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
631
15.1k
            if (found_fake) {
632
15.0k
                warn(
633
15.0k
                    frame_->offset,
634
15.0k
                    "expected dictionary key but found non-name object; inserting key " + key);
635
15.0k
                frame_->dict[key] = item;
636
15.0k
                break;
637
15.0k
            }
638
15.1k
        }
639
15.0k
    }
640
2.00k
}
641
642
void
643
Parser::check_too_many_bad_tokens()
644
214k
{
645
214k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
646
214k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
647
313
        if (bad_count_) {
648
217
            limits_error(
649
217
                "parser-max-container-size-damaged",
650
217
                "encountered errors while parsing an array or dictionary with more than " +
651
217
                    std::to_string(limit) + " elements; giving up on reading object");
652
217
        }
653
313
        limits_error(
654
313
            "parser-max-container-size",
655
313
            "encountered an array or dictionary with more than " + std::to_string(limit) +
656
313
                " elements during xref recovery; giving up on reading object");
657
313
    }
658
214k
    if (max_bad_count_ && --max_bad_count_ == 0) {
659
1.85k
        limits_error(
660
1.85k
            "parser-max-errors", "too many errors during parsing; treating object as null");
661
1.85k
    }
662
214k
    if (good_count_ > 4) {
663
79.5k
        good_count_ = 0;
664
79.5k
        bad_count_ = 1;
665
79.5k
        return;
666
79.5k
    }
667
134k
    if (++bad_count_ > 5 ||
668
128k
        (frame_->state != st_array && std::cmp_less(max_bad_count_, frame_->olist.size()))) {
669
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
670
        // exceeds the remaining number of allowable total errors.
671
5.34k
        warn("too many errors; giving up on reading object");
672
5.34k
        throw Error();
673
5.34k
    }
674
129k
    good_count_ = 0;
675
129k
}
676
677
void
678
Parser::limits_error(std::string const& limit, std::string const& msg)
679
2.26k
{
680
2.26k
    Limits::error();
681
2.26k
    warn("limits error("s + limit + "): " + msg);
682
2.26k
    throw Error();
683
2.26k
}
684
685
void
686
Parser::warn(QPDFExc const& e) const
687
327k
{
688
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
689
    // object. If parsing for some other reason, such as an explicit creation of an object from a
690
    // string, then just throw the exception.
691
327k
    if (context_) {
692
327k
        context_->warn(e);
693
327k
    } else {
694
0
        throw e;
695
0
    }
696
327k
}
697
698
void
699
Parser::warn_duplicate_key()
700
6.86k
{
701
6.86k
    warn(
702
6.86k
        frame_->offset,
703
6.86k
        "dictionary has duplicated key " + frame_->key +
704
6.86k
            "; last occurrence overrides earlier ones");
705
6.86k
    check_too_many_bad_tokens();
706
6.86k
}
707
708
void
709
Parser::warn(qpdf_offset_t offset, std::string const& msg) const
710
327k
{
711
327k
    if (stream_id_) {
712
27.7k
        std::string descr = "object "s + std::to_string(obj_id_) + " 0";
713
27.7k
        std::string name = context_->getFilename() + " object stream " + std::to_string(stream_id_);
714
27.7k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
715
299k
    } else {
716
299k
        warn(QPDFExc(qpdf_e_damaged_pdf, input_.getName(), object_description_, offset, msg));
717
299k
    }
718
327k
}
719
720
void
721
Parser::warn(std::string const& msg) const
722
251k
{
723
251k
    warn(input_.getLastOffset(), msg);
724
251k
}