Coverage Report

Created: 2026-05-30 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
static uint32_t const& max_nesting{global::Limits::parser_max_nesting()};
19
20
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
21
// special access to allow the parser to create unresolved objects and dangling references.
22
class QPDF::Doc::ParseGuard
23
{
24
  public:
25
    ParseGuard(QPDF* qpdf) :
26
174k
        objects(qpdf ? &qpdf->m->objects : nullptr)
27
174k
    {
28
174k
        if (objects) {
29
174k
            objects->inParse(true);
30
174k
        }
31
174k
    }
32
33
    static std::shared_ptr<QPDFObject>
34
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
35
378k
    {
36
378k
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
37
378k
    }
38
39
    ~ParseGuard()
40
174k
    {
41
174k
        if (objects) {
42
174k
            objects->inParse(false);
43
174k
        }
44
174k
    }
45
    QPDF::Doc::Objects* objects;
46
};
47
48
using ParseGuard = QPDF::Doc::ParseGuard;
49
using Parser = qpdf::impl::Parser;
50
51
QPDFObjectHandle
52
Parser::parse(InputSource& input, std::string const& object_description, QPDF* context)
53
25
{
54
25
    qpdf::Tokenizer tokenizer;
55
25
    if (auto result = Parser(
56
25
                          input,
57
25
                          make_description(input.getName(), object_description),
58
25
                          object_description,
59
25
                          tokenizer,
60
25
                          nullptr,
61
25
                          context,
62
25
                          false)
63
25
                          .parse()) {
64
25
        return result;
65
25
    }
66
0
    return {QPDFObject::create<QPDF_Null>()};
67
25
}
68
69
std::pair<QPDFObjectHandle, bool>
70
Parser::parse_content(
71
    InputSource& input,
72
    std::shared_ptr<QPDFObject::Description> sp_description,
73
    qpdf::Tokenizer& tokenizer,
74
    QPDF* context)
75
0
{
76
0
    static const std::string content("content"); // GCC12 - make constexpr
77
0
    auto p = Parser(
78
0
        input,
79
0
        std::move(sp_description),
80
0
        content,
81
0
        tokenizer,
82
0
        nullptr,
83
0
        context,
84
0
        true,
85
0
        0,
86
0
        0,
87
0
        context && context->doc().reconstructed_xref());
88
0
    if (auto result = p.parse(true)) {
89
0
        return {result, false};
90
0
    }
91
0
    return {{}, p.empty_};
92
0
}
93
94
QPDFObjectHandle
95
Parser::parse(
96
    InputSource& input,
97
    std::string const& object_description,
98
    QPDFTokenizer& tokenizer,
99
    bool& empty,
100
    QPDFObjectHandle::StringDecrypter* decrypter,
101
    QPDF* context)
102
0
{
103
    // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
104
    // only user of the 'empty' member. When removing this overload also remove 'empty'.
105
0
    auto p = Parser(
106
0
        input,
107
0
        make_description(input.getName(), object_description),
108
0
        object_description,
109
0
        *tokenizer.m,
110
0
        decrypter,
111
0
        context,
112
0
        false);
113
0
    auto result = p.parse();
114
0
    empty = p.empty_;
115
0
    if (result) {
116
0
        return result;
117
0
    }
118
0
    return {QPDFObject::create<QPDF_Null>()};
119
0
}
120
121
QPDFObjectHandle
122
Parser::parse(
123
    InputSource& input,
124
    std::string const& object_description,
125
    qpdf::Tokenizer& tokenizer,
126
    QPDFObjectHandle::StringDecrypter* decrypter,
127
    QPDF& context,
128
    bool sanity_checks)
129
135k
{
130
135k
    return Parser(
131
135k
               input,
132
135k
               make_description(input.getName(), object_description),
133
135k
               object_description,
134
135k
               tokenizer,
135
135k
               decrypter,
136
135k
               &context,
137
135k
               true,
138
135k
               0,
139
135k
               0,
140
135k
               sanity_checks)
141
135k
        .parse();
142
135k
}
143
144
QPDFObjectHandle
145
Parser::parse(
146
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
147
38.4k
{
148
38.4k
    return Parser(
149
38.4k
               input,
150
38.4k
               std::make_shared<QPDFObject::Description>(
151
38.4k
                   QPDFObject::ObjStreamDescr(stream_id, obj_id)),
152
38.4k
               "",
153
38.4k
               tokenizer,
154
38.4k
               nullptr,
155
38.4k
               &context,
156
38.4k
               true,
157
38.4k
               stream_id,
158
38.4k
               obj_id)
159
38.4k
        .parse();
160
38.4k
}
161
162
QPDFObjectHandle
163
Parser::parse(bool content_stream)
164
174k
{
165
174k
    try {
166
174k
        return parse_first(content_stream);
167
174k
    } catch (Error&) {
168
8.45k
        return {};
169
8.45k
    } catch (QPDFExc& e) {
170
2.77k
        throw e;
171
2.77k
    } catch (std::logic_error& e) {
172
0
        throw e;
173
1.29k
    } catch (std::exception& e) {
174
1.29k
        warn("treating object as null because of error during parsing: "s + e.what());
175
1.29k
        return {};
176
1.29k
    }
177
174k
}
178
179
QPDFObjectHandle
180
Parser::parse_first(bool content_stream)
181
174k
{
182
    // This method must take care not to resolve any objects. Don't check the type of any object
183
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
184
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
185
    // logic error to be thrown from QPDF::inParse().
186
187
174k
    QPDF::Doc::ParseGuard pg(context_);
188
174k
    start_ = input_.tell();
189
174k
    if (!tokenizer_.nextToken(input_, object_description_)) {
190
1.66k
        warn(tokenizer_.getErrorMessage());
191
1.66k
    }
192
193
174k
    switch (tokenizer_.getType()) {
194
388
    case QPDFTokenizer::tt_eof:
195
388
        if (content_stream) {
196
            // In content stream mode, leave object uninitialized to indicate EOF
197
0
            empty_ = true;
198
0
            return {};
199
0
        }
200
388
        warn("unexpected EOF");
201
388
        return {};
202
203
1.61k
    case QPDFTokenizer::tt_bad:
204
1.61k
        return {};
205
206
90
    case QPDFTokenizer::tt_brace_open:
207
267
    case QPDFTokenizer::tt_brace_close:
208
267
        warn("treating unexpected brace token as null");
209
267
        return {};
210
211
559
    case QPDFTokenizer::tt_array_close:
212
559
        warn("treating unexpected array close token as null");
213
559
        return {};
214
215
702
    case QPDFTokenizer::tt_dict_close:
216
702
        warn("unexpected dictionary close token");
217
702
        return {};
218
219
8.72k
    case QPDFTokenizer::tt_array_open:
220
147k
    case QPDFTokenizer::tt_dict_open:
221
147k
        stack_.clear();
222
147k
        stack_.emplace_back(
223
147k
            input_,
224
147k
            (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
225
147k
        frame_ = &stack_.back();
226
147k
        return parse_remainder(content_stream);
227
228
524
    case QPDFTokenizer::tt_bool:
229
524
        return with_description<QPDF_Bool>(tokenizer_.getValue() == "true");
230
231
148
    case QPDFTokenizer::tt_null:
232
148
        return {QPDFObject::create<QPDF_Null>()};
233
234
10.5k
    case QPDFTokenizer::tt_integer:
235
10.5k
        return with_description<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
236
237
498
    case QPDFTokenizer::tt_real:
238
498
        return with_description<QPDF_Real>(tokenizer_.getValue());
239
240
2.25k
    case QPDFTokenizer::tt_name:
241
2.25k
        return with_description<QPDF_Name>(tokenizer_.getValue());
242
243
8.64k
    case QPDFTokenizer::tt_word:
244
8.64k
        {
245
8.64k
            auto const& value = tokenizer_.getValue();
246
8.64k
            if (content_stream) {
247
0
                return with_description<QPDF_Operator>(value);
248
8.64k
            } else if (value == "endobj") {
249
                // We just saw endobj without having read anything. Nothing in the PDF spec appears
250
                // to allow empty objects, but they have been encountered in actual PDF files and
251
                // Adobe Reader appears to ignore them. Treat this as a null and do not move the
252
                // input source's offset.
253
214
                empty_ = true;
254
214
                input_.seek(input_.getLastOffset(), SEEK_SET);
255
214
                if (!content_stream) {
256
214
                    warn("empty object treated as null");
257
214
                }
258
214
                return {};
259
8.43k
            } else {
260
8.43k
                warn("unknown token while reading object; treating as string");
261
8.43k
                return with_description<QPDF_String>(value);
262
8.43k
            }
263
8.64k
        }
264
265
611
    case QPDFTokenizer::tt_string:
266
611
        if (decrypter_) {
267
153
            std::string s{tokenizer_.getValue()};
268
153
            decrypter_->decryptString(s);
269
153
            return with_description<QPDF_String>(s);
270
458
        } else {
271
458
            return with_description<QPDF_String>(tokenizer_.getValue());
272
458
        }
273
274
0
    default:
275
0
        warn("treating unknown token type as null while reading object");
276
0
        return {};
277
174k
    }
278
174k
}
279
280
QPDFObjectHandle
281
Parser::parse_remainder(bool content_stream)
282
147k
{
283
    // This method must take care not to resolve any objects. Don't check the type of any object
284
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
285
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
286
    // logic error to be thrown from QPDF::inParse().
287
288
147k
    bad_count_ = 0;
289
147k
    bool b_contents = false;
290
291
23.0M
    while (true) {
292
23.0M
        if (!tokenizer_.nextToken(input_, object_description_)) {
293
34.2k
            warn(tokenizer_.getErrorMessage());
294
34.2k
        }
295
23.0M
        ++good_count_; // optimistically
296
297
23.0M
        if (int_count_ != 0) {
298
            // Special handling of indirect references. Treat integer tokens as part of an indirect
299
            // reference until proven otherwise.
300
1.51M
            if (tokenizer_.getType() == QPDFTokenizer::tt_integer) {
301
896k
                if (++int_count_ > 2) {
302
                    // Process the oldest buffered integer.
303
452k
                    add_int(int_count_);
304
452k
                }
305
896k
                last_offset_buffer_[int_count_ % 2] = input_.getLastOffset();
306
896k
                int_buffer_[int_count_ % 2] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
307
896k
                continue;
308
309
896k
            } else if (
310
616k
                int_count_ >= 2 && tokenizer_.getType() == QPDFTokenizer::tt_word &&
311
403k
                tokenizer_.getValue() == "R") {
312
381k
                if (!context_) {
313
0
                    throw std::logic_error(
314
0
                        "Parser::parse called without context on an object with indirect "
315
0
                        "references");
316
0
                }
317
381k
                auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]);
318
381k
                auto gen = QIntC::to_int(int_buffer_[(int_count_) % 2]);
319
381k
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
320
378k
                    add(ParseGuard::getObject(context_, id, gen, parse_pdf_));
321
378k
                } else {
322
2.44k
                    add_bad_null(
323
2.44k
                        "treating bad indirect reference (" + std::to_string(id) + " " +
324
2.44k
                        std::to_string(gen) + " R) as null");
325
2.44k
                }
326
381k
                int_count_ = 0;
327
381k
                continue;
328
329
381k
            } else if (int_count_ > 0) {
330
                // Process the buffered integers before processing the current token.
331
235k
                if (int_count_ > 1) {
332
62.3k
                    add_int(int_count_ - 1);
333
62.3k
                }
334
235k
                add_int(int_count_);
335
235k
                int_count_ = 0;
336
235k
            }
337
1.51M
        }
338
339
21.8M
        switch (tokenizer_.getType()) {
340
6.58k
        case QPDFTokenizer::tt_eof:
341
6.58k
            warn("parse error while reading object");
342
6.58k
            if (content_stream) {
343
                // In content stream mode, leave object uninitialized to indicate EOF
344
0
                return {};
345
0
            }
346
6.58k
            warn("unexpected EOF");
347
6.58k
            return {};
348
349
28.3k
        case QPDFTokenizer::tt_bad:
350
28.3k
            check_too_many_bad_tokens();
351
28.3k
            add_null();
352
28.3k
            continue;
353
354
2.96k
        case QPDFTokenizer::tt_brace_open:
355
4.26k
        case QPDFTokenizer::tt_brace_close:
356
4.26k
            add_bad_null("treating unexpected brace token as null");
357
4.26k
            continue;
358
359
99.7k
        case QPDFTokenizer::tt_array_close:
360
99.7k
            if (frame_->state == st_array) {
361
97.8k
                auto object = frame_->null_count > 100
362
97.8k
                    ? QPDFObject::create<QPDF_Array>(std::move(frame_->olist), true)
363
97.8k
                    : QPDFObject::create<QPDF_Array>(std::move(frame_->olist));
364
97.8k
                set_description(object, frame_->offset - 1);
365
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
366
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
367
                // array start delimiter. getLastOffset points to the array end token and therefore
368
                // can't be used here.
369
97.8k
                if (stack_.size() <= 1) {
370
1.88k
                    return object;
371
1.88k
                }
372
95.9k
                stack_.pop_back();
373
95.9k
                frame_ = &stack_.back();
374
95.9k
                add(std::move(object));
375
95.9k
            } else {
376
1.92k
                if (sanity_checks_) {
377
                    // During sanity checks, assume nesting of containers is corrupt and object is
378
                    // unusable.
379
1.09k
                    warn("unexpected array close token; giving up on reading object");
380
1.09k
                    return {};
381
1.09k
                }
382
828
                add_bad_null("treating unexpected array close token as null");
383
828
            }
384
96.7k
            continue;
385
386
183k
        case QPDFTokenizer::tt_dict_close:
387
183k
            if (frame_->state <= st_dictionary_value) {
388
                // Attempt to recover more or less gracefully from invalid dictionaries.
389
181k
                auto& dict = frame_->dict;
390
391
181k
                if (frame_->state == st_dictionary_value) {
392
14.8k
                    warn(
393
14.8k
                        frame_->offset,
394
14.8k
                        "dictionary ended prematurely; using null as value for last key");
395
14.8k
                    dict[frame_->key] = QPDFObject::create<QPDF_Null>();
396
14.8k
                }
397
181k
                if (!frame_->olist.empty()) {
398
42.9k
                    if (sanity_checks_) {
399
39.6k
                        warn(
400
39.6k
                            frame_->offset,
401
39.6k
                            "expected dictionary keys but found non-name objects; ignoring");
402
39.6k
                    } else {
403
3.30k
                        fix_missing_keys();
404
3.30k
                    }
405
42.9k
                }
406
407
181k
                if (!frame_->contents_string.empty() && dict.contains("/Type") &&
408
93
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
409
34
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
410
34
                    dict["/Contents"] = QPDFObjectHandle::newString(frame_->contents_string);
411
34
                    dict["/Contents"].setParsedOffset(frame_->contents_offset);
412
34
                }
413
181k
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
414
181k
                set_description(object, frame_->offset - 2);
415
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
416
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
417
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
418
                // and therefore can't be used here.
419
181k
                if (stack_.size() <= 1) {
420
123k
                    return object;
421
123k
                }
422
58.2k
                stack_.pop_back();
423
58.2k
                frame_ = &stack_.back();
424
58.2k
                add(std::move(object));
425
58.2k
            } else {
426
1.58k
                if (sanity_checks_) {
427
                    // During sanity checks, assume nesting of containers is corrupt and object is
428
                    // unusable.
429
1.06k
                    warn("unexpected dictionary close token; giving up on reading object");
430
1.06k
                    return {};
431
1.06k
                }
432
515
                add_bad_null("unexpected dictionary close token");
433
515
            }
434
58.7k
            continue;
435
436
129k
        case QPDFTokenizer::tt_array_open:
437
266k
        case QPDFTokenizer::tt_dict_open:
438
266k
            if (stack_.size() > max_nesting) {
439
101
                limits_error(
440
101
                    "parser-max-nesting", "ignoring excessively deeply nested data structure");
441
101
            }
442
266k
            b_contents = false;
443
266k
            stack_.emplace_back(
444
266k
                input_,
445
266k
                (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array
446
266k
                                                                       : st_dictionary_key);
447
266k
            frame_ = &stack_.back();
448
266k
            continue;
449
450
9.72k
        case QPDFTokenizer::tt_bool:
451
9.72k
            add_scalar<QPDF_Bool>(tokenizer_.getValue() == "true");
452
9.72k
            continue;
453
454
224k
        case QPDFTokenizer::tt_null:
455
224k
            add_null();
456
224k
            continue;
457
458
617k
        case QPDFTokenizer::tt_integer:
459
617k
            if (!content_stream) {
460
                // Buffer token in case it is part of an indirect reference.
461
617k
                last_offset_buffer_[1] = input_.getLastOffset();
462
617k
                int_buffer_[1] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
463
617k
                int_count_ = 1;
464
617k
            } else {
465
0
                add_scalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
466
0
            }
467
617k
            continue;
468
469
81.7k
        case QPDFTokenizer::tt_real:
470
81.7k
            add_scalar<QPDF_Real>(tokenizer_.getValue());
471
81.7k
            continue;
472
473
20.0M
        case QPDFTokenizer::tt_name:
474
20.0M
            if (frame_->state == st_dictionary_key) {
475
827k
                frame_->key = tokenizer_.getValue();
476
827k
                frame_->state = st_dictionary_value;
477
827k
                b_contents = decrypter_ && frame_->key == "/Contents";
478
827k
                continue;
479
19.1M
            } else {
480
19.1M
                add_scalar<QPDF_Name>(tokenizer_.getValue());
481
19.1M
            }
482
19.1M
            continue;
483
484
19.1M
        case QPDFTokenizer::tt_word:
485
174k
            if (content_stream) {
486
0
                add_scalar<QPDF_Operator>(tokenizer_.getValue());
487
0
                continue;
488
0
            }
489
490
174k
            if (sanity_checks_) {
491
166k
                if (tokenizer_.getValue() == "endobj" || tokenizer_.getValue() == "endstream") {
492
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
493
                    // we are parsing past the end of the object.
494
2.06k
                    warn(
495
2.06k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
496
2.06k
                        "reading object");
497
2.06k
                    return {};
498
2.06k
                }
499
500
164k
                add_bad_null("unknown token while reading object; treating as null");
501
164k
                continue;
502
166k
            }
503
504
8.79k
            warn("unknown token while reading object; treating as string");
505
8.79k
            check_too_many_bad_tokens();
506
8.79k
            add_scalar<QPDF_String>(tokenizer_.getValue());
507
508
8.79k
            continue;
509
510
89.7k
        case QPDFTokenizer::tt_string:
511
89.7k
            {
512
89.7k
                auto const& val = tokenizer_.getValue();
513
89.7k
                if (decrypter_) {
514
20.3k
                    if (b_contents) {
515
1.09k
                        frame_->contents_string = val;
516
1.09k
                        frame_->contents_offset = input_.getLastOffset();
517
1.09k
                        b_contents = false;
518
1.09k
                    }
519
20.3k
                    std::string s{val};
520
20.3k
                    decrypter_->decryptString(s);
521
20.3k
                    add_scalar<QPDF_String>(s);
522
69.4k
                } else {
523
69.4k
                    add_scalar<QPDF_String>(val);
524
69.4k
                }
525
89.7k
            }
526
89.7k
            continue;
527
528
0
        default:
529
0
            add_bad_null("treating unknown token type as null while reading object");
530
21.8M
        }
531
21.8M
    }
532
147k
}
533
534
void
535
Parser::add(std::shared_ptr<QPDFObject>&& obj)
536
20.6M
{
537
20.6M
    if (frame_->state != st_dictionary_value) {
538
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
539
        // processing once the tt_dict_close token has been found.
540
19.8M
        frame_->olist.emplace_back(std::move(obj));
541
19.8M
    } else {
542
773k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, std::move(obj)); !res.second) {
543
8.27k
            warn_duplicate_key();
544
8.27k
        }
545
773k
        frame_->state = st_dictionary_key;
546
773k
    }
547
20.6M
}
548
549
void
550
Parser::add_null()
551
415k
{
552
415k
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
553
554
415k
    if (frame_->state != st_dictionary_value) {
555
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
556
        // processing once the tt_dict_close token has been found.
557
384k
        frame_->olist.emplace_back(null_obj);
558
384k
    } else {
559
31.1k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, null_obj); !res.second) {
560
2.35k
            warn_duplicate_key();
561
2.35k
        }
562
31.1k
        frame_->state = st_dictionary_key;
563
31.1k
    }
564
415k
    ++frame_->null_count;
565
415k
}
566
567
void
568
Parser::add_bad_null(std::string const& msg)
569
171k
{
570
171k
    warn(msg);
571
171k
    check_too_many_bad_tokens();
572
171k
    add_null();
573
171k
}
574
575
void
576
Parser::add_int(int count)
577
750k
{
578
750k
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]);
579
750k
    obj->setDescription(context_, description_, last_offset_buffer_[count % 2]);
580
750k
    add(std::move(obj));
581
750k
}
582
583
template <typename T, typename... Args>
584
void
585
Parser::add_scalar(Args&&... args)
586
19.3M
{
587
19.3M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
19.3M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
364
        max_bad_count_ = 1;
592
364
        check_too_many_bad_tokens(); // always throws Error()
593
364
    }
594
19.3M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
19.3M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
19.3M
    add(std::move(obj));
597
19.3M
}
void qpdf::impl::Parser::add_scalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
586
9.72k
{
587
9.72k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
9.72k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
34
        max_bad_count_ = 1;
592
34
        check_too_many_bad_tokens(); // always throws Error()
593
34
    }
594
9.72k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
9.72k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
9.72k
    add(std::move(obj));
597
9.72k
}
Unexecuted instantiation: void qpdf::impl::Parser::add_scalar<QPDF_Integer, long long>(long long&&)
void qpdf::impl::Parser::add_scalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
81.7k
{
587
81.7k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
81.7k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
44
        max_bad_count_ = 1;
592
44
        check_too_many_bad_tokens(); // always throws Error()
593
44
    }
594
81.7k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
81.7k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
81.7k
    add(std::move(obj));
597
81.7k
}
void qpdf::impl::Parser::add_scalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
19.1M
{
587
19.1M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
19.1M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
221
        max_bad_count_ = 1;
592
221
        check_too_many_bad_tokens(); // always throws Error()
593
221
    }
594
19.1M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
19.1M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
19.1M
    add(std::move(obj));
597
19.1M
}
Unexecuted instantiation: void qpdf::impl::Parser::add_scalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
77.9k
{
587
77.9k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
77.9k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
54
        max_bad_count_ = 1;
592
54
        check_too_many_bad_tokens(); // always throws Error()
593
54
    }
594
77.9k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
77.9k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
77.9k
    add(std::move(obj));
597
77.9k
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
586
20.3k
{
587
20.3k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
20.3k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
11
        max_bad_count_ = 1;
592
11
        check_too_many_bad_tokens(); // always throws Error()
593
11
    }
594
20.3k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
20.3k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
20.3k
    add(std::move(obj));
597
20.3k
}
598
599
template <typename T, typename... Args>
600
QPDFObjectHandle
601
Parser::with_description(Args&&... args)
602
22.0k
{
603
22.0k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
22.0k
    obj->setDescription(context_, description_, start_);
605
22.0k
    return {obj};
606
22.0k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Bool, bool>(bool&&)
Line
Count
Source
602
524
{
603
524
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
524
    obj->setDescription(context_, description_, start_);
605
524
    return {obj};
606
524
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Integer, long long>(long long&&)
Line
Count
Source
602
10.0k
{
603
10.0k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
10.0k
    obj->setDescription(context_, description_, start_);
605
10.0k
    return {obj};
606
10.0k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
498
{
603
498
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
498
    obj->setDescription(context_, description_, start_);
605
498
    return {obj};
606
498
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
2.25k
{
603
2.25k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
2.25k
    obj->setDescription(context_, description_, start_);
605
2.25k
    return {obj};
606
2.25k
}
Unexecuted instantiation: QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
8.57k
{
603
8.57k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
8.57k
    obj->setDescription(context_, description_, start_);
605
8.57k
    return {obj};
606
8.57k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
602
152
{
603
152
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
152
    obj->setDescription(context_, description_, start_);
605
152
    return {obj};
606
152
}
607
608
void
609
Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset)
610
279k
{
611
279k
    if (obj) {
612
279k
        obj->setDescription(context_, description_, parsed_offset);
613
279k
    }
614
279k
}
615
616
void
617
Parser::fix_missing_keys()
618
3.30k
{
619
3.30k
    std::set<std::string> names;
620
15.2k
    for (auto& obj: frame_->olist) {
621
15.2k
        if (obj.raw_type_code() == ::ot_name) {
622
400
            names.insert(obj.getName());
623
400
        }
624
15.2k
    }
625
3.30k
    int next_fake_key = 1;
626
15.1k
    for (auto const& item: frame_->olist) {
627
15.1k
        while (true) {
628
15.1k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
629
15.1k
            const bool found_fake = !frame_->dict.contains(key) && !names.contains(key);
630
15.1k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
631
15.1k
            if (found_fake) {
632
15.1k
                warn(
633
15.1k
                    frame_->offset,
634
15.1k
                    "expected dictionary key but found non-name object; inserting key " + key);
635
15.1k
                frame_->dict[key] = item;
636
15.1k
                break;
637
15.1k
            }
638
15.1k
        }
639
15.1k
    }
640
3.30k
}
641
642
void
643
Parser::check_too_many_bad_tokens()
644
218k
{
645
218k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
646
218k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
647
368
        if (bad_count_) {
648
289
            limits_error(
649
289
                "parser-max-container-size-damaged",
650
289
                "encountered errors while parsing an array or dictionary with more than " +
651
289
                    std::to_string(limit) + " elements; giving up on reading object");
652
289
        }
653
368
        limits_error(
654
368
            "parser-max-container-size",
655
368
            "encountered an array or dictionary with more than " + std::to_string(limit) +
656
368
                " elements during xref recovery; giving up on reading object");
657
368
    }
658
218k
    if (max_bad_count_ && --max_bad_count_ == 0) {
659
1.68k
        limits_error(
660
1.68k
            "parser-max-errors", "too many errors during parsing; treating object as null");
661
1.68k
    }
662
218k
    if (good_count_ > 4) {
663
78.8k
        good_count_ = 0;
664
78.8k
        bad_count_ = 1;
665
78.8k
        return;
666
78.8k
    }
667
139k
    if (++bad_count_ > 5 ||
668
132k
        (frame_->state != st_array && std::cmp_less(max_bad_count_, frame_->olist.size()))) {
669
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
670
        // exceeds the remaining number of allowable total errors.
671
6.35k
        warn("too many errors; giving up on reading object");
672
6.35k
        throw Error();
673
6.35k
    }
674
133k
    good_count_ = 0;
675
133k
}
676
677
void
678
Parser::limits_error(std::string const& limit, std::string const& msg)
679
2.15k
{
680
2.15k
    Limits::error();
681
2.15k
    warn("limits error("s + limit + "): " + msg);
682
2.15k
    throw Error();
683
2.15k
}
684
685
void
686
Parser::warn(QPDFExc const& e) const
687
334k
{
688
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
689
    // object. If parsing for some other reason, such as an explicit creation of an object from a
690
    // string, then just throw the exception.
691
334k
    if (context_) {
692
334k
        context_->warn(e);
693
334k
    } else {
694
0
        throw e;
695
0
    }
696
334k
}
697
698
void
699
Parser::warn_duplicate_key()
700
10.6k
{
701
10.6k
    warn(
702
10.6k
        frame_->offset,
703
10.6k
        "dictionary has duplicated key " + frame_->key +
704
10.6k
            "; last occurrence overrides earlier ones");
705
10.6k
    check_too_many_bad_tokens();
706
10.6k
}
707
708
void
709
Parser::warn(qpdf_offset_t offset, std::string const& msg) const
710
334k
{
711
334k
    if (stream_id_) {
712
26.6k
        std::string descr = "object "s + std::to_string(obj_id_) + " 0";
713
26.6k
        std::string name = context_->getFilename() + " object stream " + std::to_string(stream_id_);
714
26.6k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
715
307k
    } else {
716
307k
        warn(QPDFExc(qpdf_e_damaged_pdf, input_.getName(), object_description_, offset, msg));
717
307k
    }
718
334k
}
719
720
void
721
Parser::warn(std::string const& msg) const
722
254k
{
723
254k
    warn(input_.getLastOffset(), msg);
724
254k
}