Coverage Report

Created: 2026-06-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
static uint32_t const& max_nesting{global::Limits::parser_max_nesting()};
19
20
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
21
// special access to allow the parser to create unresolved objects and dangling references.
22
class QPDF::Doc::ParseGuard
23
{
24
  public:
25
    ParseGuard(QPDF* qpdf) :
26
19.0M
        objects(qpdf ? &qpdf->m->objects : nullptr)
27
19.0M
    {
28
19.0M
        if (objects) {
29
19.0M
            objects->inParse(true);
30
19.0M
        }
31
19.0M
    }
32
33
    static std::shared_ptr<QPDFObject>
34
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
35
4.66M
    {
36
4.66M
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
37
4.66M
    }
38
39
    ~ParseGuard()
40
19.0M
    {
41
19.0M
        if (objects) {
42
19.0M
            objects->inParse(false);
43
19.0M
        }
44
19.0M
    }
45
    QPDF::Doc::Objects* objects;
46
};
47
48
using ParseGuard = QPDF::Doc::ParseGuard;
49
using Parser = qpdf::impl::Parser;
50
51
QPDFObjectHandle
52
Parser::parse(InputSource& input, std::string const& object_description, QPDF* context)
53
46.9k
{
54
46.9k
    qpdf::Tokenizer tokenizer;
55
46.9k
    if (auto result = Parser(
56
46.9k
                          input,
57
46.9k
                          make_description(input.getName(), object_description),
58
46.9k
                          object_description,
59
46.9k
                          tokenizer,
60
46.9k
                          nullptr,
61
46.9k
                          context,
62
46.9k
                          false)
63
46.9k
                          .parse()) {
64
46.3k
        return result;
65
46.3k
    }
66
646
    return {QPDFObject::create<QPDF_Null>()};
67
46.9k
}
68
69
std::pair<QPDFObjectHandle, bool>
70
Parser::parse_content(
71
    InputSource& input,
72
    std::shared_ptr<QPDFObject::Description> sp_description,
73
    qpdf::Tokenizer& tokenizer,
74
    QPDF* context)
75
16.9M
{
76
16.9M
    static const std::string content("content"); // GCC12 - make constexpr
77
16.9M
    auto p = Parser(
78
16.9M
        input,
79
16.9M
        std::move(sp_description),
80
16.9M
        content,
81
16.9M
        tokenizer,
82
16.9M
        nullptr,
83
16.9M
        context,
84
16.9M
        true,
85
16.9M
        0,
86
16.9M
        0,
87
16.9M
        context && context->doc().reconstructed_xref());
88
16.9M
    if (auto result = p.parse(true)) {
89
16.9M
        return {result, false};
90
16.9M
    }
91
16.1k
    return {{}, p.empty_};
92
16.9M
}
93
94
QPDFObjectHandle
95
Parser::parse(
96
    InputSource& input,
97
    std::string const& object_description,
98
    QPDFTokenizer& tokenizer,
99
    bool& empty,
100
    QPDFObjectHandle::StringDecrypter* decrypter,
101
    QPDF* context)
102
0
{
103
    // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
104
    // only user of the 'empty' member. When removing this overload also remove 'empty'.
105
0
    auto p = Parser(
106
0
        input,
107
0
        make_description(input.getName(), object_description),
108
0
        object_description,
109
0
        *tokenizer.m,
110
0
        decrypter,
111
0
        context,
112
0
        false);
113
0
    auto result = p.parse();
114
0
    empty = p.empty_;
115
0
    if (result) {
116
0
        return result;
117
0
    }
118
0
    return {QPDFObject::create<QPDF_Null>()};
119
0
}
120
121
QPDFObjectHandle
122
Parser::parse(
123
    InputSource& input,
124
    std::string const& object_description,
125
    qpdf::Tokenizer& tokenizer,
126
    QPDFObjectHandle::StringDecrypter* decrypter,
127
    QPDF& context,
128
    bool sanity_checks)
129
1.61M
{
130
1.61M
    return Parser(
131
1.61M
               input,
132
1.61M
               make_description(input.getName(), object_description),
133
1.61M
               object_description,
134
1.61M
               tokenizer,
135
1.61M
               decrypter,
136
1.61M
               &context,
137
1.61M
               true,
138
1.61M
               0,
139
1.61M
               0,
140
1.61M
               sanity_checks)
141
1.61M
        .parse();
142
1.61M
}
143
144
QPDFObjectHandle
145
Parser::parse(
146
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
147
409k
{
148
409k
    return Parser(
149
409k
               input,
150
409k
               std::make_shared<QPDFObject::Description>(
151
409k
                   QPDFObject::ObjStreamDescr(stream_id, obj_id)),
152
409k
               "",
153
409k
               tokenizer,
154
409k
               nullptr,
155
409k
               &context,
156
409k
               true,
157
409k
               stream_id,
158
409k
               obj_id)
159
409k
        .parse();
160
409k
}
161
162
QPDFObjectHandle
163
Parser::parse(bool content_stream)
164
19.0M
{
165
19.0M
    try {
166
19.0M
        return parse_first(content_stream);
167
19.0M
    } catch (Error&) {
168
107k
        return {};
169
107k
    } catch (QPDFExc& e) {
170
51.0k
        throw e;
171
51.0k
    } catch (std::logic_error& e) {
172
23
        throw e;
173
17.3k
    } catch (std::exception& e) {
174
17.3k
        warn("treating object as null because of error during parsing: "s + e.what());
175
17.3k
        return {};
176
17.3k
    }
177
19.0M
}
178
179
QPDFObjectHandle
180
Parser::parse_first(bool content_stream)
181
19.0M
{
182
    // This method must take care not to resolve any objects. Don't check the type of any object
183
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
184
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
185
    // logic error to be thrown from QPDF::inParse().
186
187
19.0M
    QPDF::Doc::ParseGuard pg(context_);
188
19.0M
    start_ = input_.tell();
189
19.0M
    if (!tokenizer_.nextToken(input_, object_description_)) {
190
26.9k
        warn(tokenizer_.getErrorMessage());
191
26.9k
    }
192
193
19.0M
    switch (tokenizer_.getType()) {
194
14.2k
    case QPDFTokenizer::tt_eof:
195
14.2k
        if (content_stream) {
196
            // In content stream mode, leave object uninitialized to indicate EOF
197
7.64k
            empty_ = true;
198
7.64k
            return {};
199
7.64k
        }
200
6.64k
        warn("unexpected EOF");
201
6.64k
        return {};
202
203
24.7k
    case QPDFTokenizer::tt_bad:
204
24.7k
        return {};
205
206
2.31k
    case QPDFTokenizer::tt_brace_open:
207
4.39k
    case QPDFTokenizer::tt_brace_close:
208
4.39k
        warn("treating unexpected brace token as null");
209
4.39k
        return {};
210
211
6.94k
    case QPDFTokenizer::tt_array_close:
212
6.94k
        warn("treating unexpected array close token as null");
213
6.94k
        return {};
214
215
6.12k
    case QPDFTokenizer::tt_dict_close:
216
6.12k
        warn("unexpected dictionary close token");
217
6.12k
        return {};
218
219
141k
    case QPDFTokenizer::tt_array_open:
220
1.77M
    case QPDFTokenizer::tt_dict_open:
221
1.77M
        stack_.clear();
222
1.77M
        stack_.emplace_back(
223
1.77M
            input_,
224
1.77M
            (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
225
1.77M
        frame_ = &stack_.back();
226
1.77M
        return parse_remainder(content_stream);
227
228
6.90k
    case QPDFTokenizer::tt_bool:
229
6.90k
        return with_description<QPDF_Bool>(tokenizer_.getValue() == "true");
230
231
2.71k
    case QPDFTokenizer::tt_null:
232
2.71k
        return {QPDFObject::create<QPDF_Null>()};
233
234
400k
    case QPDFTokenizer::tt_integer:
235
400k
        return with_description<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
236
237
125k
    case QPDFTokenizer::tt_real:
238
125k
        return with_description<QPDF_Real>(tokenizer_.getValue());
239
240
133k
    case QPDFTokenizer::tt_name:
241
133k
        return with_description<QPDF_Name>(tokenizer_.getValue());
242
243
16.5M
    case QPDFTokenizer::tt_word:
244
16.5M
        {
245
16.5M
            auto const& value = tokenizer_.getValue();
246
16.5M
            if (content_stream) {
247
16.4M
                return with_description<QPDF_Operator>(value);
248
16.4M
            } else if (value == "endobj") {
249
                // We just saw endobj without having read anything. Nothing in the PDF spec appears
250
                // to allow empty objects, but they have been encountered in actual PDF files and
251
                // Adobe Reader appears to ignore them. Treat this as a null and do not move the
252
                // input source's offset.
253
2.85k
                empty_ = true;
254
2.85k
                input_.seek(input_.getLastOffset(), SEEK_SET);
255
2.85k
                if (!content_stream) {
256
2.85k
                    warn("empty object treated as null");
257
2.85k
                }
258
2.85k
                return {};
259
93.8k
            } else {
260
93.8k
                warn("unknown token while reading object; treating as string");
261
93.8k
                return with_description<QPDF_String>(value);
262
93.8k
            }
263
16.5M
        }
264
265
11.4k
    case QPDFTokenizer::tt_string:
266
11.4k
        if (decrypter_) {
267
1.60k
            std::string s{tokenizer_.getValue()};
268
1.60k
            decrypter_->decryptString(s);
269
1.60k
            return with_description<QPDF_String>(s);
270
9.81k
        } else {
271
9.81k
            return with_description<QPDF_String>(tokenizer_.getValue());
272
9.81k
        }
273
274
0
    default:
275
0
        warn("treating unknown token type as null while reading object");
276
0
        return {};
277
19.0M
    }
278
19.0M
}
279
280
QPDFObjectHandle
281
Parser::parse_remainder(bool content_stream)
282
1.77M
{
283
    // This method must take care not to resolve any objects. Don't check the type of any object
284
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
285
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
286
    // logic error to be thrown from QPDF::inParse().
287
288
1.77M
    bad_count_ = 0;
289
1.77M
    bool b_contents = false;
290
291
119M
    while (true) {
292
119M
        if (!tokenizer_.nextToken(input_, object_description_)) {
293
450k
            warn(tokenizer_.getErrorMessage());
294
450k
        }
295
119M
        ++good_count_; // optimistically
296
297
119M
        if (int_count_ != 0) {
298
            // Special handling of indirect references. Treat integer tokens as part of an indirect
299
            // reference until proven otherwise.
300
38.3M
            if (tokenizer_.getType() == QPDFTokenizer::tt_integer) {
301
30.7M
                if (++int_count_ > 2) {
302
                    // Process the oldest buffered integer.
303
25.2M
                    add_int(int_count_);
304
25.2M
                }
305
30.7M
                last_offset_buffer_[int_count_ % 2] = input_.getLastOffset();
306
30.7M
                int_buffer_[int_count_ % 2] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
307
30.7M
                continue;
308
309
30.7M
            } else if (
310
7.66M
                int_count_ >= 2 && tokenizer_.getType() == QPDFTokenizer::tt_word &&
311
4.94M
                tokenizer_.getValue() == "R") {
312
4.69M
                if (!context_) {
313
0
                    throw std::logic_error(
314
0
                        "Parser::parse called without context on an object with indirect "
315
0
                        "references");
316
0
                }
317
4.69M
                auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]);
318
4.69M
                auto gen = QIntC::to_int(int_buffer_[(int_count_) % 2]);
319
4.69M
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
320
4.66M
                    add(ParseGuard::getObject(context_, id, gen, parse_pdf_));
321
4.66M
                } else {
322
32.7k
                    add_bad_null(
323
32.7k
                        "treating bad indirect reference (" + std::to_string(id) + " " +
324
32.7k
                        std::to_string(gen) + " R) as null");
325
32.7k
                }
326
4.69M
                int_count_ = 0;
327
4.69M
                continue;
328
329
4.69M
            } else if (int_count_ > 0) {
330
                // Process the buffered integers before processing the current token.
331
2.96M
                if (int_count_ > 1) {
332
793k
                    add_int(int_count_ - 1);
333
793k
                }
334
2.96M
                add_int(int_count_);
335
2.96M
                int_count_ = 0;
336
2.96M
            }
337
38.3M
        }
338
339
83.9M
        switch (tokenizer_.getType()) {
340
78.8k
        case QPDFTokenizer::tt_eof:
341
78.8k
            warn("parse error while reading object");
342
78.8k
            if (content_stream) {
343
                // In content stream mode, leave object uninitialized to indicate EOF
344
339
                return {};
345
339
            }
346
78.4k
            warn("unexpected EOF");
347
78.4k
            return {};
348
349
370k
        case QPDFTokenizer::tt_bad:
350
370k
            check_too_many_bad_tokens();
351
370k
            add_null();
352
370k
            continue;
353
354
29.1k
        case QPDFTokenizer::tt_brace_open:
355
52.0k
        case QPDFTokenizer::tt_brace_close:
356
52.0k
            add_bad_null("treating unexpected brace token as null");
357
52.0k
            continue;
358
359
1.32M
        case QPDFTokenizer::tt_array_close:
360
1.32M
            if (frame_->state == st_array) {
361
1.29M
                auto object = frame_->null_count > 100
362
1.29M
                    ? QPDFObject::create<QPDF_Array>(std::move(frame_->olist), true)
363
1.29M
                    : QPDFObject::create<QPDF_Array>(std::move(frame_->olist));
364
1.29M
                set_description(object, frame_->offset - 1);
365
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
366
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
367
                // array start delimiter. getLastOffset points to the array end token and therefore
368
                // can't be used here.
369
1.29M
                if (stack_.size() <= 1) {
370
47.8k
                    return object;
371
47.8k
                }
372
1.24M
                stack_.pop_back();
373
1.24M
                frame_ = &stack_.back();
374
1.24M
                add(std::move(object));
375
1.24M
            } else {
376
27.4k
                if (sanity_checks_) {
377
                    // During sanity checks, assume nesting of containers is corrupt and object is
378
                    // unusable.
379
19.0k
                    warn("unexpected array close token; giving up on reading object");
380
19.0k
                    return {};
381
19.0k
                }
382
8.41k
                add_bad_null("treating unexpected array close token as null");
383
8.41k
            }
384
1.25M
            continue;
385
386
2.21M
        case QPDFTokenizer::tt_dict_close:
387
2.21M
            if (frame_->state <= st_dictionary_value) {
388
                // Attempt to recover more or less gracefully from invalid dictionaries.
389
2.18M
                auto& dict = frame_->dict;
390
391
2.18M
                if (frame_->state == st_dictionary_value) {
392
109k
                    warn(
393
109k
                        frame_->offset,
394
109k
                        "dictionary ended prematurely; using null as value for last key");
395
109k
                    dict[frame_->key] = QPDFObject::create<QPDF_Null>();
396
109k
                }
397
2.18M
                if (!frame_->olist.empty()) {
398
523k
                    if (sanity_checks_) {
399
489k
                        warn(
400
489k
                            frame_->offset,
401
489k
                            "expected dictionary keys but found non-name objects; ignoring");
402
489k
                    } else {
403
33.8k
                        fix_missing_keys();
404
33.8k
                    }
405
523k
                }
406
407
2.18M
                if (!frame_->contents_string.empty() && dict.contains("/Type") &&
408
1.25k
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
409
398
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
410
340
                    dict["/Contents"] = QPDFObjectHandle::newString(frame_->contents_string);
411
340
                    dict["/Contents"].setParsedOffset(frame_->contents_offset);
412
340
                }
413
2.18M
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
414
2.18M
                set_description(object, frame_->offset - 2);
415
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
416
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
417
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
418
                // and therefore can't be used here.
419
2.18M
                if (stack_.size() <= 1) {
420
1.42M
                    return object;
421
1.42M
                }
422
763k
                stack_.pop_back();
423
763k
                frame_ = &stack_.back();
424
763k
                add(std::move(object));
425
763k
            } else {
426
22.3k
                if (sanity_checks_) {
427
                    // During sanity checks, assume nesting of containers is corrupt and object is
428
                    // unusable.
429
15.6k
                    warn("unexpected dictionary close token; giving up on reading object");
430
15.6k
                    return {};
431
15.6k
                }
432
6.67k
                add_bad_null("unexpected dictionary close token");
433
6.67k
            }
434
770k
            continue;
435
436
2.21M
        case QPDFTokenizer::tt_array_open:
437
3.36M
        case QPDFTokenizer::tt_dict_open:
438
3.36M
            if (stack_.size() > max_nesting) {
439
1.00k
                limits_error(
440
1.00k
                    "parser-max-nesting", "ignoring excessively deeply nested data structure");
441
1.00k
            }
442
3.36M
            b_contents = false;
443
3.36M
            stack_.emplace_back(
444
3.36M
                input_,
445
3.36M
                (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array
446
3.36M
                                                                       : st_dictionary_key);
447
3.36M
            frame_ = &stack_.back();
448
3.36M
            continue;
449
450
119k
        case QPDFTokenizer::tt_bool:
451
119k
            add_scalar<QPDF_Bool>(tokenizer_.getValue() == "true");
452
119k
            continue;
453
454
936k
        case QPDFTokenizer::tt_null:
455
936k
            add_null();
456
936k
            continue;
457
458
8.13M
        case QPDFTokenizer::tt_integer:
459
8.13M
            if (!content_stream) {
460
                // Buffer token in case it is part of an indirect reference.
461
7.66M
                last_offset_buffer_[1] = input_.getLastOffset();
462
7.66M
                int_buffer_[1] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
463
7.66M
                int_count_ = 1;
464
7.66M
            } else {
465
471k
                add_scalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
466
471k
            }
467
8.13M
            continue;
468
469
1.06M
        case QPDFTokenizer::tt_real:
470
1.06M
            add_scalar<QPDF_Real>(tokenizer_.getValue());
471
1.06M
            continue;
472
473
61.3M
        case QPDFTokenizer::tt_name:
474
61.3M
            if (frame_->state == st_dictionary_key) {
475
9.07M
                frame_->key = tokenizer_.getValue();
476
9.07M
                frame_->state = st_dictionary_value;
477
9.07M
                b_contents = decrypter_ && frame_->key == "/Contents";
478
9.07M
                continue;
479
52.2M
            } else {
480
52.2M
                add_scalar<QPDF_Name>(tokenizer_.getValue());
481
52.2M
            }
482
52.2M
            continue;
483
484
52.2M
        case QPDFTokenizer::tt_word:
485
3.20M
            if (content_stream) {
486
867k
                add_scalar<QPDF_Operator>(tokenizer_.getValue());
487
867k
                continue;
488
867k
            }
489
490
2.33M
            if (sanity_checks_) {
491
2.24M
                if (tokenizer_.getValue() == "endobj" || tokenizer_.getValue() == "endstream") {
492
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
493
                    // we are parsing past the end of the object.
494
29.5k
                    warn(
495
29.5k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
496
29.5k
                        "reading object");
497
29.5k
                    return {};
498
29.5k
                }
499
500
2.21M
                add_bad_null("unknown token while reading object; treating as null");
501
2.21M
                continue;
502
2.24M
            }
503
504
92.2k
            warn("unknown token while reading object; treating as string");
505
92.2k
            check_too_many_bad_tokens();
506
92.2k
            add_scalar<QPDF_String>(tokenizer_.getValue());
507
508
92.2k
            continue;
509
510
1.76M
        case QPDFTokenizer::tt_string:
511
1.76M
            {
512
1.76M
                auto const& val = tokenizer_.getValue();
513
1.76M
                if (decrypter_) {
514
277k
                    if (b_contents) {
515
5.86k
                        frame_->contents_string = val;
516
5.86k
                        frame_->contents_offset = input_.getLastOffset();
517
5.86k
                        b_contents = false;
518
5.86k
                    }
519
277k
                    std::string s{val};
520
277k
                    decrypter_->decryptString(s);
521
277k
                    add_scalar<QPDF_String>(s);
522
1.48M
                } else {
523
1.48M
                    add_scalar<QPDF_String>(val);
524
1.48M
                }
525
1.76M
            }
526
1.76M
            continue;
527
528
0
        default:
529
0
            add_bad_null("treating unknown token type as null while reading object");
530
83.9M
        }
531
83.9M
    }
532
1.77M
}
533
534
void
535
Parser::add(std::shared_ptr<QPDFObject>&& obj)
536
92.2M
{
537
92.2M
    if (frame_->state != st_dictionary_value) {
538
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
539
        // processing once the tt_dict_close token has been found.
540
83.7M
        frame_->olist.emplace_back(std::move(obj));
541
83.7M
    } else {
542
8.48M
        if (auto res = frame_->dict.insert_or_assign(frame_->key, std::move(obj)); !res.second) {
543
89.6k
            warn_duplicate_key();
544
89.6k
        }
545
8.48M
        frame_->state = st_dictionary_key;
546
8.48M
    }
547
92.2M
}
548
549
void
550
Parser::add_null()
551
3.49M
{
552
3.49M
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
553
554
3.49M
    if (frame_->state != st_dictionary_value) {
555
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
556
        // processing once the tt_dict_close token has been found.
557
3.13M
        frame_->olist.emplace_back(null_obj);
558
3.13M
    } else {
559
360k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, null_obj); !res.second) {
560
16.8k
            warn_duplicate_key();
561
16.8k
        }
562
360k
        frame_->state = st_dictionary_key;
563
360k
    }
564
3.49M
    ++frame_->null_count;
565
3.49M
}
566
567
void
568
Parser::add_bad_null(std::string const& msg)
569
2.30M
{
570
2.30M
    warn(msg);
571
2.30M
    check_too_many_bad_tokens();
572
2.30M
    add_null();
573
2.30M
}
574
575
void
576
Parser::add_int(int count)
577
28.9M
{
578
28.9M
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]);
579
28.9M
    obj->setDescription(context_, description_, last_offset_buffer_[count % 2]);
580
28.9M
    add(std::move(obj));
581
28.9M
}
582
583
template <typename T, typename... Args>
584
void
585
Parser::add_scalar(Args&&... args)
586
56.6M
{
587
56.6M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
56.6M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
4.53k
        max_bad_count_ = 1;
592
4.53k
        check_too_many_bad_tokens(); // always throws Error()
593
4.53k
    }
594
56.6M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
56.6M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
56.6M
    add(std::move(obj));
597
56.6M
}
void qpdf::impl::Parser::add_scalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
586
119k
{
587
119k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
119k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
494
        max_bad_count_ = 1;
592
494
        check_too_many_bad_tokens(); // always throws Error()
593
494
    }
594
119k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
119k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
119k
    add(std::move(obj));
597
119k
}
void qpdf::impl::Parser::add_scalar<QPDF_Integer, long long>(long long&&)
Line
Count
Source
586
471k
{
587
471k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
471k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
41
        max_bad_count_ = 1;
592
41
        check_too_many_bad_tokens(); // always throws Error()
593
41
    }
594
471k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
471k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
471k
    add(std::move(obj));
597
471k
}
void qpdf::impl::Parser::add_scalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
1.06M
{
587
1.06M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
1.06M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
921
        max_bad_count_ = 1;
592
921
        check_too_many_bad_tokens(); // always throws Error()
593
921
    }
594
1.06M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
1.06M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
1.06M
    add(std::move(obj));
597
1.06M
}
void qpdf::impl::Parser::add_scalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
52.2M
{
587
52.2M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
52.2M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
2.11k
        max_bad_count_ = 1;
592
2.11k
        check_too_many_bad_tokens(); // always throws Error()
593
2.11k
    }
594
52.2M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
52.2M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
52.2M
    add(std::move(obj));
597
52.2M
}
void qpdf::impl::Parser::add_scalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
867k
{
587
867k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
867k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
134
        max_bad_count_ = 1;
592
134
        check_too_many_bad_tokens(); // always throws Error()
593
134
    }
594
867k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
867k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
867k
    add(std::move(obj));
597
867k
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
586
1.57M
{
587
1.57M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
1.57M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
721
        max_bad_count_ = 1;
592
721
        check_too_many_bad_tokens(); // always throws Error()
593
721
    }
594
1.57M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
1.57M
    obj->setDescription(context_, description_, input_.getLastOffset());
596
1.57M
    add(std::move(obj));
597
1.57M
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
586
277k
{
587
277k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
588
277k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
589
        // Stop adding scalars. We are going to abort when the close token or a bad token is
590
        // encountered.
591
111
        max_bad_count_ = 1;
592
111
        check_too_many_bad_tokens(); // always throws Error()
593
111
    }
594
277k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
595
277k
    obj->setDescription(context_, description_, input_.getLastOffset());
596
277k
    add(std::move(obj));
597
277k
}
598
599
template <typename T, typename... Args>
600
QPDFObjectHandle
601
Parser::with_description(Args&&... args)
602
17.2M
{
603
17.2M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
17.2M
    obj->setDescription(context_, description_, start_);
605
17.2M
    return {obj};
606
17.2M
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Bool, bool>(bool&&)
Line
Count
Source
602
6.90k
{
603
6.90k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
6.90k
    obj->setDescription(context_, description_, start_);
605
6.90k
    return {obj};
606
6.90k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Integer, long long>(long long&&)
Line
Count
Source
602
393k
{
603
393k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
393k
    obj->setDescription(context_, description_, start_);
605
393k
    return {obj};
606
393k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
125k
{
603
125k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
125k
    obj->setDescription(context_, description_, start_);
605
125k
    return {obj};
606
125k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
133k
{
603
133k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
133k
    obj->setDescription(context_, description_, start_);
605
133k
    return {obj};
606
133k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
16.4M
{
603
16.4M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
16.4M
    obj->setDescription(context_, description_, start_);
605
16.4M
    return {obj};
606
16.4M
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
602
99.6k
{
603
99.6k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
99.6k
    obj->setDescription(context_, description_, start_);
605
99.6k
    return {obj};
606
99.6k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
602
1.59k
{
603
1.59k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
604
1.59k
    obj->setDescription(context_, description_, start_);
605
1.59k
    return {obj};
606
1.59k
}
607
608
void
609
Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset)
610
3.47M
{
611
3.47M
    if (obj) {
612
3.47M
        obj->setDescription(context_, description_, parsed_offset);
613
3.47M
    }
614
3.47M
}
615
616
void
617
Parser::fix_missing_keys()
618
33.8k
{
619
33.8k
    std::set<std::string> names;
620
140k
    for (auto& obj: frame_->olist) {
621
140k
        if (obj.raw_type_code() == ::ot_name) {
622
6.35k
            names.insert(obj.getName());
623
6.35k
        }
624
140k
    }
625
33.8k
    int next_fake_key = 1;
626
134k
    for (auto const& item: frame_->olist) {
627
135k
        while (true) {
628
135k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
629
135k
            const bool found_fake = !frame_->dict.contains(key) && !names.contains(key);
630
135k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
631
135k
            if (found_fake) {
632
134k
                warn(
633
134k
                    frame_->offset,
634
134k
                    "expected dictionary key but found non-name object; inserting key " + key);
635
134k
                frame_->dict[key] = item;
636
134k
                break;
637
134k
            }
638
135k
        }
639
134k
    }
640
33.8k
}
641
642
void
643
Parser::check_too_many_bad_tokens()
644
2.85M
{
645
2.85M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
646
2.85M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
647
4.63k
        if (bad_count_) {
648
3.68k
            limits_error(
649
3.68k
                "parser-max-container-size-damaged",
650
3.68k
                "encountered errors while parsing an array or dictionary with more than " +
651
3.68k
                    std::to_string(limit) + " elements; giving up on reading object");
652
3.68k
        }
653
4.63k
        limits_error(
654
4.63k
            "parser-max-container-size",
655
4.63k
            "encountered an array or dictionary with more than " + std::to_string(limit) +
656
4.63k
                " elements during xref recovery; giving up on reading object");
657
4.63k
    }
658
2.85M
    if (max_bad_count_ && --max_bad_count_ == 0) {
659
17.7k
        limits_error(
660
17.7k
            "parser-max-errors", "too many errors during parsing; treating object as null");
661
17.7k
    }
662
2.85M
    if (good_count_ > 4) {
663
1.00M
        good_count_ = 0;
664
1.00M
        bad_count_ = 1;
665
1.00M
        return;
666
1.00M
    }
667
1.85M
    if (++bad_count_ > 5 ||
668
1.76M
        (frame_->state != st_array && std::cmp_less(max_bad_count_, frame_->olist.size()))) {
669
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
670
        // exceeds the remaining number of allowable total errors.
671
84.9k
        warn("too many errors; giving up on reading object");
672
84.9k
        throw Error();
673
84.9k
    }
674
1.76M
    good_count_ = 0;
675
1.76M
}
676
677
void
678
Parser::limits_error(std::string const& limit, std::string const& msg)
679
23.4k
{
680
23.4k
    Limits::error();
681
23.4k
    warn("limits error("s + limit + "): " + msg);
682
23.4k
    throw Error();
683
23.4k
}
684
685
void
686
Parser::warn(QPDFExc const& e) const
687
4.18M
{
688
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
689
    // object. If parsing for some other reason, such as an explicit creation of an object from a
690
    // string, then just throw the exception.
691
4.18M
    if (context_) {
692
4.18M
        context_->warn(e);
693
4.18M
    } else {
694
646
        throw e;
695
646
    }
696
4.18M
}
697
698
void
699
Parser::warn_duplicate_key()
700
106k
{
701
106k
    warn(
702
106k
        frame_->offset,
703
106k
        "dictionary has duplicated key " + frame_->key +
704
106k
            "; last occurrence overrides earlier ones");
705
106k
    check_too_many_bad_tokens();
706
106k
}
707
708
void
709
Parser::warn(qpdf_offset_t offset, std::string const& msg) const
710
4.18M
{
711
4.18M
    if (stream_id_) {
712
267k
        std::string descr = "object "s + std::to_string(obj_id_) + " 0";
713
267k
        std::string name = context_->getFilename() + " object stream " + std::to_string(stream_id_);
714
267k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
715
3.91M
    } else {
716
3.91M
        warn(QPDFExc(qpdf_e_damaged_pdf, input_.getName(), object_description_, offset, msg));
717
3.91M
    }
718
4.18M
}
719
720
void
721
Parser::warn(std::string const& msg) const
722
3.34M
{
723
3.34M
    warn(input_.getLastOffset(), msg);
724
3.34M
}