Coverage Report

Created: 2026-03-07 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
static uint32_t const& max_nesting{global::Limits::parser_max_nesting()};
19
20
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
21
// special access to allow the parser to create unresolved objects and dangling references.
22
class QPDF::Doc::ParseGuard
23
{
24
  public:
25
    ParseGuard(QPDF* qpdf) :
26
3.73M
        objects(qpdf ? &qpdf->m->objects : nullptr)
27
3.73M
    {
28
3.73M
        if (objects) {
29
3.69M
            objects->inParse(true);
30
3.69M
        }
31
3.73M
    }
32
33
    static std::shared_ptr<QPDFObject>
34
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
35
3.94M
    {
36
3.94M
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
37
3.94M
    }
38
39
    ~ParseGuard()
40
3.73M
    {
41
3.73M
        if (objects) {
42
3.69M
            objects->inParse(false);
43
3.69M
        }
44
3.73M
    }
45
    QPDF::Doc::Objects* objects;
46
};
47
48
using ParseGuard = QPDF::Doc::ParseGuard;
49
using Parser = qpdf::impl::Parser;
50
51
QPDFObjectHandle
52
Parser::parse(InputSource& input, std::string const& object_description, QPDF* context)
53
47.3k
{
54
47.3k
    qpdf::Tokenizer tokenizer;
55
47.3k
    if (auto result = Parser(
56
47.3k
                          input,
57
47.3k
                          make_description(input.getName(), object_description),
58
47.3k
                          object_description,
59
47.3k
                          tokenizer,
60
47.3k
                          nullptr,
61
47.3k
                          context,
62
47.3k
                          false)
63
47.3k
                          .parse()) {
64
46.7k
        return result;
65
46.7k
    }
66
620
    return {QPDFObject::create<QPDF_Null>()};
67
47.3k
}
68
69
QPDFObjectHandle
70
Parser::parse_content(
71
    InputSource& input,
72
    std::shared_ptr<QPDFObject::Description> sp_description,
73
    qpdf::Tokenizer& tokenizer,
74
    QPDF* context)
75
1.81M
{
76
1.81M
    static const std::string content("content"); // GCC12 - make constexpr
77
1.81M
    auto p = Parser(
78
1.81M
        input,
79
1.81M
        std::move(sp_description),
80
1.81M
        content,
81
1.81M
        tokenizer,
82
1.81M
        nullptr,
83
1.81M
        context,
84
1.81M
        true,
85
1.81M
        0,
86
1.81M
        0,
87
1.81M
        context && context->doc().reconstructed_xref());
88
1.81M
    auto result = p.parse(true);
89
1.81M
    if (result || p.empty_) {
90
        // In content stream mode, leave object uninitialized to indicate EOF
91
1.77M
        return result;
92
1.77M
    }
93
34.9k
    return {QPDFObject::create<QPDF_Null>()};
94
1.81M
}
95
96
QPDFObjectHandle
97
Parser::parse(
98
    InputSource& input,
99
    std::string const& object_description,
100
    QPDFTokenizer& tokenizer,
101
    bool& empty,
102
    QPDFObjectHandle::StringDecrypter* decrypter,
103
    QPDF* context)
104
0
{
105
    // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
106
    // only user of the 'empty' member. When removing this overload also remove 'empty'.
107
0
    auto p = Parser(
108
0
        input,
109
0
        make_description(input.getName(), object_description),
110
0
        object_description,
111
0
        *tokenizer.m,
112
0
        decrypter,
113
0
        context,
114
0
        false);
115
0
    auto result = p.parse();
116
0
    empty = p.empty_;
117
0
    if (result) {
118
0
        return result;
119
0
    }
120
0
    return {QPDFObject::create<QPDF_Null>()};
121
0
}
122
123
QPDFObjectHandle
124
Parser::parse(
125
    InputSource& input,
126
    std::string const& object_description,
127
    qpdf::Tokenizer& tokenizer,
128
    QPDFObjectHandle::StringDecrypter* decrypter,
129
    QPDF& context,
130
    bool sanity_checks)
131
1.51M
{
132
1.51M
    return Parser(
133
1.51M
               input,
134
1.51M
               make_description(input.getName(), object_description),
135
1.51M
               object_description,
136
1.51M
               tokenizer,
137
1.51M
               decrypter,
138
1.51M
               &context,
139
1.51M
               true,
140
1.51M
               0,
141
1.51M
               0,
142
1.51M
               sanity_checks)
143
1.51M
        .parse();
144
1.51M
}
145
146
QPDFObjectHandle
147
Parser::parse(
148
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
149
360k
{
150
360k
    return Parser(
151
360k
               input,
152
360k
               std::make_shared<QPDFObject::Description>(
153
360k
                   QPDFObject::ObjStreamDescr(stream_id, obj_id)),
154
360k
               "",
155
360k
               tokenizer,
156
360k
               nullptr,
157
360k
               &context,
158
360k
               true,
159
360k
               stream_id,
160
360k
               obj_id)
161
360k
        .parse();
162
360k
}
163
164
QPDFObjectHandle
165
Parser::parse(bool content_stream)
166
3.73M
{
167
3.73M
    try {
168
3.73M
        return parse_first(content_stream);
169
3.73M
    } catch (Error&) {
170
76.4k
        return {};
171
76.4k
    } catch (QPDFExc& e) {
172
45.8k
        throw e;
173
45.8k
    } catch (std::logic_error& e) {
174
12
        throw e;
175
14.8k
    } catch (std::exception& e) {
176
14.8k
        warn("treating object as null because of error during parsing: "s + e.what());
177
14.8k
        return {};
178
14.8k
    }
179
3.73M
}
180
181
QPDFObjectHandle
182
Parser::parse_first(bool content_stream)
183
3.73M
{
184
    // This method must take care not to resolve any objects. Don't check the type of any object
185
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
186
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
187
    // logic error to be thrown from QPDF::inParse().
188
189
3.73M
    QPDF::Doc::ParseGuard pg(context_);
190
3.73M
    start_ = input_.tell();
191
3.73M
    if (!tokenizer_.nextToken(input_, object_description_)) {
192
40.8k
        warn(tokenizer_.getErrorMessage());
193
40.8k
    }
194
195
3.73M
    switch (tokenizer_.getType()) {
196
15.3k
    case QPDFTokenizer::tt_eof:
197
15.3k
        if (content_stream) {
198
            // In content stream mode, leave object uninitialized to indicate EOF
199
7.94k
            empty_ = true;
200
7.94k
            return {};
201
7.94k
        }
202
7.36k
        warn("unexpected EOF");
203
7.36k
        return {};
204
205
39.0k
    case QPDFTokenizer::tt_bad:
206
39.0k
        return {};
207
208
3.37k
    case QPDFTokenizer::tt_brace_open:
209
6.39k
    case QPDFTokenizer::tt_brace_close:
210
6.39k
        warn("treating unexpected brace token as null");
211
6.39k
        return {};
212
213
10.8k
    case QPDFTokenizer::tt_array_close:
214
10.8k
        warn("treating unexpected array close token as null");
215
10.8k
        return {};
216
217
6.85k
    case QPDFTokenizer::tt_dict_close:
218
6.85k
        warn("unexpected dictionary close token");
219
6.85k
        return {};
220
221
117k
    case QPDFTokenizer::tt_array_open:
222
1.63M
    case QPDFTokenizer::tt_dict_open:
223
1.63M
        stack_.clear();
224
1.63M
        stack_.emplace_back(
225
1.63M
            input_,
226
1.63M
            (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
227
1.63M
        frame_ = &stack_.back();
228
1.63M
        return parse_remainder(content_stream);
229
230
6.09k
    case QPDFTokenizer::tt_bool:
231
6.09k
        return with_description<QPDF_Bool>(tokenizer_.getValue() == "true");
232
233
2.67k
    case QPDFTokenizer::tt_null:
234
2.67k
        return {QPDFObject::create<QPDF_Null>()};
235
236
382k
    case QPDFTokenizer::tt_integer:
237
382k
        return with_description<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
238
239
122k
    case QPDFTokenizer::tt_real:
240
122k
        return with_description<QPDF_Real>(tokenizer_.getValue());
241
242
289k
    case QPDFTokenizer::tt_name:
243
289k
        return with_description<QPDF_Name>(tokenizer_.getValue());
244
245
1.20M
    case QPDFTokenizer::tt_word:
246
1.20M
        {
247
1.20M
            auto const& value = tokenizer_.getValue();
248
1.20M
            if (content_stream) {
249
1.11M
                return with_description<QPDF_Operator>(value);
250
1.11M
            } else if (value == "endobj") {
251
                // We just saw endobj without having read anything. Nothing in the PDF spec appears
252
                // to allow empty objects, but they have been encountered in actual PDF files and
253
                // Adobe Reader appears to ignore them. Treat this as a null and do not move the
254
                // input source's offset.
255
3.04k
                empty_ = true;
256
3.04k
                input_.seek(input_.getLastOffset(), SEEK_SET);
257
3.04k
                if (!content_stream) {
258
3.04k
                    warn("empty object treated as null");
259
3.04k
                }
260
3.04k
                return {};
261
89.3k
            } else {
262
89.3k
                warn("unknown token while reading object; treating as string");
263
89.3k
                return with_description<QPDF_String>(value);
264
89.3k
            }
265
1.20M
        }
266
267
16.7k
    case QPDFTokenizer::tt_string:
268
16.7k
        if (decrypter_) {
269
1.83k
            std::string s{tokenizer_.getValue()};
270
1.83k
            decrypter_->decryptString(s);
271
1.83k
            return with_description<QPDF_String>(s);
272
14.9k
        } else {
273
14.9k
            return with_description<QPDF_String>(tokenizer_.getValue());
274
14.9k
        }
275
276
0
    default:
277
0
        warn("treating unknown token type as null while reading object");
278
0
        return {};
279
3.73M
    }
280
3.73M
}
281
282
QPDFObjectHandle
283
Parser::parse_remainder(bool content_stream)
284
1.63M
{
285
    // This method must take care not to resolve any objects. Don't check the type of any object
286
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
287
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
288
    // logic error to be thrown from QPDF::inParse().
289
290
1.63M
    bad_count_ = 0;
291
1.63M
    bool b_contents = false;
292
293
96.9M
    while (true) {
294
96.8M
        if (!tokenizer_.nextToken(input_, object_description_)) {
295
420k
            warn(tokenizer_.getErrorMessage());
296
420k
        }
297
96.8M
        ++good_count_; // optimistically
298
299
96.8M
        if (int_count_ != 0) {
300
            // Special handling of indirect references. Treat integer tokens as part of an indirect
301
            // reference until proven otherwise.
302
31.7M
            if (tokenizer_.getType() == QPDFTokenizer::tt_integer) {
303
25.1M
                if (++int_count_ > 2) {
304
                    // Process the oldest buffered integer.
305
20.4M
                    add_int(int_count_);
306
20.4M
                }
307
25.1M
                last_offset_buffer_[int_count_ % 2] = input_.getLastOffset();
308
25.1M
                int_buffer_[int_count_ % 2] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
309
25.1M
                continue;
310
311
25.1M
            } else if (
312
6.66M
                int_count_ >= 2 && tokenizer_.getType() == QPDFTokenizer::tt_word &&
313
4.19M
                tokenizer_.getValue() == "R") {
314
3.97M
                if (!context_) {
315
0
                    throw std::logic_error(
316
0
                        "Parser::parse called without context on an object with indirect "
317
0
                        "references");
318
0
                }
319
3.97M
                auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]);
320
3.97M
                auto gen = QIntC::to_int(int_buffer_[(int_count_) % 2]);
321
3.97M
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
322
3.94M
                    add(ParseGuard::getObject(context_, id, gen, parse_pdf_));
323
3.94M
                } else {
324
29.8k
                    add_bad_null(
325
29.8k
                        "treating bad indirect reference (" + std::to_string(id) + " " +
326
29.8k
                        std::to_string(gen) + " R) as null");
327
29.8k
                }
328
3.97M
                int_count_ = 0;
329
3.97M
                continue;
330
331
3.97M
            } else if (int_count_ > 0) {
332
                // Process the buffered integers before processing the current token.
333
2.68M
                if (int_count_ > 1) {
334
722k
                    add_int(int_count_ - 1);
335
722k
                }
336
2.68M
                add_int(int_count_);
337
2.68M
                int_count_ = 0;
338
2.68M
            }
339
31.7M
        }
340
341
67.7M
        switch (tokenizer_.getType()) {
342
82.9k
        case QPDFTokenizer::tt_eof:
343
82.9k
            warn("parse error while reading object");
344
82.9k
            if (content_stream) {
345
                // In content stream mode, leave object uninitialized to indicate EOF
346
413
                return {};
347
413
            }
348
82.4k
            warn("unexpected EOF");
349
82.4k
            return {};
350
351
347k
        case QPDFTokenizer::tt_bad:
352
347k
            check_too_many_bad_tokens();
353
347k
            add_null();
354
347k
            continue;
355
356
25.7k
        case QPDFTokenizer::tt_brace_open:
357
50.2k
        case QPDFTokenizer::tt_brace_close:
358
50.2k
            add_bad_null("treating unexpected brace token as null");
359
50.2k
            continue;
360
361
1.17M
        case QPDFTokenizer::tt_array_close:
362
1.17M
            if (frame_->state == st_array) {
363
1.15M
                auto object = frame_->null_count > 100
364
1.15M
                    ? QPDFObject::create<QPDF_Array>(std::move(frame_->olist), true)
365
1.15M
                    : QPDFObject::create<QPDF_Array>(std::move(frame_->olist));
366
1.15M
                set_description(object, frame_->offset - 1);
367
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
368
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
369
                // array start delimiter. getLastOffset points to the array end token and therefore
370
                // can't be used here.
371
1.15M
                if (stack_.size() <= 1) {
372
33.5k
                    return object;
373
33.5k
                }
374
1.12M
                stack_.pop_back();
375
1.12M
                frame_ = &stack_.back();
376
1.12M
                add(std::move(object));
377
1.12M
            } else {
378
23.2k
                if (sanity_checks_) {
379
                    // During sanity checks, assume nesting of containers is corrupt and object is
380
                    // unusable.
381
15.6k
                    warn("unexpected array close token; giving up on reading object");
382
15.6k
                    return {};
383
15.6k
                }
384
7.58k
                add_bad_null("treating unexpected array close token as null");
385
7.58k
            }
386
1.12M
            continue;
387
388
2.04M
        case QPDFTokenizer::tt_dict_close:
389
2.04M
            if (frame_->state <= st_dictionary_value) {
390
                // Attempt to recover more or less gracefully from invalid dictionaries.
391
2.02M
                auto& dict = frame_->dict;
392
393
2.02M
                if (frame_->state == st_dictionary_value) {
394
108k
                    warn(
395
108k
                        frame_->offset,
396
108k
                        "dictionary ended prematurely; using null as value for last key");
397
108k
                    dict[frame_->key] = QPDFObject::create<QPDF_Null>();
398
108k
                }
399
2.02M
                if (!frame_->olist.empty()) {
400
518k
                    if (sanity_checks_) {
401
486k
                        warn(
402
486k
                            frame_->offset,
403
486k
                            "expected dictionary keys but found non-name objects; ignoring");
404
486k
                    } else {
405
32.8k
                        fix_missing_keys();
406
32.8k
                    }
407
518k
                }
408
409
2.02M
                if (!frame_->contents_string.empty() && dict.contains("/Type") &&
410
1.35k
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
411
440
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
412
385
                    dict["/Contents"] = QPDFObjectHandle::newString(frame_->contents_string);
413
385
                    dict["/Contents"].setParsedOffset(frame_->contents_offset);
414
385
                }
415
2.02M
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
416
2.02M
                set_description(object, frame_->offset - 2);
417
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
418
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
419
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
420
                // and therefore can't be used here.
421
2.02M
                if (stack_.size() <= 1) {
422
1.34M
                    return object;
423
1.34M
                }
424
677k
                stack_.pop_back();
425
677k
                frame_ = &stack_.back();
426
677k
                add(std::move(object));
427
677k
            } else {
428
16.8k
                if (sanity_checks_) {
429
                    // During sanity checks, assume nesting of containers is corrupt and object is
430
                    // unusable.
431
11.8k
                    warn("unexpected dictionary close token; giving up on reading object");
432
11.8k
                    return {};
433
11.8k
                }
434
5.04k
                add_bad_null("unexpected dictionary close token");
435
5.04k
            }
436
683k
            continue;
437
438
2.05M
        case QPDFTokenizer::tt_array_open:
439
3.19M
        case QPDFTokenizer::tt_dict_open:
440
3.19M
            if (stack_.size() > max_nesting) {
441
1.42k
                limits_error(
442
1.42k
                    "parser-max-nesting", "ignoring excessively deeply nested data structure");
443
1.42k
            }
444
3.19M
            b_contents = false;
445
3.19M
            stack_.emplace_back(
446
3.19M
                input_,
447
3.19M
                (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array
448
3.19M
                                                                       : st_dictionary_key);
449
3.19M
            frame_ = &stack_.back();
450
3.19M
            continue;
451
452
115k
        case QPDFTokenizer::tt_bool:
453
115k
            add_scalar<QPDF_Bool>(tokenizer_.getValue() == "true");
454
115k
            continue;
455
456
882k
        case QPDFTokenizer::tt_null:
457
882k
            add_null();
458
882k
            continue;
459
460
7.27M
        case QPDFTokenizer::tt_integer:
461
7.27M
            if (!content_stream) {
462
                // Buffer token in case it is part of an indirect reference.
463
6.66M
                last_offset_buffer_[1] = input_.getLastOffset();
464
6.66M
                int_buffer_[1] = QUtil::string_to_ll(tokenizer_.getValue().c_str());
465
6.66M
                int_count_ = 1;
466
6.66M
            } else {
467
612k
                add_scalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str()));
468
612k
            }
469
7.27M
            continue;
470
471
866k
        case QPDFTokenizer::tt_real:
472
866k
            add_scalar<QPDF_Real>(tokenizer_.getValue());
473
866k
            continue;
474
475
47.5M
        case QPDFTokenizer::tt_name:
476
47.5M
            if (frame_->state == st_dictionary_key) {
477
8.65M
                frame_->key = tokenizer_.getValue();
478
8.65M
                frame_->state = st_dictionary_value;
479
8.65M
                b_contents = decrypter_ && frame_->key == "/Contents";
480
8.65M
                continue;
481
38.8M
            } else {
482
38.8M
                add_scalar<QPDF_Name>(tokenizer_.getValue());
483
38.8M
            }
484
38.8M
            continue;
485
486
38.8M
        case QPDFTokenizer::tt_word:
487
2.51M
            if (content_stream) {
488
397k
                add_scalar<QPDF_Operator>(tokenizer_.getValue());
489
397k
                continue;
490
397k
            }
491
492
2.12M
            if (sanity_checks_) {
493
2.03M
                if (tokenizer_.getValue() == "endobj" || tokenizer_.getValue() == "endstream") {
494
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
495
                    // we are parsing past the end of the object.
496
23.5k
                    warn(
497
23.5k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
498
23.5k
                        "reading object");
499
23.5k
                    return {};
500
23.5k
                }
501
502
2.01M
                add_bad_null("unknown token while reading object; treating as null");
503
2.01M
                continue;
504
2.03M
            }
505
506
82.7k
            warn("unknown token while reading object; treating as string");
507
82.7k
            check_too_many_bad_tokens();
508
82.7k
            add_scalar<QPDF_String>(tokenizer_.getValue());
509
510
82.7k
            continue;
511
512
1.64M
        case QPDFTokenizer::tt_string:
513
1.64M
            {
514
1.64M
                auto const& val = tokenizer_.getValue();
515
1.64M
                if (decrypter_) {
516
306k
                    if (b_contents) {
517
11.0k
                        frame_->contents_string = val;
518
11.0k
                        frame_->contents_offset = input_.getLastOffset();
519
11.0k
                        b_contents = false;
520
11.0k
                    }
521
306k
                    std::string s{val};
522
306k
                    decrypter_->decryptString(s);
523
306k
                    add_scalar<QPDF_String>(s);
524
1.33M
                } else {
525
1.33M
                    add_scalar<QPDF_String>(val);
526
1.33M
                }
527
1.64M
            }
528
1.64M
            continue;
529
530
0
        default:
531
0
            add_bad_null("treating unknown token type as null while reading object");
532
67.7M
        }
533
67.7M
    }
534
1.63M
}
535
536
void
537
Parser::add(std::shared_ptr<QPDFObject>&& obj)
538
72.1M
{
539
72.1M
    if (frame_->state != st_dictionary_value) {
540
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
541
        // processing once the tt_dict_close token has been found.
542
64.0M
        frame_->olist.emplace_back(std::move(obj));
543
64.0M
    } else {
544
8.09M
        if (auto res = frame_->dict.insert_or_assign(frame_->key, std::move(obj)); !res.second) {
545
547k
            warn_duplicate_key();
546
547k
        }
547
8.09M
        frame_->state = st_dictionary_key;
548
8.09M
    }
549
72.1M
}
550
551
void
552
Parser::add_null()
553
3.24M
{
554
3.24M
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
555
556
3.24M
    if (frame_->state != st_dictionary_value) {
557
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
558
        // processing once the tt_dict_close token has been found.
559
2.87M
        frame_->olist.emplace_back(null_obj);
560
2.87M
    } else {
561
367k
        if (auto res = frame_->dict.insert_or_assign(frame_->key, null_obj); !res.second) {
562
44.3k
            warn_duplicate_key();
563
44.3k
        }
564
367k
        frame_->state = st_dictionary_key;
565
367k
    }
566
3.24M
    ++frame_->null_count;
567
3.24M
}
568
569
void
570
Parser::add_bad_null(std::string const& msg)
571
2.10M
{
572
2.10M
    warn(msg);
573
2.10M
    check_too_many_bad_tokens();
574
2.10M
    add_null();
575
2.10M
}
576
577
void
578
Parser::add_int(int count)
579
23.8M
{
580
23.8M
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]);
581
23.8M
    obj->setDescription(context_, description_, last_offset_buffer_[count % 2]);
582
23.8M
    add(std::move(obj));
583
23.8M
}
584
585
template <typename T, typename... Args>
586
void
587
Parser::add_scalar(Args&&... args)
588
42.5M
{
589
42.5M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
590
42.5M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
4.06k
        max_bad_count_ = 1;
594
4.06k
        check_too_many_bad_tokens(); // always throws Error()
595
4.06k
    }
596
42.5M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
42.5M
    obj->setDescription(context_, description_, input_.getLastOffset());
598
42.5M
    add(std::move(obj));
599
42.5M
}
void qpdf::impl::Parser::add_scalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
588
115k
{
589
115k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
590
115k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
437
        max_bad_count_ = 1;
594
437
        check_too_many_bad_tokens(); // always throws Error()
595
437
    }
596
115k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
115k
    obj->setDescription(context_, description_, input_.getLastOffset());
598
115k
    add(std::move(obj));
599
115k
}
void qpdf::impl::Parser::add_scalar<QPDF_Integer, long long>(long long&&)
Line
Count
Source
588
612k
{
589
612k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
590
612k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
51
        max_bad_count_ = 1;
594
51
        check_too_many_bad_tokens(); // always throws Error()
595
51
    }
596
612k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
612k
    obj->setDescription(context_, description_, input_.getLastOffset());
598
612k
    add(std::move(obj));
599
612k
}
void qpdf::impl::Parser::add_scalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
588
866k
{
589
866k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
590
866k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
726
        max_bad_count_ = 1;
594
726
        check_too_many_bad_tokens(); // always throws Error()
595
726
    }
596
866k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
866k
    obj->setDescription(context_, description_, input_.getLastOffset());
598
866k
    add(std::move(obj));
599
866k
}
void qpdf::impl::Parser::add_scalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
588
38.8M
{
589
38.8M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
590
38.8M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
2.01k
        max_bad_count_ = 1;
594
2.01k
        check_too_many_bad_tokens(); // always throws Error()
595
2.01k
    }
596
38.8M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
38.8M
    obj->setDescription(context_, description_, input_.getLastOffset());
598
38.8M
    add(std::move(obj));
599
38.8M
}
void qpdf::impl::Parser::add_scalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
588
397k
{
589
397k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
590
397k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
50
        max_bad_count_ = 1;
594
50
        check_too_many_bad_tokens(); // always throws Error()
595
50
    }
596
397k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
397k
    obj->setDescription(context_, description_, input_.getLastOffset());
598
397k
    add(std::move(obj));
599
397k
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
588
1.41M
{
589
1.41M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
590
1.41M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
623
        max_bad_count_ = 1;
594
623
        check_too_many_bad_tokens(); // always throws Error()
595
623
    }
596
1.41M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
1.41M
    obj->setDescription(context_, description_, input_.getLastOffset());
598
1.41M
    add(std::move(obj));
599
1.41M
}
void qpdf::impl::Parser::add_scalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
588
306k
{
589
306k
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
590
306k
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
170
        max_bad_count_ = 1;
594
170
        check_too_many_bad_tokens(); // always throws Error()
595
170
    }
596
306k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
306k
    obj->setDescription(context_, description_, input_.getLastOffset());
598
306k
    add(std::move(obj));
599
306k
}
600
601
template <typename T, typename... Args>
602
QPDFObjectHandle
603
Parser::with_description(Args&&... args)
604
2.00M
{
605
2.00M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
2.00M
    obj->setDescription(context_, description_, start_);
607
2.00M
    return {obj};
608
2.00M
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Bool, bool>(bool&&)
Line
Count
Source
604
6.09k
{
605
6.09k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
6.09k
    obj->setDescription(context_, description_, start_);
607
6.09k
    return {obj};
608
6.09k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Integer, long long>(long long&&)
Line
Count
Source
604
376k
{
605
376k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
376k
    obj->setDescription(context_, description_, start_);
607
376k
    return {obj};
608
376k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
604
122k
{
605
122k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
122k
    obj->setDescription(context_, description_, start_);
607
122k
    return {obj};
608
122k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
604
289k
{
605
289k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
289k
    obj->setDescription(context_, description_, start_);
607
289k
    return {obj};
608
289k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
604
1.11M
{
605
1.11M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
1.11M
    obj->setDescription(context_, description_, start_);
607
1.11M
    return {obj};
608
1.11M
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
604
99.3k
{
605
99.3k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
99.3k
    obj->setDescription(context_, description_, start_);
607
99.3k
    return {obj};
608
99.3k
}
QPDFObjectHandle qpdf::impl::Parser::with_description<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
604
1.70k
{
605
1.70k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
1.70k
    obj->setDescription(context_, description_, start_);
607
1.70k
    return {obj};
608
1.70k
}
609
610
void
611
Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset)
612
3.17M
{
613
3.17M
    if (obj) {
614
3.17M
        obj->setDescription(context_, description_, parsed_offset);
615
3.17M
    }
616
3.17M
}
617
618
void
619
Parser::fix_missing_keys()
620
32.8k
{
621
32.8k
    std::set<std::string> names;
622
125k
    for (auto& obj: frame_->olist) {
623
125k
        if (obj.raw_type_code() == ::ot_name) {
624
5.79k
            names.insert(obj.obj_sp()->getStringValue());
625
5.79k
        }
626
125k
    }
627
32.8k
    int next_fake_key = 1;
628
120k
    for (auto const& item: frame_->olist) {
629
121k
        while (true) {
630
121k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
631
121k
            const bool found_fake = !frame_->dict.contains(key) && !names.contains(key);
632
121k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
633
121k
            if (found_fake) {
634
120k
                warn(
635
120k
                    frame_->offset,
636
120k
                    "expected dictionary key but found non-name object; inserting key " + key);
637
120k
                frame_->dict[key] = item;
638
120k
                break;
639
120k
            }
640
121k
        }
641
120k
    }
642
32.8k
}
643
644
void
645
Parser::check_too_many_bad_tokens()
646
2.51M
{
647
2.51M
    auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
648
2.51M
    if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
649
4.11k
        if (bad_count_) {
650
3.19k
            limits_error(
651
3.19k
                "parser-max-container-size-damaged",
652
3.19k
                "encountered errors while parsing an array or dictionary with more than " +
653
3.19k
                    std::to_string(limit) + " elements; giving up on reading object");
654
3.19k
        }
655
4.11k
        limits_error(
656
4.11k
            "parser-max-container-size",
657
4.11k
            "encountered an array or dictionary with more than " + std::to_string(limit) +
658
4.11k
                " elements during xref recovery; giving up on reading object");
659
4.11k
    }
660
2.51M
    if (max_bad_count_ && --max_bad_count_ == 0) {
661
12.3k
        limits_error(
662
12.3k
            "parser-max-errors", "too many errors during parsing; treating object as null");
663
12.3k
    }
664
2.51M
    if (good_count_ > 4) {
665
919k
        good_count_ = 0;
666
919k
        bad_count_ = 1;
667
919k
        return;
668
919k
    }
669
1.59M
    if (++bad_count_ > 5 ||
670
1.54M
        (frame_->state != st_array && std::cmp_less(max_bad_count_, frame_->olist.size()))) {
671
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
672
        // exceeds the remaining number of allowable total errors.
673
59.6k
        warn("too many errors; giving up on reading object");
674
59.6k
        throw Error();
675
59.6k
    }
676
1.53M
    good_count_ = 0;
677
1.53M
}
678
679
void
680
Parser::limits_error(std::string const& limit, std::string const& msg)
681
17.8k
{
682
17.8k
    Limits::error();
683
17.8k
    warn("limits error("s + limit + "): " + msg);
684
17.8k
    throw Error();
685
17.8k
}
686
687
void
688
Parser::warn(QPDFExc const& e) const
689
4.38M
{
690
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
691
    // object. If parsing for some other reason, such as an explicit creation of an object from a
692
    // string, then just throw the exception.
693
4.38M
    if (context_) {
694
4.38M
        context_->warn(e);
695
4.38M
    } else {
696
620
        throw e;
697
620
    }
698
4.38M
}
699
700
void
701
Parser::warn_duplicate_key()
702
592k
{
703
592k
    warn(
704
592k
        frame_->offset,
705
592k
        "dictionary has duplicated key " + frame_->key +
706
592k
            "; last occurrence overrides earlier ones");
707
592k
}
708
709
void
710
Parser::warn(qpdf_offset_t offset, std::string const& msg) const
711
4.38M
{
712
4.38M
    if (stream_id_) {
713
208k
        std::string descr = "object "s + std::to_string(obj_id_) + " 0";
714
208k
        std::string name = context_->getFilename() + " object stream " + std::to_string(stream_id_);
715
208k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
716
4.17M
    } else {
717
4.17M
        warn(QPDFExc(qpdf_e_damaged_pdf, input_.getName(), object_description_, offset, msg));
718
4.17M
    }
719
4.38M
}
720
721
void
722
Parser::warn(std::string const& msg) const
723
3.07M
{
724
3.07M
    warn(input_.getLastOffset(), msg);
725
3.07M
}