Coverage Report

Created: 2026-01-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
static uint32_t const& max_nesting{global::Limits::parser_max_nesting()};
19
20
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
21
// special access to allow the parser to create unresolved objects and dangling references.
22
class QPDF::Doc::ParseGuard
23
{
24
  public:
25
    ParseGuard(QPDF* qpdf) :
26
1.06M
        objects(qpdf ? &qpdf->m->objects : nullptr)
27
1.06M
    {
28
1.06M
        if (objects) {
29
1.06M
            objects->inParse(true);
30
1.06M
        }
31
1.06M
    }
32
33
    static std::shared_ptr<QPDFObject>
34
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
35
390k
    {
36
390k
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
37
390k
    }
38
39
    ~ParseGuard()
40
1.06M
    {
41
1.06M
        if (objects) {
42
1.06M
            objects->inParse(false);
43
1.06M
        }
44
1.06M
    }
45
    QPDF::Doc::Objects* objects;
46
};
47
48
using ParseGuard = QPDF::Doc::ParseGuard;
49
50
QPDFObjectHandle
51
QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context)
52
0
{
53
0
    qpdf::Tokenizer tokenizer;
54
0
    if (auto result = QPDFParser(
55
0
                          input,
56
0
                          make_description(input.getName(), object_description),
57
0
                          object_description,
58
0
                          tokenizer,
59
0
                          nullptr,
60
0
                          context,
61
0
                          false)
62
0
                          .parse()) {
63
0
        return result;
64
0
    }
65
0
    return {QPDFObject::create<QPDF_Null>()};
66
0
}
67
68
QPDFObjectHandle
69
QPDFParser::parse_content(
70
    InputSource& input,
71
    std::shared_ptr<QPDFObject::Description> sp_description,
72
    qpdf::Tokenizer& tokenizer,
73
    QPDF* context)
74
878k
{
75
878k
    static const std::string content("content"); // GCC12 - make constexpr
76
878k
    auto p = QPDFParser(
77
878k
        input,
78
878k
        std::move(sp_description),
79
878k
        content,
80
878k
        tokenizer,
81
878k
        nullptr,
82
878k
        context,
83
878k
        true,
84
878k
        0,
85
878k
        0,
86
878k
        context && context->doc().reconstructed_xref());
87
878k
    auto result = p.parse(true);
88
878k
    if (result || p.empty_) {
89
        // In content stream mode, leave object uninitialized to indicate EOF
90
858k
        return result;
91
858k
    }
92
19.5k
    return {QPDFObject::create<QPDF_Null>()};
93
878k
}
94
95
QPDFObjectHandle
96
QPDFParser::parse(
97
    InputSource& input,
98
    std::string const& object_description,
99
    QPDFTokenizer& tokenizer,
100
    bool& empty,
101
    QPDFObjectHandle::StringDecrypter* decrypter,
102
    QPDF* context)
103
0
{
104
    // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
105
    // only user of the 'empty' member. When removing this overload also remove 'empty'.
106
0
    auto p = QPDFParser(
107
0
        input,
108
0
        make_description(input.getName(), object_description),
109
0
        object_description,
110
0
        *tokenizer.m,
111
0
        decrypter,
112
0
        context,
113
0
        false);
114
0
    auto result = p.parse();
115
0
    empty = p.empty_;
116
0
    if (result) {
117
0
        return result;
118
0
    }
119
0
    return {QPDFObject::create<QPDF_Null>()};
120
0
}
121
122
QPDFObjectHandle
123
QPDFParser::parse(
124
    InputSource& input,
125
    std::string const& object_description,
126
    qpdf::Tokenizer& tokenizer,
127
    QPDFObjectHandle::StringDecrypter* decrypter,
128
    QPDF& context,
129
    bool sanity_checks)
130
158k
{
131
158k
    return QPDFParser(
132
158k
               input,
133
158k
               make_description(input.getName(), object_description),
134
158k
               object_description,
135
158k
               tokenizer,
136
158k
               decrypter,
137
158k
               &context,
138
158k
               true,
139
158k
               0,
140
158k
               0,
141
158k
               sanity_checks)
142
158k
        .parse();
143
158k
}
144
145
QPDFObjectHandle
146
QPDFParser::parse(
147
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
148
24.4k
{
149
24.4k
    return QPDFParser(
150
24.4k
               input,
151
24.4k
               std::make_shared<QPDFObject::Description>(
152
24.4k
                   QPDFObject::ObjStreamDescr(stream_id, obj_id)),
153
24.4k
               "",
154
24.4k
               tokenizer,
155
24.4k
               nullptr,
156
24.4k
               &context,
157
24.4k
               true,
158
24.4k
               stream_id,
159
24.4k
               obj_id)
160
24.4k
        .parse();
161
24.4k
}
162
163
QPDFObjectHandle
164
QPDFParser::parse(bool content_stream)
165
1.06M
{
166
1.06M
    try {
167
1.06M
        return parse_first(content_stream);
168
1.06M
    } catch (Error&) {
169
7.97k
        return {};
170
7.97k
    } catch (QPDFExc& e) {
171
4.93k
        throw e;
172
4.93k
    } catch (std::logic_error& e) {
173
1
        throw e;
174
1.70k
    } catch (std::exception& e) {
175
1.70k
        warn("treating object as null because of error during parsing: "s + e.what());
176
1.70k
        return {};
177
1.70k
    }
178
1.06M
}
179
180
QPDFObjectHandle
181
QPDFParser::parse_first(bool content_stream)
182
1.06M
{
183
    // This method must take care not to resolve any objects. Don't check the type of any object
184
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
185
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
186
    // logic error to be thrown from QPDF::inParse().
187
188
1.06M
    QPDF::Doc::ParseGuard pg(context);
189
1.06M
    start = input.tell();
190
1.06M
    if (!tokenizer.nextToken(input, object_description)) {
191
15.8k
        warn(tokenizer.getErrorMessage());
192
15.8k
    }
193
194
1.06M
    switch (tokenizer.getType()) {
195
4.74k
    case QPDFTokenizer::tt_eof:
196
4.74k
        if (content_stream) {
197
            // In content stream mode, leave object uninitialized to indicate EOF
198
4.12k
            empty_ = true;
199
4.12k
            return {};
200
4.12k
        }
201
620
        warn("unexpected EOF");
202
620
        return {};
203
204
15.5k
    case QPDFTokenizer::tt_bad:
205
15.5k
        return {};
206
207
1.30k
    case QPDFTokenizer::tt_brace_open:
208
1.92k
    case QPDFTokenizer::tt_brace_close:
209
1.92k
        warn("treating unexpected brace token as null");
210
1.92k
        return {};
211
212
2.03k
    case QPDFTokenizer::tt_array_close:
213
2.03k
        warn("treating unexpected array close token as null");
214
2.03k
        return {};
215
216
824
    case QPDFTokenizer::tt_dict_close:
217
824
        warn("unexpected dictionary close token");
218
824
        return {};
219
220
14.6k
    case QPDFTokenizer::tt_array_open:
221
165k
    case QPDFTokenizer::tt_dict_open:
222
165k
        stack.clear();
223
165k
        stack.emplace_back(
224
165k
            input,
225
165k
            (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
226
165k
        frame = &stack.back();
227
165k
        return parseRemainder(content_stream);
228
229
779
    case QPDFTokenizer::tt_bool:
230
779
        return withDescription<QPDF_Bool>(tokenizer.getValue() == "true");
231
232
414
    case QPDFTokenizer::tt_null:
233
414
        return {QPDFObject::create<QPDF_Null>()};
234
235
109k
    case QPDFTokenizer::tt_integer:
236
109k
        return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
237
238
49.5k
    case QPDFTokenizer::tt_real:
239
49.5k
        return withDescription<QPDF_Real>(tokenizer.getValue());
240
241
80.2k
    case QPDFTokenizer::tt_name:
242
80.2k
        return withDescription<QPDF_Name>(tokenizer.getValue());
243
244
624k
    case QPDFTokenizer::tt_word:
245
624k
        {
246
624k
            auto const& value = tokenizer.getValue();
247
624k
            if (content_stream) {
248
617k
                return withDescription<QPDF_Operator>(value);
249
617k
            } else if (value == "endobj") {
250
                // We just saw endobj without having read anything. Nothing in the PDF spec appears
251
                // to allow empty objects, but they have been encountered in actual PDF files and
252
                // Adobe Reader appears to ignore them. Treat this as a null and do not move the
253
                // input source's offset.
254
236
                empty_ = true;
255
236
                input.seek(input.getLastOffset(), SEEK_SET);
256
236
                if (!content_stream) {
257
236
                    warn("empty object treated as null");
258
236
                }
259
236
                return {};
260
7.55k
            } else {
261
7.55k
                warn("unknown token while reading object; treating as string");
262
7.55k
                return withDescription<QPDF_String>(value);
263
7.55k
            }
264
624k
        }
265
266
4.80k
    case QPDFTokenizer::tt_string:
267
4.80k
        if (decrypter) {
268
405
            std::string s{tokenizer.getValue()};
269
405
            decrypter->decryptString(s);
270
405
            return withDescription<QPDF_String>(s);
271
4.40k
        } else {
272
4.40k
            return withDescription<QPDF_String>(tokenizer.getValue());
273
4.40k
        }
274
275
0
    default:
276
0
        warn("treating unknown token type as null while reading object");
277
0
        return {};
278
1.06M
    }
279
1.06M
}
280
281
QPDFObjectHandle
282
QPDFParser::parseRemainder(bool content_stream)
283
165k
{
284
    // This method must take care not to resolve any objects. Don't check the type of any object
285
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
286
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
287
    // logic error to be thrown from QPDF::inParse().
288
289
165k
    bad_count = 0;
290
165k
    bool b_contents = false;
291
292
7.22M
    while (true) {
293
7.21M
        if (!tokenizer.nextToken(input, object_description)) {
294
48.1k
            warn(tokenizer.getErrorMessage());
295
48.1k
        }
296
7.21M
        ++good_count; // optimistically
297
298
7.21M
        if (int_count != 0) {
299
            // Special handling of indirect references. Treat integer tokens as part of an indirect
300
            // reference until proven otherwise.
301
1.99M
            if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
302
1.31M
                if (++int_count > 2) {
303
                    // Process the oldest buffered integer.
304
844k
                    addInt(int_count);
305
844k
                }
306
1.31M
                last_offset_buffer[int_count % 2] = input.getLastOffset();
307
1.31M
                int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
308
1.31M
                continue;
309
310
1.31M
            } else if (
311
684k
                int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
312
417k
                tokenizer.getValue() == "R") {
313
393k
                if (!context) {
314
0
                    throw std::logic_error(
315
0
                        "QPDFParser::parse called without context on an object with indirect "
316
0
                        "references");
317
0
                }
318
393k
                auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]);
319
393k
                auto gen = QIntC::to_int(int_buffer[(int_count) % 2]);
320
393k
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
321
390k
                    add(ParseGuard::getObject(context, id, gen, parse_pdf));
322
390k
                } else {
323
2.77k
                    add_bad_null(
324
2.77k
                        "treating bad indirect reference (" + std::to_string(id) + " " +
325
2.77k
                        std::to_string(gen) + " R) as null");
326
2.77k
                }
327
393k
                int_count = 0;
328
393k
                continue;
329
330
393k
            } else if (int_count > 0) {
331
                // Process the buffered integers before processing the current token.
332
291k
                if (int_count > 1) {
333
77.1k
                    addInt(int_count - 1);
334
77.1k
                }
335
291k
                addInt(int_count);
336
291k
                int_count = 0;
337
291k
            }
338
1.99M
        }
339
340
5.50M
        switch (tokenizer.getType()) {
341
8.63k
        case QPDFTokenizer::tt_eof:
342
8.63k
            warn("parse error while reading object");
343
8.63k
            if (content_stream) {
344
                // In content stream mode, leave object uninitialized to indicate EOF
345
220
                return {};
346
220
            }
347
8.41k
            warn("unexpected EOF");
348
8.41k
            return {};
349
350
40.7k
        case QPDFTokenizer::tt_bad:
351
40.7k
            check_too_many_bad_tokens();
352
40.7k
            addNull();
353
40.7k
            continue;
354
355
2.03k
        case QPDFTokenizer::tt_brace_open:
356
8.05k
        case QPDFTokenizer::tt_brace_close:
357
8.05k
            add_bad_null("treating unexpected brace token as null");
358
8.05k
            continue;
359
360
132k
        case QPDFTokenizer::tt_array_close:
361
132k
            if (frame->state == st_array) {
362
130k
                auto object = frame->null_count > 100
363
130k
                    ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true)
364
130k
                    : QPDFObject::create<QPDF_Array>(std::move(frame->olist));
365
130k
                setDescription(object, frame->offset - 1);
366
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
367
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
368
                // array start delimiter. getLastOffset points to the array end token and therefore
369
                // can't be used here.
370
130k
                if (stack.size() <= 1) {
371
6.42k
                    return object;
372
6.42k
                }
373
123k
                stack.pop_back();
374
123k
                frame = &stack.back();
375
123k
                add(std::move(object));
376
123k
            } else {
377
2.99k
                if (sanity_checks) {
378
                    // During sanity checks, assume nesting of containers is corrupt and object is
379
                    // unusable.
380
2.27k
                    warn("unexpected array close token; giving up on reading object");
381
2.27k
                    return {};
382
2.27k
                }
383
715
                add_bad_null("treating unexpected array close token as null");
384
715
            }
385
124k
            continue;
386
387
221k
        case QPDFTokenizer::tt_dict_close:
388
221k
            if (frame->state <= st_dictionary_value) {
389
                // Attempt to recover more or less gracefully from invalid dictionaries.
390
219k
                auto& dict = frame->dict;
391
392
219k
                if (frame->state == st_dictionary_value) {
393
11.6k
                    warn(
394
11.6k
                        frame->offset,
395
11.6k
                        "dictionary ended prematurely; using null as value for last key");
396
11.6k
                    dict[frame->key] = QPDFObject::create<QPDF_Null>();
397
11.6k
                }
398
219k
                if (!frame->olist.empty()) {
399
65.5k
                    if (sanity_checks) {
400
59.2k
                        warn(
401
59.2k
                            frame->offset,
402
59.2k
                            "expected dictionary keys but found non-name objects; ignoring");
403
59.2k
                    } else {
404
6.33k
                        fixMissingKeys();
405
6.33k
                    }
406
65.5k
                }
407
408
219k
                if (!frame->contents_string.empty() && dict.contains("/Type") &&
409
61
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
410
11
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
411
11
                    dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
412
11
                    dict["/Contents"].setParsedOffset(frame->contents_offset);
413
11
                }
414
219k
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
415
219k
                setDescription(object, frame->offset - 2);
416
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
417
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
418
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
419
                // and therefore can't be used here.
420
219k
                if (stack.size() <= 1) {
421
132k
                    return object;
422
132k
                }
423
87.7k
                stack.pop_back();
424
87.7k
                frame = &stack.back();
425
87.7k
                add(std::move(object));
426
87.7k
            } else {
427
1.92k
                if (sanity_checks) {
428
                    // During sanity checks, assume nesting of containers is corrupt and object is
429
                    // unusable.
430
1.49k
                    warn("unexpected dictionary close token; giving up on reading object");
431
1.49k
                    return {};
432
1.49k
                }
433
429
                add_bad_null("unexpected dictionary close token");
434
429
            }
435
88.2k
            continue;
436
437
269k
        case QPDFTokenizer::tt_array_open:
438
408k
        case QPDFTokenizer::tt_dict_open:
439
408k
            if (stack.size() > max_nesting) {
440
262
                limits_error(
441
262
                    "parser-max-nesting", "ignoring excessively deeply nested data structure");
442
262
            }
443
408k
            b_contents = false;
444
408k
            stack.emplace_back(
445
408k
                input,
446
408k
                (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
447
408k
                                                                      : st_dictionary_key);
448
408k
            frame = &stack.back();
449
408k
            continue;
450
451
14.2k
        case QPDFTokenizer::tt_bool:
452
14.2k
            addScalar<QPDF_Bool>(tokenizer.getValue() == "true");
453
14.2k
            continue;
454
455
53.4k
        case QPDFTokenizer::tt_null:
456
53.4k
            addNull();
457
53.4k
            continue;
458
459
963k
        case QPDFTokenizer::tt_integer:
460
963k
            if (!content_stream) {
461
                // Buffer token in case it is part of an indirect reference.
462
685k
                last_offset_buffer[1] = input.getLastOffset();
463
685k
                int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
464
685k
                int_count = 1;
465
685k
            } else {
466
278k
                addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
467
278k
            }
468
963k
            continue;
469
470
149k
        case QPDFTokenizer::tt_real:
471
149k
            addScalar<QPDF_Real>(tokenizer.getValue());
472
149k
            continue;
473
474
2.83M
        case QPDFTokenizer::tt_name:
475
2.83M
            if (frame->state == st_dictionary_key) {
476
951k
                frame->key = tokenizer.getValue();
477
951k
                frame->state = st_dictionary_value;
478
951k
                b_contents = decrypter && frame->key == "/Contents";
479
951k
                continue;
480
1.87M
            } else {
481
1.87M
                addScalar<QPDF_Name>(tokenizer.getValue());
482
1.87M
            }
483
1.87M
            continue;
484
485
1.87M
        case QPDFTokenizer::tt_word:
486
372k
            if (content_stream) {
487
127k
                addScalar<QPDF_Operator>(tokenizer.getValue());
488
127k
                continue;
489
127k
            }
490
491
244k
            if (sanity_checks) {
492
233k
                if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") {
493
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
494
                    // we are parsing past the end of the object.
495
2.88k
                    warn(
496
2.88k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
497
2.88k
                        "reading object");
498
2.88k
                    return {};
499
2.88k
                }
500
501
231k
                add_bad_null("unknown token while reading object; treating as null");
502
231k
                continue;
503
233k
            }
504
505
10.7k
            warn("unknown token while reading object; treating as string");
506
10.7k
            check_too_many_bad_tokens();
507
10.7k
            addScalar<QPDF_String>(tokenizer.getValue());
508
509
10.7k
            continue;
510
511
303k
        case QPDFTokenizer::tt_string:
512
303k
            {
513
303k
                auto const& val = tokenizer.getValue();
514
303k
                if (decrypter) {
515
13.8k
                    if (b_contents) {
516
861
                        frame->contents_string = val;
517
861
                        frame->contents_offset = input.getLastOffset();
518
861
                        b_contents = false;
519
861
                    }
520
13.8k
                    std::string s{val};
521
13.8k
                    decrypter->decryptString(s);
522
13.8k
                    addScalar<QPDF_String>(s);
523
289k
                } else {
524
289k
                    addScalar<QPDF_String>(val);
525
289k
                }
526
303k
            }
527
303k
            continue;
528
529
0
        default:
530
0
            add_bad_null("treating unknown token type as null while reading object");
531
5.50M
        }
532
5.50M
    }
533
165k
}
534
535
void
536
QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
537
4.57M
{
538
4.57M
    if (frame->state != st_dictionary_value) {
539
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
540
        // processing once the tt_dict_close token has been found.
541
3.68M
        frame->olist.emplace_back(std::move(obj));
542
3.68M
    } else {
543
887k
        if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
544
70.0k
            warnDuplicateKey();
545
70.0k
        }
546
887k
        frame->state = st_dictionary_key;
547
887k
    }
548
4.57M
}
549
550
void
551
QPDFParser::addNull()
552
327k
{
553
327k
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
554
555
327k
    if (frame->state != st_dictionary_value) {
556
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
557
        // processing once the tt_dict_close token has been found.
558
285k
        frame->olist.emplace_back(null_obj);
559
285k
    } else {
560
42.3k
        if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
561
6.79k
            warnDuplicateKey();
562
6.79k
        }
563
42.3k
        frame->state = st_dictionary_key;
564
42.3k
    }
565
327k
    ++frame->null_count;
566
327k
}
567
568
void
569
QPDFParser::add_bad_null(std::string const& msg)
570
242k
{
571
242k
    warn(msg);
572
242k
    check_too_many_bad_tokens();
573
242k
    addNull();
574
242k
}
575
576
void
577
QPDFParser::addInt(int count)
578
1.21M
{
579
1.21M
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]);
580
1.21M
    obj->setDescription(context, description, last_offset_buffer[count % 2]);
581
1.21M
    add(std::move(obj));
582
1.21M
}
583
584
template <typename T, typename... Args>
585
void
586
QPDFParser::addScalar(Args&&... args)
587
2.76M
{
588
2.76M
    auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
589
2.76M
    if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
590
        // Stop adding scalars. We are going to abort when the close token or a bad token is
591
        // encountered.
592
315
        max_bad_count = 1;
593
315
        check_too_many_bad_tokens(); // always throws Error()
594
315
    }
595
2.76M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
596
2.76M
    obj->setDescription(context, description, input.getLastOffset());
597
2.76M
    add(std::move(obj));
598
2.76M
}
void QPDFParser::addScalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
587
14.2k
{
588
14.2k
    auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
589
14.2k
    if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
590
        // Stop adding scalars. We are going to abort when the close token or a bad token is
591
        // encountered.
592
39
        max_bad_count = 1;
593
39
        check_too_many_bad_tokens(); // always throws Error()
594
39
    }
595
14.2k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
596
14.2k
    obj->setDescription(context, description, input.getLastOffset());
597
14.2k
    add(std::move(obj));
598
14.2k
}
void QPDFParser::addScalar<QPDF_Integer, long long>(long long&&)
Line
Count
Source
587
277k
{
588
277k
    auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
589
277k
    if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
590
        // Stop adding scalars. We are going to abort when the close token or a bad token is
591
        // encountered.
592
20
        max_bad_count = 1;
593
20
        check_too_many_bad_tokens(); // always throws Error()
594
20
    }
595
277k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
596
277k
    obj->setDescription(context, description, input.getLastOffset());
597
277k
    add(std::move(obj));
598
277k
}
void QPDFParser::addScalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
587
149k
{
588
149k
    auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
589
149k
    if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
590
        // Stop adding scalars. We are going to abort when the close token or a bad token is
591
        // encountered.
592
54
        max_bad_count = 1;
593
54
        check_too_many_bad_tokens(); // always throws Error()
594
54
    }
595
149k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
596
149k
    obj->setDescription(context, description, input.getLastOffset());
597
149k
    add(std::move(obj));
598
149k
}
void QPDFParser::addScalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
587
1.87M
{
588
1.87M
    auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
589
1.87M
    if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
590
        // Stop adding scalars. We are going to abort when the close token or a bad token is
591
        // encountered.
592
132
        max_bad_count = 1;
593
132
        check_too_many_bad_tokens(); // always throws Error()
594
132
    }
595
1.87M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
596
1.87M
    obj->setDescription(context, description, input.getLastOffset());
597
1.87M
    add(std::move(obj));
598
1.87M
}
void QPDFParser::addScalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
587
127k
{
588
127k
    auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
589
127k
    if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
590
        // Stop adding scalars. We are going to abort when the close token or a bad token is
591
        // encountered.
592
18
        max_bad_count = 1;
593
18
        check_too_many_bad_tokens(); // always throws Error()
594
18
    }
595
127k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
596
127k
    obj->setDescription(context, description, input.getLastOffset());
597
127k
    add(std::move(obj));
598
127k
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
587
299k
{
588
299k
    auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
589
299k
    if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
590
        // Stop adding scalars. We are going to abort when the close token or a bad token is
591
        // encountered.
592
46
        max_bad_count = 1;
593
46
        check_too_many_bad_tokens(); // always throws Error()
594
46
    }
595
299k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
596
299k
    obj->setDescription(context, description, input.getLastOffset());
597
299k
    add(std::move(obj));
598
299k
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
587
13.8k
{
588
13.8k
    auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
589
13.8k
    if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
590
        // Stop adding scalars. We are going to abort when the close token or a bad token is
591
        // encountered.
592
6
        max_bad_count = 1;
593
6
        check_too_many_bad_tokens(); // always throws Error()
594
6
    }
595
13.8k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
596
13.8k
    obj->setDescription(context, description, input.getLastOffset());
597
13.8k
    add(std::move(obj));
598
13.8k
}
599
600
template <typename T, typename... Args>
601
QPDFObjectHandle
602
QPDFParser::withDescription(Args&&... args)
603
867k
{
604
867k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
605
867k
    obj->setDescription(context, description, start);
606
867k
    return {obj};
607
867k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Bool, bool>(bool&&)
Line
Count
Source
603
779
{
604
779
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
605
779
    obj->setDescription(context, description, start);
606
779
    return {obj};
607
779
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Integer, long long>(long long&&)
Line
Count
Source
603
108k
{
604
108k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
605
108k
    obj->setDescription(context, description, start);
606
108k
    return {obj};
607
108k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
603
49.5k
{
604
49.5k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
605
49.5k
    obj->setDescription(context, description, start);
606
49.5k
    return {obj};
607
49.5k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
603
80.2k
{
604
80.2k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
605
80.2k
    obj->setDescription(context, description, start);
606
80.2k
    return {obj};
607
80.2k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
603
617k
{
604
617k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
605
617k
    obj->setDescription(context, description, start);
606
617k
    return {obj};
607
617k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
603
11.3k
{
604
11.3k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
605
11.3k
    obj->setDescription(context, description, start);
606
11.3k
    return {obj};
607
11.3k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
603
352
{
604
352
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
605
352
    obj->setDescription(context, description, start);
606
352
    return {obj};
607
352
}
608
609
void
610
QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset)
611
349k
{
612
349k
    if (obj) {
613
349k
        obj->setDescription(context, description, parsed_offset);
614
349k
    }
615
349k
}
616
617
void
618
QPDFParser::fixMissingKeys()
619
6.33k
{
620
6.33k
    std::set<std::string> names;
621
15.1k
    for (auto& obj: frame->olist) {
622
15.1k
        if (obj.raw_type_code() == ::ot_name) {
623
257
            names.insert(obj.obj_sp()->getStringValue());
624
257
        }
625
15.1k
    }
626
6.33k
    int next_fake_key = 1;
627
15.0k
    for (auto const& item: frame->olist) {
628
15.1k
        while (true) {
629
15.1k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
630
15.1k
            const bool found_fake = !frame->dict.contains(key) && !names.contains(key);
631
15.1k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
632
15.1k
            if (found_fake) {
633
15.0k
                warn(
634
15.0k
                    frame->offset,
635
15.0k
                    "expected dictionary key but found non-name object; inserting key " + key);
636
15.0k
                frame->dict[key] = item;
637
15.0k
                break;
638
15.0k
            }
639
15.1k
        }
640
15.0k
    }
641
6.33k
}
642
643
void
644
QPDFParser::check_too_many_bad_tokens()
645
292k
{
646
292k
    auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
647
292k
    if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
648
320
        if (bad_count) {
649
199
            limits_error(
650
199
                "parser-max-container-size-damaged",
651
199
                "encountered errors while parsing an array or dictionary with more than " +
652
199
                    std::to_string(limit) + " elements; giving up on reading object");
653
199
        }
654
320
        limits_error(
655
320
            "parser-max-container-size",
656
320
            "encountered an array or dictionary with more than " + std::to_string(limit) +
657
320
                " elements during xref recovery; giving up on reading object");
658
320
    }
659
292k
    if (max_bad_count && --max_bad_count == 0) {
660
1.10k
        limits_error(
661
1.10k
            "parser-max-errors", "too many errors during parsing; treating object as null");
662
1.10k
    }
663
292k
    if (good_count > 4) {
664
112k
        good_count = 0;
665
112k
        bad_count = 1;
666
112k
        return;
667
112k
    }
668
179k
    if (++bad_count > 5 ||
669
174k
        (frame->state != st_array && std::cmp_less(max_bad_count, frame->olist.size()))) {
670
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
671
        // exceeds the remaining number of allowable total errors.
672
6.40k
        warn("too many errors; giving up on reading object");
673
6.40k
        throw Error();
674
6.40k
    }
675
173k
    good_count = 0;
676
173k
}
677
678
void
679
QPDFParser::limits_error(std::string const& limit, std::string const& msg)
680
1.68k
{
681
1.68k
    Limits::error();
682
1.68k
    warn("limits error("s + limit + "): " + msg);
683
1.68k
    throw Error();
684
1.68k
}
685
686
void
687
QPDFParser::warn(QPDFExc const& e) const
688
526k
{
689
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
690
    // object. If parsing for some other reason, such as an explicit creation of an object from a
691
    // string, then just throw the exception.
692
526k
    if (context) {
693
526k
        context->warn(e);
694
526k
    } else {
695
0
        throw e;
696
0
    }
697
526k
}
698
699
void
700
QPDFParser::warnDuplicateKey()
701
76.8k
{
702
76.8k
    warn(
703
76.8k
        frame->offset,
704
76.8k
        "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
705
76.8k
}
706
707
void
708
QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
709
526k
{
710
526k
    if (stream_id) {
711
14.3k
        std::string descr = "object "s + std::to_string(obj_id) + " 0";
712
14.3k
        std::string name = context->getFilename() + " object stream " + std::to_string(stream_id);
713
14.3k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
714
512k
    } else {
715
512k
        warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg));
716
512k
    }
717
526k
}
718
719
void
720
QPDFParser::warn(std::string const& msg) const
721
363k
{
722
363k
    warn(input.getLastOffset(), msg);
723
363k
}