Coverage Report

Created: 2025-08-29 06:54

/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
QPDFObjectHandle
19
QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context)
20
17.4k
{
21
17.4k
    qpdf::Tokenizer tokenizer;
22
17.4k
    bool empty = false;
23
17.4k
    return QPDFParser(
24
17.4k
               input,
25
17.4k
               make_description(input.getName(), object_description),
26
17.4k
               object_description,
27
17.4k
               tokenizer,
28
17.4k
               nullptr,
29
17.4k
               context,
30
17.4k
               false)
31
17.4k
        .parse(empty, false);
32
17.4k
}
33
34
QPDFObjectHandle
35
QPDFParser::parse_content(
36
    InputSource& input,
37
    std::shared_ptr<QPDFObject::Description> sp_description,
38
    qpdf::Tokenizer& tokenizer,
39
    QPDF* context)
40
1.19M
{
41
1.19M
    bool empty = false;
42
1.19M
    return QPDFParser(
43
1.19M
               input,
44
1.19M
               std::move(sp_description),
45
1.19M
               "content",
46
1.19M
               tokenizer,
47
1.19M
               nullptr,
48
1.19M
               context,
49
1.19M
               true,
50
1.19M
               0,
51
1.19M
               0,
52
1.19M
               context && context->reconstructed_xref())
53
1.19M
        .parse(empty, true);
54
1.19M
}
55
56
QPDFObjectHandle
57
QPDFParser::parse(
58
    InputSource& input,
59
    std::string const& object_description,
60
    QPDFTokenizer& tokenizer,
61
    bool& empty,
62
    QPDFObjectHandle::StringDecrypter* decrypter,
63
    QPDF* context)
64
0
{
65
0
    return QPDFParser(
66
0
               input,
67
0
               make_description(input.getName(), object_description),
68
0
               object_description,
69
0
               *tokenizer.m,
70
0
               decrypter,
71
0
               context,
72
0
               false)
73
0
        .parse(empty, false);
74
0
}
75
76
std::pair<QPDFObjectHandle, bool>
77
QPDFParser::parse(
78
    InputSource& input,
79
    std::string const& object_description,
80
    qpdf::Tokenizer& tokenizer,
81
    QPDFObjectHandle::StringDecrypter* decrypter,
82
    QPDF& context,
83
    bool sanity_checks)
84
161k
{
85
161k
    bool empty{false};
86
161k
    auto result = QPDFParser(
87
161k
                      input,
88
161k
                      make_description(input.getName(), object_description),
89
161k
                      object_description,
90
161k
                      tokenizer,
91
161k
                      decrypter,
92
161k
                      &context,
93
161k
                      true,
94
161k
                      0,
95
161k
                      0,
96
161k
                      sanity_checks)
97
161k
                      .parse(empty, false);
98
161k
    return {result, empty};
99
161k
}
100
101
std::pair<QPDFObjectHandle, bool>
102
QPDFParser::parse(
103
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
104
28.3k
{
105
28.3k
    bool empty{false};
106
28.3k
    auto result = QPDFParser(
107
28.3k
                      input,
108
28.3k
                      std::make_shared<QPDFObject::Description>(
109
28.3k
                          QPDFObject::ObjStreamDescr(stream_id, obj_id)),
110
28.3k
                      "",
111
28.3k
                      tokenizer,
112
28.3k
                      nullptr,
113
28.3k
                      &context,
114
28.3k
                      true,
115
28.3k
                      stream_id,
116
28.3k
                      obj_id)
117
28.3k
                      .parse(empty, false);
118
28.3k
    return {result, empty};
119
28.3k
}
120
121
QPDFObjectHandle
122
QPDFParser::parse(bool& empty, bool content_stream)
123
1.40M
{
124
    // This method must take care not to resolve any objects. Don't check the type of any object
125
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
126
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
127
    // logic error to be thrown from QPDF::inParse().
128
129
1.40M
    QPDF::ParseGuard pg(context);
130
1.40M
    empty = false;
131
1.40M
    start = input.tell();
132
133
1.40M
    if (!tokenizer.nextToken(input, object_description)) {
134
11.1k
        warn(tokenizer.getErrorMessage());
135
11.1k
    }
136
137
1.40M
    switch (tokenizer.getType()) {
138
4.00k
    case QPDFTokenizer::tt_eof:
139
4.00k
        if (content_stream) {
140
            // In content stream mode, leave object uninitialized to indicate EOF
141
3.70k
            return {};
142
3.70k
        }
143
302
        QTC::TC("qpdf", "QPDFParser eof in parse");
144
302
        warn("unexpected EOF");
145
302
        return {QPDFObject::create<QPDF_Null>()};
146
147
10.9k
    case QPDFTokenizer::tt_bad:
148
10.9k
        QTC::TC("qpdf", "QPDFParser bad token in parse");
149
10.9k
        return {QPDFObject::create<QPDF_Null>()};
150
151
993
    case QPDFTokenizer::tt_brace_open:
152
1.95k
    case QPDFTokenizer::tt_brace_close:
153
1.95k
        QTC::TC("qpdf", "QPDFParser bad brace");
154
1.95k
        warn("treating unexpected brace token as null");
155
1.95k
        return {QPDFObject::create<QPDF_Null>()};
156
157
2.77k
    case QPDFTokenizer::tt_array_close:
158
2.77k
        QTC::TC("qpdf", "QPDFParser bad array close");
159
2.77k
        warn("treating unexpected array close token as null");
160
2.77k
        return {QPDFObject::create<QPDF_Null>()};
161
162
1.19k
    case QPDFTokenizer::tt_dict_close:
163
1.19k
        QTC::TC("qpdf", "QPDFParser bad dictionary close");
164
1.19k
        warn("unexpected dictionary close token");
165
1.19k
        return {QPDFObject::create<QPDF_Null>()};
166
167
15.0k
    case QPDFTokenizer::tt_array_open:
168
179k
    case QPDFTokenizer::tt_dict_open:
169
179k
        stack.clear();
170
179k
        stack.emplace_back(
171
179k
            input,
172
179k
            (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
173
179k
        frame = &stack.back();
174
179k
        return parseRemainder(content_stream);
175
176
2.14k
    case QPDFTokenizer::tt_bool:
177
2.14k
        return withDescription<QPDF_Bool>(tokenizer.getValue() == "true");
178
179
254
    case QPDFTokenizer::tt_null:
180
254
        return {QPDFObject::create<QPDF_Null>()};
181
182
135k
    case QPDFTokenizer::tt_integer:
183
135k
        return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
184
185
64.6k
    case QPDFTokenizer::tt_real:
186
64.6k
        return withDescription<QPDF_Real>(tokenizer.getValue());
187
188
63.4k
    case QPDFTokenizer::tt_name:
189
63.4k
        return withDescription<QPDF_Name>(tokenizer.getValue());
190
191
933k
    case QPDFTokenizer::tt_word:
192
933k
        {
193
933k
            auto const& value = tokenizer.getValue();
194
933k
            if (content_stream) {
195
926k
                return withDescription<QPDF_Operator>(value);
196
926k
            } else if (value == "endobj") {
197
                // We just saw endobj without having read anything.  Treat this as a null and do
198
                // not move the input source's offset.
199
305
                input.seek(input.getLastOffset(), SEEK_SET);
200
305
                empty = true;
201
305
                return {QPDFObject::create<QPDF_Null>()};
202
6.98k
            } else {
203
6.98k
                QTC::TC("qpdf", "QPDFParser treat word as string");
204
6.98k
                warn("unknown token while reading object; treating as string");
205
6.98k
                return withDescription<QPDF_String>(value);
206
6.98k
            }
207
933k
        }
208
209
5.17k
    case QPDFTokenizer::tt_string:
210
5.17k
        if (decrypter) {
211
440
            std::string s{tokenizer.getValue()};
212
440
            decrypter->decryptString(s);
213
440
            return withDescription<QPDF_String>(s);
214
4.73k
        } else {
215
4.73k
            return withDescription<QPDF_String>(tokenizer.getValue());
216
4.73k
        }
217
218
0
    default:
219
0
        warn("treating unknown token type as null while reading object");
220
0
        return {QPDFObject::create<QPDF_Null>()};
221
1.40M
    }
222
1.40M
}
223
224
QPDFObjectHandle
225
QPDFParser::parseRemainder(bool content_stream)
226
179k
{
227
    // This method must take care not to resolve any objects. Don't check the type of any object
228
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
229
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
230
    // logic error to be thrown from QPDF::inParse().
231
232
179k
    bad_count = 0;
233
179k
    bool b_contents = false;
234
235
6.40M
    while (true) {
236
6.40M
        if (!tokenizer.nextToken(input, object_description)) {
237
42.7k
            warn(tokenizer.getErrorMessage());
238
42.7k
        }
239
6.40M
        ++good_count; // optimistically
240
241
6.40M
        if (int_count != 0) {
242
            // Special handling of indirect references. Treat integer tokens as part of an indirect
243
            // reference until proven otherwise.
244
1.81M
            if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
245
1.00M
                if (++int_count > 2) {
246
                    // Process the oldest buffered integer.
247
405k
                    addInt(int_count);
248
405k
                }
249
1.00M
                last_offset_buffer[int_count % 2] = input.getLastOffset();
250
1.00M
                int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
251
1.00M
                continue;
252
253
1.00M
            } else if (
254
806k
                int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
255
806k
                tokenizer.getValue() == "R") {
256
525k
                if (context == nullptr) {
257
0
                    QTC::TC("qpdf", "QPDFParser indirect without context");
258
0
                    throw std::logic_error(
259
0
                        "QPDFParser::parse called without context on an object "
260
0
                        "with indirect references");
261
0
                }
262
525k
                auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]);
263
525k
                auto gen = QIntC::to_int(int_buffer[(int_count) % 2]);
264
525k
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
265
521k
                    add(QPDF::ParseGuard::getObject(context, id, gen, parse_pdf));
266
521k
                } else {
267
3.37k
                    QTC::TC("qpdf", "QPDFParser invalid objgen");
268
3.37k
                    addNull();
269
3.37k
                }
270
525k
                int_count = 0;
271
525k
                continue;
272
273
525k
            } else if (int_count > 0) {
274
                // Process the buffered integers before processing the current token.
275
281k
                if (int_count > 1) {
276
73.5k
                    addInt(int_count - 1);
277
73.5k
                }
278
281k
                addInt(int_count);
279
281k
                int_count = 0;
280
281k
            }
281
1.81M
        }
282
283
4.87M
        switch (tokenizer.getType()) {
284
9.32k
        case QPDFTokenizer::tt_eof:
285
9.32k
            warn("parse error while reading object");
286
9.32k
            if (content_stream) {
287
                // In content stream mode, leave object uninitialized to indicate EOF
288
321
                return {};
289
321
            }
290
9.00k
            QTC::TC("qpdf", "QPDFParser eof in parseRemainder");
291
9.00k
            warn("unexpected EOF");
292
9.00k
            return {QPDFObject::create<QPDF_Null>()};
293
294
40.3k
        case QPDFTokenizer::tt_bad:
295
40.3k
            QTC::TC("qpdf", "QPDFParser bad token in parseRemainder");
296
40.3k
            if (tooManyBadTokens()) {
297
1.31k
                return {QPDFObject::create<QPDF_Null>()};
298
1.31k
            }
299
39.0k
            addNull();
300
39.0k
            continue;
301
302
2.29k
        case QPDFTokenizer::tt_brace_open:
303
7.70k
        case QPDFTokenizer::tt_brace_close:
304
7.70k
            QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder");
305
7.70k
            warn("treating unexpected brace token as null");
306
7.70k
            if (tooManyBadTokens()) {
307
557
                return {QPDFObject::create<QPDF_Null>()};
308
557
            }
309
7.15k
            addNull();
310
7.15k
            continue;
311
312
152k
        case QPDFTokenizer::tt_array_close:
313
152k
            if ((bad_count || sanity_checks) && !max_bad_count) {
314
                // Trigger warning.
315
233
                (void)tooManyBadTokens();
316
233
                return {QPDFObject::create<QPDF_Null>()};
317
233
            }
318
152k
            if (frame->state == st_array) {
319
147k
                auto object = frame->null_count > 100
320
147k
                    ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true)
321
147k
                    : QPDFObject::create<QPDF_Array>(std::move(frame->olist));
322
147k
                setDescription(object, frame->offset - 1);
323
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
324
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
325
                // array start delimiter. getLastOffset points to the array end token and therefore
326
                // can't be used here.
327
147k
                if (stack.size() <= 1) {
328
8.22k
                    return object;
329
8.22k
                }
330
139k
                stack.pop_back();
331
139k
                frame = &stack.back();
332
139k
                add(std::move(object));
333
139k
            } else {
334
4.24k
                QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder");
335
4.24k
                if (sanity_checks) {
336
                    // During sanity checks, assume nesting of containers is corrupt and object is
337
                    // unusable.
338
3.76k
                    warn("unexpected array close token; giving up on reading object");
339
3.76k
                    return {QPDFObject::create<QPDF_Null>()};
340
3.76k
                }
341
472
                warn("treating unexpected array close token as null");
342
472
                if (tooManyBadTokens()) {
343
36
                    return {QPDFObject::create<QPDF_Null>()};
344
36
                }
345
436
                addNull();
346
436
            }
347
140k
            continue;
348
349
259k
        case QPDFTokenizer::tt_dict_close:
350
259k
            if ((bad_count || sanity_checks) && !max_bad_count) {
351
                // Trigger warning.
352
155
                (void)tooManyBadTokens();
353
155
                return {QPDFObject::create<QPDF_Null>()};
354
155
            }
355
259k
            if (frame->state <= st_dictionary_value) {
356
                // Attempt to recover more or less gracefully from invalid dictionaries.
357
256k
                auto& dict = frame->dict;
358
359
256k
                if (frame->state == st_dictionary_value) {
360
7.73k
                    QTC::TC("qpdf", "QPDFParser no val for last key");
361
7.73k
                    warn(
362
7.73k
                        frame->offset,
363
7.73k
                        "dictionary ended prematurely; using null as value for last key");
364
7.73k
                    dict[frame->key] = QPDFObject::create<QPDF_Null>();
365
7.73k
                }
366
256k
                if (!frame->olist.empty()) {
367
48.0k
                    if (sanity_checks) {
368
45.4k
                        warn(
369
45.4k
                            frame->offset,
370
45.4k
                            "expected dictionary keys but found non-name objects; ignoring");
371
45.4k
                    } else {
372
2.59k
                        fixMissingKeys();
373
2.59k
                    }
374
48.0k
                }
375
376
256k
                if (!frame->contents_string.empty() && dict.contains("/Type") &&
377
256k
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
378
256k
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
379
6
                    dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
380
6
                    dict["/Contents"].setParsedOffset(frame->contents_offset);
381
6
                }
382
256k
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
383
256k
                setDescription(object, frame->offset - 2);
384
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
385
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
386
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
387
                // and therefore can't be used here.
388
256k
                if (stack.size() <= 1) {
389
140k
                    return object;
390
140k
                }
391
116k
                stack.pop_back();
392
116k
                frame = &stack.back();
393
116k
                add(std::move(object));
394
116k
            } else {
395
3.07k
                QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder");
396
3.07k
                if (sanity_checks) {
397
                    // During sanity checks, assume nesting of containers is corrupt and object is
398
                    // unusable.
399
2.39k
                    warn("unexpected dictionary close token; giving up on reading object");
400
2.39k
                    return {QPDFObject::create<QPDF_Null>()};
401
2.39k
                }
402
679
                warn("unexpected dictionary close token");
403
679
                if (tooManyBadTokens()) {
404
44
                    return {QPDFObject::create<QPDF_Null>()};
405
44
                }
406
635
                addNull();
407
635
            }
408
116k
            continue;
409
410
284k
        case QPDFTokenizer::tt_array_open:
411
459k
        case QPDFTokenizer::tt_dict_open:
412
459k
            if (stack.size() > 499) {
413
263
                QTC::TC("qpdf", "QPDFParser too deep");
414
263
                warn("ignoring excessively deeply nested data structure");
415
263
                return {QPDFObject::create<QPDF_Null>()};
416
459k
            } else {
417
459k
                b_contents = false;
418
459k
                stack.emplace_back(
419
459k
                    input,
420
459k
                    (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
421
459k
                                                                          : st_dictionary_key);
422
459k
                frame = &stack.back();
423
459k
                continue;
424
459k
            }
425
426
16.7k
        case QPDFTokenizer::tt_bool:
427
16.7k
            addScalar<QPDF_Bool>(tokenizer.getValue() == "true");
428
16.7k
            continue;
429
430
58.8k
        case QPDFTokenizer::tt_null:
431
58.8k
            addNull();
432
58.8k
            continue;
433
434
969k
        case QPDFTokenizer::tt_integer:
435
969k
            if (!content_stream) {
436
                // Buffer token in case it is part of an indirect reference.
437
806k
                last_offset_buffer[1] = input.getLastOffset();
438
806k
                int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
439
806k
                int_count = 1;
440
806k
            } else {
441
162k
                addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
442
162k
            }
443
969k
            continue;
444
445
207k
        case QPDFTokenizer::tt_real:
446
207k
            addScalar<QPDF_Real>(tokenizer.getValue());
447
207k
            continue;
448
449
2.17M
        case QPDFTokenizer::tt_name:
450
2.17M
            if (frame->state == st_dictionary_key) {
451
1.11M
                frame->key = tokenizer.getValue();
452
1.11M
                frame->state = st_dictionary_value;
453
1.11M
                b_contents = decrypter && frame->key == "/Contents";
454
1.11M
                continue;
455
1.11M
            } else {
456
1.06M
                addScalar<QPDF_Name>(tokenizer.getValue());
457
1.06M
            }
458
1.06M
            continue;
459
460
1.06M
        case QPDFTokenizer::tt_word:
461
364k
            if (content_stream) {
462
139k
                addScalar<QPDF_Operator>(tokenizer.getValue());
463
139k
                continue;
464
139k
            }
465
466
224k
            if (sanity_checks) {
467
216k
                if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") {
468
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
469
                    // we are parsing past the end of the object.
470
3.99k
                    warn(
471
3.99k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
472
3.99k
                        "reading object");
473
3.99k
                    return {QPDFObject::create<QPDF_Null>()};
474
3.99k
                }
475
476
212k
                warn("unknown token while reading object; treating as null");
477
212k
                if (tooManyBadTokens()) {
478
5.99k
                    return {QPDFObject::create<QPDF_Null>()};
479
5.99k
                }
480
206k
                addNull();
481
206k
                continue;
482
212k
            }
483
484
8.52k
            QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
485
8.52k
            warn("unknown token while reading object; treating as string");
486
8.52k
            if (tooManyBadTokens()) {
487
175
                return {QPDFObject::create<QPDF_Null>()};
488
175
            }
489
8.34k
            addScalar<QPDF_String>(tokenizer.getValue());
490
491
8.34k
            continue;
492
493
153k
        case QPDFTokenizer::tt_string:
494
153k
            {
495
153k
                auto const& val = tokenizer.getValue();
496
153k
                if (decrypter) {
497
13.6k
                    if (b_contents) {
498
792
                        frame->contents_string = val;
499
792
                        frame->contents_offset = input.getLastOffset();
500
792
                        b_contents = false;
501
792
                    }
502
13.6k
                    std::string s{val};
503
13.6k
                    decrypter->decryptString(s);
504
13.6k
                    addScalar<QPDF_String>(s);
505
139k
                } else {
506
139k
                    addScalar<QPDF_String>(val);
507
139k
                }
508
153k
            }
509
153k
            continue;
510
511
0
        default:
512
0
            warn("treating unknown token type as null while reading object");
513
0
            if (tooManyBadTokens()) {
514
0
                return {QPDFObject::create<QPDF_Null>()};
515
0
            }
516
0
            addNull();
517
4.87M
        }
518
4.87M
    }
519
179k
}
520
521
void
522
QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
523
3.17M
{
524
3.17M
    if (frame->state != st_dictionary_value) {
525
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
526
        // processing once the tt_dict_close token has been found.
527
2.12M
        frame->olist.emplace_back(std::move(obj));
528
2.12M
    } else {
529
1.05M
        if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
530
61.2k
            warnDuplicateKey();
531
61.2k
        }
532
1.05M
        frame->state = st_dictionary_key;
533
1.05M
    }
534
3.17M
}
535
536
void
537
QPDFParser::addNull()
538
314k
{
539
314k
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
540
541
314k
    if (frame->state != st_dictionary_value) {
542
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
543
        // processing once the tt_dict_close token has been found.
544
275k
        frame->olist.emplace_back(null_obj);
545
275k
    } else {
546
38.9k
        if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
547
5.10k
            warnDuplicateKey();
548
5.10k
        }
549
38.9k
        frame->state = st_dictionary_key;
550
38.9k
    }
551
314k
    ++frame->null_count;
552
314k
}
553
554
void
555
QPDFParser::addInt(int count)
556
760k
{
557
760k
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]);
558
760k
    obj->setDescription(context, description, last_offset_buffer[count % 2]);
559
760k
    add(std::move(obj));
560
760k
}
561
562
template <typename T, typename... Args>
563
void
564
QPDFParser::addScalar(Args&&... args)
565
1.74M
{
566
1.74M
    if ((bad_count || sanity_checks) &&
567
1.74M
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
568
        // Stop adding scalars. We are going to abort when the close token or a bad token is
569
        // encountered.
570
106k
        max_bad_count = 0;
571
106k
        return;
572
106k
    }
573
1.64M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
574
1.64M
    obj->setDescription(context, description, input.getLastOffset());
575
1.64M
    add(std::move(obj));
576
1.64M
}
void QPDFParser::addScalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
565
16.7k
{
566
16.7k
    if ((bad_count || sanity_checks) &&
567
16.7k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
568
        // Stop adding scalars. We are going to abort when the close token or a bad token is
569
        // encountered.
570
822
        max_bad_count = 0;
571
822
        return;
572
822
    }
573
15.9k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
574
15.9k
    obj->setDescription(context, description, input.getLastOffset());
575
15.9k
    add(std::move(obj));
576
15.9k
}
void QPDFParser::addScalar<QPDF_Integer, long long>(long long&&)
Line
Count
Source
565
162k
{
566
162k
    if ((bad_count || sanity_checks) &&
567
162k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
568
        // Stop adding scalars. We are going to abort when the close token or a bad token is
569
        // encountered.
570
12.2k
        max_bad_count = 0;
571
12.2k
        return;
572
12.2k
    }
573
150k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
574
150k
    obj->setDescription(context, description, input.getLastOffset());
575
150k
    add(std::move(obj));
576
150k
}
void QPDFParser::addScalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
565
207k
{
566
207k
    if ((bad_count || sanity_checks) &&
567
207k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
568
        // Stop adding scalars. We are going to abort when the close token or a bad token is
569
        // encountered.
570
5.59k
        max_bad_count = 0;
571
5.59k
        return;
572
5.59k
    }
573
202k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
574
202k
    obj->setDescription(context, description, input.getLastOffset());
575
202k
    add(std::move(obj));
576
202k
}
void QPDFParser::addScalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
565
1.06M
{
566
1.06M
    if ((bad_count || sanity_checks) &&
567
1.06M
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
568
        // Stop adding scalars. We are going to abort when the close token or a bad token is
569
        // encountered.
570
54.4k
        max_bad_count = 0;
571
54.4k
        return;
572
54.4k
    }
573
1.00M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
574
1.00M
    obj->setDescription(context, description, input.getLastOffset());
575
1.00M
    add(std::move(obj));
576
1.00M
}
void QPDFParser::addScalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
565
139k
{
566
139k
    if ((bad_count || sanity_checks) &&
567
139k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
568
        // Stop adding scalars. We are going to abort when the close token or a bad token is
569
        // encountered.
570
33.5k
        max_bad_count = 0;
571
33.5k
        return;
572
33.5k
    }
573
105k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
574
105k
    obj->setDescription(context, description, input.getLastOffset());
575
105k
    add(std::move(obj));
576
105k
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
565
148k
{
566
148k
    if ((bad_count || sanity_checks) &&
567
148k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
568
        // Stop adding scalars. We are going to abort when the close token or a bad token is
569
        // encountered.
570
220
        max_bad_count = 0;
571
220
        return;
572
220
    }
573
147k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
574
147k
    obj->setDescription(context, description, input.getLastOffset());
575
147k
    add(std::move(obj));
576
147k
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
565
13.5k
{
566
13.5k
    if ((bad_count || sanity_checks) &&
567
13.5k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
568
        // Stop adding scalars. We are going to abort when the close token or a bad token is
569
        // encountered.
570
28
        max_bad_count = 0;
571
28
        return;
572
28
    }
573
13.5k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
574
13.5k
    obj->setDescription(context, description, input.getLastOffset());
575
13.5k
    add(std::move(obj));
576
13.5k
}
577
578
template <typename T, typename... Args>
579
QPDFObjectHandle
580
QPDFParser::withDescription(Args&&... args)
581
1.20M
{
582
1.20M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
583
1.20M
    obj->setDescription(context, description, start);
584
1.20M
    return {obj};
585
1.20M
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Bool, bool>(bool&&)
Line
Count
Source
581
2.14k
{
582
2.14k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
583
2.14k
    obj->setDescription(context, description, start);
584
2.14k
    return {obj};
585
2.14k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Integer, long long>(long long&&)
Line
Count
Source
581
135k
{
582
135k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
583
135k
    obj->setDescription(context, description, start);
584
135k
    return {obj};
585
135k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
581
64.6k
{
582
64.6k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
583
64.6k
    obj->setDescription(context, description, start);
584
64.6k
    return {obj};
585
64.6k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
581
63.4k
{
582
63.4k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
583
63.4k
    obj->setDescription(context, description, start);
584
63.4k
    return {obj};
585
63.4k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
581
926k
{
582
926k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
583
926k
    obj->setDescription(context, description, start);
584
926k
    return {obj};
585
926k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
581
11.2k
{
582
11.2k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
583
11.2k
    obj->setDescription(context, description, start);
584
11.2k
    return {obj};
585
11.2k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
581
382
{
582
382
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
583
382
    obj->setDescription(context, description, start);
584
382
    return {obj};
585
382
}
586
587
void
588
QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset)
589
404k
{
590
404k
    if (obj) {
591
404k
        obj->setDescription(context, description, parsed_offset);
592
404k
    }
593
404k
}
594
595
void
596
QPDFParser::fixMissingKeys()
597
2.59k
{
598
2.59k
    std::set<std::string> names;
599
10.1k
    for (auto& obj: frame->olist) {
600
10.1k
        if (obj.getObj()->getTypeCode() == ::ot_name) {
601
43
            names.insert(obj.getObj()->getStringValue());
602
43
        }
603
10.1k
    }
604
2.59k
    int next_fake_key = 1;
605
10.1k
    for (auto const& item: frame->olist) {
606
10.1k
        while (true) {
607
10.1k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
608
10.1k
            const bool found_fake = !frame->dict.contains(key) && !names.contains(key);
609
10.1k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
610
10.1k
            if (found_fake) {
611
10.1k
                warn(
612
10.1k
                    frame->offset,
613
10.1k
                    "expected dictionary key but found non-name object; inserting key " + key);
614
10.1k
                frame->dict[key] = item;
615
10.1k
                break;
616
10.1k
            }
617
10.1k
        }
618
10.1k
    }
619
2.59k
}
620
621
bool
622
QPDFParser::tooManyBadTokens()
623
269k
{
624
269k
    if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) {
625
96
        if (bad_count) {
626
71
            warn(
627
71
                "encountered errors while parsing an array or dictionary with more than 5000 "
628
71
                "elements; giving up on reading object");
629
71
            return true;
630
71
        }
631
25
        warn(
632
25
            "encountered an array or dictionary with more than 5000 elements during xref recovery; "
633
25
            "giving up on reading object");
634
25
    }
635
269k
    if (max_bad_count && --max_bad_count > 0 && good_count > 4) {
636
103k
        good_count = 0;
637
103k
        bad_count = 1;
638
103k
        return false;
639
103k
    }
640
165k
    if (++bad_count > 5 ||
641
165k
        (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) {
642
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
643
        // exceeds the remaining number of allowable total errors.
644
8.21k
        warn("too many errors; giving up on reading object");
645
8.21k
        return true;
646
8.21k
    }
647
157k
    good_count = 0;
648
157k
    return false;
649
165k
}
650
651
void
652
QPDFParser::warn(QPDFExc const& e) const
653
463k
{
654
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
655
    // object. If parsing for some other reason, such as an explicit creation of an object from a
656
    // string, then just throw the exception.
657
463k
    if (context) {
658
463k
        context->warn(e);
659
463k
    } else {
660
0
        throw e;
661
0
    }
662
463k
}
663
664
void
665
QPDFParser::warnDuplicateKey()
666
66.3k
{
667
66.3k
    QTC::TC("qpdf", "QPDFParser duplicate dict key");
668
66.3k
    warn(
669
66.3k
        frame->offset,
670
66.3k
        "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
671
66.3k
}
672
673
void
674
QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
675
463k
{
676
463k
    if (stream_id) {
677
16.6k
        std::string descr = "object "s + std::to_string(obj_id) + " 0";
678
16.6k
        std::string name = context->getFilename() + " object stream " + std::to_string(stream_id);
679
16.6k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
680
446k
    } else {
681
446k
        warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg));
682
446k
    }
683
463k
}
684
685
void
686
QPDFParser::warn(std::string const& msg) const
687
333k
{
688
333k
    warn(input.getLastOffset(), msg);
689
333k
}