Coverage Report

Created: 2025-07-11 07:03

/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/BufferInputSource.hh>
4
#include <qpdf/QPDF.hh>
5
#include <qpdf/QPDFObjGen.hh>
6
#include <qpdf/QPDFObjectHandle.hh>
7
#include <qpdf/QPDFObject_private.hh>
8
#include <qpdf/QPDFTokenizer_private.hh>
9
#include <qpdf/QTC.hh>
10
#include <qpdf/QUtil.hh>
11
12
#include <memory>
13
14
using namespace std::literals;
15
using namespace qpdf;
16
17
using ObjectPtr = std::shared_ptr<QPDFObject>;
18
19
QPDFObjectHandle
20
QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context)
21
226k
{
22
226k
    qpdf::Tokenizer tokenizer;
23
226k
    bool empty = false;
24
226k
    return QPDFParser(
25
226k
               input,
26
226k
               make_description(input.getName(), object_description),
27
226k
               object_description,
28
226k
               tokenizer,
29
226k
               nullptr,
30
226k
               context,
31
226k
               false)
32
226k
        .parse(empty, false);
33
226k
}
34
35
QPDFObjectHandle
36
QPDFParser::parse_content(
37
    InputSource& input,
38
    std::shared_ptr<QPDFObject::Description> sp_description,
39
    qpdf::Tokenizer& tokenizer,
40
    QPDF* context)
41
5.58M
{
42
5.58M
    bool empty = false;
43
5.58M
    return QPDFParser(
44
5.58M
               input, std::move(sp_description), "content", tokenizer, nullptr, context, true)
45
5.58M
        .parse(empty, true);
46
5.58M
}
47
48
QPDFObjectHandle
49
QPDFParser::parse(
50
    InputSource& input,
51
    std::string const& object_description,
52
    QPDFTokenizer& tokenizer,
53
    bool& empty,
54
    QPDFObjectHandle::StringDecrypter* decrypter,
55
    QPDF* context)
56
0
{
57
0
    return QPDFParser(
58
0
               input,
59
0
               make_description(input.getName(), object_description),
60
0
               object_description,
61
0
               *tokenizer.m,
62
0
               decrypter,
63
0
               context,
64
0
               false)
65
0
        .parse(empty, false);
66
0
}
67
68
std::pair<QPDFObjectHandle, bool>
69
QPDFParser::parse(
70
    InputSource& input,
71
    std::string const& object_description,
72
    qpdf::Tokenizer& tokenizer,
73
    QPDFObjectHandle::StringDecrypter* decrypter,
74
    QPDF& context,
75
    bool sanity_checks)
76
1.77M
{
77
1.77M
    bool empty{false};
78
1.77M
    auto result = QPDFParser(
79
1.77M
                      input,
80
1.77M
                      make_description(input.getName(), object_description),
81
1.77M
                      object_description,
82
1.77M
                      tokenizer,
83
1.77M
                      decrypter,
84
1.77M
                      &context,
85
1.77M
                      true,
86
1.77M
                      0,
87
1.77M
                      0,
88
1.77M
                      sanity_checks)
89
1.77M
                      .parse(empty, false);
90
1.77M
    return {result, empty};
91
1.77M
}
92
93
std::pair<QPDFObjectHandle, bool>
94
QPDFParser::parse(
95
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
96
479k
{
97
479k
    bool empty{false};
98
479k
    auto result = QPDFParser(
99
479k
                      input,
100
479k
                      std::make_shared<QPDFObject::Description>(
101
479k
                          QPDFObject::ObjStreamDescr(stream_id, obj_id)),
102
479k
                      "",
103
479k
                      tokenizer,
104
479k
                      nullptr,
105
479k
                      &context,
106
479k
                      true,
107
479k
                      stream_id,
108
479k
                      obj_id)
109
479k
                      .parse(empty, false);
110
479k
    return {result, empty};
111
479k
}
112
113
QPDFObjectHandle
114
QPDFParser::parse(bool& empty, bool content_stream)
115
8.05M
{
116
    // This method must take care not to resolve any objects. Don't check the type of any object
117
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
118
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
119
    // logic error to be thrown from QPDF::inParse().
120
121
8.05M
    QPDF::ParseGuard pg(context);
122
8.05M
    empty = false;
123
8.05M
    start = input.tell();
124
125
8.05M
    if (!tokenizer.nextToken(input, object_description)) {
126
52.7k
        warn(tokenizer.getErrorMessage());
127
52.7k
    }
128
129
8.05M
    switch (tokenizer.getType()) {
130
18.1k
    case QPDFTokenizer::tt_eof:
131
18.1k
        if (content_stream) {
132
            // In content stream mode, leave object uninitialized to indicate EOF
133
7.29k
            return {};
134
7.29k
        }
135
10.8k
        QTC::TC("qpdf", "QPDFParser eof in parse");
136
10.8k
        warn("unexpected EOF");
137
10.8k
        return {QPDFObject::create<QPDF_Null>()};
138
139
51.7k
    case QPDFTokenizer::tt_bad:
140
51.7k
        QTC::TC("qpdf", "QPDFParser bad token in parse");
141
51.7k
        return {QPDFObject::create<QPDF_Null>()};
142
143
4.02k
    case QPDFTokenizer::tt_brace_open:
144
11.5k
    case QPDFTokenizer::tt_brace_close:
145
11.5k
        QTC::TC("qpdf", "QPDFParser bad brace");
146
11.5k
        warn("treating unexpected brace token as null");
147
11.5k
        return {QPDFObject::create<QPDF_Null>()};
148
149
13.8k
    case QPDFTokenizer::tt_array_close:
150
13.8k
        QTC::TC("qpdf", "QPDFParser bad array close");
151
13.8k
        warn("treating unexpected array close token as null");
152
13.8k
        return {QPDFObject::create<QPDF_Null>()};
153
154
7.58k
    case QPDFTokenizer::tt_dict_close:
155
7.58k
        QTC::TC("qpdf", "QPDFParser bad dictionary close");
156
7.58k
        warn("unexpected dictionary close token");
157
7.58k
        return {QPDFObject::create<QPDF_Null>()};
158
159
134k
    case QPDFTokenizer::tt_array_open:
160
1.94M
    case QPDFTokenizer::tt_dict_open:
161
1.94M
        stack.clear();
162
1.94M
        stack.emplace_back(
163
1.94M
            input,
164
1.94M
            (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
165
1.94M
        frame = &stack.back();
166
1.94M
        return parseRemainder(content_stream);
167
168
13.2k
    case QPDFTokenizer::tt_bool:
169
13.2k
        return withDescription<QPDF_Bool>(tokenizer.getValue() == "true");
170
171
8.21k
    case QPDFTokenizer::tt_null:
172
8.21k
        return {QPDFObject::create<QPDF_Null>()};
173
174
1.03M
    case QPDFTokenizer::tt_integer:
175
1.03M
        return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
176
177
889k
    case QPDFTokenizer::tt_real:
178
889k
        return withDescription<QPDF_Real>(tokenizer.getValue());
179
180
1.07M
    case QPDFTokenizer::tt_name:
181
1.07M
        return withDescription<QPDF_Name>(tokenizer.getValue());
182
183
2.92M
    case QPDFTokenizer::tt_word:
184
2.92M
        {
185
2.92M
            auto const& value = tokenizer.getValue();
186
2.92M
            if (content_stream) {
187
2.81M
                return withDescription<QPDF_Operator>(value);
188
2.81M
            } else if (value == "endobj") {
189
                // We just saw endobj without having read anything.  Treat this as a null and do
190
                // not move the input source's offset.
191
4.36k
                input.seek(input.getLastOffset(), SEEK_SET);
192
4.36k
                empty = true;
193
4.36k
                return {QPDFObject::create<QPDF_Null>()};
194
104k
            } else {
195
104k
                QTC::TC("qpdf", "QPDFParser treat word as string");
196
104k
                warn("unknown token while reading object; treating as string");
197
104k
                return withDescription<QPDF_String>(value);
198
104k
            }
199
2.92M
        }
200
201
52.4k
    case QPDFTokenizer::tt_string:
202
52.4k
        if (decrypter) {
203
1.22k
            std::string s{tokenizer.getValue()};
204
1.22k
            decrypter->decryptString(s);
205
1.22k
            return withDescription<QPDF_String>(s);
206
51.2k
        } else {
207
51.2k
            return withDescription<QPDF_String>(tokenizer.getValue());
208
51.2k
        }
209
210
0
    default:
211
0
        warn("treating unknown token type as null while reading object");
212
0
        return {QPDFObject::create<QPDF_Null>()};
213
8.05M
    }
214
8.05M
}
215
216
QPDFObjectHandle
217
QPDFParser::parseRemainder(bool content_stream)
218
1.94M
{
219
    // This method must take care not to resolve any objects. Don't check the type of any object
220
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
221
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
222
    // logic error to be thrown from QPDF::inParse().
223
224
1.94M
    bad_count = 0;
225
1.94M
    bool b_contents = false;
226
227
91.5M
    while (true) {
228
91.5M
        if (!tokenizer.nextToken(input, object_description)) {
229
424k
            warn(tokenizer.getErrorMessage());
230
424k
        }
231
91.5M
        ++good_count; // optimistically
232
233
91.5M
        if (int_count != 0) {
234
            // Special handling of indirect references. Treat integer tokens as part of an indirect
235
            // reference until proven otherwise.
236
19.4M
            if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
237
10.8M
                if (++int_count > 2) {
238
                    // Process the oldest buffered integer.
239
4.68M
                    addInt(int_count);
240
4.68M
                }
241
10.8M
                last_offset_buffer[int_count % 2] = input.getLastOffset();
242
10.8M
                int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
243
10.8M
                continue;
244
245
10.8M
            } else if (
246
8.53M
                int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
247
8.53M
                tokenizer.getValue() == "R") {
248
5.34M
                if (context == nullptr) {
249
0
                    QTC::TC("qpdf", "QPDFParser indirect without context");
250
0
                    throw std::logic_error(
251
0
                        "QPDFParser::parse called without context on an object "
252
0
                        "with indirect references");
253
0
                }
254
5.34M
                auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]);
255
5.34M
                auto gen = QIntC::to_int(int_buffer[(int_count) % 2]);
256
5.34M
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
257
5.29M
                    add(QPDF::ParseGuard::getObject(context, id, gen, parse_pdf));
258
5.29M
                } else {
259
53.8k
                    QTC::TC("qpdf", "QPDFParser invalid objgen");
260
53.8k
                    addNull();
261
53.8k
                }
262
5.34M
                int_count = 0;
263
5.34M
                continue;
264
265
5.34M
            } else if (int_count > 0) {
266
                // Process the buffered integers before processing the current token.
267
3.18M
                if (int_count > 1) {
268
839k
                    addInt(int_count - 1);
269
839k
                }
270
3.18M
                addInt(int_count);
271
3.18M
                int_count = 0;
272
3.18M
            }
273
19.4M
        }
274
275
75.3M
        switch (tokenizer.getType()) {
276
92.0k
        case QPDFTokenizer::tt_eof:
277
92.0k
            warn("parse error while reading object");
278
92.0k
            if (content_stream) {
279
                // In content stream mode, leave object uninitialized to indicate EOF
280
724
                return {};
281
724
            }
282
91.3k
            QTC::TC("qpdf", "QPDFParser eof in parseRemainder");
283
91.3k
            warn("unexpected EOF");
284
91.3k
            return {QPDFObject::create<QPDF_Null>()};
285
286
391k
        case QPDFTokenizer::tt_bad:
287
391k
            QTC::TC("qpdf", "QPDFParser bad token in parseRemainder");
288
391k
            if (tooManyBadTokens()) {
289
15.5k
                return {QPDFObject::create<QPDF_Null>()};
290
15.5k
            }
291
375k
            addNull();
292
375k
            continue;
293
294
28.0k
        case QPDFTokenizer::tt_brace_open:
295
50.0k
        case QPDFTokenizer::tt_brace_close:
296
50.0k
            QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder");
297
50.0k
            warn("treating unexpected brace token as null");
298
50.0k
            if (tooManyBadTokens()) {
299
3.79k
                return {QPDFObject::create<QPDF_Null>()};
300
3.79k
            }
301
46.2k
            addNull();
302
46.2k
            continue;
303
304
1.43M
        case QPDFTokenizer::tt_array_close:
305
1.43M
            if ((bad_count || sanity_checks) && !max_bad_count) {
306
                // Trigger warning.
307
1.32k
                (void)tooManyBadTokens();
308
1.32k
                return {QPDFObject::create<QPDF_Null>()};
309
1.32k
            }
310
1.42M
            if (frame->state == st_array) {
311
1.39M
                auto object = frame->null_count > 100
312
1.39M
                    ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true)
313
1.39M
                    : QPDFObject::create<QPDF_Array>(std::move(frame->olist));
314
1.39M
                setDescription(object, frame->offset - 1);
315
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
316
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
317
                // array start delimiter. getLastOffset points to the array end token and therefore
318
                // can't be used here.
319
1.39M
                if (stack.size() <= 1) {
320
60.3k
                    return object;
321
60.3k
                }
322
1.33M
                stack.pop_back();
323
1.33M
                frame = &stack.back();
324
1.33M
                add(std::move(object));
325
1.33M
            } else {
326
31.0k
                QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder");
327
31.0k
                if (sanity_checks) {
328
                    // During sanity checks, assume nesting of containers is corrupt and object is
329
                    // unusable.
330
23.5k
                    warn("unexpected array close token; giving up on reading object");
331
23.5k
                    return {QPDFObject::create<QPDF_Null>()};
332
23.5k
                }
333
7.49k
                warn("treating unexpected array close token as null");
334
7.49k
                if (tooManyBadTokens()) {
335
691
                    return {QPDFObject::create<QPDF_Null>()};
336
691
                }
337
6.80k
                addNull();
338
6.80k
            }
339
1.34M
            continue;
340
341
2.57M
        case QPDFTokenizer::tt_dict_close:
342
2.57M
            if ((bad_count || sanity_checks) && !max_bad_count) {
343
                // Trigger warning.
344
1.16k
                (void)tooManyBadTokens();
345
1.16k
                return {QPDFObject::create<QPDF_Null>()};
346
1.16k
            }
347
2.57M
            if (frame->state <= st_dictionary_value) {
348
                // Attempt to recover more or less gracefully from invalid dictionaries.
349
2.55M
                auto& dict = frame->dict;
350
351
2.55M
                if (frame->state == st_dictionary_value) {
352
82.9k
                    QTC::TC("qpdf", "QPDFParser no val for last key");
353
82.9k
                    warn(
354
82.9k
                        frame->offset,
355
82.9k
                        "dictionary ended prematurely; using null as value for last key");
356
82.9k
                    dict[frame->key] = QPDFObject::create<QPDF_Null>();
357
82.9k
                }
358
2.55M
                if (!frame->olist.empty()) {
359
485k
                    if (sanity_checks) {
360
460k
                        warn(
361
460k
                            frame->offset,
362
460k
                            "expected dictionary keys but found non-name objects; ignoring");
363
460k
                    } else {
364
25.4k
                        fixMissingKeys();
365
25.4k
                    }
366
485k
                }
367
368
2.55M
                if (!frame->contents_string.empty() && dict.contains("/Type") &&
369
2.55M
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
370
2.55M
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
371
81
                    dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
372
81
                    dict["/Contents"].setParsedOffset(frame->contents_offset);
373
81
                }
374
2.55M
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
375
2.55M
                setDescription(object, frame->offset - 2);
376
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
377
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
378
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
379
                // and therefore can't be used here.
380
2.55M
                if (stack.size() <= 1) {
381
1.60M
                    return object;
382
1.60M
                }
383
942k
                stack.pop_back();
384
942k
                frame = &stack.back();
385
942k
                add(std::move(object));
386
942k
            } else {
387
23.5k
                QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder");
388
23.5k
                if (sanity_checks) {
389
                    // During sanity checks, assume nesting of containers is corrupt and object is
390
                    // unusable.
391
16.2k
                    warn("unexpected dictionary close token; giving up on reading object");
392
16.2k
                    return {QPDFObject::create<QPDF_Null>()};
393
16.2k
                }
394
7.35k
                warn("unexpected dictionary close token");
395
7.35k
                if (tooManyBadTokens()) {
396
670
                    return {QPDFObject::create<QPDF_Null>()};
397
670
                }
398
6.68k
                addNull();
399
6.68k
            }
400
949k
            continue;
401
402
2.27M
        case QPDFTokenizer::tt_array_open:
403
3.78M
        case QPDFTokenizer::tt_dict_open:
404
3.78M
            if (stack.size() > 499) {
405
1.37k
                QTC::TC("qpdf", "QPDFParser too deep");
406
1.37k
                warn("ignoring excessively deeply nested data structure");
407
1.37k
                return {QPDFObject::create<QPDF_Null>()};
408
3.77M
            } else {
409
3.77M
                b_contents = false;
410
3.77M
                stack.emplace_back(
411
3.77M
                    input,
412
3.77M
                    (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
413
3.77M
                                                                          : st_dictionary_key);
414
3.77M
                frame = &stack.back();
415
3.77M
                continue;
416
3.77M
            }
417
418
124k
        case QPDFTokenizer::tt_bool:
419
124k
            addScalar<QPDF_Bool>(tokenizer.getValue() == "true");
420
124k
            continue;
421
422
1.19M
        case QPDFTokenizer::tt_null:
423
1.19M
            addNull();
424
1.19M
            continue;
425
426
9.09M
        case QPDFTokenizer::tt_integer:
427
9.09M
            if (!content_stream) {
428
                // Buffer token in case it is part of an indirect reference.
429
8.53M
                last_offset_buffer[1] = input.getLastOffset();
430
8.53M
                int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
431
8.53M
                int_count = 1;
432
8.53M
            } else {
433
562k
                addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
434
562k
            }
435
9.09M
            continue;
436
437
1.04M
        case QPDFTokenizer::tt_real:
438
1.04M
            addScalar<QPDF_Real>(tokenizer.getValue());
439
1.04M
            continue;
440
441
51.6M
        case QPDFTokenizer::tt_name:
442
51.6M
            if (frame->state == st_dictionary_key) {
443
10.6M
                frame->key = tokenizer.getValue();
444
10.6M
                frame->state = st_dictionary_value;
445
10.6M
                b_contents = decrypter && frame->key == "/Contents";
446
10.6M
                continue;
447
40.9M
            } else {
448
40.9M
                addScalar<QPDF_Name>(tokenizer.getValue());
449
40.9M
            }
450
40.9M
            continue;
451
452
40.9M
        case QPDFTokenizer::tt_word:
453
2.82M
            if (content_stream) {
454
588k
                addScalar<QPDF_Operator>(tokenizer.getValue());
455
588k
                continue;
456
588k
            }
457
458
2.23M
            if (sanity_checks) {
459
2.16M
                if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") {
460
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
461
                    // we are parsing past the end of the object.
462
33.6k
                    warn(
463
33.6k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
464
33.6k
                        "reading object");
465
33.6k
                    return {QPDFObject::create<QPDF_Null>()};
466
33.6k
                }
467
468
2.12M
                warn("unknown token while reading object; treating as null");
469
2.12M
                if (tooManyBadTokens()) {
470
61.3k
                    return {QPDFObject::create<QPDF_Null>()};
471
61.3k
                }
472
2.06M
                addNull();
473
2.06M
                continue;
474
2.12M
            }
475
476
71.4k
            QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
477
71.4k
            warn("unknown token while reading object; treating as string");
478
71.4k
            if (tooManyBadTokens()) {
479
2.17k
                return {QPDFObject::create<QPDF_Null>()};
480
2.17k
            }
481
69.2k
            addScalar<QPDF_String>(tokenizer.getValue());
482
483
69.2k
            continue;
484
485
1.10M
        case QPDFTokenizer::tt_string:
486
1.10M
            {
487
1.10M
                auto const& val = tokenizer.getValue();
488
1.10M
                if (decrypter) {
489
188k
                    if (b_contents) {
490
6.90k
                        frame->contents_string = val;
491
6.90k
                        frame->contents_offset = input.getLastOffset();
492
6.90k
                        b_contents = false;
493
6.90k
                    }
494
188k
                    std::string s{val};
495
188k
                    decrypter->decryptString(s);
496
188k
                    addScalar<QPDF_String>(s);
497
920k
                } else {
498
920k
                    addScalar<QPDF_String>(val);
499
920k
                }
500
1.10M
            }
501
1.10M
            continue;
502
503
0
        default:
504
0
            warn("treating unknown token type as null while reading object");
505
0
            if (tooManyBadTokens()) {
506
0
                return {QPDFObject::create<QPDF_Null>()};
507
0
            }
508
0
            addNull();
509
75.3M
        }
510
75.3M
    }
511
1.94M
}
512
513
void
514
QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
515
51.6M
{
516
51.6M
    if (frame->state != st_dictionary_value) {
517
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
518
        // processing once the tt_dict_close token has been found.
519
41.5M
        frame->olist.emplace_back(std::move(obj));
520
41.5M
    } else {
521
10.0M
        if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
522
531k
            warnDuplicateKey();
523
531k
        }
524
10.0M
        frame->state = st_dictionary_key;
525
10.0M
    }
526
51.6M
}
527
528
void
529
QPDFParser::addNull()
530
3.73M
{
531
3.73M
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
532
533
3.73M
    if (frame->state != st_dictionary_value) {
534
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
535
        // processing once the tt_dict_close token has been found.
536
3.35M
        frame->olist.emplace_back(null_obj);
537
3.35M
    } else {
538
385k
        if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
539
33.4k
            warnDuplicateKey();
540
33.4k
        }
541
385k
        frame->state = st_dictionary_key;
542
385k
    }
543
3.73M
    ++frame->null_count;
544
3.73M
}
545
546
void
547
QPDFParser::addInt(int count)
548
8.71M
{
549
8.71M
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]);
550
8.71M
    obj->setDescription(context, description, last_offset_buffer[count % 2]);
551
8.71M
    add(std::move(obj));
552
8.71M
}
553
554
template <typename T, typename... Args>
555
void
556
QPDFParser::addScalar(Args&&... args)
557
44.4M
{
558
44.4M
    if ((bad_count || sanity_checks) &&
559
44.4M
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
560
        // Stop adding scalars. We are going to abort when the close token or a bad token is
561
        // encountered.
562
9.11M
        max_bad_count = 0;
563
9.11M
        return;
564
9.11M
    }
565
35.3M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
566
35.3M
    obj->setDescription(context, description, input.getLastOffset());
567
35.3M
    add(std::move(obj));
568
35.3M
}
void QPDFParser::addScalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
557
124k
{
558
124k
    if ((bad_count || sanity_checks) &&
559
124k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
560
        // Stop adding scalars. We are going to abort when the close token or a bad token is
561
        // encountered.
562
2.56k
        max_bad_count = 0;
563
2.56k
        return;
564
2.56k
    }
565
122k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
566
122k
    obj->setDescription(context, description, input.getLastOffset());
567
122k
    add(std::move(obj));
568
122k
}
void QPDFParser::addScalar<QPDF_Integer, long long>(long long&&)
Line
Count
Source
557
562k
{
558
562k
    if ((bad_count || sanity_checks) &&
559
562k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
560
        // Stop adding scalars. We are going to abort when the close token or a bad token is
561
        // encountered.
562
30.7k
        max_bad_count = 0;
563
30.7k
        return;
564
30.7k
    }
565
531k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
566
531k
    obj->setDescription(context, description, input.getLastOffset());
567
531k
    add(std::move(obj));
568
531k
}
void QPDFParser::addScalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
557
1.04M
{
558
1.04M
    if ((bad_count || sanity_checks) &&
559
1.04M
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
560
        // Stop adding scalars. We are going to abort when the close token or a bad token is
561
        // encountered.
562
5.44k
        max_bad_count = 0;
563
5.44k
        return;
564
5.44k
    }
565
1.03M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
566
1.03M
    obj->setDescription(context, description, input.getLastOffset());
567
1.03M
    add(std::move(obj));
568
1.03M
}
void QPDFParser::addScalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
557
40.9M
{
558
40.9M
    if ((bad_count || sanity_checks) &&
559
40.9M
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
560
        // Stop adding scalars. We are going to abort when the close token or a bad token is
561
        // encountered.
562
9.02M
        max_bad_count = 0;
563
9.02M
        return;
564
9.02M
    }
565
31.9M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
566
31.9M
    obj->setDescription(context, description, input.getLastOffset());
567
31.9M
    add(std::move(obj));
568
31.9M
}
void QPDFParser::addScalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
557
588k
{
558
588k
    if ((bad_count || sanity_checks) &&
559
588k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
560
        // Stop adding scalars. We are going to abort when the close token or a bad token is
561
        // encountered.
562
41.9k
        max_bad_count = 0;
563
41.9k
        return;
564
41.9k
    }
565
546k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
566
546k
    obj->setDescription(context, description, input.getLastOffset());
567
546k
    add(std::move(obj));
568
546k
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
557
989k
{
558
989k
    if ((bad_count || sanity_checks) &&
559
989k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
560
        // Stop adding scalars. We are going to abort when the close token or a bad token is
561
        // encountered.
562
4.44k
        max_bad_count = 0;
563
4.44k
        return;
564
4.44k
    }
565
985k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
566
985k
    obj->setDescription(context, description, input.getLastOffset());
567
985k
    add(std::move(obj));
568
985k
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
557
188k
{
558
188k
    if ((bad_count || sanity_checks) &&
559
188k
        (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
560
        // Stop adding scalars. We are going to abort when the close token or a bad token is
561
        // encountered.
562
2.31k
        max_bad_count = 0;
563
2.31k
        return;
564
2.31k
    }
565
185k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
566
185k
    obj->setDescription(context, description, input.getLastOffset());
567
185k
    add(std::move(obj));
568
185k
}
569
570
template <typename T, typename... Args>
571
QPDFObjectHandle
572
QPDFParser::withDescription(Args&&... args)
573
5.98M
{
574
5.98M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
575
5.98M
    obj->setDescription(context, description, start);
576
5.98M
    return {obj};
577
5.98M
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Bool, bool>(bool&&)
Line
Count
Source
573
13.2k
{
574
13.2k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
575
13.2k
    obj->setDescription(context, description, start);
576
13.2k
    return {obj};
577
13.2k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Integer, long long>(long long&&)
Line
Count
Source
573
1.03M
{
574
1.03M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
575
1.03M
    obj->setDescription(context, description, start);
576
1.03M
    return {obj};
577
1.03M
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
573
889k
{
574
889k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
575
889k
    obj->setDescription(context, description, start);
576
889k
    return {obj};
577
889k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
573
1.07M
{
574
1.07M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
575
1.07M
    obj->setDescription(context, description, start);
576
1.07M
    return {obj};
577
1.07M
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
573
2.81M
{
574
2.81M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
575
2.81M
    obj->setDescription(context, description, start);
576
2.81M
    return {obj};
577
2.81M
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
573
150k
{
574
150k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
575
150k
    obj->setDescription(context, description, start);
576
150k
    return {obj};
577
150k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
573
1.15k
{
574
1.15k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
575
1.15k
    obj->setDescription(context, description, start);
576
1.15k
    return {obj};
577
1.15k
}
578
579
void
580
QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset)
581
3.94M
{
582
3.94M
    if (obj) {
583
3.94M
        obj->setDescription(context, description, parsed_offset);
584
3.94M
    }
585
3.94M
}
586
587
void
588
QPDFParser::fixMissingKeys()
589
25.4k
{
590
25.4k
    std::set<std::string> names;
591
93.7k
    for (auto& obj: frame->olist) {
592
93.7k
        if (obj.getObj()->getTypeCode() == ::ot_name) {
593
2.49k
            names.insert(obj.getObj()->getStringValue());
594
2.49k
        }
595
93.7k
    }
596
25.4k
    int next_fake_key = 1;
597
85.0k
    for (auto const& item: frame->olist) {
598
85.2k
        while (true) {
599
85.2k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
600
85.2k
            const bool found_fake = !frame->dict.contains(key) && !names.contains(key);
601
85.2k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
602
85.2k
            if (found_fake) {
603
85.0k
                warn(
604
85.0k
                    frame->offset,
605
85.0k
                    "expected dictionary key but found non-name object; inserting key " + key);
606
85.0k
                frame->dict[key] = item;
607
85.0k
                break;
608
85.0k
            }
609
85.2k
        }
610
85.0k
    }
611
25.4k
}
612
613
bool
614
QPDFParser::tooManyBadTokens()
615
2.64M
{
616
2.64M
    if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) {
617
3.40k
        if (bad_count) {
618
2.72k
            warn(
619
2.72k
                "encountered errors while parsing an array or dictionary with more than 5000 "
620
2.72k
                "elements; giving up on reading object");
621
2.72k
            return true;
622
2.72k
        }
623
676
        warn(
624
676
            "encountered an array or dictionary with more than 5000 elements during xref recovery; "
625
676
            "giving up on reading object");
626
676
    }
627
2.64M
    if (--max_bad_count > 0 && good_count > 4) {
628
960k
        good_count = 0;
629
960k
        bad_count = 1;
630
960k
        return false;
631
960k
    }
632
1.68M
    if (++bad_count > 5 ||
633
1.68M
        (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) {
634
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
635
        // exceeds the remaining number of allowable total errors.
636
82.9k
        warn("too many errors; giving up on reading object");
637
82.9k
        return true;
638
82.9k
    }
639
1.60M
    good_count = 0;
640
1.60M
    return false;
641
1.68M
}
642
643
void
644
QPDFParser::warn(QPDFExc const& e) const
645
4.42M
{
646
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
647
    // object. If parsing for some other reason, such as an explicit creation of an object from a
648
    // string, then just throw the exception.
649
4.42M
    if (context) {
650
4.42M
        context->warn(e);
651
4.42M
    } else {
652
443
        throw e;
653
443
    }
654
4.42M
}
655
656
void
657
QPDFParser::warnDuplicateKey()
658
565k
{
659
565k
    QTC::TC("qpdf", "QPDFParser duplicate dict key");
660
565k
    warn(
661
565k
        frame->offset,
662
565k
        "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
663
565k
}
664
665
void
666
QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
667
4.42M
{
668
4.42M
    if (stream_id) {
669
236k
        std::string descr = "object "s + std::to_string(obj_id) + " 0";
670
236k
        std::string name = context->getFilename() + " object stream " + std::to_string(stream_id);
671
236k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
672
4.19M
    } else {
673
4.19M
        warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg));
674
4.19M
    }
675
4.42M
}
676
677
void
678
QPDFParser::warn(std::string const& msg) const
679
3.23M
{
680
3.23M
    warn(input.getLastOffset(), msg);
681
3.23M
}