Coverage Report

Created: 2025-11-11 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/QPDFParser.cc
Line
Count
Source
1
#include <qpdf/QPDFParser.hh>
2
3
#include <qpdf/QPDF.hh>
4
#include <qpdf/QPDFObjGen.hh>
5
#include <qpdf/QPDFObjectHandle.hh>
6
#include <qpdf/QPDFObject_private.hh>
7
#include <qpdf/QPDFTokenizer_private.hh>
8
#include <qpdf/QTC.hh>
9
#include <qpdf/QUtil.hh>
10
11
#include <memory>
12
13
using namespace std::literals;
14
using namespace qpdf;
15
16
using ObjectPtr = std::shared_ptr<QPDFObject>;
17
18
static uint32_t const& max_nesting{global::Limits::objects_max_nesting()};
19
20
// The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
21
// special access to allow the parser to create unresolved objects and dangling references.
22
class QPDF::Doc::ParseGuard
23
{
24
  public:
25
    ParseGuard(QPDF* qpdf) :
26
4.45M
        objects(qpdf ? &qpdf->m->objects : nullptr)
27
4.45M
    {
28
4.45M
        if (objects) {
29
4.40M
            objects->inParse(true);
30
4.40M
        }
31
4.45M
    }
32
33
    static std::shared_ptr<QPDFObject>
34
    getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
35
4.11M
    {
36
4.11M
        return qpdf->m->objects.getObjectForParser(id, gen, parse_pdf);
37
4.11M
    }
38
39
    ~ParseGuard()
40
4.45M
    {
41
4.45M
        if (objects) {
42
4.40M
            objects->inParse(false);
43
4.40M
        }
44
4.45M
    }
45
    QPDF::Doc::Objects* objects;
46
};
47
48
using ParseGuard = QPDF::Doc::ParseGuard;
49
50
QPDFObjectHandle
51
QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context)
52
51.8k
{
53
51.8k
    qpdf::Tokenizer tokenizer;
54
51.8k
    if (auto result = QPDFParser(
55
51.8k
                          input,
56
51.8k
                          make_description(input.getName(), object_description),
57
51.8k
                          object_description,
58
51.8k
                          tokenizer,
59
51.8k
                          nullptr,
60
51.8k
                          context,
61
51.8k
                          false)
62
51.8k
                          .parse()) {
63
51.2k
        return result;
64
51.2k
    }
65
621
    return {QPDFObject::create<QPDF_Null>()};
66
51.8k
}
67
68
QPDFObjectHandle
69
QPDFParser::parse_content(
70
    InputSource& input,
71
    std::shared_ptr<QPDFObject::Description> sp_description,
72
    qpdf::Tokenizer& tokenizer,
73
    QPDF* context)
74
2.48M
{
75
2.48M
    static const std::string content("content"); // GCC12 - make constexpr
76
2.48M
    auto p = QPDFParser(
77
2.48M
        input,
78
2.48M
        std::move(sp_description),
79
2.48M
        content,
80
2.48M
        tokenizer,
81
2.48M
        nullptr,
82
2.48M
        context,
83
2.48M
        true,
84
2.48M
        0,
85
2.48M
        0,
86
2.48M
        context && context->doc().reconstructed_xref());
87
2.48M
    auto result = p.parse(true);
88
2.48M
    if (result || p.empty_) {
89
        // In content stream mode, leave object uninitialized to indicate EOF
90
2.44M
        return result;
91
2.44M
    }
92
41.6k
    return {QPDFObject::create<QPDF_Null>()};
93
2.48M
}
94
95
QPDFObjectHandle
96
QPDFParser::parse(
97
    InputSource& input,
98
    std::string const& object_description,
99
    QPDFTokenizer& tokenizer,
100
    bool& empty,
101
    QPDFObjectHandle::StringDecrypter* decrypter,
102
    QPDF* context)
103
0
{
104
    // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
105
    // only user of the 'empty' member. When removing this overload also remove 'empty'.
106
0
    auto p = QPDFParser(
107
0
        input,
108
0
        make_description(input.getName(), object_description),
109
0
        object_description,
110
0
        *tokenizer.m,
111
0
        decrypter,
112
0
        context,
113
0
        false);
114
0
    auto result = p.parse();
115
0
    empty = p.empty_;
116
0
    if (result) {
117
0
        return result;
118
0
    }
119
0
    return {QPDFObject::create<QPDF_Null>()};
120
0
}
121
122
QPDFObjectHandle
123
QPDFParser::parse(
124
    InputSource& input,
125
    std::string const& object_description,
126
    qpdf::Tokenizer& tokenizer,
127
    QPDFObjectHandle::StringDecrypter* decrypter,
128
    QPDF& context,
129
    bool sanity_checks)
130
1.53M
{
131
1.53M
    return QPDFParser(
132
1.53M
               input,
133
1.53M
               make_description(input.getName(), object_description),
134
1.53M
               object_description,
135
1.53M
               tokenizer,
136
1.53M
               decrypter,
137
1.53M
               &context,
138
1.53M
               true,
139
1.53M
               0,
140
1.53M
               0,
141
1.53M
               sanity_checks)
142
1.53M
        .parse();
143
1.53M
}
144
145
QPDFObjectHandle
146
QPDFParser::parse(
147
    is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
148
383k
{
149
383k
    return QPDFParser(
150
383k
               input,
151
383k
               std::make_shared<QPDFObject::Description>(
152
383k
                   QPDFObject::ObjStreamDescr(stream_id, obj_id)),
153
383k
               "",
154
383k
               tokenizer,
155
383k
               nullptr,
156
383k
               &context,
157
383k
               true,
158
383k
               stream_id,
159
383k
               obj_id)
160
383k
        .parse();
161
383k
}
162
163
QPDFObjectHandle
164
QPDFParser::parse(bool content_stream)
165
4.45M
{
166
4.45M
    try {
167
4.45M
        return parse_first(content_stream);
168
4.45M
    } catch (Error&) {
169
73.6k
        return {};
170
73.6k
    } catch (QPDFExc& e) {
171
33.0k
        throw e;
172
33.0k
    } catch (std::logic_error& e) {
173
7
        throw e;
174
9.14k
    } catch (std::exception& e) {
175
9.14k
        warn("treating object as null because of error during parsing : "s + e.what());
176
9.14k
        return {};
177
9.14k
    }
178
4.45M
}
179
180
QPDFObjectHandle
181
QPDFParser::parse_first(bool content_stream)
182
4.45M
{
183
    // This method must take care not to resolve any objects. Don't check the type of any object
184
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
185
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
186
    // logic error to be thrown from QPDF::inParse().
187
188
4.45M
    QPDF::Doc::ParseGuard pg(context);
189
4.45M
    start = input.tell();
190
4.45M
    if (!tokenizer.nextToken(input, object_description)) {
191
46.1k
        warn(tokenizer.getErrorMessage());
192
46.1k
    }
193
194
4.45M
    switch (tokenizer.getType()) {
195
14.6k
    case QPDFTokenizer::tt_eof:
196
14.6k
        if (content_stream) {
197
            // In content stream mode, leave object uninitialized to indicate EOF
198
7.31k
            empty_ = true;
199
7.31k
            return {};
200
7.31k
        }
201
7.28k
        warn("unexpected EOF");
202
7.28k
        return {};
203
204
44.7k
    case QPDFTokenizer::tt_bad:
205
44.7k
        return {};
206
207
3.47k
    case QPDFTokenizer::tt_brace_open:
208
7.21k
    case QPDFTokenizer::tt_brace_close:
209
7.21k
        warn("treating unexpected brace token as null");
210
7.21k
        return {};
211
212
13.0k
    case QPDFTokenizer::tt_array_close:
213
13.0k
        warn("treating unexpected array close token as null");
214
13.0k
        return {};
215
216
5.73k
    case QPDFTokenizer::tt_dict_close:
217
5.73k
        warn("unexpected dictionary close token");
218
5.73k
        return {};
219
220
119k
    case QPDFTokenizer::tt_array_open:
221
1.67M
    case QPDFTokenizer::tt_dict_open:
222
1.67M
        stack.clear();
223
1.67M
        stack.emplace_back(
224
1.67M
            input,
225
1.67M
            (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
226
1.67M
        frame = &stack.back();
227
1.67M
        return parseRemainder(content_stream);
228
229
6.38k
    case QPDFTokenizer::tt_bool:
230
6.38k
        return withDescription<QPDF_Bool>(tokenizer.getValue() == "true");
231
232
387k
    case QPDFTokenizer::tt_null:
233
387k
        return {QPDFObject::create<QPDF_Null>()};
234
235
490k
    case QPDFTokenizer::tt_integer:
236
490k
        return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
237
238
203k
    case QPDFTokenizer::tt_real:
239
203k
        return withDescription<QPDF_Real>(tokenizer.getValue());
240
241
206k
    case QPDFTokenizer::tt_name:
242
206k
        return withDescription<QPDF_Name>(tokenizer.getValue());
243
244
1.37M
    case QPDFTokenizer::tt_word:
245
1.37M
        {
246
1.37M
            auto const& value = tokenizer.getValue();
247
1.37M
            if (content_stream) {
248
1.28M
                return withDescription<QPDF_Operator>(value);
249
1.28M
            } else if (value == "endobj") {
250
                // We just saw endobj without having read anything. Nothing in the PDF spec appears
251
                // to allow empty objects, but they have been encountered in actual PDF files and
252
                // Adobe Reader appears to ignore them. Treat this as a null and do not move the
253
                // input source's offset.
254
2.86k
                empty_ = true;
255
2.86k
                input.seek(input.getLastOffset(), SEEK_SET);
256
2.86k
                if (!content_stream) {
257
2.86k
                    warn("empty object treated as null");
258
2.86k
                }
259
2.86k
                return {};
260
89.8k
            } else {
261
89.8k
                warn("unknown token while reading object; treating as string");
262
89.8k
                return withDescription<QPDF_String>(value);
263
89.8k
            }
264
1.37M
        }
265
266
24.3k
    case QPDFTokenizer::tt_string:
267
24.3k
        if (decrypter) {
268
1.58k
            std::string s{tokenizer.getValue()};
269
1.58k
            decrypter->decryptString(s);
270
1.58k
            return withDescription<QPDF_String>(s);
271
22.7k
        } else {
272
22.7k
            return withDescription<QPDF_String>(tokenizer.getValue());
273
22.7k
        }
274
275
0
    default:
276
0
        warn("treating unknown token type as null while reading object");
277
0
        return {};
278
4.45M
    }
279
4.45M
}
280
281
QPDFObjectHandle
282
QPDFParser::parseRemainder(bool content_stream)
283
1.67M
{
284
    // This method must take care not to resolve any objects. Don't check the type of any object
285
    // without first ensuring that it is a direct object. Otherwise, doing so may have the side
286
    // effect of reading the object and changing the file pointer. If you do this, it will cause a
287
    // logic error to be thrown from QPDF::inParse().
288
289
1.67M
    bad_count = 0;
290
1.67M
    bool b_contents = false;
291
292
80.9M
    while (true) {
293
80.8M
        if (!tokenizer.nextToken(input, object_description)) {
294
428k
            warn(tokenizer.getErrorMessage());
295
428k
        }
296
80.8M
        ++good_count; // optimistically
297
298
80.8M
        if (int_count != 0) {
299
            // Special handling of indirect references. Treat integer tokens as part of an indirect
300
            // reference until proven otherwise.
301
24.1M
            if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
302
17.1M
                if (++int_count > 2) {
303
                    // Process the oldest buffered integer.
304
12.2M
                    addInt(int_count);
305
12.2M
                }
306
17.1M
                last_offset_buffer[int_count % 2] = input.getLastOffset();
307
17.1M
                int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
308
17.1M
                continue;
309
310
17.1M
            } else if (
311
6.90M
                int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
312
4.37M
                tokenizer.getValue() == "R") {
313
4.14M
                if (!context) {
314
0
                    throw std::logic_error(
315
0
                        "QPDFParser::parse called without context on an object with indirect "
316
0
                        "references");
317
0
                }
318
4.14M
                auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]);
319
4.14M
                auto gen = QIntC::to_int(int_buffer[(int_count) % 2]);
320
4.14M
                if (!(id < 1 || gen < 0 || gen >= 65535)) {
321
4.11M
                    add(ParseGuard::getObject(context, id, gen, parse_pdf));
322
4.11M
                } else {
323
27.5k
                    add_bad_null(
324
27.5k
                        "treating bad indirect reference (" + std::to_string(id) + " " +
325
27.5k
                        std::to_string(gen) + " R) as null");
326
27.5k
                }
327
4.14M
                int_count = 0;
328
4.14M
                continue;
329
330
4.14M
            } else if (int_count > 0) {
331
                // Process the buffered integers before processing the current token.
332
2.76M
                if (int_count > 1) {
333
747k
                    addInt(int_count - 1);
334
747k
                }
335
2.76M
                addInt(int_count);
336
2.76M
                int_count = 0;
337
2.76M
            }
338
24.1M
        }
339
340
59.5M
        switch (tokenizer.getType()) {
341
93.6k
        case QPDFTokenizer::tt_eof:
342
93.6k
            warn("parse error while reading object");
343
93.6k
            if (content_stream) {
344
                // In content stream mode, leave object uninitialized to indicate EOF
345
494
                return {};
346
494
            }
347
93.1k
            warn("unexpected EOF");
348
93.1k
            return {};
349
350
360k
        case QPDFTokenizer::tt_bad:
351
360k
            check_too_many_bad_tokens();
352
360k
            addNull();
353
360k
            continue;
354
355
28.7k
        case QPDFTokenizer::tt_brace_open:
356
53.7k
        case QPDFTokenizer::tt_brace_close:
357
53.7k
            add_bad_null("treating unexpected brace token as null");
358
53.7k
            continue;
359
360
1.21M
        case QPDFTokenizer::tt_array_close:
361
1.21M
            if (frame->state == st_array) {
362
1.19M
                auto object = frame->null_count > 100
363
1.19M
                    ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true)
364
1.19M
                    : QPDFObject::create<QPDF_Array>(std::move(frame->olist));
365
1.19M
                setDescription(object, frame->offset - 1);
366
                // The `offset` points to the next of "[".  Set the rewind offset to point to the
367
                // beginning of "[". This has been explicitly tested with whitespace surrounding the
368
                // array start delimiter. getLastOffset points to the array end token and therefore
369
                // can't be used here.
370
1.19M
                if (stack.size() <= 1) {
371
35.5k
                    return object;
372
35.5k
                }
373
1.15M
                stack.pop_back();
374
1.15M
                frame = &stack.back();
375
1.15M
                add(std::move(object));
376
1.15M
            } else {
377
22.7k
                if (sanity_checks) {
378
                    // During sanity checks, assume nesting of containers is corrupt and object is
379
                    // unusable.
380
15.8k
                    warn("unexpected array close token; giving up on reading object");
381
15.8k
                    return {};
382
15.8k
                }
383
6.89k
                add_bad_null("treating unexpected array close token as null");
384
6.89k
            }
385
1.16M
            continue;
386
387
2.13M
        case QPDFTokenizer::tt_dict_close:
388
2.13M
            if (frame->state <= st_dictionary_value) {
389
                // Attempt to recover more or less gracefully from invalid dictionaries.
390
2.11M
                auto& dict = frame->dict;
391
392
2.11M
                if (frame->state == st_dictionary_value) {
393
103k
                    warn(
394
103k
                        frame->offset,
395
103k
                        "dictionary ended prematurely; using null as value for last key");
396
103k
                    dict[frame->key] = QPDFObject::create<QPDF_Null>();
397
103k
                }
398
2.11M
                if (!frame->olist.empty()) {
399
518k
                    if (sanity_checks) {
400
485k
                        warn(
401
485k
                            frame->offset,
402
485k
                            "expected dictionary keys but found non-name objects; ignoring");
403
485k
                    } else {
404
33.2k
                        fixMissingKeys();
405
33.2k
                    }
406
518k
                }
407
408
2.11M
                if (!frame->contents_string.empty() && dict.contains("/Type") &&
409
1.00k
                    dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") &&
410
315
                    dict.contains("/Contents") && dict["/Contents"].isString()) {
411
308
                    dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
412
308
                    dict["/Contents"].setParsedOffset(frame->contents_offset);
413
308
                }
414
2.11M
                auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict));
415
2.11M
                setDescription(object, frame->offset - 2);
416
                // The `offset` points to the next of "<<". Set the rewind offset to point to the
417
                // beginning of "<<". This has been explicitly tested with whitespace surrounding
418
                // the dictionary start delimiter. getLastOffset points to the dictionary end token
419
                // and therefore can't be used here.
420
2.11M
                if (stack.size() <= 1) {
421
1.38M
                    return object;
422
1.38M
                }
423
726k
                stack.pop_back();
424
726k
                frame = &stack.back();
425
726k
                add(std::move(object));
426
726k
            } else {
427
17.7k
                if (sanity_checks) {
428
                    // During sanity checks, assume nesting of containers is corrupt and object is
429
                    // unusable.
430
12.3k
                    warn("unexpected dictionary close token; giving up on reading object");
431
12.3k
                    return {};
432
12.3k
                }
433
5.39k
                add_bad_null("unexpected dictionary close token");
434
5.39k
            }
435
731k
            continue;
436
437
2.07M
        case QPDFTokenizer::tt_array_open:
438
3.31M
        case QPDFTokenizer::tt_dict_open:
439
3.31M
            if (stack.size() > max_nesting) {
440
1.38k
                warn("ignoring excessively deeply nested data structure");
441
1.38k
                return {};
442
3.30M
            } else {
443
3.30M
                b_contents = false;
444
3.30M
                stack.emplace_back(
445
3.30M
                    input,
446
3.30M
                    (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
447
3.30M
                                                                          : st_dictionary_key);
448
3.30M
                frame = &stack.back();
449
3.30M
                continue;
450
3.30M
            }
451
452
120k
        case QPDFTokenizer::tt_bool:
453
120k
            addScalar<QPDF_Bool>(tokenizer.getValue() == "true");
454
120k
            continue;
455
456
1.06M
        case QPDFTokenizer::tt_null:
457
1.06M
            addNull();
458
1.06M
            continue;
459
460
7.63M
        case QPDFTokenizer::tt_integer:
461
7.63M
            if (!content_stream) {
462
                // Buffer token in case it is part of an indirect reference.
463
6.91M
                last_offset_buffer[1] = input.getLastOffset();
464
6.91M
                int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
465
6.91M
                int_count = 1;
466
6.91M
            } else {
467
722k
                addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
468
722k
            }
469
7.63M
            continue;
470
471
877k
        case QPDFTokenizer::tt_real:
472
877k
            addScalar<QPDF_Real>(tokenizer.getValue());
473
877k
            continue;
474
475
38.3M
        case QPDFTokenizer::tt_name:
476
38.3M
            if (frame->state == st_dictionary_key) {
477
8.89M
                frame->key = tokenizer.getValue();
478
8.89M
                frame->state = st_dictionary_value;
479
8.89M
                b_contents = decrypter && frame->key == "/Contents";
480
8.89M
                continue;
481
29.4M
            } else {
482
29.4M
                addScalar<QPDF_Name>(tokenizer.getValue());
483
29.4M
            }
484
29.4M
            continue;
485
486
29.4M
        case QPDFTokenizer::tt_word:
487
2.48M
            if (content_stream) {
488
317k
                addScalar<QPDF_Operator>(tokenizer.getValue());
489
317k
                continue;
490
317k
            }
491
492
2.17M
            if (sanity_checks) {
493
2.09M
                if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") {
494
                    // During sanity checks, assume an unexpected endobj or endstream indicates that
495
                    // we are parsing past the end of the object.
496
25.2k
                    warn(
497
25.2k
                        "unexpected 'endobj' or 'endstream' while reading object; giving up on "
498
25.2k
                        "reading object");
499
25.2k
                    return {};
500
25.2k
                }
501
502
2.06M
                add_bad_null("unknown token while reading object; treating as null");
503
2.06M
                continue;
504
2.09M
            }
505
506
79.3k
            warn("unknown token while reading object; treating as string");
507
79.3k
            check_too_many_bad_tokens();
508
79.3k
            addScalar<QPDF_String>(tokenizer.getValue());
509
510
79.3k
            continue;
511
512
1.83M
        case QPDFTokenizer::tt_string:
513
1.83M
            {
514
1.83M
                auto const& val = tokenizer.getValue();
515
1.83M
                if (decrypter) {
516
184k
                    if (b_contents) {
517
10.7k
                        frame->contents_string = val;
518
10.7k
                        frame->contents_offset = input.getLastOffset();
519
10.7k
                        b_contents = false;
520
10.7k
                    }
521
184k
                    std::string s{val};
522
184k
                    decrypter->decryptString(s);
523
184k
                    addScalar<QPDF_String>(s);
524
1.64M
                } else {
525
1.64M
                    addScalar<QPDF_String>(val);
526
1.64M
                }
527
1.83M
            }
528
1.83M
            continue;
529
530
0
        default:
531
0
            add_bad_null("treating unknown token type as null while reading object");
532
59.5M
        }
533
59.5M
    }
534
1.67M
}
535
536
void
537
QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
538
55.1M
{
539
55.1M
    if (frame->state != st_dictionary_value) {
540
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
541
        // processing once the tt_dict_close token has been found.
542
46.8M
        frame->olist.emplace_back(std::move(obj));
543
46.8M
    } else {
544
8.31M
        if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
545
479k
            warnDuplicateKey();
546
479k
        }
547
8.31M
        frame->state = st_dictionary_key;
548
8.31M
    }
549
55.1M
}
550
551
void
552
QPDFParser::addNull()
553
3.49M
{
554
3.49M
    const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
555
556
3.49M
    if (frame->state != st_dictionary_value) {
557
        // If state is st_dictionary_key then there is a missing key. Push onto olist for
558
        // processing once the tt_dict_close token has been found.
559
3.11M
        frame->olist.emplace_back(null_obj);
560
3.11M
    } else {
561
381k
        if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
562
39.6k
            warnDuplicateKey();
563
39.6k
        }
564
381k
        frame->state = st_dictionary_key;
565
381k
    }
566
3.49M
    ++frame->null_count;
567
3.49M
}
568
569
void
570
QPDFParser::add_bad_null(std::string const& msg)
571
2.15M
{
572
2.15M
    warn(msg);
573
2.15M
    check_too_many_bad_tokens();
574
2.15M
    addNull();
575
2.15M
}
576
577
void
578
QPDFParser::addInt(int count)
579
15.8M
{
580
15.8M
    auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]);
581
15.8M
    obj->setDescription(context, description, last_offset_buffer[count % 2]);
582
15.8M
    add(std::move(obj));
583
15.8M
}
584
585
template <typename T, typename... Args>
586
void
587
QPDFParser::addScalar(Args&&... args)
588
33.3M
{
589
33.3M
    auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
590
33.3M
    if (frame->olist.size() > limit || frame->dict.size() > limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
4.28k
        max_bad_count = 0;
594
4.28k
        check_too_many_bad_tokens(); // always throws Error()
595
4.28k
    }
596
33.3M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
33.3M
    obj->setDescription(context, description, input.getLastOffset());
598
33.3M
    add(std::move(obj));
599
33.3M
}
void QPDFParser::addScalar<QPDF_Bool, bool>(bool&&)
Line
Count
Source
588
120k
{
589
120k
    auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
590
120k
    if (frame->olist.size() > limit || frame->dict.size() > limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
140
        max_bad_count = 0;
594
140
        check_too_many_bad_tokens(); // always throws Error()
595
140
    }
596
120k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
120k
    obj->setDescription(context, description, input.getLastOffset());
598
120k
    add(std::move(obj));
599
120k
}
void QPDFParser::addScalar<QPDF_Integer, long long>(long long&&)
Line
Count
Source
588
722k
{
589
722k
    auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
590
722k
    if (frame->olist.size() > limit || frame->dict.size() > limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
74
        max_bad_count = 0;
594
74
        check_too_many_bad_tokens(); // always throws Error()
595
74
    }
596
722k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
722k
    obj->setDescription(context, description, input.getLastOffset());
598
722k
    add(std::move(obj));
599
722k
}
void QPDFParser::addScalar<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
588
877k
{
589
877k
    auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
590
877k
    if (frame->olist.size() > limit || frame->dict.size() > limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
304
        max_bad_count = 0;
594
304
        check_too_many_bad_tokens(); // always throws Error()
595
304
    }
596
877k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
877k
    obj->setDescription(context, description, input.getLastOffset());
598
877k
    add(std::move(obj));
599
877k
}
void QPDFParser::addScalar<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
588
29.4M
{
589
29.4M
    auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
590
29.4M
    if (frame->olist.size() > limit || frame->dict.size() > limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
3.27k
        max_bad_count = 0;
594
3.27k
        check_too_many_bad_tokens(); // always throws Error()
595
3.27k
    }
596
29.4M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
29.4M
    obj->setDescription(context, description, input.getLastOffset());
598
29.4M
    add(std::move(obj));
599
29.4M
}
void QPDFParser::addScalar<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
588
317k
{
589
317k
    auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
590
317k
    if (frame->olist.size() > limit || frame->dict.size() > limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
122
        max_bad_count = 0;
594
122
        check_too_many_bad_tokens(); // always throws Error()
595
122
    }
596
317k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
317k
    obj->setDescription(context, description, input.getLastOffset());
598
317k
    add(std::move(obj));
599
317k
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
588
1.72M
{
589
1.72M
    auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
590
1.72M
    if (frame->olist.size() > limit || frame->dict.size() > limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
332
        max_bad_count = 0;
594
332
        check_too_many_bad_tokens(); // always throws Error()
595
332
    }
596
1.72M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
1.72M
    obj->setDescription(context, description, input.getLastOffset());
598
1.72M
    add(std::move(obj));
599
1.72M
}
void QPDFParser::addScalar<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
588
184k
{
589
184k
    auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
590
184k
    if (frame->olist.size() > limit || frame->dict.size() > limit) {
591
        // Stop adding scalars. We are going to abort when the close token or a bad token is
592
        // encountered.
593
41
        max_bad_count = 0;
594
41
        check_too_many_bad_tokens(); // always throws Error()
595
41
    }
596
184k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
597
184k
    obj->setDescription(context, description, input.getLastOffset());
598
184k
    add(std::move(obj));
599
184k
}
600
601
template <typename T, typename... Args>
602
QPDFObjectHandle
603
QPDFParser::withDescription(Args&&... args)
604
2.29M
{
605
2.29M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
2.29M
    obj->setDescription(context, description, start);
607
2.29M
    return {obj};
608
2.29M
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Bool, bool>(bool&&)
Line
Count
Source
604
6.38k
{
605
6.38k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
6.38k
    obj->setDescription(context, description, start);
607
6.38k
    return {obj};
608
6.38k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Integer, long long>(long long&&)
Line
Count
Source
604
487k
{
605
487k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
487k
    obj->setDescription(context, description, start);
607
487k
    return {obj};
608
487k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Real, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
604
203k
{
605
203k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
203k
    obj->setDescription(context, description, start);
607
203k
    return {obj};
608
203k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Name, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
604
206k
{
605
206k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
206k
    obj->setDescription(context, description, start);
607
206k
    return {obj};
608
206k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_Operator, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
604
1.28M
{
605
1.28M
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
1.28M
    obj->setDescription(context, description, start);
607
1.28M
    return {obj};
608
1.28M
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&)
Line
Count
Source
604
107k
{
605
107k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
107k
    obj->setDescription(context, description, start);
607
107k
    return {obj};
608
107k
}
QPDFObjectHandle QPDFParser::withDescription<QPDF_String, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)
Line
Count
Source
604
1.49k
{
605
1.49k
    auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
606
1.49k
    obj->setDescription(context, description, start);
607
1.49k
    return {obj};
608
1.49k
}
609
610
void
611
QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset)
612
3.30M
{
613
3.30M
    if (obj) {
614
3.30M
        obj->setDescription(context, description, parsed_offset);
615
3.30M
    }
616
3.30M
}
617
618
void
619
QPDFParser::fixMissingKeys()
620
33.2k
{
621
33.2k
    std::set<std::string> names;
622
123k
    for (auto& obj: frame->olist) {
623
123k
        if (obj.raw_type_code() == ::ot_name) {
624
3.78k
            names.insert(obj.obj_sp()->getStringValue());
625
3.78k
        }
626
123k
    }
627
33.2k
    int next_fake_key = 1;
628
120k
    for (auto const& item: frame->olist) {
629
121k
        while (true) {
630
121k
            const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
631
121k
            const bool found_fake = !frame->dict.contains(key) && !names.contains(key);
632
121k
            QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
633
121k
            if (found_fake) {
634
120k
                warn(
635
120k
                    frame->offset,
636
120k
                    "expected dictionary key but found non-name object; inserting key " + key);
637
120k
                frame->dict[key] = item;
638
120k
                break;
639
120k
            }
640
121k
        }
641
120k
    }
642
33.2k
}
643
644
void
645
QPDFParser::check_too_many_bad_tokens()
646
2.59M
{
647
2.59M
    auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
648
2.59M
    if (frame->olist.size() > limit || frame->dict.size() > limit) {
649
4.45k
        if (bad_count) {
650
3.15k
            warn(
651
3.15k
                "encountered errors while parsing an array or dictionary with more than " +
652
3.15k
                std::to_string(limit) + " elements; giving up on reading object");
653
3.15k
            throw Error();
654
3.15k
        }
655
1.29k
        warn(
656
1.29k
            "encountered an array or dictionary with more than " + std::to_string(limit) +
657
1.29k
            " elements during xref recovery; giving up on reading object");
658
1.29k
    }
659
2.58M
    if (max_bad_count && --max_bad_count > 0 && good_count > 4) {
660
934k
        good_count = 0;
661
934k
        bad_count = 1;
662
934k
        return;
663
934k
    }
664
1.65M
    if (++bad_count > 5 ||
665
1.60M
        (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) {
666
        // Give up after 5 errors in close proximity or if the number of missing dictionary keys
667
        // exceeds the remaining number of allowable total errors.
668
70.7k
        warn("too many errors; giving up on reading object");
669
70.7k
        throw Error();
670
70.7k
    }
671
1.58M
    good_count = 0;
672
1.58M
}
673
674
void
675
QPDFParser::warn(QPDFExc const& e) const
676
4.39M
{
677
    // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
678
    // object. If parsing for some other reason, such as an explicit creation of an object from a
679
    // string, then just throw the exception.
680
4.39M
    if (context) {
681
4.39M
        context->warn(e);
682
4.39M
    } else {
683
621
        throw e;
684
621
    }
685
4.39M
}
686
687
void
688
QPDFParser::warnDuplicateKey()
689
519k
{
690
519k
    warn(
691
519k
        frame->offset,
692
519k
        "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
693
519k
}
694
695
void
696
QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
697
4.39M
{
698
4.39M
    if (stream_id) {
699
207k
        std::string descr = "object "s + std::to_string(obj_id) + " 0";
700
207k
        std::string name = context->getFilename() + " object stream " + std::to_string(stream_id);
701
207k
        warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg));
702
4.18M
    } else {
703
4.18M
        warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg));
704
4.18M
    }
705
4.39M
}
706
707
void
708
QPDFParser::warn(std::string const& msg) const
709
3.16M
{
710
3.16M
    warn(input.getLastOffset(), msg);
711
3.16M
}