Coverage Report

Created: 2025-07-01 06:10

/src/qpdf/libqpdf/JSON.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/JSON.hh>
2
3
#include <qpdf/JSON_writer.hh>
4
5
#include <qpdf/BufferInputSource.hh>
6
#include <qpdf/Pl_Base64.hh>
7
#include <qpdf/Pl_Concatenate.hh>
8
#include <qpdf/Pl_String.hh>
9
#include <qpdf/QTC.hh>
10
#include <qpdf/QUtil.hh>
11
#include <qpdf/Util.hh>
12
13
#include <cstring>
14
#include <stdexcept>
15
16
using namespace qpdf;
17
18
JSON::Members::Members(std::unique_ptr<JSON_value> value) :
19
0
    value(std::move(value))
20
0
{
21
0
}
22
23
JSON::JSON(std::unique_ptr<JSON_value> value) :
24
0
    m(new Members(std::move(value)))
25
0
{
26
0
}
27
28
void
29
JSON::writeClose(Pipeline* p, bool first, size_t depth, char const* delimiter)
30
0
{
31
0
    if (first) {
32
0
        *p << delimiter;
33
0
    } else {
34
0
        std::string s{"\n"};
35
0
        s.append(2 * depth, ' ');
36
0
        *p << s + delimiter;
37
0
    }
38
0
}
39
40
void
41
JSON::writeNext(Pipeline* p, bool& first, size_t depth)
42
0
{
43
0
    if (first) {
44
0
        first = false;
45
0
        std::string s{"\n"};
46
0
        s.append(2 * depth, ' ');
47
0
        *p << s;
48
0
    } else {
49
0
        std::string s{",\n"};
50
0
        s.append(2 * depth, ' ');
51
0
        *p << s;
52
0
    }
53
0
}
54
55
void
56
JSON::writeDictionaryOpen(Pipeline* p, bool& first, size_t depth)
57
0
{
58
0
    *p << "{";
59
0
    first = true;
60
0
}
61
62
void
63
JSON::writeArrayOpen(Pipeline* p, bool& first, size_t depth)
64
0
{
65
0
    *p << "[";
66
0
    first = true;
67
0
}
68
69
void
70
JSON::writeDictionaryClose(Pipeline* p, bool first, size_t depth)
71
0
{
72
0
    writeClose(p, first, depth, "}");
73
0
}
74
75
void
76
JSON::writeArrayClose(Pipeline* p, bool first, size_t depth)
77
0
{
78
0
    writeClose(p, first, depth, "]");
79
0
}
80
81
void
82
JSON::writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth)
83
0
{
84
0
    writeNext(p, first, depth);
85
0
    *p << std::string("\"") + key + "\": ";
86
0
}
87
88
void
89
JSON::writeDictionaryItem(
90
    Pipeline* p, bool& first, std::string const& key, JSON const& value, size_t depth)
91
0
{
92
0
    writeDictionaryKey(p, first, key, depth);
93
0
    value.write(p, depth);
94
0
}
95
96
void
97
JSON::writeArrayItem(Pipeline* p, bool& first, JSON const& element, size_t depth)
98
0
{
99
0
    writeNext(p, first, depth);
100
0
    element.write(p, depth);
101
0
}
102
103
void
104
JSON::JSON_dictionary::write(Pipeline* p, size_t depth) const
105
0
{
106
0
    bool first = true;
107
0
    writeDictionaryOpen(p, first, depth);
108
0
    for (auto const& iter: members) {
109
0
        writeDictionaryItem(p, first, iter.first, iter.second, 1 + depth);
110
0
    }
111
0
    writeDictionaryClose(p, first, depth);
112
0
}
113
114
void
115
JSON::JSON_array::write(Pipeline* p, size_t depth) const
116
0
{
117
0
    bool first = true;
118
0
    writeArrayOpen(p, first, depth);
119
0
    for (auto const& element: elements) {
120
0
        writeArrayItem(p, first, element, 1 + depth);
121
0
    }
122
0
    writeArrayClose(p, first, depth);
123
0
}
124
125
JSON::JSON_string::JSON_string(std::string const& utf8) :
126
0
    JSON_value(vt_string),
127
0
    utf8(utf8)
128
0
{
129
0
}
130
131
void
132
JSON::JSON_string::write(Pipeline* p, size_t) const
133
0
{
134
0
    *p << std::string("\"") + Writer::encode_string(utf8) + "\"";
135
0
}
136
137
JSON::JSON_number::JSON_number(long long value) :
138
0
    JSON_value(vt_number),
139
0
    encoded(std::to_string(value))
140
0
{
141
0
}
142
143
JSON::JSON_number::JSON_number(double value) :
144
0
    JSON_value(vt_number),
145
0
    encoded(QUtil::double_to_string(value, 6))
146
0
{
147
0
}
148
149
JSON::JSON_number::JSON_number(std::string const& value) :
150
0
    JSON_value(vt_number),
151
0
    encoded(value)
152
0
{
153
0
}
154
155
void
156
JSON::JSON_number::write(Pipeline* p, size_t) const
157
0
{
158
0
    *p << encoded;
159
0
}
160
161
JSON::JSON_bool::JSON_bool(bool val) :
162
0
    JSON_value(vt_bool),
163
0
    value(val)
164
0
{
165
0
}
166
167
void
168
JSON::JSON_bool::write(Pipeline* p, size_t) const
169
0
{
170
0
    *p << (value ? "true" : "false");
171
0
}
172
173
void
174
JSON::JSON_null::write(Pipeline* p, size_t) const
175
0
{
176
0
    *p << "null";
177
0
}
178
179
JSON::JSON_blob::JSON_blob(std::function<void(Pipeline*)> fn) :
180
0
    JSON_value(vt_blob),
181
0
    fn(fn)
182
0
{
183
0
}
184
185
void
186
JSON::JSON_blob::write(Pipeline* p, size_t) const
187
0
{
188
0
    *p << "\"";
189
0
    Pl_Concatenate cat("blob concatenate", p);
190
0
    Pl_Base64 base64("blob base64", &cat, Pl_Base64::a_encode);
191
0
    fn(&base64);
192
0
    base64.finish();
193
0
    *p << "\"";
194
0
}
195
196
void
197
JSON::write(Pipeline* p, size_t depth) const
198
0
{
199
0
    if (!m) {
200
0
        *p << "null";
201
0
    } else {
202
0
        m->value->write(p, depth);
203
0
    }
204
0
}
205
206
std::string
207
JSON::unparse() const
208
0
{
209
0
    if (!m) {
210
0
        return "null";
211
0
    }
212
0
    std::string s;
213
0
    Pl_String p("unparse", nullptr, s);
214
0
    write(&p, 0);
215
0
    return s;
216
0
}
217
218
std::string
219
JSON::Writer::encode_string(std::string const& str)
220
0
{
221
0
    static auto constexpr hexchars = "0123456789abcdef";
222
223
0
    auto begin = str.cbegin();
224
0
    auto end = str.cend();
225
0
    auto iter = begin;
226
0
    while (iter != end) {
227
0
        auto c = static_cast<unsigned char>(*iter);
228
0
        if ((c > 34 && c != '\\') || c == ' ' || c == 33) {
229
            // Optimistically check that no char in str requires escaping. Hopefully we can just
230
            // return the input str.
231
0
            ++iter;
232
0
        } else {
233
            // We found a char that requires escaping. Initialize result to the chars scanned so
234
            // far, append/replace the rest of str one char at a time, and return the result.
235
0
            std::string result{begin, iter};
236
237
0
            for (; iter != end; ++iter) {
238
0
                auto ch = static_cast<unsigned char>(*iter);
239
0
                if ((ch > 34 && ch != '\\') || ch == ' ' || ch == 33) {
240
                    // Check for most common case first.
241
0
                    result += *iter;
242
0
                } else {
243
0
                    switch (ch) {
244
0
                    case '\\':
245
0
                        result += "\\\\";
246
0
                        break;
247
0
                    case '\"':
248
0
                        result += "\\\"";
249
0
                        break;
250
0
                    case '\b':
251
0
                        result += "\\b";
252
0
                        break;
253
0
                    case '\f':
254
0
                        result += "\\f";
255
0
                        break;
256
0
                    case '\n':
257
0
                        result += "\\n";
258
0
                        break;
259
0
                    case '\r':
260
0
                        result += "\\r";
261
0
                        break;
262
0
                    case '\t':
263
0
                        result += "\\t";
264
0
                        break;
265
0
                    default:
266
0
                        result += ch < 16 ? "\\u000" : "\\u001";
267
0
                        result += hexchars[ch % 16];
268
0
                    }
269
0
                }
270
0
            }
271
0
            return result;
272
0
        }
273
0
    }
274
0
    return str;
275
0
}
276
277
JSON
278
JSON::makeDictionary()
279
0
{
280
0
    return {std::make_unique<JSON_dictionary>()};
281
0
}
282
283
JSON
284
JSON::addDictionaryMember(std::string const& key, JSON const& val)
285
0
{
286
0
    if (auto* obj = m ? dynamic_cast<JSON_dictionary*>(m->value.get()) : nullptr) {
287
0
        return obj->members[Writer::encode_string(key)] = val.m ? val : makeNull();
288
0
    } else {
289
0
        throw std::runtime_error("JSON::addDictionaryMember called on non-dictionary");
290
0
    }
291
0
}
292
293
JSON
294
JSON::makeArray()
295
0
{
296
0
    return {std::make_unique<JSON_array>()};
297
0
}
298
299
JSON
300
JSON::addArrayElement(JSON const& val)
301
0
{
302
0
    if (auto* arr = m ? dynamic_cast<JSON_array*>(m->value.get()) : nullptr) {
303
0
        if (val.m) {
304
0
            arr->elements.push_back(val);
305
0
        } else {
306
0
            arr->elements.push_back(makeNull());
307
0
        }
308
0
        return arr->elements.back();
309
0
    }
310
0
    throw std::runtime_error("JSON::addArrayElement called on non-array");
311
0
    return {}; // unreachable
312
0
}
313
314
JSON
315
JSON::makeString(std::string const& utf8)
316
0
{
317
0
    return {std::make_unique<JSON_string>(utf8)};
318
0
}
319
320
JSON
321
JSON::makeInt(long long int value)
322
0
{
323
0
    return {std::make_unique<JSON_number>(value)};
324
0
}
325
326
JSON
327
JSON::makeReal(double value)
328
0
{
329
0
    return {std::make_unique<JSON_number>(value)};
330
0
}
331
332
JSON
333
JSON::makeNumber(std::string const& encoded)
334
0
{
335
0
    return {std::make_unique<JSON_number>(encoded)};
336
0
}
337
338
JSON
339
JSON::makeBool(bool value)
340
0
{
341
0
    return {std::make_unique<JSON_bool>(value)};
342
0
}
343
344
JSON
345
JSON::makeNull()
346
0
{
347
0
    return {std::make_unique<JSON_null>()};
348
0
}
349
350
JSON
351
JSON::makeBlob(std::function<void(Pipeline*)> fn)
352
0
{
353
0
    return {std::make_unique<JSON_blob>(fn)};
354
0
}
355
356
bool
357
JSON::isArray() const
358
0
{
359
0
    return m ? m->value->type_code == vt_array : false;
360
0
}
361
362
bool
363
JSON::isDictionary() const
364
0
{
365
0
    return m && m->value->type_code == vt_dictionary;
366
0
}
367
368
bool
369
JSON::getString(std::string& utf8) const
370
0
{
371
0
    if (m && m->value->type_code == vt_string) {
372
0
        auto v = dynamic_cast<JSON_string const*>(m->value.get());
373
0
        utf8 = v->utf8;
374
0
        return true;
375
0
    }
376
0
    return false;
377
0
}
378
379
bool
380
JSON::getNumber(std::string& value) const
381
0
{
382
0
    if (m && m->value->type_code == vt_number) {
383
0
        auto v = dynamic_cast<JSON_number const*>(m->value.get());
384
0
        value = v->encoded;
385
0
        return true;
386
0
    }
387
0
    return false;
388
0
}
389
390
bool
391
JSON::getBool(bool& value) const
392
0
{
393
0
    if (m && m->value->type_code == vt_bool) {
394
0
        auto v = dynamic_cast<JSON_bool const*>(m->value.get());
395
0
        value = v->value;
396
0
        return true;
397
0
    }
398
0
    return false;
399
0
}
400
401
bool
402
JSON::isNull() const
403
0
{
404
0
    return m && m->value->type_code == vt_null;
405
0
}
406
407
JSON
408
JSON::getDictItem(std::string const& key) const
409
0
{
410
0
    if (auto v = m ? dynamic_cast<JSON_dictionary const*>(m->value.get()) : nullptr) {
411
0
        if (auto it = v->members.find(key); it != v->members.end()) {
412
0
            return it->second;
413
0
        }
414
0
    }
415
0
    return makeNull();
416
0
}
417
418
bool
419
JSON::forEachDictItem(std::function<void(std::string const& key, JSON value)> fn) const
420
0
{
421
0
    if (auto v = m ? dynamic_cast<JSON_dictionary const*>(m->value.get()) : nullptr) {
422
0
        for (auto const& [key, value]: v->members) {
423
0
            fn(key, value);
424
0
        }
425
0
        return true;
426
0
    }
427
0
    return false;
428
0
}
429
430
bool
431
JSON::forEachArrayItem(std::function<void(JSON value)> fn) const
432
0
{
433
0
    if (auto v = m ? dynamic_cast<JSON_array const*>(m->value.get()) : nullptr) {
434
0
        for (auto const& i: v->elements) {
435
0
            fn(JSON(i));
436
0
        }
437
0
        return true;
438
0
    }
439
0
    return false;
440
0
}
441
442
bool
443
JSON::checkSchema(JSON schema, std::list<std::string>& errors)
444
0
{
445
0
    return m && checkSchemaInternal(m->value.get(), schema.m->value.get(), 0, errors, "");
446
0
}
447
448
bool
449
JSON::checkSchema(JSON schema, unsigned long flags, std::list<std::string>& errors)
450
0
{
451
0
    return m && checkSchemaInternal(m->value.get(), schema.m->value.get(), flags, errors, "");
452
0
}
453
454
bool
455
JSON::checkSchemaInternal(
456
    JSON_value* this_v,
457
    JSON_value* sch_v,
458
    unsigned long flags,
459
    std::list<std::string>& errors,
460
    std::string prefix)
461
0
{
462
0
    auto* this_arr = dynamic_cast<JSON_array*>(this_v);
463
0
    auto* this_dict = dynamic_cast<JSON_dictionary*>(this_v);
464
465
0
    auto* sch_arr = dynamic_cast<JSON_array*>(sch_v);
466
0
    auto* sch_dict = dynamic_cast<JSON_dictionary*>(sch_v);
467
468
0
    auto* sch_str = dynamic_cast<JSON_string*>(sch_v);
469
470
0
    std::string err_prefix;
471
0
    if (prefix.empty()) {
472
0
        err_prefix = "top-level object";
473
0
    } else {
474
0
        err_prefix = "json key \"" + prefix + "\"";
475
0
    }
476
477
0
    std::string pattern_key;
478
0
    if (sch_dict) {
479
0
        if (!this_dict) {
480
0
            QTC::TC("libtests", "JSON wanted dictionary");
481
0
            errors.push_back(err_prefix + " is supposed to be a dictionary");
482
0
            return false;
483
0
        }
484
0
        auto members = sch_dict->members;
485
0
        std::string key;
486
0
        if ((members.size() == 1) &&
487
0
            ((key = members.begin()->first, key.length() > 2) && (key.at(0) == '<') &&
488
0
             (key.at(key.length() - 1) == '>'))) {
489
0
            pattern_key = key;
490
0
        }
491
0
    }
492
493
0
    if (sch_dict && !pattern_key.empty()) {
494
0
        auto pattern_schema = sch_dict->members[pattern_key].m->value.get();
495
0
        for (auto const& [key, val]: this_dict->members) {
496
0
            checkSchemaInternal(
497
0
                val.m->value.get(), pattern_schema, flags, errors, prefix + "." + key);
498
0
        }
499
0
    } else if (sch_dict) {
500
0
        for (auto& [key, val]: sch_dict->members) {
501
0
            if (this_dict->members.contains(key)) {
502
0
                checkSchemaInternal(
503
0
                    this_dict->members[key].m->value.get(),
504
0
                    val.m->value.get(),
505
0
                    flags,
506
0
                    errors,
507
0
                    prefix + "." + key);
508
0
            } else {
509
0
                if (flags & f_optional) {
510
0
                    QTC::TC("libtests", "JSON optional key");
511
0
                } else {
512
0
                    QTC::TC("libtests", "JSON key missing in object");
513
0
                    errors.emplace_back(
514
0
                        err_prefix + ": key \"" + key +
515
0
                        "\" is present in schema but missing in object");
516
0
                }
517
0
            }
518
0
        }
519
0
        for (auto const& item: this_dict->members) {
520
0
            if (!sch_dict->members.contains(item.first)) {
521
0
                QTC::TC("libtests", "JSON key extra in object");
522
0
                errors.emplace_back(
523
0
                    err_prefix + ": key \"" + item.first +
524
0
                    "\" is not present in schema but appears in object");
525
0
            }
526
0
        }
527
0
    } else if (sch_arr) {
528
0
        auto n_elements = sch_arr->elements.size();
529
0
        if (n_elements == 1) {
530
            // A single-element array in the schema allows a single element in the object or a
531
            // variable-length array, each of whose items must conform to the single element of the
532
            // schema array. This doesn't apply to arrays of arrays -- we fall back to the behavior
533
            // of allowing a single item only when the object is not an array.
534
0
            if (this_arr) {
535
0
                int i = 0;
536
0
                for (auto const& element: this_arr->elements) {
537
0
                    checkSchemaInternal(
538
0
                        element.m->value.get(),
539
0
                        sch_arr->elements.at(0).m->value.get(),
540
0
                        flags,
541
0
                        errors,
542
0
                        prefix + "." + std::to_string(i));
543
0
                    ++i;
544
0
                }
545
0
            } else {
546
0
                QTC::TC("libtests", "JSON schema array for single item");
547
0
                checkSchemaInternal(
548
0
                    this_v, sch_arr->elements.at(0).m->value.get(), flags, errors, prefix);
549
0
            }
550
0
        } else if (!this_arr || this_arr->elements.size() != n_elements) {
551
0
            QTC::TC("libtests", "JSON schema array length mismatch");
552
0
            errors.emplace_back(
553
0
                err_prefix + " is supposed to be an array of length " + std::to_string(n_elements));
554
0
            return false;
555
0
        } else {
556
            // A multi-element array in the schema must correspond to an element of the same length
557
            // in the object. Each element in the object is validated against the corresponding
558
            // element in the schema.
559
0
            size_t i = 0;
560
0
            for (auto const& element: this_arr->elements) {
561
0
                checkSchemaInternal(
562
0
                    element.m->value.get(),
563
0
                    sch_arr->elements.at(i).m->value.get(),
564
0
                    flags,
565
0
                    errors,
566
0
                    prefix + "." + std::to_string(i));
567
0
                ++i;
568
0
            }
569
0
        }
570
0
    } else if (!sch_str) {
571
0
        QTC::TC("libtests", "JSON schema other type");
572
0
        errors.emplace_back(err_prefix + " schema value is not dictionary, array, or string");
573
0
        return false;
574
0
    }
575
576
0
    return errors.empty();
577
0
}
578
579
namespace
580
{
581
    class JSONParser
582
    {
583
      public:
584
        JSONParser(InputSource& is, JSON::Reactor* reactor) :
585
0
            is(is),
586
0
            reactor(reactor),
587
0
            p(buf)
588
0
        {
589
0
        }
590
591
        JSON parse();
592
593
      private:
594
        enum parser_state_e {
595
            ps_top,
596
            ps_dict_begin,
597
            ps_dict_after_key,
598
            ps_dict_after_colon,
599
            ps_dict_after_item,
600
            ps_dict_after_comma,
601
            ps_array_begin,
602
            ps_array_after_item,
603
            ps_array_after_comma,
604
            ps_done,
605
        };
606
607
        enum lex_state_e {
608
            ls_top,
609
            ls_number,
610
            ls_number_minus,
611
            ls_number_leading_zero,
612
            ls_number_before_point,
613
            ls_number_point,
614
            ls_number_after_point,
615
            ls_number_e,
616
            ls_number_e_sign,
617
            ls_alpha,
618
            ls_string,
619
            ls_after_string,
620
            ls_backslash,
621
            ls_u4,
622
            ls_begin_array,
623
            ls_end_array,
624
            ls_begin_dict,
625
            ls_end_dict,
626
            ls_colon,
627
            ls_comma,
628
        };
629
630
        struct StackFrame
631
        {
632
            StackFrame(parser_state_e state, JSON& item) :
633
0
                state(state),
634
0
                item(item)
635
0
            {
636
0
            }
637
638
            parser_state_e state;
639
            JSON item;
640
        };
641
642
        void getToken();
643
        void handleToken();
644
        void tokenError();
645
        static void handle_u_code(
646
            unsigned long codepoint,
647
            qpdf_offset_t offset,
648
            unsigned long& high_surrogate,
649
            qpdf_offset_t& high_offset,
650
            std::string& result);
651
        inline void append();
652
        inline void append(lex_state_e);
653
        inline void ignore();
654
        inline void ignore(lex_state_e);
655
656
        InputSource& is;
657
        JSON::Reactor* reactor;
658
        lex_state_e lex_state{ls_top};
659
        char buf[16384];
660
        size_t bytes{0};
661
        char const* p;
662
        qpdf_offset_t u_count{0};
663
        unsigned long u_value{0};
664
        qpdf_offset_t offset{0};
665
        bool done{false};
666
        std::string token;
667
        qpdf_offset_t token_start{0};
668
        parser_state_e parser_state{ps_top};
669
        std::vector<StackFrame> stack;
670
        std::string dict_key;
671
        qpdf_offset_t dict_key_offset{0};
672
    };
673
} // namespace
674
675
void
676
JSONParser::handle_u_code(
677
    unsigned long codepoint,
678
    qpdf_offset_t offset,
679
    unsigned long& high_surrogate,
680
    qpdf_offset_t& high_offset,
681
    std::string& result)
682
0
{
683
0
    if ((codepoint & 0xFC00) == 0xD800) {
684
        // high surrogate
685
0
        qpdf_offset_t new_high_offset = offset;
686
0
        if (high_offset) {
687
0
            QTC::TC("libtests", "JSON 16 high high");
688
0
            throw std::runtime_error(
689
0
                "JSON: offset " + std::to_string(new_high_offset) +
690
0
                ": UTF-16 high surrogate found after previous high surrogate at offset " +
691
0
                std::to_string(high_offset));
692
0
        }
693
0
        high_offset = new_high_offset;
694
0
        high_surrogate = codepoint;
695
0
    } else if ((codepoint & 0xFC00) == 0xDC00) {
696
        // low surrogate
697
0
        if (offset != (high_offset + 6)) {
698
0
            QTC::TC("libtests", "JSON 16 low not after high");
699
0
            throw std::runtime_error(
700
0
                "JSON: offset " + std::to_string(offset) +
701
0
                ": UTF-16 low surrogate found not immediately after high surrogate");
702
0
        }
703
0
        high_offset = 0;
704
0
        codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF);
705
0
        result += QUtil::toUTF8(codepoint);
706
0
    } else {
707
0
        result += QUtil::toUTF8(codepoint);
708
0
    }
709
0
}
710
711
void
712
JSONParser::tokenError()
713
0
{
714
0
    if (done) {
715
0
        QTC::TC("libtests", "JSON parse ls premature end of input");
716
0
        throw std::runtime_error("JSON: premature end of input");
717
0
    }
718
719
0
    if (lex_state == ls_u4) {
720
0
        QTC::TC("libtests", "JSON parse bad hex after u");
721
0
        throw std::runtime_error(
722
0
            "JSON: offset " + std::to_string(offset - u_count - 1) +
723
0
            ": \\u must be followed by four hex digits");
724
0
    } else if (lex_state == ls_alpha) {
725
0
        QTC::TC("libtests", "JSON parse keyword bad character");
726
0
        throw std::runtime_error(
727
0
            "JSON: offset " + std::to_string(offset) + ": keyword: unexpected character " +
728
0
            std::string(p, 1));
729
0
    } else if (lex_state == ls_string) {
730
0
        QTC::TC("libtests", "JSON parse control char in string");
731
0
        throw std::runtime_error(
732
0
            "JSON: offset " + std::to_string(offset) +
733
0
            ": control character in string (missing \"?)");
734
0
    } else if (lex_state == ls_backslash) {
735
0
        QTC::TC("libtests", "JSON parse backslash bad character");
736
0
        throw std::runtime_error(
737
0
            "JSON: offset " + std::to_string(offset) +
738
0
            ": invalid character after backslash: " + std::string(p, 1));
739
0
    }
740
741
0
    if (*p == '.') {
742
0
        if (lex_state == ls_number || lex_state == ls_number_e || lex_state == ls_number_e_sign) {
743
0
            QTC::TC("libtests", "JSON parse point after e");
744
0
            throw std::runtime_error(
745
0
                "JSON: offset " + std::to_string(offset) +
746
0
                ": numeric literal: decimal point after e");
747
0
        } else {
748
0
            QTC::TC("libtests", "JSON parse duplicate point");
749
0
            throw std::runtime_error(
750
0
                "JSON: offset " + std::to_string(offset) +
751
0
                ": numeric literal: decimal point already seen");
752
0
        }
753
0
    } else if (*p == 'e' || *p == 'E') {
754
0
        QTC::TC("libtests", "JSON parse duplicate e");
755
0
        throw std::runtime_error(
756
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: e already seen");
757
0
    } else if ((*p == '+') || (*p == '-')) {
758
0
        QTC::TC("libtests", "JSON parse unexpected sign");
759
0
        throw std::runtime_error(
760
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign");
761
0
    } else if (util::is_space(*p) || strchr("{}[]:,", *p)) {
762
0
        QTC::TC("libtests", "JSON parse incomplete number");
763
0
        throw std::runtime_error(
764
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: incomplete number");
765
766
0
    } else {
767
0
        QTC::TC("libtests", "JSON parse numeric bad character");
768
0
        throw std::runtime_error(
769
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected character " +
770
0
            std::string(p, 1));
771
0
    }
772
0
    throw std::logic_error("JSON::tokenError : unhandled error");
773
0
}
774
775
// Append current character to token and advance to next input character.
776
inline void
777
JSONParser::append()
778
0
{
779
0
    token += *p;
780
0
    ++p;
781
0
    ++offset;
782
0
}
783
784
// Append current character to token, advance to next input character and transition to 'next' lexer
785
// state.
786
inline void
787
JSONParser::append(lex_state_e next)
788
0
{
789
0
    lex_state = next;
790
0
    token += *p;
791
0
    ++p;
792
0
    ++offset;
793
0
}
794
795
// Advance to next input character without appending the current character to token.
796
inline void
797
JSONParser::ignore()
798
0
{
799
0
    ++p;
800
0
    ++offset;
801
0
}
802
803
// Advance to next input character without appending the current character to token and transition
804
// to 'next' lexer state.
805
inline void
806
JSONParser::ignore(lex_state_e next)
807
0
{
808
0
    lex_state = next;
809
0
    ++p;
810
0
    ++offset;
811
0
}
812
813
void
814
JSONParser::getToken()
815
0
{
816
0
    token.clear();
817
818
    // Keep track of UTF-16 surrogate pairs.
819
0
    unsigned long high_surrogate = 0;
820
0
    qpdf_offset_t high_offset = 0;
821
822
0
    while (true) {
823
0
        if (p == (buf + bytes)) {
824
0
            p = buf;
825
0
            bytes = is.read(buf, sizeof(buf));
826
0
            if (bytes == 0) {
827
0
                done = true;
828
0
                break;
829
0
            }
830
0
        }
831
832
0
        if ((*p < 32 && *p >= 0)) {
833
0
            if (*p == '\t' || *p == '\n' || *p == '\r') {
834
                // Legal white space not permitted in strings. This will always end the current
835
                // token (unless we are still before the start of the token).
836
0
                if (lex_state == ls_top) {
837
0
                    ignore();
838
0
                } else {
839
0
                    break;
840
0
                }
841
842
0
            } else {
843
0
                QTC::TC("libtests", "JSON parse null character");
844
0
                throw std::runtime_error(
845
0
                    "JSON: control or null character at offset " + std::to_string(offset));
846
0
            }
847
0
        } else if (*p == ',') {
848
0
            if (lex_state == ls_top) {
849
0
                ignore(ls_comma);
850
0
                return;
851
0
            } else if (lex_state == ls_string) {
852
0
                append();
853
0
            } else {
854
0
                break;
855
0
            }
856
0
        } else if (*p == ':') {
857
0
            if (lex_state == ls_top) {
858
0
                ignore(ls_colon);
859
0
                return;
860
0
            } else if (lex_state == ls_string) {
861
0
                append();
862
0
            } else {
863
0
                break;
864
0
            }
865
0
        } else if (*p == ' ') {
866
0
            if (lex_state == ls_top) {
867
0
                ignore();
868
0
            } else if (lex_state == ls_string) {
869
0
                append();
870
0
            } else {
871
0
                break;
872
0
            }
873
0
        } else if (*p == '{') {
874
0
            if (lex_state == ls_top) {
875
0
                token_start = offset;
876
0
                ignore(ls_begin_dict);
877
0
                return;
878
0
            } else if (lex_state == ls_string) {
879
0
                append();
880
0
            } else {
881
0
                break;
882
0
            }
883
0
        } else if (*p == '}') {
884
0
            if (lex_state == ls_top) {
885
0
                ignore(ls_end_dict);
886
0
                return;
887
0
            } else if (lex_state == ls_string) {
888
0
                append();
889
0
            } else {
890
0
                break;
891
0
            }
892
0
        } else if (*p == '[') {
893
0
            if (lex_state == ls_top) {
894
0
                token_start = offset;
895
0
                ignore(ls_begin_array);
896
0
                return;
897
0
            } else if (lex_state == ls_string) {
898
0
                append();
899
0
            } else {
900
0
                break;
901
0
            }
902
0
        } else if (*p == ']') {
903
0
            if (lex_state == ls_top) {
904
0
                ignore(ls_end_array);
905
0
                return;
906
0
            } else if (lex_state == ls_string) {
907
0
                append();
908
0
            } else {
909
0
                break;
910
0
            }
911
0
        } else {
912
0
            switch (lex_state) {
913
0
            case ls_top:
914
0
                token_start = offset;
915
0
                if (*p == '"') {
916
0
                    ignore(ls_string);
917
0
                } else if ((*p >= 'a') && (*p <= 'z')) {
918
0
                    append(ls_alpha);
919
0
                } else if (*p == '-') {
920
0
                    append(ls_number_minus);
921
0
                } else if ((*p >= '1') && (*p <= '9')) {
922
0
                    append(ls_number_before_point);
923
0
                } else if (*p == '0') {
924
0
                    append(ls_number_leading_zero);
925
0
                } else {
926
0
                    QTC::TC("libtests", "JSON parse bad character");
927
0
                    throw std::runtime_error(
928
0
                        "JSON: offset " + std::to_string(offset) + ": unexpected character " +
929
0
                        std::string(p, 1));
930
0
                }
931
0
                break;
932
933
0
            case ls_number_minus:
934
0
                if ((*p >= '1') && (*p <= '9')) {
935
0
                    append(ls_number_before_point);
936
0
                } else if (*p == '0') {
937
0
                    append(ls_number_leading_zero);
938
0
                } else {
939
0
                    QTC::TC("libtests", "JSON parse number minus no digits");
940
0
                    throw std::runtime_error(
941
0
                        "JSON: offset " + std::to_string(offset) +
942
0
                        ": numeric literal: no digit after minus sign");
943
0
                }
944
0
                break;
945
946
0
            case ls_number_leading_zero:
947
0
                if (*p == '.') {
948
0
                    append(ls_number_point);
949
0
                } else if (*p == 'e' || *p == 'E') {
950
0
                    append(ls_number_e);
951
0
                } else {
952
0
                    QTC::TC("libtests", "JSON parse leading zero");
953
0
                    throw std::runtime_error(
954
0
                        "JSON: offset " + std::to_string(offset) + ": number with leading zero");
955
0
                }
956
0
                break;
957
958
0
            case ls_number_before_point:
959
0
                if ((*p >= '0') && (*p <= '9')) {
960
0
                    append();
961
0
                } else if (*p == '.') {
962
0
                    append(ls_number_point);
963
0
                } else if (*p == 'e' || *p == 'E') {
964
0
                    append(ls_number_e);
965
0
                } else {
966
0
                    tokenError();
967
0
                }
968
0
                break;
969
970
0
            case ls_number_point:
971
0
                if ((*p >= '0') && (*p <= '9')) {
972
0
                    append(ls_number_after_point);
973
0
                } else {
974
0
                    tokenError();
975
0
                }
976
0
                break;
977
978
0
            case ls_number_after_point:
979
0
                if ((*p >= '0') && (*p <= '9')) {
980
0
                    append();
981
0
                } else if (*p == 'e' || *p == 'E') {
982
0
                    append(ls_number_e);
983
0
                } else {
984
0
                    tokenError();
985
0
                }
986
0
                break;
987
988
0
            case ls_number_e:
989
0
                if ((*p >= '0') && (*p <= '9')) {
990
0
                    append(ls_number);
991
0
                } else if ((*p == '+') || (*p == '-')) {
992
0
                    append(ls_number_e_sign);
993
0
                } else {
994
0
                    tokenError();
995
0
                }
996
0
                break;
997
998
0
            case ls_number_e_sign:
999
0
                if ((*p >= '0') && (*p <= '9')) {
1000
0
                    append(ls_number);
1001
0
                } else {
1002
0
                    tokenError();
1003
0
                }
1004
0
                break;
1005
1006
0
            case ls_number:
1007
                // We only get here after we have seen an exponent.
1008
0
                if ((*p >= '0') && (*p <= '9')) {
1009
0
                    append();
1010
0
                } else {
1011
0
                    tokenError();
1012
0
                }
1013
0
                break;
1014
1015
0
            case ls_alpha:
1016
0
                if ((*p >= 'a') && (*p <= 'z')) {
1017
0
                    append();
1018
0
                } else {
1019
0
                    tokenError();
1020
0
                }
1021
0
                break;
1022
1023
0
            case ls_string:
1024
0
                if (*p == '"') {
1025
0
                    if (high_offset) {
1026
0
                        QTC::TC("libtests", "JSON 16 dangling high");
1027
0
                        throw std::runtime_error(
1028
0
                            "JSON: offset " + std::to_string(high_offset) +
1029
0
                            ": UTF-16 high surrogate not followed by low surrogate");
1030
0
                    }
1031
0
                    ignore(ls_after_string);
1032
0
                    return;
1033
0
                } else if (*p == '\\') {
1034
0
                    ignore(ls_backslash);
1035
0
                } else {
1036
0
                    append();
1037
0
                }
1038
0
                break;
1039
1040
0
            case ls_backslash:
1041
0
                lex_state = ls_string;
1042
0
                switch (*p) {
1043
0
                case '\\':
1044
0
                case '\"':
1045
0
                case '/':
1046
                    // \/ is allowed in json input, but so is /, so we don't map / to \/ in output.
1047
0
                    token += *p;
1048
0
                    break;
1049
0
                case 'b':
1050
0
                    token += '\b';
1051
0
                    break;
1052
0
                case 'f':
1053
0
                    token += '\f';
1054
0
                    break;
1055
0
                case 'n':
1056
0
                    token += '\n';
1057
0
                    break;
1058
0
                case 'r':
1059
0
                    token += '\r';
1060
0
                    break;
1061
0
                case 't':
1062
0
                    token += '\t';
1063
0
                    break;
1064
0
                case 'u':
1065
0
                    lex_state = ls_u4;
1066
0
                    u_count = 0;
1067
0
                    u_value = 0;
1068
0
                    break;
1069
0
                default:
1070
0
                    lex_state = ls_backslash;
1071
0
                    tokenError();
1072
0
                }
1073
0
                ignore();
1074
0
                break;
1075
1076
0
            case ls_u4:
1077
0
                using ui = unsigned int;
1078
0
                if (ui val = ui(util::hex_decode_char(*p)); val < 16) {
1079
0
                    u_value = 16 * u_value + val;
1080
0
                } else {
1081
0
                    tokenError();
1082
0
                }
1083
0
                if (++u_count == 4) {
1084
0
                    handle_u_code(u_value, offset - 5, high_surrogate, high_offset, token);
1085
0
                    lex_state = ls_string;
1086
0
                }
1087
0
                ignore();
1088
0
                break;
1089
1090
0
            default:
1091
0
                throw std::logic_error("JSONParser::getToken : trying to handle delimiter state");
1092
0
            }
1093
0
        }
1094
0
    }
1095
1096
    // We only get here if on end of input or if the last character was a control character or other
1097
    // delimiter.
1098
1099
0
    if (!token.empty()) {
1100
0
        switch (lex_state) {
1101
0
        case ls_top:
1102
            // Can't happen
1103
0
            throw std::logic_error("tok_start set in ls_top while parsing");
1104
0
            break;
1105
1106
0
        case ls_number_leading_zero:
1107
0
        case ls_number_before_point:
1108
0
        case ls_number_after_point:
1109
0
            lex_state = ls_number;
1110
0
            break;
1111
1112
0
        case ls_number:
1113
0
        case ls_alpha:
1114
            // terminal state
1115
0
            break;
1116
1117
0
        default:
1118
0
            tokenError();
1119
0
        }
1120
0
    }
1121
0
}
1122
1123
void
1124
JSONParser::handleToken()
1125
0
{
1126
0
    if (lex_state == ls_top) {
1127
0
        return;
1128
0
    }
1129
1130
0
    if (parser_state == ps_done) {
1131
0
        QTC::TC("libtests", "JSON parse junk after object");
1132
0
        throw std::runtime_error(
1133
0
            "JSON: offset " + std::to_string(offset) +
1134
0
            ": material follows end of object: " + token);
1135
0
    }
1136
1137
0
    const static JSON null_item = JSON::makeNull();
1138
0
    JSON item;
1139
0
    auto tos = stack.empty() ? null_item : stack.back().item;
1140
0
    auto ls = lex_state;
1141
0
    lex_state = ls_top;
1142
1143
0
    switch (ls) {
1144
0
    case ls_begin_dict:
1145
0
        item = JSON::makeDictionary();
1146
0
        break;
1147
1148
0
    case ls_begin_array:
1149
0
        item = JSON::makeArray();
1150
0
        break;
1151
1152
0
    case ls_colon:
1153
0
        if (parser_state != ps_dict_after_key) {
1154
0
            QTC::TC("libtests", "JSON parse unexpected :");
1155
0
            throw std::runtime_error(
1156
0
                "JSON: offset " + std::to_string(offset) + ": unexpected colon");
1157
0
        }
1158
0
        parser_state = ps_dict_after_colon;
1159
0
        return;
1160
1161
0
    case ls_comma:
1162
0
        if (!((parser_state == ps_dict_after_item) || (parser_state == ps_array_after_item))) {
1163
0
            QTC::TC("libtests", "JSON parse unexpected ,");
1164
0
            throw std::runtime_error(
1165
0
                "JSON: offset " + std::to_string(offset) + ": unexpected comma");
1166
0
        }
1167
0
        if (parser_state == ps_dict_after_item) {
1168
0
            parser_state = ps_dict_after_comma;
1169
0
        } else if (parser_state == ps_array_after_item) {
1170
0
            parser_state = ps_array_after_comma;
1171
0
        } else {
1172
0
            throw std::logic_error("JSONParser::handleToken: unexpected parser state for comma");
1173
0
        }
1174
0
        return;
1175
1176
0
    case ls_end_array:
1177
0
        if (!(parser_state == ps_array_begin || parser_state == ps_array_after_item)) {
1178
0
            QTC::TC("libtests", "JSON parse unexpected ]");
1179
0
            throw std::runtime_error(
1180
0
                "JSON: offset " + std::to_string(offset) + ": unexpected array end delimiter");
1181
0
        }
1182
0
        parser_state = stack.back().state;
1183
0
        tos.setEnd(offset);
1184
0
        if (reactor) {
1185
0
            reactor->containerEnd(tos);
1186
0
        }
1187
0
        if (parser_state != ps_done) {
1188
0
            stack.pop_back();
1189
0
        }
1190
0
        return;
1191
1192
0
    case ls_end_dict:
1193
0
        if (!((parser_state == ps_dict_begin) || (parser_state == ps_dict_after_item))) {
1194
0
            QTC::TC("libtests", "JSON parse unexpected }");
1195
0
            throw std::runtime_error(
1196
0
                "JSON: offset " + std::to_string(offset) + ": unexpected dictionary end delimiter");
1197
0
        }
1198
0
        parser_state = stack.back().state;
1199
0
        tos.setEnd(offset);
1200
0
        if (reactor) {
1201
0
            reactor->containerEnd(tos);
1202
0
        }
1203
0
        if (parser_state != ps_done) {
1204
0
            stack.pop_back();
1205
0
        }
1206
0
        return;
1207
1208
0
    case ls_number:
1209
0
        item = JSON::makeNumber(token);
1210
0
        break;
1211
1212
0
    case ls_alpha:
1213
0
        if (token == "true") {
1214
0
            item = JSON::makeBool(true);
1215
0
        } else if (token == "false") {
1216
0
            item = JSON::makeBool(false);
1217
0
        } else if (token == "null") {
1218
0
            item = JSON::makeNull();
1219
0
        } else {
1220
0
            QTC::TC("libtests", "JSON parse invalid keyword");
1221
0
            throw std::runtime_error(
1222
0
                "JSON: offset " + std::to_string(offset) + ": invalid keyword " + token);
1223
0
        }
1224
0
        break;
1225
1226
0
    case ls_after_string:
1227
0
        if (parser_state == ps_dict_begin || parser_state == ps_dict_after_comma) {
1228
0
            dict_key = token;
1229
0
            dict_key_offset = token_start;
1230
0
            parser_state = ps_dict_after_key;
1231
0
            return;
1232
0
        } else {
1233
0
            item = JSON::makeString(token);
1234
0
        }
1235
0
        break;
1236
1237
0
    default:
1238
0
        throw std::runtime_error(
1239
0
            "JSON: offset " + std::to_string(offset) + ": premature end of input");
1240
0
        break;
1241
0
    }
1242
1243
0
    item.setStart(token_start);
1244
0
    item.setEnd(offset);
1245
1246
0
    switch (parser_state) {
1247
0
    case ps_dict_begin:
1248
0
    case ps_dict_after_comma:
1249
0
        QTC::TC("libtests", "JSON parse string as dict key");
1250
0
        throw std::runtime_error(
1251
0
            "JSON: offset " + std::to_string(offset) + ": expect string as dictionary key");
1252
0
        break;
1253
1254
0
    case ps_dict_after_colon:
1255
0
        if (!reactor || !reactor->dictionaryItem(dict_key, item)) {
1256
0
            tos.addDictionaryMember(dict_key, item);
1257
0
        }
1258
0
        parser_state = ps_dict_after_item;
1259
0
        break;
1260
1261
0
    case ps_array_begin:
1262
0
    case ps_array_after_comma:
1263
0
        if (!reactor || !reactor->arrayItem(item)) {
1264
0
            tos.addArrayElement(item);
1265
0
        }
1266
0
        parser_state = ps_array_after_item;
1267
0
        break;
1268
1269
0
    case ps_top:
1270
0
        if (!(item.isDictionary() || item.isArray())) {
1271
0
            stack.emplace_back(ps_done, item);
1272
0
            parser_state = ps_done;
1273
0
            return;
1274
0
        }
1275
0
        parser_state = ps_done;
1276
0
        break;
1277
1278
0
    case ps_dict_after_key:
1279
0
        QTC::TC("libtests", "JSON parse expected colon");
1280
0
        throw std::runtime_error("JSON: offset " + std::to_string(offset) + ": expected ':'");
1281
0
        break;
1282
1283
0
    case ps_dict_after_item:
1284
0
        QTC::TC("libtests", "JSON parse expected , or }");
1285
0
        throw std::runtime_error(
1286
0
            "JSON: offset " + std::to_string(offset) + ": expected ',' or '}'");
1287
0
        break;
1288
1289
0
    case ps_array_after_item:
1290
0
        QTC::TC("libtests", "JSON parse expected, or ]");
1291
0
        throw std::runtime_error(
1292
0
            "JSON: offset " + std::to_string(offset) + ": expected ',' or ']'");
1293
0
        break;
1294
1295
0
    case ps_done:
1296
0
        throw std::logic_error("JSONParser::handleToken: unexpected parser state");
1297
0
    }
1298
1299
0
    if (item.isDictionary() || item.isArray()) {
1300
0
        stack.emplace_back(parser_state, item);
1301
        // Calling container start method is postponed until after adding the containers to their
1302
        // parent containers, if any. This makes it much easier to keep track of the current nesting
1303
        // level.
1304
0
        if (item.isDictionary()) {
1305
0
            if (reactor) {
1306
0
                reactor->dictionaryStart();
1307
0
            }
1308
0
            parser_state = ps_dict_begin;
1309
0
        } else if (item.isArray()) {
1310
0
            if (reactor) {
1311
0
                reactor->arrayStart();
1312
0
            }
1313
0
            parser_state = ps_array_begin;
1314
0
        }
1315
1316
0
        if (stack.size() > 500) {
1317
0
            throw std::runtime_error(
1318
0
                "JSON: offset " + std::to_string(offset) + ": maximum object depth exceeded");
1319
0
        }
1320
0
    }
1321
0
}
1322
1323
JSON
1324
JSONParser::parse()
1325
0
{
1326
0
    while (!done) {
1327
0
        getToken();
1328
0
        handleToken();
1329
0
    }
1330
0
    if (parser_state != ps_done) {
1331
0
        QTC::TC("libtests", "JSON parse premature EOF");
1332
0
        throw std::runtime_error("JSON: premature end of input");
1333
0
    }
1334
0
    auto const& tos = stack.back().item;
1335
0
    if (reactor && !(tos.isArray() || tos.isDictionary())) {
1336
0
        reactor->topLevelScalar();
1337
0
    }
1338
0
    return tos;
1339
0
}
1340
1341
JSON
1342
JSON::parse(InputSource& is, Reactor* reactor)
1343
0
{
1344
0
    JSONParser jp(is, reactor);
1345
0
    return jp.parse();
1346
0
}
1347
1348
JSON
1349
JSON::parse(std::string const& s)
1350
0
{
1351
0
    BufferInputSource bis("json input", s);
1352
0
    JSONParser jp(bis, nullptr);
1353
0
    return jp.parse();
1354
0
}
1355
1356
void
1357
JSON::setStart(qpdf_offset_t start)
1358
0
{
1359
0
    if (m) {
1360
0
        m->start = start;
1361
0
    }
1362
0
}
1363
1364
void
1365
JSON::setEnd(qpdf_offset_t end)
1366
0
{
1367
0
    if (m) {
1368
0
        m->end = end;
1369
0
    }
1370
0
}
1371
1372
qpdf_offset_t
1373
JSON::getStart() const
1374
0
{
1375
0
    return m ? m->start : 0;
1376
0
}
1377
1378
qpdf_offset_t
1379
JSON::getEnd() const
1380
0
{
1381
0
    return m ? m->end : 0;
1382
0
}