Coverage Report

Created: 2024-09-08 06:06

/src/qpdf/libqpdf/JSON.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/JSON.hh>
2
3
#include <qpdf/JSON_writer.hh>
4
5
#include <qpdf/BufferInputSource.hh>
6
#include <qpdf/Pl_Base64.hh>
7
#include <qpdf/Pl_Concatenate.hh>
8
#include <qpdf/Pl_String.hh>
9
#include <qpdf/QTC.hh>
10
#include <qpdf/QUtil.hh>
11
#include <cstring>
12
#include <stdexcept>
13
14
JSON::Members::Members(std::unique_ptr<JSON_value> value) :
15
    value(std::move(value))
16
0
{
17
0
}
18
19
JSON::JSON(std::unique_ptr<JSON_value> value) :
20
    m(new Members(std::move(value)))
21
0
{
22
0
}
23
24
void
25
JSON::writeClose(Pipeline* p, bool first, size_t depth, char const* delimiter)
26
0
{
27
0
    if (first) {
28
0
        *p << delimiter;
29
0
    } else {
30
0
        std::string s{"\n"};
31
0
        s.append(2 * depth, ' ');
32
0
        *p << s + delimiter;
33
0
    }
34
0
}
35
36
void
37
JSON::writeNext(Pipeline* p, bool& first, size_t depth)
38
0
{
39
0
    if (first) {
40
0
        first = false;
41
0
        std::string s{"\n"};
42
0
        s.append(2 * depth, ' ');
43
0
        *p << s;
44
0
    } else {
45
0
        std::string s{",\n"};
46
0
        s.append(2 * depth, ' ');
47
0
        *p << s;
48
0
    }
49
0
}
50
51
void
52
JSON::writeDictionaryOpen(Pipeline* p, bool& first, size_t depth)
53
0
{
54
0
    *p << "{";
55
0
    first = true;
56
0
}
57
58
void
59
JSON::writeArrayOpen(Pipeline* p, bool& first, size_t depth)
60
0
{
61
0
    *p << "[";
62
0
    first = true;
63
0
}
64
65
void
66
JSON::writeDictionaryClose(Pipeline* p, bool first, size_t depth)
67
0
{
68
0
    writeClose(p, first, depth, "}");
69
0
}
70
71
void
72
JSON::writeArrayClose(Pipeline* p, bool first, size_t depth)
73
0
{
74
0
    writeClose(p, first, depth, "]");
75
0
}
76
77
void
78
JSON::writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth)
79
0
{
80
0
    writeNext(p, first, depth);
81
0
    *p << std::string("\"") + key + "\": ";
82
0
}
83
84
void
85
JSON::writeDictionaryItem(
86
    Pipeline* p, bool& first, std::string const& key, JSON const& value, size_t depth)
87
0
{
88
0
    writeDictionaryKey(p, first, key, depth);
89
0
    value.write(p, depth);
90
0
}
91
92
void
93
JSON::writeArrayItem(Pipeline* p, bool& first, JSON const& element, size_t depth)
94
0
{
95
0
    writeNext(p, first, depth);
96
0
    element.write(p, depth);
97
0
}
98
99
void
100
JSON::JSON_dictionary::write(Pipeline* p, size_t depth) const
101
0
{
102
0
    bool first = true;
103
0
    writeDictionaryOpen(p, first, depth);
104
0
    for (auto const& iter: members) {
105
0
        writeDictionaryItem(p, first, iter.first, iter.second, 1 + depth);
106
0
    }
107
0
    writeDictionaryClose(p, first, depth);
108
0
}
109
110
void
111
JSON::JSON_array::write(Pipeline* p, size_t depth) const
112
0
{
113
0
    bool first = true;
114
0
    writeArrayOpen(p, first, depth);
115
0
    for (auto const& element: elements) {
116
0
        writeArrayItem(p, first, element, 1 + depth);
117
0
    }
118
0
    writeArrayClose(p, first, depth);
119
0
}
120
121
JSON::JSON_string::JSON_string(std::string const& utf8) :
122
    JSON_value(vt_string),
123
    utf8(utf8),
124
    encoded(Writer::encode_string(utf8))
125
0
{
126
0
}
127
128
void
129
JSON::JSON_string::write(Pipeline* p, size_t) const
130
0
{
131
0
    *p << std::string("\"") + encoded + "\"";
132
0
}
133
134
JSON::JSON_number::JSON_number(long long value) :
135
    JSON_value(vt_number),
136
    encoded(std::to_string(value))
137
0
{
138
0
}
139
140
JSON::JSON_number::JSON_number(double value) :
141
    JSON_value(vt_number),
142
    encoded(QUtil::double_to_string(value, 6))
143
0
{
144
0
}
145
146
JSON::JSON_number::JSON_number(std::string const& value) :
147
    JSON_value(vt_number),
148
    encoded(value)
149
0
{
150
0
}
151
152
void
153
JSON::JSON_number::write(Pipeline* p, size_t) const
154
0
{
155
0
    *p << encoded;
156
0
}
157
158
JSON::JSON_bool::JSON_bool(bool val) :
159
    JSON_value(vt_bool),
160
    value(val)
161
0
{
162
0
}
163
164
void
165
JSON::JSON_bool::write(Pipeline* p, size_t) const
166
0
{
167
0
    *p << (value ? "true" : "false");
168
0
}
169
170
void
171
JSON::JSON_null::write(Pipeline* p, size_t) const
172
0
{
173
0
    *p << "null";
174
0
}
175
176
JSON::JSON_blob::JSON_blob(std::function<void(Pipeline*)> fn) :
177
    JSON_value(vt_blob),
178
    fn(fn)
179
0
{
180
0
}
181
182
void
183
JSON::JSON_blob::write(Pipeline* p, size_t) const
184
0
{
185
0
    *p << "\"";
186
0
    Pl_Concatenate cat("blob concatenate", p);
187
0
    Pl_Base64 base64("blob base64", &cat, Pl_Base64::a_encode);
188
0
    fn(&base64);
189
0
    base64.finish();
190
0
    *p << "\"";
191
0
}
192
193
void
194
JSON::write(Pipeline* p, size_t depth) const
195
0
{
196
0
    if (!m) {
197
0
        *p << "null";
198
0
    } else {
199
0
        m->value->write(p, depth);
200
0
    }
201
0
}
202
203
std::string
204
JSON::unparse() const
205
0
{
206
0
    if (!m) {
207
0
        return "null";
208
0
    }
209
0
    std::string s;
210
0
    Pl_String p("unparse", nullptr, s);
211
0
    write(&p, 0);
212
0
    return s;
213
0
}
214
215
std::string
216
JSON::Writer::encode_string(std::string const& str)
217
0
{
218
0
    static auto constexpr hexchars = "0123456789abcdef";
219
220
0
    auto begin = str.cbegin();
221
0
    auto end = str.cend();
222
0
    auto iter = begin;
223
0
    while (iter != end) {
224
0
        auto c = static_cast<unsigned char>(*iter);
225
0
        if ((c > 34 && c != '\\') || c == ' ' || c == 33) {
226
            // Optimistically check that no char in str requires escaping. Hopefully we can just
227
            // return the input str.
228
0
            ++iter;
229
0
        } else {
230
            // We found a char that requires escaping. Initialize result to the chars scanned so
231
            // far, append/replace the rest of str one char at a time, and return the result.
232
0
            std::string result{begin, iter};
233
234
0
            for (; iter != end; ++iter) {
235
0
                auto ch = static_cast<unsigned char>(*iter);
236
0
                if ((ch > 34 && ch != '\\') || ch == ' ' || ch == 33) {
237
                    // Check for most common case first.
238
0
                    result += *iter;
239
0
                } else {
240
0
                    switch (ch) {
241
0
                    case '\\':
242
0
                        result += "\\\\";
243
0
                        break;
244
0
                    case '\"':
245
0
                        result += "\\\"";
246
0
                        break;
247
0
                    case '\b':
248
0
                        result += "\\b";
249
0
                        break;
250
0
                    case '\f':
251
0
                        result += "\\f";
252
0
                        break;
253
0
                    case '\n':
254
0
                        result += "\\n";
255
0
                        break;
256
0
                    case '\r':
257
0
                        result += "\\r";
258
0
                        break;
259
0
                    case '\t':
260
0
                        result += "\\t";
261
0
                        break;
262
0
                    default:
263
0
                        result += ch < 16 ? "\\u000" : "\\u001";
264
0
                        result += hexchars[ch % 16];
265
0
                    }
266
0
                }
267
0
            }
268
0
            return result;
269
0
        }
270
0
    }
271
0
    return str;
272
0
}
273
274
JSON
275
JSON::makeDictionary()
276
0
{
277
0
    return {std::make_unique<JSON_dictionary>()};
278
0
}
279
280
JSON
281
JSON::addDictionaryMember(std::string const& key, JSON const& val)
282
0
{
283
0
    if (auto* obj = m ? dynamic_cast<JSON_dictionary*>(m->value.get()) : nullptr) {
284
0
        return obj->members[Writer::encode_string(key)] = val.m ? val : makeNull();
285
0
    } else {
286
0
        throw std::runtime_error("JSON::addDictionaryMember called on non-dictionary");
287
0
    }
288
0
}
289
290
bool
291
JSON::checkDictionaryKeySeen(std::string const& key)
292
0
{
293
0
    if (auto* obj = m ? dynamic_cast<JSON_dictionary*>(m->value.get()) : nullptr) {
294
0
        return !obj->parsed_keys.insert(key).second;
295
0
    }
296
0
    throw std::logic_error("JSON::checkDictionaryKey called on non-dictionary");
297
0
    return false; // unreachable
298
0
}
299
300
JSON
301
JSON::makeArray()
302
0
{
303
0
    return {std::make_unique<JSON_array>()};
304
0
}
305
306
JSON
307
JSON::addArrayElement(JSON const& val)
308
0
{
309
0
    if (auto* arr = m ? dynamic_cast<JSON_array*>(m->value.get()) : nullptr) {
310
0
        if (val.m) {
311
0
            arr->elements.push_back(val);
312
0
        } else {
313
0
            arr->elements.push_back(makeNull());
314
0
        }
315
0
        return arr->elements.back();
316
0
    }
317
0
    throw std::runtime_error("JSON::addArrayElement called on non-array");
318
0
    return {}; // unreachable
319
0
}
320
321
JSON
322
JSON::makeString(std::string const& utf8)
323
0
{
324
0
    return {std::make_unique<JSON_string>(utf8)};
325
0
}
326
327
JSON
328
JSON::makeInt(long long int value)
329
0
{
330
0
    return {std::make_unique<JSON_number>(value)};
331
0
}
332
333
JSON
334
JSON::makeReal(double value)
335
0
{
336
0
    return {std::make_unique<JSON_number>(value)};
337
0
}
338
339
JSON
340
JSON::makeNumber(std::string const& encoded)
341
0
{
342
0
    return {std::make_unique<JSON_number>(encoded)};
343
0
}
344
345
JSON
346
JSON::makeBool(bool value)
347
0
{
348
0
    return {std::make_unique<JSON_bool>(value)};
349
0
}
350
351
JSON
352
JSON::makeNull()
353
0
{
354
0
    return {std::make_unique<JSON_null>()};
355
0
}
356
357
JSON
358
JSON::makeBlob(std::function<void(Pipeline*)> fn)
359
0
{
360
0
    return {std::make_unique<JSON_blob>(fn)};
361
0
}
362
363
bool
364
JSON::isArray() const
365
0
{
366
0
    return m ? m->value->type_code == vt_array : false;
367
0
}
368
369
bool
370
JSON::isDictionary() const
371
0
{
372
0
    return m && m->value->type_code == vt_dictionary;
373
0
}
374
375
bool
376
JSON::getString(std::string& utf8) const
377
0
{
378
0
    if (m && m->value->type_code == vt_string) {
379
0
        auto v = dynamic_cast<JSON_string const*>(m->value.get());
380
0
        utf8 = v->utf8;
381
0
        return true;
382
0
    }
383
0
    return false;
384
0
}
385
386
bool
387
JSON::getNumber(std::string& value) const
388
0
{
389
0
    if (m && m->value->type_code == vt_number) {
390
0
        auto v = dynamic_cast<JSON_number const*>(m->value.get());
391
0
        value = v->encoded;
392
0
        return true;
393
0
    }
394
0
    return false;
395
0
}
396
397
bool
398
JSON::getBool(bool& value) const
399
0
{
400
0
    if (m && m->value->type_code == vt_bool) {
401
0
        auto v = dynamic_cast<JSON_bool const*>(m->value.get());
402
0
        value = v->value;
403
0
        return true;
404
0
    }
405
0
    return false;
406
0
}
407
408
bool
409
JSON::isNull() const
410
0
{
411
0
    return m && m->value->type_code == vt_null;
412
0
}
413
414
JSON
415
JSON::getDictItem(std::string const& key) const
416
0
{
417
0
    if (auto v = m ? dynamic_cast<JSON_dictionary const*>(m->value.get()) : nullptr) {
418
0
        if (auto it = v->members.find(key); it != v->members.end()) {
419
0
            return it->second;
420
0
        }
421
0
    }
422
0
    return makeNull();
423
0
}
424
425
bool
426
JSON::forEachDictItem(std::function<void(std::string const& key, JSON value)> fn) const
427
0
{
428
0
    if (auto v = m ? dynamic_cast<JSON_dictionary const*>(m->value.get()) : nullptr) {
429
0
        for (auto const& [key, value]: v->members) {
430
0
            fn(key, value);
431
0
        }
432
0
        return true;
433
0
    }
434
0
    return false;
435
0
}
436
437
bool
438
JSON::forEachArrayItem(std::function<void(JSON value)> fn) const
439
0
{
440
0
    if (auto v = m ? dynamic_cast<JSON_array const*>(m->value.get()) : nullptr) {
441
0
        for (auto const& i: v->elements) {
442
0
            fn(JSON(i));
443
0
        }
444
0
        return true;
445
0
    }
446
0
    return false;
447
0
}
448
449
bool
450
JSON::checkSchema(JSON schema, std::list<std::string>& errors)
451
0
{
452
0
    return m && checkSchemaInternal(m->value.get(), schema.m->value.get(), 0, errors, "");
453
0
}
454
455
bool
456
JSON::checkSchema(JSON schema, unsigned long flags, std::list<std::string>& errors)
457
0
{
458
0
    return m && checkSchemaInternal(m->value.get(), schema.m->value.get(), flags, errors, "");
459
0
}
460
461
bool
462
JSON::checkSchemaInternal(
463
    JSON_value* this_v,
464
    JSON_value* sch_v,
465
    unsigned long flags,
466
    std::list<std::string>& errors,
467
    std::string prefix)
468
0
{
469
0
    auto* this_arr = dynamic_cast<JSON_array*>(this_v);
470
0
    auto* this_dict = dynamic_cast<JSON_dictionary*>(this_v);
471
472
0
    auto* sch_arr = dynamic_cast<JSON_array*>(sch_v);
473
0
    auto* sch_dict = dynamic_cast<JSON_dictionary*>(sch_v);
474
475
0
    auto* sch_str = dynamic_cast<JSON_string*>(sch_v);
476
477
0
    std::string err_prefix;
478
0
    if (prefix.empty()) {
479
0
        err_prefix = "top-level object";
480
0
    } else {
481
0
        err_prefix = "json key \"" + prefix + "\"";
482
0
    }
483
484
0
    std::string pattern_key;
485
0
    if (sch_dict) {
486
0
        if (!this_dict) {
487
0
            QTC::TC("libtests", "JSON wanted dictionary");
488
0
            errors.push_back(err_prefix + " is supposed to be a dictionary");
489
0
            return false;
490
0
        }
491
0
        auto members = sch_dict->members;
492
0
        std::string key;
493
0
        if ((members.size() == 1) &&
494
0
            ((key = members.begin()->first, key.length() > 2) && (key.at(0) == '<') &&
495
0
             (key.at(key.length() - 1) == '>'))) {
496
0
            pattern_key = key;
497
0
        }
498
0
    }
499
500
0
    if (sch_dict && (!pattern_key.empty())) {
501
0
        auto pattern_schema = sch_dict->members[pattern_key].m->value.get();
502
0
        for (auto const& iter: this_dict->members) {
503
0
            std::string const& key = iter.first;
504
0
            checkSchemaInternal(
505
0
                this_dict->members[key].m->value.get(),
506
0
                pattern_schema,
507
0
                flags,
508
0
                errors,
509
0
                prefix + "." + key);
510
0
        }
511
0
    } else if (sch_dict) {
512
0
        for (auto& iter: sch_dict->members) {
513
0
            std::string const& key = iter.first;
514
0
            if (this_dict->members.count(key)) {
515
0
                checkSchemaInternal(
516
0
                    this_dict->members[key].m->value.get(),
517
0
                    iter.second.m->value.get(),
518
0
                    flags,
519
0
                    errors,
520
0
                    prefix + "." + key);
521
0
            } else {
522
0
                if (flags & f_optional) {
523
0
                    QTC::TC("libtests", "JSON optional key");
524
0
                } else {
525
0
                    QTC::TC("libtests", "JSON key missing in object");
526
0
                    errors.push_back(
527
0
                        err_prefix + ": key \"" + key +
528
0
                        "\" is present in schema but missing in object");
529
0
                }
530
0
            }
531
0
        }
532
0
        for (auto const& iter: this_dict->members) {
533
0
            std::string const& key = iter.first;
534
0
            if (sch_dict->members.count(key) == 0) {
535
0
                QTC::TC("libtests", "JSON key extra in object");
536
0
                errors.push_back(
537
0
                    err_prefix + ": key \"" + key +
538
0
                    "\" is not present in schema but appears in object");
539
0
            }
540
0
        }
541
0
    } else if (sch_arr) {
542
0
        auto n_elements = sch_arr->elements.size();
543
0
        if (n_elements == 1) {
544
            // A single-element array in the schema allows a single element in the object or a
545
            // variable-length array, each of whose items must conform to the single element of the
546
            // schema array. This doesn't apply to arrays of arrays -- we fall back to the behavior
547
            // of allowing a single item only when the object is not an array.
548
0
            if (this_arr) {
549
0
                int i = 0;
550
0
                for (auto const& element: this_arr->elements) {
551
0
                    checkSchemaInternal(
552
0
                        element.m->value.get(),
553
0
                        sch_arr->elements.at(0).m->value.get(),
554
0
                        flags,
555
0
                        errors,
556
0
                        prefix + "." + std::to_string(i));
557
0
                    ++i;
558
0
                }
559
0
            } else {
560
0
                QTC::TC("libtests", "JSON schema array for single item");
561
0
                checkSchemaInternal(
562
0
                    this_v, sch_arr->elements.at(0).m->value.get(), flags, errors, prefix);
563
0
            }
564
0
        } else if (!this_arr || (this_arr->elements.size() != n_elements)) {
565
0
            QTC::TC("libtests", "JSON schema array length mismatch");
566
0
            errors.push_back(
567
0
                err_prefix + " is supposed to be an array of length " + std::to_string(n_elements));
568
0
            return false;
569
0
        } else {
570
            // A multi-element array in the schema must correspond to an element of the same length
571
            // in the object. Each element in the object is validated against the corresponding
572
            // element in the schema.
573
0
            size_t i = 0;
574
0
            for (auto const& element: this_arr->elements) {
575
0
                checkSchemaInternal(
576
0
                    element.m->value.get(),
577
0
                    sch_arr->elements.at(i).m->value.get(),
578
0
                    flags,
579
0
                    errors,
580
0
                    prefix + "." + std::to_string(i));
581
0
                ++i;
582
0
            }
583
0
        }
584
0
    } else if (!sch_str) {
585
0
        QTC::TC("libtests", "JSON schema other type");
586
0
        errors.push_back(err_prefix + " schema value is not dictionary, array, or string");
587
0
        return false;
588
0
    }
589
590
0
    return errors.empty();
591
0
}
592
593
namespace
594
{
595
    class JSONParser
596
    {
597
      public:
598
        JSONParser(InputSource& is, JSON::Reactor* reactor) :
599
            is(is),
600
            reactor(reactor),
601
            p(buf)
602
0
        {
603
0
        }
604
605
        JSON parse();
606
607
      private:
608
        enum parser_state_e {
609
            ps_top,
610
            ps_dict_begin,
611
            ps_dict_after_key,
612
            ps_dict_after_colon,
613
            ps_dict_after_item,
614
            ps_dict_after_comma,
615
            ps_array_begin,
616
            ps_array_after_item,
617
            ps_array_after_comma,
618
            ps_done,
619
        };
620
621
        enum lex_state_e {
622
            ls_top,
623
            ls_number,
624
            ls_number_minus,
625
            ls_number_leading_zero,
626
            ls_number_before_point,
627
            ls_number_point,
628
            ls_number_after_point,
629
            ls_number_e,
630
            ls_number_e_sign,
631
            ls_alpha,
632
            ls_string,
633
            ls_after_string,
634
            ls_backslash,
635
            ls_u4,
636
            ls_begin_array,
637
            ls_end_array,
638
            ls_begin_dict,
639
            ls_end_dict,
640
            ls_colon,
641
            ls_comma,
642
        };
643
644
        struct StackFrame
645
        {
646
            StackFrame(parser_state_e state, JSON& item) :
647
                state(state),
648
                item(item)
649
0
            {
650
0
            }
651
652
            parser_state_e state;
653
            JSON item;
654
        };
655
656
        void getToken();
657
        void handleToken();
658
        void tokenError();
659
        static void handle_u_code(
660
            unsigned long codepoint,
661
            qpdf_offset_t offset,
662
            unsigned long& high_surrogate,
663
            qpdf_offset_t& high_offset,
664
            std::string& result);
665
        inline void append();
666
        inline void append(lex_state_e);
667
        inline void ignore();
668
        inline void ignore(lex_state_e);
669
670
        InputSource& is;
671
        JSON::Reactor* reactor;
672
        lex_state_e lex_state{ls_top};
673
        char buf[16384];
674
        size_t bytes{0};
675
        char const* p;
676
        qpdf_offset_t u_count{0};
677
        unsigned long u_value{0};
678
        qpdf_offset_t offset{0};
679
        bool done{false};
680
        std::string token;
681
        qpdf_offset_t token_start{0};
682
        parser_state_e parser_state{ps_top};
683
        std::vector<StackFrame> stack;
684
        std::string dict_key;
685
        qpdf_offset_t dict_key_offset{0};
686
    };
687
} // namespace
688
689
void
690
JSONParser::handle_u_code(
691
    unsigned long codepoint,
692
    qpdf_offset_t offset,
693
    unsigned long& high_surrogate,
694
    qpdf_offset_t& high_offset,
695
    std::string& result)
696
0
{
697
0
    if ((codepoint & 0xFC00) == 0xD800) {
698
        // high surrogate
699
0
        qpdf_offset_t new_high_offset = offset;
700
0
        if (high_offset) {
701
0
            QTC::TC("libtests", "JSON 16 high high");
702
0
            throw std::runtime_error(
703
0
                "JSON: offset " + std::to_string(new_high_offset) +
704
0
                ": UTF-16 high surrogate found after previous high surrogate at offset " +
705
0
                std::to_string(high_offset));
706
0
        }
707
0
        high_offset = new_high_offset;
708
0
        high_surrogate = codepoint;
709
0
    } else if ((codepoint & 0xFC00) == 0xDC00) {
710
        // low surrogate
711
0
        if (offset != (high_offset + 6)) {
712
0
            QTC::TC("libtests", "JSON 16 low not after high");
713
0
            throw std::runtime_error(
714
0
                "JSON: offset " + std::to_string(offset) +
715
0
                ": UTF-16 low surrogate found not immediately after high surrogate");
716
0
        }
717
0
        high_offset = 0;
718
0
        codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF);
719
0
        result += QUtil::toUTF8(codepoint);
720
0
    } else {
721
0
        result += QUtil::toUTF8(codepoint);
722
0
    }
723
0
}
724
725
void
726
JSONParser::tokenError()
727
0
{
728
0
    if (done) {
729
0
        QTC::TC("libtests", "JSON parse ls premature end of input");
730
0
        throw std::runtime_error("JSON: premature end of input");
731
0
    }
732
733
0
    if (lex_state == ls_u4) {
734
0
        QTC::TC("libtests", "JSON parse bad hex after u");
735
0
        throw std::runtime_error(
736
0
            "JSON: offset " + std::to_string(offset - u_count - 1) +
737
0
            ": \\u must be followed by four hex digits");
738
0
    } else if (lex_state == ls_alpha) {
739
0
        QTC::TC("libtests", "JSON parse keyword bad character");
740
0
        throw std::runtime_error(
741
0
            "JSON: offset " + std::to_string(offset) + ": keyword: unexpected character " +
742
0
            std::string(p, 1));
743
0
    } else if (lex_state == ls_string) {
744
0
        QTC::TC("libtests", "JSON parse control char in string");
745
0
        throw std::runtime_error(
746
0
            "JSON: offset " + std::to_string(offset) +
747
0
            ": control character in string (missing \"?)");
748
0
    } else if (lex_state == ls_backslash) {
749
0
        QTC::TC("libtests", "JSON parse backslash bad character");
750
0
        throw std::runtime_error(
751
0
            "JSON: offset " + std::to_string(offset) +
752
0
            ": invalid character after backslash: " + std::string(p, 1));
753
0
    }
754
755
0
    if (*p == '.') {
756
0
        if (lex_state == ls_number || lex_state == ls_number_e || lex_state == ls_number_e_sign) {
757
0
            QTC::TC("libtests", "JSON parse point after e");
758
0
            throw std::runtime_error(
759
0
                "JSON: offset " + std::to_string(offset) +
760
0
                ": numeric literal: decimal point after e");
761
0
        } else {
762
0
            QTC::TC("libtests", "JSON parse duplicate point");
763
0
            throw std::runtime_error(
764
0
                "JSON: offset " + std::to_string(offset) +
765
0
                ": numeric literal: decimal point already seen");
766
0
        }
767
0
    } else if (*p == 'e' || *p == 'E') {
768
0
        QTC::TC("libtests", "JSON parse duplicate e");
769
0
        throw std::runtime_error(
770
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: e already seen");
771
0
    } else if ((*p == '+') || (*p == '-')) {
772
0
        QTC::TC("libtests", "JSON parse unexpected sign");
773
0
        throw std::runtime_error(
774
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign");
775
0
    } else if (QUtil::is_space(*p) || strchr("{}[]:,", *p)) {
776
0
        QTC::TC("libtests", "JSON parse incomplete number");
777
0
        throw std::runtime_error(
778
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: incomplete number");
779
780
0
    } else {
781
0
        QTC::TC("libtests", "JSON parse numeric bad character");
782
0
        throw std::runtime_error(
783
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected character " +
784
0
            std::string(p, 1));
785
0
    }
786
0
    throw std::logic_error("JSON::tokenError : unhandled error");
787
0
}
788
789
// Append current character to token and advance to next input character.
790
inline void
791
JSONParser::append()
792
0
{
793
0
    token += *p;
794
0
    ++p;
795
0
    ++offset;
796
0
}
797
798
// Append current character to token, advance to next input character and transition to 'next' lexer
799
// state.
800
inline void
801
JSONParser::append(lex_state_e next)
802
0
{
803
0
    lex_state = next;
804
0
    token += *p;
805
0
    ++p;
806
0
    ++offset;
807
0
}
808
809
// Advance to next input character without appending the current character to token.
810
inline void
811
JSONParser::ignore()
812
0
{
813
0
    ++p;
814
0
    ++offset;
815
0
}
816
817
// Advance to next input character without appending the current character to token and transition
818
// to 'next' lexer state.
819
inline void
820
JSONParser::ignore(lex_state_e next)
821
0
{
822
0
    lex_state = next;
823
0
    ++p;
824
0
    ++offset;
825
0
}
826
827
void
828
JSONParser::getToken()
829
0
{
830
0
    token.clear();
831
832
    // Keep track of UTF-16 surrogate pairs.
833
0
    unsigned long high_surrogate = 0;
834
0
    qpdf_offset_t high_offset = 0;
835
836
0
    while (true) {
837
0
        if (p == (buf + bytes)) {
838
0
            p = buf;
839
0
            bytes = is.read(buf, sizeof(buf));
840
0
            if (bytes == 0) {
841
0
                done = true;
842
0
                break;
843
0
            }
844
0
        }
845
846
0
        if ((*p < 32 && *p >= 0)) {
847
0
            if (*p == '\t' || *p == '\n' || *p == '\r') {
848
                // Legal white space not permitted in strings. This will always end the current
849
                // token (unless we are still before the start of the token).
850
0
                if (lex_state == ls_top) {
851
0
                    ignore();
852
0
                } else {
853
0
                    break;
854
0
                }
855
856
0
            } else {
857
0
                QTC::TC("libtests", "JSON parse null character");
858
0
                throw std::runtime_error(
859
0
                    "JSON: control or null character at offset " + std::to_string(offset));
860
0
            }
861
0
        } else if (*p == ',') {
862
0
            if (lex_state == ls_top) {
863
0
                ignore(ls_comma);
864
0
                return;
865
0
            } else if (lex_state == ls_string) {
866
0
                append();
867
0
            } else {
868
0
                break;
869
0
            }
870
0
        } else if (*p == ':') {
871
0
            if (lex_state == ls_top) {
872
0
                ignore(ls_colon);
873
0
                return;
874
0
            } else if (lex_state == ls_string) {
875
0
                append();
876
0
            } else {
877
0
                break;
878
0
            }
879
0
        } else if (*p == ' ') {
880
0
            if (lex_state == ls_top) {
881
0
                ignore();
882
0
            } else if (lex_state == ls_string) {
883
0
                append();
884
0
            } else {
885
0
                break;
886
0
            }
887
0
        } else if (*p == '{') {
888
0
            if (lex_state == ls_top) {
889
0
                token_start = offset;
890
0
                ignore(ls_begin_dict);
891
0
                return;
892
0
            } else if (lex_state == ls_string) {
893
0
                append();
894
0
            } else {
895
0
                break;
896
0
            }
897
0
        } else if (*p == '}') {
898
0
            if (lex_state == ls_top) {
899
0
                ignore(ls_end_dict);
900
0
                return;
901
0
            } else if (lex_state == ls_string) {
902
0
                append();
903
0
            } else {
904
0
                break;
905
0
            }
906
0
        } else if (*p == '[') {
907
0
            if (lex_state == ls_top) {
908
0
                token_start = offset;
909
0
                ignore(ls_begin_array);
910
0
                return;
911
0
            } else if (lex_state == ls_string) {
912
0
                append();
913
0
            } else {
914
0
                break;
915
0
            }
916
0
        } else if (*p == ']') {
917
0
            if (lex_state == ls_top) {
918
0
                ignore(ls_end_array);
919
0
                return;
920
0
            } else if (lex_state == ls_string) {
921
0
                append();
922
0
            } else {
923
0
                break;
924
0
            }
925
0
        } else {
926
0
            switch (lex_state) {
927
0
            case ls_top:
928
0
                token_start = offset;
929
0
                if (*p == '"') {
930
0
                    ignore(ls_string);
931
0
                } else if ((*p >= 'a') && (*p <= 'z')) {
932
0
                    append(ls_alpha);
933
0
                } else if (*p == '-') {
934
0
                    append(ls_number_minus);
935
0
                } else if ((*p >= '1') && (*p <= '9')) {
936
0
                    append(ls_number_before_point);
937
0
                } else if (*p == '0') {
938
0
                    append(ls_number_leading_zero);
939
0
                } else {
940
0
                    QTC::TC("libtests", "JSON parse bad character");
941
0
                    throw std::runtime_error(
942
0
                        "JSON: offset " + std::to_string(offset) + ": unexpected character " +
943
0
                        std::string(p, 1));
944
0
                }
945
0
                break;
946
947
0
            case ls_number_minus:
948
0
                if ((*p >= '1') && (*p <= '9')) {
949
0
                    append(ls_number_before_point);
950
0
                } else if (*p == '0') {
951
0
                    append(ls_number_leading_zero);
952
0
                } else {
953
0
                    QTC::TC("libtests", "JSON parse number minus no digits");
954
0
                    throw std::runtime_error(
955
0
                        "JSON: offset " + std::to_string(offset) +
956
0
                        ": numeric literal: no digit after minus sign");
957
0
                }
958
0
                break;
959
960
0
            case ls_number_leading_zero:
961
0
                if (*p == '.') {
962
0
                    append(ls_number_point);
963
0
                } else if (*p == 'e' || *p == 'E') {
964
0
                    append(ls_number_e);
965
0
                } else {
966
0
                    QTC::TC("libtests", "JSON parse leading zero");
967
0
                    throw std::runtime_error(
968
0
                        "JSON: offset " + std::to_string(offset) + ": number with leading zero");
969
0
                }
970
0
                break;
971
972
0
            case ls_number_before_point:
973
0
                if ((*p >= '0') && (*p <= '9')) {
974
0
                    append();
975
0
                } else if (*p == '.') {
976
0
                    append(ls_number_point);
977
0
                } else if (*p == 'e' || *p == 'E') {
978
0
                    append(ls_number_e);
979
0
                } else {
980
0
                    tokenError();
981
0
                }
982
0
                break;
983
984
0
            case ls_number_point:
985
0
                if ((*p >= '0') && (*p <= '9')) {
986
0
                    append(ls_number_after_point);
987
0
                } else {
988
0
                    tokenError();
989
0
                }
990
0
                break;
991
992
0
            case ls_number_after_point:
993
0
                if ((*p >= '0') && (*p <= '9')) {
994
0
                    append();
995
0
                } else if (*p == 'e' || *p == 'E') {
996
0
                    append(ls_number_e);
997
0
                } else {
998
0
                    tokenError();
999
0
                }
1000
0
                break;
1001
1002
0
            case ls_number_e:
1003
0
                if ((*p >= '0') && (*p <= '9')) {
1004
0
                    append(ls_number);
1005
0
                } else if ((*p == '+') || (*p == '-')) {
1006
0
                    append(ls_number_e_sign);
1007
0
                } else {
1008
0
                    tokenError();
1009
0
                }
1010
0
                break;
1011
1012
0
            case ls_number_e_sign:
1013
0
                if ((*p >= '0') && (*p <= '9')) {
1014
0
                    append(ls_number);
1015
0
                } else {
1016
0
                    tokenError();
1017
0
                }
1018
0
                break;
1019
1020
0
            case ls_number:
1021
                // We only get here after we have seen an exponent.
1022
0
                if ((*p >= '0') && (*p <= '9')) {
1023
0
                    append();
1024
0
                } else {
1025
0
                    tokenError();
1026
0
                }
1027
0
                break;
1028
1029
0
            case ls_alpha:
1030
0
                if ((*p >= 'a') && (*p <= 'z')) {
1031
0
                    append();
1032
0
                } else {
1033
0
                    tokenError();
1034
0
                }
1035
0
                break;
1036
1037
0
            case ls_string:
1038
0
                if (*p == '"') {
1039
0
                    if (high_offset) {
1040
0
                        QTC::TC("libtests", "JSON 16 dangling high");
1041
0
                        throw std::runtime_error(
1042
0
                            "JSON: offset " + std::to_string(high_offset) +
1043
0
                            ": UTF-16 high surrogate not followed by low surrogate");
1044
0
                    }
1045
0
                    ignore(ls_after_string);
1046
0
                    return;
1047
0
                } else if (*p == '\\') {
1048
0
                    ignore(ls_backslash);
1049
0
                } else {
1050
0
                    append();
1051
0
                }
1052
0
                break;
1053
1054
0
            case ls_backslash:
1055
0
                lex_state = ls_string;
1056
0
                switch (*p) {
1057
0
                case '\\':
1058
0
                case '\"':
1059
0
                case '/':
1060
                    // \/ is allowed in json input, but so is /, so we don't map / to \/ in output.
1061
0
                    token += *p;
1062
0
                    break;
1063
0
                case 'b':
1064
0
                    token += '\b';
1065
0
                    break;
1066
0
                case 'f':
1067
0
                    token += '\f';
1068
0
                    break;
1069
0
                case 'n':
1070
0
                    token += '\n';
1071
0
                    break;
1072
0
                case 'r':
1073
0
                    token += '\r';
1074
0
                    break;
1075
0
                case 't':
1076
0
                    token += '\t';
1077
0
                    break;
1078
0
                case 'u':
1079
0
                    lex_state = ls_u4;
1080
0
                    u_count = 0;
1081
0
                    u_value = 0;
1082
0
                    break;
1083
0
                default:
1084
0
                    lex_state = ls_backslash;
1085
0
                    tokenError();
1086
0
                }
1087
0
                ignore();
1088
0
                break;
1089
1090
0
            case ls_u4:
1091
0
                using ui = unsigned int;
1092
0
                if (ui val = ui(QUtil::hex_decode_char(*p)); val < 16) {
1093
0
                    u_value = 16 * u_value + val;
1094
0
                } else {
1095
0
                    tokenError();
1096
0
                }
1097
0
                if (++u_count == 4) {
1098
0
                    handle_u_code(u_value, offset - 5, high_surrogate, high_offset, token);
1099
0
                    lex_state = ls_string;
1100
0
                }
1101
0
                ignore();
1102
0
                break;
1103
1104
0
            default:
1105
0
                throw std::logic_error("JSONParser::getToken : trying to handle delimiter state");
1106
0
            }
1107
0
        }
1108
0
    }
1109
1110
    // We only get here if on end of input or if the last character was a control character or other
1111
    // delimiter.
1112
1113
0
    if (!token.empty()) {
1114
0
        switch (lex_state) {
1115
0
        case ls_top:
1116
            // Can't happen
1117
0
            throw std::logic_error("tok_start set in ls_top while parsing");
1118
0
            break;
1119
1120
0
        case ls_number_leading_zero:
1121
0
        case ls_number_before_point:
1122
0
        case ls_number_after_point:
1123
0
            lex_state = ls_number;
1124
0
            break;
1125
1126
0
        case ls_number:
1127
0
        case ls_alpha:
1128
            // terminal state
1129
0
            break;
1130
1131
0
        default:
1132
0
            tokenError();
1133
0
        }
1134
0
    }
1135
0
}
1136
1137
void
1138
JSONParser::handleToken()
1139
0
{
1140
0
    if (lex_state == ls_top) {
1141
0
        return;
1142
0
    }
1143
1144
0
    if (parser_state == ps_done) {
1145
0
        QTC::TC("libtests", "JSON parse junk after object");
1146
0
        throw std::runtime_error(
1147
0
            "JSON: offset " + std::to_string(offset) +
1148
0
            ": material follows end of object: " + token);
1149
0
    }
1150
1151
0
    const static JSON null_item = JSON::makeNull();
1152
0
    JSON item;
1153
0
    auto tos = stack.empty() ? null_item : stack.back().item;
1154
0
    auto ls = lex_state;
1155
0
    lex_state = ls_top;
1156
1157
0
    switch (ls) {
1158
0
    case ls_begin_dict:
1159
0
        item = JSON::makeDictionary();
1160
0
        break;
1161
1162
0
    case ls_begin_array:
1163
0
        item = JSON::makeArray();
1164
0
        break;
1165
1166
0
    case ls_colon:
1167
0
        if (parser_state != ps_dict_after_key) {
1168
0
            QTC::TC("libtests", "JSON parse unexpected :");
1169
0
            throw std::runtime_error(
1170
0
                "JSON: offset " + std::to_string(offset) + ": unexpected colon");
1171
0
        }
1172
0
        parser_state = ps_dict_after_colon;
1173
0
        return;
1174
1175
0
    case ls_comma:
1176
0
        if (!((parser_state == ps_dict_after_item) || (parser_state == ps_array_after_item))) {
1177
0
            QTC::TC("libtests", "JSON parse unexpected ,");
1178
0
            throw std::runtime_error(
1179
0
                "JSON: offset " + std::to_string(offset) + ": unexpected comma");
1180
0
        }
1181
0
        if (parser_state == ps_dict_after_item) {
1182
0
            parser_state = ps_dict_after_comma;
1183
0
        } else if (parser_state == ps_array_after_item) {
1184
0
            parser_state = ps_array_after_comma;
1185
0
        } else {
1186
0
            throw std::logic_error("JSONParser::handleToken: unexpected parser state for comma");
1187
0
        }
1188
0
        return;
1189
1190
0
    case ls_end_array:
1191
0
        if (!(parser_state == ps_array_begin || parser_state == ps_array_after_item)) {
1192
0
            QTC::TC("libtests", "JSON parse unexpected ]");
1193
0
            throw std::runtime_error(
1194
0
                "JSON: offset " + std::to_string(offset) + ": unexpected array end delimiter");
1195
0
        }
1196
0
        parser_state = stack.back().state;
1197
0
        tos.setEnd(offset);
1198
0
        if (reactor) {
1199
0
            reactor->containerEnd(tos);
1200
0
        }
1201
0
        if (parser_state != ps_done) {
1202
0
            stack.pop_back();
1203
0
        }
1204
0
        return;
1205
1206
0
    case ls_end_dict:
1207
0
        if (!((parser_state == ps_dict_begin) || (parser_state == ps_dict_after_item))) {
1208
0
            QTC::TC("libtests", "JSON parse unexpected }");
1209
0
            throw std::runtime_error(
1210
0
                "JSON: offset " + std::to_string(offset) + ": unexpected dictionary end delimiter");
1211
0
        }
1212
0
        parser_state = stack.back().state;
1213
0
        tos.setEnd(offset);
1214
0
        if (reactor) {
1215
0
            reactor->containerEnd(tos);
1216
0
        }
1217
0
        if (parser_state != ps_done) {
1218
0
            stack.pop_back();
1219
0
        }
1220
0
        return;
1221
1222
0
    case ls_number:
1223
0
        item = JSON::makeNumber(token);
1224
0
        break;
1225
1226
0
    case ls_alpha:
1227
0
        if (token == "true") {
1228
0
            item = JSON::makeBool(true);
1229
0
        } else if (token == "false") {
1230
0
            item = JSON::makeBool(false);
1231
0
        } else if (token == "null") {
1232
0
            item = JSON::makeNull();
1233
0
        } else {
1234
0
            QTC::TC("libtests", "JSON parse invalid keyword");
1235
0
            throw std::runtime_error(
1236
0
                "JSON: offset " + std::to_string(offset) + ": invalid keyword " + token);
1237
0
        }
1238
0
        break;
1239
1240
0
    case ls_after_string:
1241
0
        if (parser_state == ps_dict_begin || parser_state == ps_dict_after_comma) {
1242
0
            dict_key = token;
1243
0
            dict_key_offset = token_start;
1244
0
            parser_state = ps_dict_after_key;
1245
0
            return;
1246
0
        } else {
1247
0
            item = JSON::makeString(token);
1248
0
        }
1249
0
        break;
1250
1251
0
    default:
1252
0
        throw std::runtime_error(
1253
0
            "JSON: offset " + std::to_string(offset) + ": premature end of input");
1254
0
        break;
1255
0
    }
1256
1257
0
    item.setStart(token_start);
1258
0
    item.setEnd(offset);
1259
1260
0
    switch (parser_state) {
1261
0
    case ps_dict_begin:
1262
0
    case ps_dict_after_comma:
1263
0
        QTC::TC("libtests", "JSON parse string as dict key");
1264
0
        throw std::runtime_error(
1265
0
            "JSON: offset " + std::to_string(offset) + ": expect string as dictionary key");
1266
0
        break;
1267
1268
0
    case ps_dict_after_colon:
1269
0
        if (tos.checkDictionaryKeySeen(dict_key)) {
1270
0
            QTC::TC("libtests", "JSON parse duplicate key");
1271
0
            throw std::runtime_error(
1272
0
                "JSON: offset " + std::to_string(dict_key_offset) + ": duplicated dictionary key");
1273
0
        }
1274
0
        if (!reactor || !reactor->dictionaryItem(dict_key, item)) {
1275
0
            tos.addDictionaryMember(dict_key, item);
1276
0
        }
1277
0
        parser_state = ps_dict_after_item;
1278
0
        break;
1279
1280
0
    case ps_array_begin:
1281
0
    case ps_array_after_comma:
1282
0
        if (!reactor || !reactor->arrayItem(item)) {
1283
0
            tos.addArrayElement(item);
1284
0
        }
1285
0
        parser_state = ps_array_after_item;
1286
0
        break;
1287
1288
0
    case ps_top:
1289
0
        if (!(item.isDictionary() || item.isArray())) {
1290
0
            stack.emplace_back(ps_done, item);
1291
0
            parser_state = ps_done;
1292
0
            return;
1293
0
        }
1294
0
        parser_state = ps_done;
1295
0
        break;
1296
1297
0
    case ps_dict_after_key:
1298
0
        QTC::TC("libtests", "JSON parse expected colon");
1299
0
        throw std::runtime_error("JSON: offset " + std::to_string(offset) + ": expected ':'");
1300
0
        break;
1301
1302
0
    case ps_dict_after_item:
1303
0
        QTC::TC("libtests", "JSON parse expected , or }");
1304
0
        throw std::runtime_error(
1305
0
            "JSON: offset " + std::to_string(offset) + ": expected ',' or '}'");
1306
0
        break;
1307
1308
0
    case ps_array_after_item:
1309
0
        QTC::TC("libtests", "JSON parse expected, or ]");
1310
0
        throw std::runtime_error(
1311
0
            "JSON: offset " + std::to_string(offset) + ": expected ',' or ']'");
1312
0
        break;
1313
1314
0
    case ps_done:
1315
0
        throw std::logic_error("JSONParser::handleToken: unexpected parser state");
1316
0
    }
1317
1318
0
    if (item.isDictionary() || item.isArray()) {
1319
0
        stack.emplace_back(parser_state, item);
1320
        // Calling container start method is postponed until after adding the containers to their
1321
        // parent containers, if any. This makes it much easier to keep track of the current nesting
1322
        // level.
1323
0
        if (item.isDictionary()) {
1324
0
            if (reactor) {
1325
0
                reactor->dictionaryStart();
1326
0
            }
1327
0
            parser_state = ps_dict_begin;
1328
0
        } else if (item.isArray()) {
1329
0
            if (reactor) {
1330
0
                reactor->arrayStart();
1331
0
            }
1332
0
            parser_state = ps_array_begin;
1333
0
        }
1334
1335
0
        if (stack.size() > 500) {
1336
0
            throw std::runtime_error(
1337
0
                "JSON: offset " + std::to_string(offset) + ": maximum object depth exceeded");
1338
0
        }
1339
0
    }
1340
0
}
1341
1342
JSON
1343
JSONParser::parse()
1344
0
{
1345
0
    while (!done) {
1346
0
        getToken();
1347
0
        handleToken();
1348
0
    }
1349
0
    if (parser_state != ps_done) {
1350
0
        QTC::TC("libtests", "JSON parse premature EOF");
1351
0
        throw std::runtime_error("JSON: premature end of input");
1352
0
    }
1353
0
    auto const& tos = stack.back().item;
1354
0
    if (reactor && !(tos.isArray() || tos.isDictionary())) {
1355
0
        reactor->topLevelScalar();
1356
0
    }
1357
0
    return tos;
1358
0
}
1359
1360
JSON
1361
JSON::parse(InputSource& is, Reactor* reactor)
1362
0
{
1363
0
    JSONParser jp(is, reactor);
1364
0
    return jp.parse();
1365
0
}
1366
1367
JSON
1368
JSON::parse(std::string const& s)
1369
0
{
1370
0
    BufferInputSource bis("json input", s);
1371
0
    JSONParser jp(bis, nullptr);
1372
0
    return jp.parse();
1373
0
}
1374
1375
void
1376
JSON::setStart(qpdf_offset_t start)
1377
0
{
1378
0
    if (m) {
1379
0
        m->start = start;
1380
0
    }
1381
0
}
1382
1383
void
1384
JSON::setEnd(qpdf_offset_t end)
1385
0
{
1386
0
    if (m) {
1387
0
        m->end = end;
1388
0
    }
1389
0
}
1390
1391
qpdf_offset_t
1392
JSON::getStart() const
1393
0
{
1394
0
    return m ? m->start : 0;
1395
0
}
1396
1397
qpdf_offset_t
1398
JSON::getEnd() const
1399
0
{
1400
0
    return m ? m->end : 0;
1401
0
}