Coverage Report

Created: 2025-12-05 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/qpdf/libqpdf/JSON.cc
Line
Count
Source
1
#include <qpdf/JSON.hh>
2
3
#include <qpdf/JSON_writer.hh>
4
5
#include <qpdf/InputSource_private.hh>
6
#include <qpdf/Pl_Base64.hh>
7
#include <qpdf/Pl_Concatenate.hh>
8
#include <qpdf/Pl_String.hh>
9
#include <qpdf/QTC.hh>
10
#include <qpdf/QUtil.hh>
11
#include <qpdf/Util.hh>
12
13
#include <cstring>
14
#include <stdexcept>
15
16
using namespace qpdf;
17
18
JSON::Members::Members(std::unique_ptr<JSON_value> value) :
19
0
    value(std::move(value))
20
0
{
21
0
}
22
23
JSON::JSON(std::unique_ptr<JSON_value> value) :
24
0
    m(new Members(std::move(value)))
25
0
{
26
0
}
27
28
void
29
JSON::writeClose(Pipeline* p, bool first, size_t depth, char const* delimiter)
30
0
{
31
0
    if (first) {
32
0
        *p << delimiter;
33
0
    } else {
34
0
        std::string s{"\n"};
35
0
        s.append(2 * depth, ' ');
36
0
        *p << s + delimiter;
37
0
    }
38
0
}
39
40
void
41
JSON::writeNext(Pipeline* p, bool& first, size_t depth)
42
0
{
43
0
    if (first) {
44
0
        first = false;
45
0
        std::string s{"\n"};
46
0
        s.append(2 * depth, ' ');
47
0
        *p << s;
48
0
    } else {
49
0
        std::string s{",\n"};
50
0
        s.append(2 * depth, ' ');
51
0
        *p << s;
52
0
    }
53
0
}
54
55
void
56
JSON::writeDictionaryOpen(Pipeline* p, bool& first, size_t depth)
57
0
{
58
0
    *p << "{";
59
0
    first = true;
60
0
}
61
62
void
63
JSON::writeArrayOpen(Pipeline* p, bool& first, size_t depth)
64
0
{
65
0
    *p << "[";
66
0
    first = true;
67
0
}
68
69
void
70
JSON::writeDictionaryClose(Pipeline* p, bool first, size_t depth)
71
0
{
72
0
    writeClose(p, first, depth, "}");
73
0
}
74
75
void
76
JSON::writeArrayClose(Pipeline* p, bool first, size_t depth)
77
0
{
78
0
    writeClose(p, first, depth, "]");
79
0
}
80
81
void
82
JSON::writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth)
83
0
{
84
0
    writeNext(p, first, depth);
85
0
    *p << std::string("\"") + key + "\": ";
86
0
}
87
88
void
89
JSON::writeDictionaryItem(
90
    Pipeline* p, bool& first, std::string const& key, JSON const& value, size_t depth)
91
0
{
92
0
    writeDictionaryKey(p, first, key, depth);
93
0
    value.write(p, depth);
94
0
}
95
96
void
97
JSON::writeArrayItem(Pipeline* p, bool& first, JSON const& element, size_t depth)
98
0
{
99
0
    writeNext(p, first, depth);
100
0
    element.write(p, depth);
101
0
}
102
103
void
104
JSON::JSON_dictionary::write(Pipeline* p, size_t depth) const
105
0
{
106
0
    bool first = true;
107
0
    writeDictionaryOpen(p, first, depth);
108
0
    for (auto const& iter: members) {
109
0
        writeDictionaryItem(p, first, iter.first, iter.second, 1 + depth);
110
0
    }
111
0
    writeDictionaryClose(p, first, depth);
112
0
}
113
114
void
115
JSON::JSON_array::write(Pipeline* p, size_t depth) const
116
0
{
117
0
    bool first = true;
118
0
    writeArrayOpen(p, first, depth);
119
0
    for (auto const& element: elements) {
120
0
        writeArrayItem(p, first, element, 1 + depth);
121
0
    }
122
0
    writeArrayClose(p, first, depth);
123
0
}
124
125
JSON::JSON_string::JSON_string(std::string const& utf8) :
126
0
    JSON_value(vt_string),
127
0
    utf8(utf8)
128
0
{
129
0
}
130
131
void
132
JSON::JSON_string::write(Pipeline* p, size_t) const
133
0
{
134
0
    *p << std::string("\"") + Writer::encode_string(utf8) + "\"";
135
0
}
136
137
JSON::JSON_number::JSON_number(long long value) :
138
0
    JSON_value(vt_number),
139
0
    encoded(std::to_string(value))
140
0
{
141
0
}
142
143
JSON::JSON_number::JSON_number(double value) :
144
0
    JSON_value(vt_number),
145
0
    encoded(QUtil::double_to_string(value, 6))
146
0
{
147
0
}
148
149
JSON::JSON_number::JSON_number(std::string const& value) :
150
0
    JSON_value(vt_number),
151
0
    encoded(value)
152
0
{
153
0
}
154
155
void
156
JSON::JSON_number::write(Pipeline* p, size_t) const
157
0
{
158
0
    *p << encoded;
159
0
}
160
161
JSON::JSON_bool::JSON_bool(bool val) :
162
0
    JSON_value(vt_bool),
163
0
    value(val)
164
0
{
165
0
}
166
167
void
168
JSON::JSON_bool::write(Pipeline* p, size_t) const
169
0
{
170
0
    *p << (value ? "true" : "false");
171
0
}
172
173
void
174
JSON::JSON_null::write(Pipeline* p, size_t) const
175
0
{
176
0
    *p << "null";
177
0
}
178
179
JSON::JSON_blob::JSON_blob(std::function<void(Pipeline*)> fn) :
180
0
    JSON_value(vt_blob),
181
0
    fn(fn)
182
0
{
183
0
}
184
185
void
186
JSON::JSON_blob::write(Pipeline* p, size_t) const
187
0
{
188
0
    *p << "\"";
189
0
    Pl_Concatenate cat("blob concatenate", p);
190
0
    Pl_Base64 base64("blob base64", &cat, Pl_Base64::a_encode);
191
0
    fn(&base64);
192
0
    base64.finish();
193
0
    *p << "\"";
194
0
}
195
196
void
197
JSON::write(Pipeline* p, size_t depth) const
198
0
{
199
0
    if (!m) {
200
0
        *p << "null";
201
0
    } else {
202
0
        m->value->write(p, depth);
203
0
    }
204
0
}
205
206
std::string
207
JSON::unparse() const
208
0
{
209
0
    if (!m) {
210
0
        return "null";
211
0
    }
212
0
    std::string s;
213
0
    Pl_String p("unparse", nullptr, s);
214
0
    write(&p, 0);
215
0
    return s;
216
0
}
217
218
std::string
219
JSON::Writer::encode_string(std::string const& str)
220
0
{
221
0
    static auto constexpr hexchars = "0123456789abcdef";
222
223
0
    auto begin = str.cbegin();
224
0
    auto end = str.cend();
225
0
    auto iter = begin;
226
0
    while (iter != end) {
227
0
        auto c = static_cast<unsigned char>(*iter);
228
0
        if ((c > 34 && c != '\\') || c == ' ' || c == 33) {
229
            // Optimistically check that no char in str requires escaping. Hopefully we can just
230
            // return the input str.
231
0
            ++iter;
232
0
        } else {
233
            // We found a char that requires escaping. Initialize result to the chars scanned so
234
            // far, append/replace the rest of str one char at a time, and return the result.
235
0
            std::string result{begin, iter};
236
237
0
            for (; iter != end; ++iter) {
238
0
                auto ch = static_cast<unsigned char>(*iter);
239
0
                if ((ch > 34 && ch != '\\') || ch == ' ' || ch == 33) {
240
                    // Check for most common case first.
241
0
                    result += *iter;
242
0
                } else {
243
0
                    switch (ch) {
244
0
                    case '\\':
245
0
                        result += "\\\\";
246
0
                        break;
247
0
                    case '\"':
248
0
                        result += "\\\"";
249
0
                        break;
250
0
                    case '\b':
251
0
                        result += "\\b";
252
0
                        break;
253
0
                    case '\f':
254
0
                        result += "\\f";
255
0
                        break;
256
0
                    case '\n':
257
0
                        result += "\\n";
258
0
                        break;
259
0
                    case '\r':
260
0
                        result += "\\r";
261
0
                        break;
262
0
                    case '\t':
263
0
                        result += "\\t";
264
0
                        break;
265
0
                    default:
266
0
                        result += ch < 16 ? "\\u000" : "\\u001";
267
0
                        result += hexchars[ch % 16];
268
0
                    }
269
0
                }
270
0
            }
271
0
            return result;
272
0
        }
273
0
    }
274
0
    return str;
275
0
}
276
277
JSON
278
JSON::makeDictionary()
279
0
{
280
0
    return {std::make_unique<JSON_dictionary>()};
281
0
}
282
283
JSON
284
JSON::addDictionaryMember(std::string const& key, JSON const& val)
285
0
{
286
0
    if (auto* obj = m ? dynamic_cast<JSON_dictionary*>(m->value.get()) : nullptr) {
287
0
        return obj->members[Writer::encode_string(key)] = val.m ? val : makeNull();
288
0
    } else {
289
0
        throw std::runtime_error("JSON::addDictionaryMember called on non-dictionary");
290
0
    }
291
0
}
292
293
JSON
294
JSON::makeArray()
295
0
{
296
0
    return {std::make_unique<JSON_array>()};
297
0
}
298
299
JSON
300
JSON::addArrayElement(JSON const& val)
301
0
{
302
0
    if (auto* arr = m ? dynamic_cast<JSON_array*>(m->value.get()) : nullptr) {
303
0
        if (val.m) {
304
0
            arr->elements.push_back(val);
305
0
        } else {
306
0
            arr->elements.push_back(makeNull());
307
0
        }
308
0
        return arr->elements.back();
309
0
    }
310
0
    throw std::runtime_error("JSON::addArrayElement called on non-array");
311
0
    return {}; // unreachable
312
0
}
313
314
JSON
315
JSON::makeString(std::string const& utf8)
316
0
{
317
0
    return {std::make_unique<JSON_string>(utf8)};
318
0
}
319
320
JSON
321
JSON::makeInt(long long int value)
322
0
{
323
0
    return {std::make_unique<JSON_number>(value)};
324
0
}
325
326
JSON
327
JSON::makeReal(double value)
328
0
{
329
0
    return {std::make_unique<JSON_number>(value)};
330
0
}
331
332
JSON
333
JSON::makeNumber(std::string const& encoded)
334
0
{
335
0
    return {std::make_unique<JSON_number>(encoded)};
336
0
}
337
338
JSON
339
JSON::makeBool(bool value)
340
0
{
341
0
    return {std::make_unique<JSON_bool>(value)};
342
0
}
343
344
JSON
345
JSON::makeNull()
346
0
{
347
0
    return {std::make_unique<JSON_null>()};
348
0
}
349
350
JSON
351
JSON::makeBlob(std::function<void(Pipeline*)> fn)
352
0
{
353
0
    return {std::make_unique<JSON_blob>(fn)};
354
0
}
355
356
bool
357
JSON::isArray() const
358
0
{
359
0
    return m ? m->value->type_code == vt_array : false;
360
0
}
361
362
bool
363
JSON::isDictionary() const
364
0
{
365
0
    return m && m->value->type_code == vt_dictionary;
366
0
}
367
368
bool
369
JSON::getString(std::string& utf8) const
370
0
{
371
0
    if (m && m->value->type_code == vt_string) {
372
0
        auto v = dynamic_cast<JSON_string const*>(m->value.get());
373
0
        utf8 = v->utf8;
374
0
        return true;
375
0
    }
376
0
    return false;
377
0
}
378
379
bool
380
JSON::getNumber(std::string& value) const
381
0
{
382
0
    if (m && m->value->type_code == vt_number) {
383
0
        auto v = dynamic_cast<JSON_number const*>(m->value.get());
384
0
        value = v->encoded;
385
0
        return true;
386
0
    }
387
0
    return false;
388
0
}
389
390
bool
391
JSON::getBool(bool& value) const
392
0
{
393
0
    if (m && m->value->type_code == vt_bool) {
394
0
        auto v = dynamic_cast<JSON_bool const*>(m->value.get());
395
0
        value = v->value;
396
0
        return true;
397
0
    }
398
0
    return false;
399
0
}
400
401
bool
402
JSON::isNull() const
403
0
{
404
0
    return m && m->value->type_code == vt_null;
405
0
}
406
407
JSON
408
JSON::getDictItem(std::string const& key) const
409
0
{
410
0
    if (auto v = m ? dynamic_cast<JSON_dictionary const*>(m->value.get()) : nullptr) {
411
0
        if (auto it = v->members.find(key); it != v->members.end()) {
412
0
            return it->second;
413
0
        }
414
0
    }
415
0
    return makeNull();
416
0
}
417
418
bool
419
JSON::forEachDictItem(std::function<void(std::string const& key, JSON value)> fn) const
420
0
{
421
0
    if (auto v = m ? dynamic_cast<JSON_dictionary const*>(m->value.get()) : nullptr) {
422
0
        for (auto const& [key, value]: v->members) {
423
0
            fn(key, value);
424
0
        }
425
0
        return true;
426
0
    }
427
0
    return false;
428
0
}
429
430
bool
431
JSON::forEachArrayItem(std::function<void(JSON value)> fn) const
432
0
{
433
0
    if (auto v = m ? dynamic_cast<JSON_array const*>(m->value.get()) : nullptr) {
434
0
        for (auto const& i: v->elements) {
435
0
            fn(JSON(i));
436
0
        }
437
0
        return true;
438
0
    }
439
0
    return false;
440
0
}
441
442
bool
443
JSON::checkSchema(JSON schema, std::list<std::string>& errors)
444
0
{
445
0
    if (!m || !schema.m) {
446
0
        return false;
447
0
    }
448
0
    checkSchemaInternal(m->value.get(), schema.m->value.get(), 0, errors, "");
449
0
    return errors.empty();
450
0
}
451
452
bool
453
JSON::checkSchema(JSON schema, unsigned long flags, std::list<std::string>& errors)
454
0
{
455
0
    if (!m || !schema.m) {
456
0
        return false;
457
0
    }
458
0
    checkSchemaInternal(m->value.get(), schema.m->value.get(), flags, errors, "");
459
0
    return errors.empty();
460
0
}
461
462
void
463
JSON::checkSchemaInternal(
464
    JSON_value* this_v,
465
    JSON_value* sch_v,
466
    unsigned long flags,
467
    std::list<std::string>& errors,
468
    std::string prefix)
469
0
{
470
0
    auto error = [&errors, prefix](std::string const& msg) {
471
0
        if (prefix.empty()) {
472
0
            errors.emplace_back("top-level object" + msg);
473
0
        } else {
474
0
            errors.emplace_back("json key \"" + prefix + "\"" + msg);
475
0
        }
476
0
    };
477
478
0
    if (auto* sch_dict = dynamic_cast<JSON_dictionary*>(sch_v)) {
479
0
        auto* this_dict = dynamic_cast<JSON_dictionary*>(this_v);
480
0
        if (!this_dict) {
481
0
            error(" is supposed to be a dictionary");
482
0
            return;
483
0
        }
484
0
        auto const& members = sch_dict->members;
485
0
        if (members.size() == 1) {
486
0
            auto const& pattern_key = members.begin()->first;
487
0
            if (pattern_key.starts_with('<') && pattern_key.ends_with('>')) {
488
0
                auto pattern_schema = sch_dict->members[pattern_key].m->value.get();
489
0
                for (auto const& [key, val]: this_dict->members) {
490
0
                    checkSchemaInternal(
491
0
                        val.m->value.get(), pattern_schema, flags, errors, prefix + "." + key);
492
0
                }
493
0
                return;
494
0
            }
495
0
        }
496
497
0
        for (auto& [key, val]: sch_dict->members) {
498
0
            if (this_dict->members.contains(key)) {
499
0
                checkSchemaInternal(
500
0
                    this_dict->members[key].m->value.get(),
501
0
                    val.m->value.get(),
502
0
                    flags,
503
0
                    errors,
504
0
                    prefix + "." + key);
505
0
            } else {
506
0
                if (flags & f_optional) {
507
0
                    QTC::TC("libtests", "JSON optional key");
508
0
                } else {
509
0
                    error(": key \"" + key + "\" is present in schema but missing in object");
510
0
                }
511
0
            }
512
0
        }
513
0
        for (auto const& item: this_dict->members) {
514
0
            if (!sch_dict->members.contains(item.first)) {
515
0
                error(
516
0
                    ": key \"" + item.first + "\" is not present in schema but appears in object");
517
0
            }
518
0
        }
519
0
        return;
520
0
    }
521
522
0
    if (auto* sch_arr = dynamic_cast<JSON_array*>(sch_v)) {
523
0
        auto* this_arr = dynamic_cast<JSON_array*>(this_v);
524
0
        auto n_elements = sch_arr->elements.size();
525
0
        if (n_elements == 1) {
526
            // A single-element array in the schema allows a single element in the object or a
527
            // variable-length array, each of whose items must conform to the single element of the
528
            // schema array. This doesn't apply to arrays of arrays -- we fall back to the behavior
529
            // of allowing a single item only when the object is not an array.
530
0
            if (this_arr) {
531
0
                int i = 0;
532
0
                for (auto const& element: this_arr->elements) {
533
0
                    checkSchemaInternal(
534
0
                        element.m->value.get(),
535
0
                        sch_arr->elements.at(0).m->value.get(),
536
0
                        flags,
537
0
                        errors,
538
0
                        prefix + "." + std::to_string(i));
539
0
                    ++i;
540
0
                }
541
0
            } else {
542
0
                checkSchemaInternal(
543
0
                    this_v, sch_arr->elements.at(0).m->value.get(), flags, errors, prefix);
544
0
            }
545
0
        } else if (!this_arr || this_arr->elements.size() != n_elements) {
546
0
            error(" is supposed to be an array of length " + std::to_string(n_elements));
547
0
            return;
548
0
        } else {
549
            // A multi-element array in the schema must correspond to an element of the same length
550
            // in the object. Each element in the object is validated against the corresponding
551
            // element in the schema.
552
0
            size_t i = 0;
553
0
            for (auto const& element: this_arr->elements) {
554
0
                checkSchemaInternal(
555
0
                    element.m->value.get(),
556
0
                    sch_arr->elements.at(i).m->value.get(),
557
0
                    flags,
558
0
                    errors,
559
0
                    prefix + "." + std::to_string(i));
560
0
                ++i;
561
0
            }
562
0
        }
563
0
        return;
564
0
    }
565
566
0
    if (!dynamic_cast<JSON_string*>(sch_v)) {
567
0
        error(" schema value is not dictionary, array, or string");
568
0
    }
569
0
}
570
571
namespace
572
{
573
    class JSONParser
574
    {
575
      public:
576
        JSONParser(InputSource& is, JSON::Reactor* reactor) :
577
0
            is(is),
578
0
            reactor(reactor),
579
0
            p(buf)
580
0
        {
581
0
        }
582
583
        JSON parse();
584
585
      private:
586
        enum parser_state_e {
587
            ps_top,
588
            ps_dict_begin,
589
            ps_dict_after_key,
590
            ps_dict_after_colon,
591
            ps_dict_after_item,
592
            ps_dict_after_comma,
593
            ps_array_begin,
594
            ps_array_after_item,
595
            ps_array_after_comma,
596
            ps_done,
597
        };
598
599
        enum lex_state_e {
600
            ls_top,
601
            ls_number,
602
            ls_number_minus,
603
            ls_number_leading_zero,
604
            ls_number_before_point,
605
            ls_number_point,
606
            ls_number_after_point,
607
            ls_number_e,
608
            ls_number_e_sign,
609
            ls_alpha,
610
            ls_string,
611
            ls_after_string,
612
            ls_backslash,
613
            ls_u4,
614
            ls_begin_array,
615
            ls_end_array,
616
            ls_begin_dict,
617
            ls_end_dict,
618
            ls_colon,
619
            ls_comma,
620
        };
621
622
        struct StackFrame
623
        {
624
            StackFrame(parser_state_e state, JSON& item) :
625
0
                state(state),
626
0
                item(item)
627
0
            {
628
0
            }
629
630
            parser_state_e state;
631
            JSON item;
632
        };
633
634
        void getToken();
635
        void handleToken();
636
        void tokenError();
637
        static void handle_u_code(
638
            unsigned long codepoint,
639
            qpdf_offset_t offset,
640
            unsigned long& high_surrogate,
641
            qpdf_offset_t& high_offset,
642
            std::string& result);
643
        inline void append();
644
        inline void append(lex_state_e);
645
        inline void ignore();
646
        inline void ignore(lex_state_e);
647
648
        InputSource& is;
649
        JSON::Reactor* reactor;
650
        lex_state_e lex_state{ls_top};
651
        char buf[16384];
652
        size_t bytes{0};
653
        char const* p;
654
        qpdf_offset_t u_count{0};
655
        unsigned long u_value{0};
656
        qpdf_offset_t offset{0};
657
        bool done{false};
658
        std::string token;
659
        qpdf_offset_t token_start{0};
660
        parser_state_e parser_state{ps_top};
661
        std::vector<StackFrame> stack;
662
        std::string dict_key;
663
        qpdf_offset_t dict_key_offset{0};
664
    };
665
} // namespace
666
667
void
668
JSONParser::handle_u_code(
669
    unsigned long codepoint,
670
    qpdf_offset_t offset,
671
    unsigned long& high_surrogate,
672
    qpdf_offset_t& high_offset,
673
    std::string& result)
674
0
{
675
0
    if ((codepoint & 0xFC00) == 0xD800) {
676
        // high surrogate
677
0
        qpdf_offset_t new_high_offset = offset;
678
0
        if (high_offset) {
679
0
            QTC::TC("libtests", "JSON 16 high high");
680
0
            throw std::runtime_error(
681
0
                "JSON: offset " + std::to_string(new_high_offset) +
682
0
                ": UTF-16 high surrogate found after previous high surrogate at offset " +
683
0
                std::to_string(high_offset));
684
0
        }
685
0
        high_offset = new_high_offset;
686
0
        high_surrogate = codepoint;
687
0
    } else if ((codepoint & 0xFC00) == 0xDC00) {
688
        // low surrogate
689
0
        if (offset != (high_offset + 6)) {
690
0
            QTC::TC("libtests", "JSON 16 low not after high");
691
0
            throw std::runtime_error(
692
0
                "JSON: offset " + std::to_string(offset) +
693
0
                ": UTF-16 low surrogate found not immediately after high surrogate");
694
0
        }
695
0
        high_offset = 0;
696
0
        codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF);
697
0
        result += QUtil::toUTF8(codepoint);
698
0
    } else {
699
0
        result += QUtil::toUTF8(codepoint);
700
0
    }
701
0
}
702
703
void
704
JSONParser::tokenError()
705
0
{
706
0
    if (done) {
707
0
        QTC::TC("libtests", "JSON parse ls premature end of input");
708
0
        throw std::runtime_error("JSON: premature end of input");
709
0
    }
710
711
0
    if (lex_state == ls_u4) {
712
0
        QTC::TC("libtests", "JSON parse bad hex after u");
713
0
        throw std::runtime_error(
714
0
            "JSON: offset " + std::to_string(offset - u_count - 1) +
715
0
            ": \\u must be followed by four hex digits");
716
0
    } else if (lex_state == ls_alpha) {
717
0
        QTC::TC("libtests", "JSON parse keyword bad character");
718
0
        throw std::runtime_error(
719
0
            "JSON: offset " + std::to_string(offset) + ": keyword: unexpected character " +
720
0
            std::string(p, 1));
721
0
    } else if (lex_state == ls_string) {
722
0
        QTC::TC("libtests", "JSON parse control char in string");
723
0
        throw std::runtime_error(
724
0
            "JSON: offset " + std::to_string(offset) +
725
0
            ": control character in string (missing \"?)");
726
0
    } else if (lex_state == ls_backslash) {
727
0
        QTC::TC("libtests", "JSON parse backslash bad character");
728
0
        throw std::runtime_error(
729
0
            "JSON: offset " + std::to_string(offset) +
730
0
            ": invalid character after backslash: " + std::string(p, 1));
731
0
    }
732
733
0
    if (*p == '.') {
734
0
        if (lex_state == ls_number || lex_state == ls_number_e || lex_state == ls_number_e_sign) {
735
0
            QTC::TC("libtests", "JSON parse point after e");
736
0
            throw std::runtime_error(
737
0
                "JSON: offset " + std::to_string(offset) +
738
0
                ": numeric literal: decimal point after e");
739
0
        } else {
740
0
            QTC::TC("libtests", "JSON parse duplicate point");
741
0
            throw std::runtime_error(
742
0
                "JSON: offset " + std::to_string(offset) +
743
0
                ": numeric literal: decimal point already seen");
744
0
        }
745
0
    } else if (*p == 'e' || *p == 'E') {
746
0
        QTC::TC("libtests", "JSON parse duplicate e");
747
0
        throw std::runtime_error(
748
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: e already seen");
749
0
    } else if ((*p == '+') || (*p == '-')) {
750
0
        QTC::TC("libtests", "JSON parse unexpected sign");
751
0
        throw std::runtime_error(
752
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign");
753
0
    } else if (util::is_space(*p) || strchr("{}[]:,", *p)) {
754
0
        QTC::TC("libtests", "JSON parse incomplete number");
755
0
        throw std::runtime_error(
756
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: incomplete number");
757
758
0
    } else {
759
0
        QTC::TC("libtests", "JSON parse numeric bad character");
760
0
        throw std::runtime_error(
761
0
            "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected character " +
762
0
            std::string(p, 1));
763
0
    }
764
0
    throw std::logic_error("JSON::tokenError : unhandled error");
765
0
}
766
767
// Append current character to token and advance to next input character.
768
inline void
769
JSONParser::append()
770
0
{
771
0
    token += *p;
772
0
    ++p;
773
0
    ++offset;
774
0
}
775
776
// Append current character to token, advance to next input character and transition to 'next' lexer
777
// state.
778
inline void
779
JSONParser::append(lex_state_e next)
780
0
{
781
0
    lex_state = next;
782
0
    token += *p;
783
0
    ++p;
784
0
    ++offset;
785
0
}
786
787
// Advance to next input character without appending the current character to token.
788
inline void
789
JSONParser::ignore()
790
0
{
791
0
    ++p;
792
0
    ++offset;
793
0
}
794
795
// Advance to next input character without appending the current character to token and transition
796
// to 'next' lexer state.
797
inline void
798
JSONParser::ignore(lex_state_e next)
799
0
{
800
0
    lex_state = next;
801
0
    ++p;
802
0
    ++offset;
803
0
}
804
805
void
806
JSONParser::getToken()
807
0
{
808
0
    token.clear();
809
810
    // Keep track of UTF-16 surrogate pairs.
811
0
    unsigned long high_surrogate = 0;
812
0
    qpdf_offset_t high_offset = 0;
813
814
0
    while (true) {
815
0
        if (p == (buf + bytes)) {
816
0
            p = buf;
817
0
            bytes = is.read(buf, sizeof(buf));
818
0
            if (bytes == 0) {
819
0
                done = true;
820
0
                break;
821
0
            }
822
0
        }
823
824
0
        if ((*p < 32 && *p >= 0)) {
825
0
            if (*p == '\t' || *p == '\n' || *p == '\r') {
826
                // Legal white space not permitted in strings. This will always end the current
827
                // token (unless we are still before the start of the token).
828
0
                if (lex_state == ls_top) {
829
0
                    ignore();
830
0
                } else {
831
0
                    break;
832
0
                }
833
834
0
            } else {
835
0
                QTC::TC("libtests", "JSON parse null character");
836
0
                throw std::runtime_error(
837
0
                    "JSON: control or null character at offset " + std::to_string(offset));
838
0
            }
839
0
        } else if (*p == ',') {
840
0
            if (lex_state == ls_top) {
841
0
                ignore(ls_comma);
842
0
                return;
843
0
            } else if (lex_state == ls_string) {
844
0
                append();
845
0
            } else {
846
0
                break;
847
0
            }
848
0
        } else if (*p == ':') {
849
0
            if (lex_state == ls_top) {
850
0
                ignore(ls_colon);
851
0
                return;
852
0
            } else if (lex_state == ls_string) {
853
0
                append();
854
0
            } else {
855
0
                break;
856
0
            }
857
0
        } else if (*p == ' ') {
858
0
            if (lex_state == ls_top) {
859
0
                ignore();
860
0
            } else if (lex_state == ls_string) {
861
0
                append();
862
0
            } else {
863
0
                break;
864
0
            }
865
0
        } else if (*p == '{') {
866
0
            if (lex_state == ls_top) {
867
0
                token_start = offset;
868
0
                ignore(ls_begin_dict);
869
0
                return;
870
0
            } else if (lex_state == ls_string) {
871
0
                append();
872
0
            } else {
873
0
                break;
874
0
            }
875
0
        } else if (*p == '}') {
876
0
            if (lex_state == ls_top) {
877
0
                ignore(ls_end_dict);
878
0
                return;
879
0
            } else if (lex_state == ls_string) {
880
0
                append();
881
0
            } else {
882
0
                break;
883
0
            }
884
0
        } else if (*p == '[') {
885
0
            if (lex_state == ls_top) {
886
0
                token_start = offset;
887
0
                ignore(ls_begin_array);
888
0
                return;
889
0
            } else if (lex_state == ls_string) {
890
0
                append();
891
0
            } else {
892
0
                break;
893
0
            }
894
0
        } else if (*p == ']') {
895
0
            if (lex_state == ls_top) {
896
0
                ignore(ls_end_array);
897
0
                return;
898
0
            } else if (lex_state == ls_string) {
899
0
                append();
900
0
            } else {
901
0
                break;
902
0
            }
903
0
        } else {
904
0
            switch (lex_state) {
905
0
            case ls_top:
906
0
                token_start = offset;
907
0
                if (*p == '"') {
908
0
                    ignore(ls_string);
909
0
                } else if ((*p >= 'a') && (*p <= 'z')) {
910
0
                    append(ls_alpha);
911
0
                } else if (*p == '-') {
912
0
                    append(ls_number_minus);
913
0
                } else if ((*p >= '1') && (*p <= '9')) {
914
0
                    append(ls_number_before_point);
915
0
                } else if (*p == '0') {
916
0
                    append(ls_number_leading_zero);
917
0
                } else {
918
0
                    QTC::TC("libtests", "JSON parse bad character");
919
0
                    throw std::runtime_error(
920
0
                        "JSON: offset " + std::to_string(offset) + ": unexpected character " +
921
0
                        std::string(p, 1));
922
0
                }
923
0
                break;
924
925
0
            case ls_number_minus:
926
0
                if ((*p >= '1') && (*p <= '9')) {
927
0
                    append(ls_number_before_point);
928
0
                } else if (*p == '0') {
929
0
                    append(ls_number_leading_zero);
930
0
                } else {
931
0
                    QTC::TC("libtests", "JSON parse number minus no digits");
932
0
                    throw std::runtime_error(
933
0
                        "JSON: offset " + std::to_string(offset) +
934
0
                        ": numeric literal: no digit after minus sign");
935
0
                }
936
0
                break;
937
938
0
            case ls_number_leading_zero:
939
0
                if (*p == '.') {
940
0
                    append(ls_number_point);
941
0
                } else if (*p == 'e' || *p == 'E') {
942
0
                    append(ls_number_e);
943
0
                } else {
944
0
                    QTC::TC("libtests", "JSON parse leading zero");
945
0
                    throw std::runtime_error(
946
0
                        "JSON: offset " + std::to_string(offset) + ": number with leading zero");
947
0
                }
948
0
                break;
949
950
0
            case ls_number_before_point:
951
0
                if ((*p >= '0') && (*p <= '9')) {
952
0
                    append();
953
0
                } else if (*p == '.') {
954
0
                    append(ls_number_point);
955
0
                } else if (*p == 'e' || *p == 'E') {
956
0
                    append(ls_number_e);
957
0
                } else {
958
0
                    tokenError();
959
0
                }
960
0
                break;
961
962
0
            case ls_number_point:
963
0
                if ((*p >= '0') && (*p <= '9')) {
964
0
                    append(ls_number_after_point);
965
0
                } else {
966
0
                    tokenError();
967
0
                }
968
0
                break;
969
970
0
            case ls_number_after_point:
971
0
                if ((*p >= '0') && (*p <= '9')) {
972
0
                    append();
973
0
                } else if (*p == 'e' || *p == 'E') {
974
0
                    append(ls_number_e);
975
0
                } else {
976
0
                    tokenError();
977
0
                }
978
0
                break;
979
980
0
            case ls_number_e:
981
0
                if ((*p >= '0') && (*p <= '9')) {
982
0
                    append(ls_number);
983
0
                } else if ((*p == '+') || (*p == '-')) {
984
0
                    append(ls_number_e_sign);
985
0
                } else {
986
0
                    tokenError();
987
0
                }
988
0
                break;
989
990
0
            case ls_number_e_sign:
991
0
                if ((*p >= '0') && (*p <= '9')) {
992
0
                    append(ls_number);
993
0
                } else {
994
0
                    tokenError();
995
0
                }
996
0
                break;
997
998
0
            case ls_number:
999
                // We only get here after we have seen an exponent.
1000
0
                if ((*p >= '0') && (*p <= '9')) {
1001
0
                    append();
1002
0
                } else {
1003
0
                    tokenError();
1004
0
                }
1005
0
                break;
1006
1007
0
            case ls_alpha:
1008
0
                if ((*p >= 'a') && (*p <= 'z')) {
1009
0
                    append();
1010
0
                } else {
1011
0
                    tokenError();
1012
0
                }
1013
0
                break;
1014
1015
0
            case ls_string:
1016
0
                if (*p == '"') {
1017
0
                    if (high_offset) {
1018
0
                        QTC::TC("libtests", "JSON 16 dangling high");
1019
0
                        throw std::runtime_error(
1020
0
                            "JSON: offset " + std::to_string(high_offset) +
1021
0
                            ": UTF-16 high surrogate not followed by low surrogate");
1022
0
                    }
1023
0
                    ignore(ls_after_string);
1024
0
                    return;
1025
0
                } else if (*p == '\\') {
1026
0
                    ignore(ls_backslash);
1027
0
                } else {
1028
0
                    append();
1029
0
                }
1030
0
                break;
1031
1032
0
            case ls_backslash:
1033
0
                lex_state = ls_string;
1034
0
                switch (*p) {
1035
0
                case '\\':
1036
0
                case '\"':
1037
0
                case '/':
1038
                    // \/ is allowed in json input, but so is /, so we don't map / to \/ in output.
1039
0
                    token += *p;
1040
0
                    break;
1041
0
                case 'b':
1042
0
                    token += '\b';
1043
0
                    break;
1044
0
                case 'f':
1045
0
                    token += '\f';
1046
0
                    break;
1047
0
                case 'n':
1048
0
                    token += '\n';
1049
0
                    break;
1050
0
                case 'r':
1051
0
                    token += '\r';
1052
0
                    break;
1053
0
                case 't':
1054
0
                    token += '\t';
1055
0
                    break;
1056
0
                case 'u':
1057
0
                    lex_state = ls_u4;
1058
0
                    u_count = 0;
1059
0
                    u_value = 0;
1060
0
                    break;
1061
0
                default:
1062
0
                    lex_state = ls_backslash;
1063
0
                    tokenError();
1064
0
                }
1065
0
                ignore();
1066
0
                break;
1067
1068
0
            case ls_u4:
1069
0
                using ui = unsigned int;
1070
0
                if (ui val = ui(util::hex_decode_char(*p)); val < 16) {
1071
0
                    u_value = 16 * u_value + val;
1072
0
                } else {
1073
0
                    tokenError();
1074
0
                }
1075
0
                if (++u_count == 4) {
1076
0
                    handle_u_code(u_value, offset - 5, high_surrogate, high_offset, token);
1077
0
                    lex_state = ls_string;
1078
0
                }
1079
0
                ignore();
1080
0
                break;
1081
1082
0
            default:
1083
0
                throw std::logic_error("JSONParser::getToken : trying to handle delimiter state");
1084
0
            }
1085
0
        }
1086
0
    }
1087
1088
    // We only get here if on end of input or if the last character was a control character or other
1089
    // delimiter.
1090
1091
0
    if (!token.empty()) {
1092
0
        switch (lex_state) {
1093
0
        case ls_top:
1094
            // Can't happen
1095
0
            throw std::logic_error("tok_start set in ls_top while parsing");
1096
0
            break;
1097
1098
0
        case ls_number_leading_zero:
1099
0
        case ls_number_before_point:
1100
0
        case ls_number_after_point:
1101
0
            lex_state = ls_number;
1102
0
            break;
1103
1104
0
        case ls_number:
1105
0
        case ls_alpha:
1106
            // terminal state
1107
0
            break;
1108
1109
0
        default:
1110
0
            tokenError();
1111
0
        }
1112
0
    }
1113
0
}
1114
1115
void
1116
JSONParser::handleToken()
1117
0
{
1118
0
    if (lex_state == ls_top) {
1119
0
        return;
1120
0
    }
1121
1122
0
    if (parser_state == ps_done) {
1123
0
        QTC::TC("libtests", "JSON parse junk after object");
1124
0
        throw std::runtime_error(
1125
0
            "JSON: offset " + std::to_string(offset) +
1126
0
            ": material follows end of object: " + token);
1127
0
    }
1128
1129
0
    const static JSON null_item = JSON::makeNull();
1130
0
    JSON item;
1131
0
    auto tos = stack.empty() ? null_item : stack.back().item;
1132
0
    auto ls = lex_state;
1133
0
    lex_state = ls_top;
1134
1135
0
    switch (ls) {
1136
0
    case ls_begin_dict:
1137
0
        item = JSON::makeDictionary();
1138
0
        break;
1139
1140
0
    case ls_begin_array:
1141
0
        item = JSON::makeArray();
1142
0
        break;
1143
1144
0
    case ls_colon:
1145
0
        if (parser_state != ps_dict_after_key) {
1146
0
            QTC::TC("libtests", "JSON parse unexpected :");
1147
0
            throw std::runtime_error(
1148
0
                "JSON: offset " + std::to_string(offset) + ": unexpected colon");
1149
0
        }
1150
0
        parser_state = ps_dict_after_colon;
1151
0
        return;
1152
1153
0
    case ls_comma:
1154
0
        if (!((parser_state == ps_dict_after_item) || (parser_state == ps_array_after_item))) {
1155
0
            QTC::TC("libtests", "JSON parse unexpected ,");
1156
0
            throw std::runtime_error(
1157
0
                "JSON: offset " + std::to_string(offset) + ": unexpected comma");
1158
0
        }
1159
0
        if (parser_state == ps_dict_after_item) {
1160
0
            parser_state = ps_dict_after_comma;
1161
0
        } else if (parser_state == ps_array_after_item) {
1162
0
            parser_state = ps_array_after_comma;
1163
0
        } else {
1164
0
            throw std::logic_error("JSONParser::handleToken: unexpected parser state for comma");
1165
0
        }
1166
0
        return;
1167
1168
0
    case ls_end_array:
1169
0
        if (!(parser_state == ps_array_begin || parser_state == ps_array_after_item)) {
1170
0
            QTC::TC("libtests", "JSON parse unexpected ]");
1171
0
            throw std::runtime_error(
1172
0
                "JSON: offset " + std::to_string(offset) + ": unexpected array end delimiter");
1173
0
        }
1174
0
        parser_state = stack.back().state;
1175
0
        tos.setEnd(offset);
1176
0
        if (reactor) {
1177
0
            reactor->containerEnd(tos);
1178
0
        }
1179
0
        if (parser_state != ps_done) {
1180
0
            stack.pop_back();
1181
0
        }
1182
0
        return;
1183
1184
0
    case ls_end_dict:
1185
0
        if (!((parser_state == ps_dict_begin) || (parser_state == ps_dict_after_item))) {
1186
0
            QTC::TC("libtests", "JSON parse unexpected }");
1187
0
            throw std::runtime_error(
1188
0
                "JSON: offset " + std::to_string(offset) + ": unexpected dictionary end delimiter");
1189
0
        }
1190
0
        parser_state = stack.back().state;
1191
0
        tos.setEnd(offset);
1192
0
        if (reactor) {
1193
0
            reactor->containerEnd(tos);
1194
0
        }
1195
0
        if (parser_state != ps_done) {
1196
0
            stack.pop_back();
1197
0
        }
1198
0
        return;
1199
1200
0
    case ls_number:
1201
0
        item = JSON::makeNumber(token);
1202
0
        break;
1203
1204
0
    case ls_alpha:
1205
0
        if (token == "true") {
1206
0
            item = JSON::makeBool(true);
1207
0
        } else if (token == "false") {
1208
0
            item = JSON::makeBool(false);
1209
0
        } else if (token == "null") {
1210
0
            item = JSON::makeNull();
1211
0
        } else {
1212
0
            QTC::TC("libtests", "JSON parse invalid keyword");
1213
0
            throw std::runtime_error(
1214
0
                "JSON: offset " + std::to_string(offset) + ": invalid keyword " + token);
1215
0
        }
1216
0
        break;
1217
1218
0
    case ls_after_string:
1219
0
        if (parser_state == ps_dict_begin || parser_state == ps_dict_after_comma) {
1220
0
            dict_key = token;
1221
0
            dict_key_offset = token_start;
1222
0
            parser_state = ps_dict_after_key;
1223
0
            return;
1224
0
        } else {
1225
0
            item = JSON::makeString(token);
1226
0
        }
1227
0
        break;
1228
1229
0
    default:
1230
0
        throw std::runtime_error(
1231
0
            "JSON: offset " + std::to_string(offset) + ": premature end of input");
1232
0
        break;
1233
0
    }
1234
1235
0
    item.setStart(token_start);
1236
0
    item.setEnd(offset);
1237
1238
0
    switch (parser_state) {
1239
0
    case ps_dict_begin:
1240
0
    case ps_dict_after_comma:
1241
0
        QTC::TC("libtests", "JSON parse string as dict key");
1242
0
        throw std::runtime_error(
1243
0
            "JSON: offset " + std::to_string(offset) + ": expect string as dictionary key");
1244
0
        break;
1245
1246
0
    case ps_dict_after_colon:
1247
0
        if (!reactor || !reactor->dictionaryItem(dict_key, item)) {
1248
0
            tos.addDictionaryMember(dict_key, item);
1249
0
        }
1250
0
        parser_state = ps_dict_after_item;
1251
0
        break;
1252
1253
0
    case ps_array_begin:
1254
0
    case ps_array_after_comma:
1255
0
        if (!reactor || !reactor->arrayItem(item)) {
1256
0
            tos.addArrayElement(item);
1257
0
        }
1258
0
        parser_state = ps_array_after_item;
1259
0
        break;
1260
1261
0
    case ps_top:
1262
0
        if (!(item.isDictionary() || item.isArray())) {
1263
0
            stack.emplace_back(ps_done, item);
1264
0
            parser_state = ps_done;
1265
0
            return;
1266
0
        }
1267
0
        parser_state = ps_done;
1268
0
        break;
1269
1270
0
    case ps_dict_after_key:
1271
0
        QTC::TC("libtests", "JSON parse expected colon");
1272
0
        throw std::runtime_error("JSON: offset " + std::to_string(offset) + ": expected ':'");
1273
0
        break;
1274
1275
0
    case ps_dict_after_item:
1276
0
        QTC::TC("libtests", "JSON parse expected , or }");
1277
0
        throw std::runtime_error(
1278
0
            "JSON: offset " + std::to_string(offset) + ": expected ',' or '}'");
1279
0
        break;
1280
1281
0
    case ps_array_after_item:
1282
0
        QTC::TC("libtests", "JSON parse expected, or ]");
1283
0
        throw std::runtime_error(
1284
0
            "JSON: offset " + std::to_string(offset) + ": expected ',' or ']'");
1285
0
        break;
1286
1287
0
    case ps_done:
1288
0
        throw std::logic_error("JSONParser::handleToken: unexpected parser state");
1289
0
    }
1290
1291
0
    if (item.isDictionary() || item.isArray()) {
1292
0
        stack.emplace_back(parser_state, item);
1293
        // Calling container start method is postponed until after adding the containers to their
1294
        // parent containers, if any. This makes it much easier to keep track of the current nesting
1295
        // level.
1296
0
        if (item.isDictionary()) {
1297
0
            if (reactor) {
1298
0
                reactor->dictionaryStart();
1299
0
            }
1300
0
            parser_state = ps_dict_begin;
1301
0
        } else if (item.isArray()) {
1302
0
            if (reactor) {
1303
0
                reactor->arrayStart();
1304
0
            }
1305
0
            parser_state = ps_array_begin;
1306
0
        }
1307
1308
0
        if (stack.size() > 500) {
1309
0
            throw std::runtime_error(
1310
0
                "JSON: offset " + std::to_string(offset) + ": maximum object depth exceeded");
1311
0
        }
1312
0
    }
1313
0
}
1314
1315
JSON
1316
JSONParser::parse()
1317
0
{
1318
0
    while (!done) {
1319
0
        getToken();
1320
0
        handleToken();
1321
0
    }
1322
0
    if (parser_state != ps_done) {
1323
0
        QTC::TC("libtests", "JSON parse premature EOF");
1324
0
        throw std::runtime_error("JSON: premature end of input");
1325
0
    }
1326
0
    auto const& tos = stack.back().item;
1327
0
    if (reactor && !(tos.isArray() || tos.isDictionary())) {
1328
0
        reactor->topLevelScalar();
1329
0
    }
1330
0
    return tos;
1331
0
}
1332
1333
JSON
1334
JSON::parse(InputSource& is, Reactor* reactor)
1335
0
{
1336
0
    JSONParser jp(is, reactor);
1337
0
    return jp.parse();
1338
0
}
1339
1340
JSON
1341
JSON::parse(std::string const& s)
1342
0
{
1343
0
    is::OffsetBuffer bis("json input", s);
1344
0
    JSONParser jp(bis, nullptr);
1345
0
    return jp.parse();
1346
0
}
1347
1348
void
1349
JSON::setStart(qpdf_offset_t start)
1350
0
{
1351
0
    if (m) {
1352
0
        m->start = start;
1353
0
    }
1354
0
}
1355
1356
void
1357
JSON::setEnd(qpdf_offset_t end)
1358
0
{
1359
0
    if (m) {
1360
0
        m->end = end;
1361
0
    }
1362
0
}
1363
1364
qpdf_offset_t
1365
JSON::getStart() const
1366
0
{
1367
0
    return m ? m->start : 0;
1368
0
}
1369
1370
qpdf_offset_t
1371
JSON::getEnd() const
1372
0
{
1373
0
    return m ? m->end : 0;
1374
0
}