Coverage Report

Created: 2025-01-19 07:38

/src/boost/boost/property_tree/json_parser/detail/parser.hpp
Line
Count
Source (jump to first uncovered line)
1
#ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
2
#define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
3
4
#include <boost/property_tree/json_parser/error.hpp>
5
6
#include <boost/core/ref.hpp>
7
#include <boost/bind/bind.hpp>
8
#include <boost/bind/placeholders.hpp>
9
10
#include <iterator>
11
#include <sstream>
12
#include <string>
13
14
namespace boost { namespace property_tree {
15
    namespace json_parser { namespace detail
16
{
17
18
    template <typename Encoding, typename Iterator, typename Sentinel>
19
    class source
20
    {
21
    public:
22
        typedef typename std::iterator_traits<Iterator>::value_type
23
            code_unit;
24
        typedef bool (Encoding::*encoding_predicate)(code_unit c) const;
25
26
0
        explicit source(Encoding& encoding) : encoding(encoding) {}
27
28
        template <typename Range>
29
        void set_input(const std::string& filename, const Range& r)
30
0
        {
31
0
            this->filename = filename;
32
0
            cur = r.begin();
33
0
            end = r.end();
34
            // Note that there is no backtracking, so if e.g. a UTF-8 file
35
            // starts with something that initially looks like a BOM but isn't,
36
            // there's trouble.
37
            // However, no valid JSON file can start with a UTF-8 EF byte.
38
0
            encoding.skip_introduction(cur, end);
39
0
            line = 1;
40
0
            offset = 0;
41
0
        }
42
43
0
        bool done() const { return cur == end; }
44
45
0
        void parse_error(const char* msg) {
46
0
            BOOST_PROPERTY_TREE_THROW(
47
0
                json_parser_error(msg, filename, line));
48
0
        }
49
50
0
        void next() {
51
0
            if (encoding.is_nl(*cur)) {
52
0
                ++line;
53
0
                offset = 0;
54
0
            } else {
55
0
                ++offset;
56
0
            }
57
0
            ++cur;
58
0
        }
59
60
        template <typename Action>
61
0
        bool have(encoding_predicate p, Action& a) {
62
0
            bool found = cur != end && (encoding.*p)(*cur);
63
0
            if (found) {
64
0
                a(*cur);
65
0
                next();
66
0
            }
67
0
            return found;
68
0
        }
Unexecuted instantiation: bool boost::property_tree::json_parser::detail::source<boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> > >::have<boost::property_tree::json_parser::detail::source<boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> > >::DoNothing>(bool (boost::property_tree::json_parser::detail::encoding<char>::*)(char) const, boost::property_tree::json_parser::detail::source<boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> > >::DoNothing&)
Unexecuted instantiation: bool boost::property_tree::json_parser::detail::source<boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> > >::have<boost::property_tree::json_parser::detail::number_callback_adapter<boost::property_tree::json_parser::detail::standard_callbacks<boost::property_tree::basic_ptree<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >, boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::input_iterator_tag> >(bool (boost::property_tree::json_parser::detail::encoding<char>::*)(char) const, boost::property_tree::json_parser::detail::number_callback_adapter<boost::property_tree::json_parser::detail::standard_callbacks<boost::property_tree::basic_ptree<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >, boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::input_iterator_tag>&)
69
70
0
        bool have(encoding_predicate p) {
71
0
            DoNothing n;
72
0
            return have(p, n);
73
0
        }
74
75
        template <typename Action>
76
0
        void expect(encoding_predicate p, const char* msg, Action& a) {
77
0
            if (!have(p, a)) {
78
0
                parse_error(msg);
79
0
            }
80
0
        }
Unexecuted instantiation: void boost::property_tree::json_parser::detail::source<boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> > >::expect<boost::property_tree::json_parser::detail::source<boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> > >::DoNothing>(bool (boost::property_tree::json_parser::detail::encoding<char>::*)(char) const, char const*, boost::property_tree::json_parser::detail::source<boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> > >::DoNothing&)
Unexecuted instantiation: void boost::property_tree::json_parser::detail::source<boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> > >::expect<boost::property_tree::json_parser::detail::number_callback_adapter<boost::property_tree::json_parser::detail::standard_callbacks<boost::property_tree::basic_ptree<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >, boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::input_iterator_tag> >(bool (boost::property_tree::json_parser::detail::encoding<char>::*)(char) const, char const*, boost::property_tree::json_parser::detail::number_callback_adapter<boost::property_tree::json_parser::detail::standard_callbacks<boost::property_tree::basic_ptree<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >, boost::property_tree::json_parser::detail::encoding<char>, std::__1::istreambuf_iterator<char, std::__1::char_traits<char> >, std::__1::input_iterator_tag>&)
81
82
0
        void expect(encoding_predicate p, const char* msg) {
83
0
            DoNothing n;
84
0
            expect(p, msg, n);
85
0
        }
86
87
0
        code_unit need_cur(const char* msg) {
88
0
            if (cur == end) {
89
0
                parse_error(msg);
90
0
            }
91
0
            return *cur;
92
0
        }
93
94
0
        Iterator& raw_cur() { return cur; }
95
0
        Sentinel raw_end() { return end; }
96
97
    private:
98
        struct DoNothing {
99
0
            void operator ()(code_unit) const {}
100
        };
101
102
        Encoding& encoding;
103
        Iterator cur;
104
        Sentinel end;
105
        std::string filename;
106
        int line;
107
        int offset;
108
    };
109
110
    template <typename Callbacks, typename Encoding, typename Iterator,
111
        typename = typename std::iterator_traits<Iterator>
112
            ::iterator_category>
113
    class number_callback_adapter
114
    {
115
    public:
116
        number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
117
                                Iterator& cur)
118
            : callbacks(callbacks), encoding(encoding), first(cur), cur(cur)
119
        {}
120
121
        void operator ()(typename Encoding::external_char) {}
122
123
        void finish() const {
124
            callbacks.on_number(encoding.to_internal(first, cur));
125
        }
126
127
    private:
128
        number_callback_adapter(const number_callback_adapter&);
129
130
        Callbacks& callbacks;
131
        Encoding& encoding;
132
        Iterator first;
133
        Iterator& cur;
134
    };
135
136
    template <typename Callbacks, typename Encoding, typename Iterator>
137
    class number_callback_adapter<Callbacks, Encoding, Iterator,
138
                                  std::input_iterator_tag>
139
    {
140
    public:
141
        number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
142
                                Iterator&)
143
0
            : callbacks(callbacks), encoding(encoding), first(true)
144
0
        {}
145
146
0
        void operator ()(typename Encoding::external_char c) {
147
0
            if (first) {
148
0
                callbacks.on_begin_number();
149
0
                first = false;
150
0
            }
151
0
            callbacks.on_digit(encoding.to_internal_trivial(c));
152
0
        }
153
154
0
        void finish() const {
155
0
            callbacks.on_end_number();
156
0
        }
157
    private:
158
        number_callback_adapter(const number_callback_adapter&);
159
160
        Callbacks& callbacks;
161
        Encoding& encoding;
162
        bool first;
163
    };
164
165
    template <typename Callbacks, typename Encoding, typename Iterator,
166
        typename = typename std::iterator_traits<Iterator>
167
            ::iterator_category>
168
    class string_callback_adapter
169
    {
170
    public:
171
        string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
172
                                Iterator& cur)
173
            : callbacks(callbacks), encoding(encoding), cur(cur),
174
              run_begin(cur)
175
        {}
176
177
        void start_run() {
178
            run_begin = cur;
179
        }
180
181
        void finish_run() {
182
            callbacks.on_code_units(encoding.to_internal(run_begin, cur));
183
        }
184
185
        template <typename Sentinel, typename EncodingErrorFn>
186
        void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
187
            encoding.skip_codepoint(cur, end, error_fn);
188
        }
189
190
    private:
191
        string_callback_adapter(const string_callback_adapter&);
192
193
        Callbacks& callbacks;
194
        Encoding& encoding;
195
        Iterator& cur;
196
        Iterator run_begin;
197
    };
198
199
    template <typename Callbacks, typename Encoding, typename Iterator>
200
    class string_callback_adapter<Callbacks, Encoding, Iterator,
201
                                  std::input_iterator_tag>
202
    {
203
    public:
204
        string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
205
                                Iterator& cur)
206
0
            : callbacks(callbacks), encoding(encoding), cur(cur)
207
0
        {}
208
209
0
        void start_run() {}
210
211
0
        void finish_run() {}
212
213
        template <typename Sentinel, typename EncodingErrorFn>
214
0
        void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
215
0
            encoding.transcode_codepoint(cur, end,
216
0
                boost::bind(&Callbacks::on_code_unit,
217
0
                            boost::ref(callbacks), boost::placeholders::_1),
218
0
                error_fn);
219
0
        }
220
221
    private:
222
        string_callback_adapter(const string_callback_adapter&);
223
224
        Callbacks& callbacks;
225
        Encoding& encoding;
226
        Iterator& cur;
227
    };
228
229
    template <typename Callbacks, typename Encoding, typename Iterator,
230
              typename Sentinel>
231
    class parser
232
    {
233
        typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator>
234
            number_adapter;
235
        typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator>
236
            string_adapter;
237
        typedef detail::source<Encoding, Iterator, Sentinel> source;
238
        typedef typename source::code_unit code_unit;
239
240
    public:
241
        parser(Callbacks& callbacks, Encoding& encoding)
242
0
            : callbacks(callbacks), encoding(encoding), src(encoding)
243
0
        {}
244
245
        template <typename Range>
246
0
        void set_input(const std::string& filename, const Range& r) {
247
0
            src.set_input(filename, r);
248
0
        }
249
250
0
        void finish() {
251
0
            skip_ws();
252
0
            if (!src.done()) {
253
0
                parse_error("garbage after data");
254
0
            }
255
0
        }
256
257
0
        void parse_value() {
258
0
            if (parse_object()) return;
259
0
            if (parse_array()) return;
260
0
            if (parse_string()) return;
261
0
            if (parse_boolean()) return;
262
0
            if (parse_null()) return;
263
0
            if (parse_number()) return;
264
0
            parse_error("expected value");
265
0
        }
266
267
0
        bool parse_null() {
268
0
            skip_ws();
269
0
            if (!have(&Encoding::is_n)) {
270
0
                return false;
271
0
            }
272
0
            expect(&Encoding::is_u, "expected 'null'");
273
0
            expect(&Encoding::is_l, "expected 'null'");
274
0
            expect(&Encoding::is_l, "expected 'null'");
275
0
            callbacks.on_null();
276
0
            return true;
277
0
        }
278
279
0
        bool parse_boolean() {
280
0
            skip_ws();
281
0
            if (have(&Encoding::is_t)) {
282
0
                expect(&Encoding::is_r, "expected 'true'");
283
0
                expect(&Encoding::is_u, "expected 'true'");
284
0
                expect(&Encoding::is_e, "expected 'true'");
285
0
                callbacks.on_boolean(true);
286
0
                return true;
287
0
            }
288
0
            if (have(&Encoding::is_f)) {
289
0
                expect(&Encoding::is_a, "expected 'false'");
290
0
                expect(&Encoding::is_l, "expected 'false'");
291
0
                expect(&Encoding::is_s, "expected 'false'");
292
0
                expect(&Encoding::is_e, "expected 'false'");
293
0
                callbacks.on_boolean(false);
294
0
                return true;
295
0
            }
296
0
            return false;
297
0
        }
298
299
0
        bool parse_number() {
300
0
            skip_ws();
301
302
0
            number_adapter adapter(callbacks, encoding, src.raw_cur());
303
0
            bool started = false;
304
0
            if (have(&Encoding::is_minus, adapter)) {
305
0
                started = true;
306
0
            }
307
0
            if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) {
308
0
                if (started) {
309
0
                    parse_error("expected digits after -");
310
0
                }
311
0
                return false;
312
0
            }
313
0
            parse_frac_part(adapter);
314
0
            parse_exp_part(adapter);
315
0
            adapter.finish();
316
0
            return true;
317
0
        }
318
319
0
        bool parse_string() {
320
0
            skip_ws();
321
322
0
            if (!have(&Encoding::is_quote)) {
323
0
                return false;
324
0
            }
325
326
0
            callbacks.on_begin_string();
327
0
            string_adapter adapter(callbacks, encoding, src.raw_cur());
328
0
            while (!encoding.is_quote(need_cur("unterminated string"))) {
329
0
                if (encoding.is_backslash(*src.raw_cur())) {
330
0
                    adapter.finish_run();
331
0
                    next();
332
0
                    parse_escape();
333
0
                    adapter.start_run();
334
0
                } else {
335
0
                    adapter.process_codepoint(src.raw_end(),
336
0
                        boost::bind(&parser::parse_error,
337
0
                                    this, "invalid code sequence"));
338
0
                }
339
0
            }
340
0
            adapter.finish_run();
341
0
            callbacks.on_end_string();
342
0
            next();
343
0
            return true;
344
0
        }
345
346
0
        bool parse_array() {
347
0
            skip_ws();
348
349
0
            if (!have(&Encoding::is_open_bracket)) {
350
0
                return false;
351
0
            }
352
353
0
            callbacks.on_begin_array();
354
0
            skip_ws();
355
0
            if (have(&Encoding::is_close_bracket)) {
356
0
                callbacks.on_end_array();
357
0
                return true;
358
0
            }
359
0
            do {
360
0
                parse_value();
361
0
                skip_ws();
362
0
            } while (have(&Encoding::is_comma));
363
0
            expect(&Encoding::is_close_bracket, "expected ']' or ','");
364
0
            callbacks.on_end_array();
365
0
            return true;
366
0
        }
367
368
0
        bool parse_object() {
369
0
            skip_ws();
370
371
0
            if (!have(&Encoding::is_open_brace)) {
372
0
                return false;
373
0
            }
374
375
0
            callbacks.on_begin_object();
376
0
            skip_ws();
377
0
            if (have(&Encoding::is_close_brace)) {
378
0
                callbacks.on_end_object();
379
0
                return true;
380
0
            }
381
0
            do {
382
0
                if (!parse_string()) {
383
0
                    parse_error("expected key string");
384
0
                }
385
0
                skip_ws();
386
0
                expect(&Encoding::is_colon, "expected ':'");
387
0
                parse_value();
388
0
                skip_ws();
389
0
            } while (have(&Encoding::is_comma));
390
0
            expect(&Encoding::is_close_brace, "expected '}' or ','");
391
0
            callbacks.on_end_object();
392
0
            return true;
393
0
        }
394
395
    private:
396
        typedef typename source::encoding_predicate encoding_predicate;
397
398
0
        void parse_error(const char* msg) { src.parse_error(msg); }
399
0
        void next() { src.next(); }
400
        template <typename Action>
401
0
        bool have(encoding_predicate p, Action& a) { return src.have(p, a); }
402
0
        bool have(encoding_predicate p) { return src.have(p); }
403
        template <typename Action>
404
0
        void expect(encoding_predicate p, const char* msg, Action& a) {
405
0
            src.expect(p, msg, a);
406
0
        }
407
0
        void expect(encoding_predicate p, const char* msg) {
408
0
            src.expect(p, msg);
409
0
        }
410
0
        code_unit need_cur(const char* msg) { return src.need_cur(msg); }
411
412
0
        void skip_ws() {
413
0
            while (have(&Encoding::is_ws)) {
414
0
            }
415
0
        }
416
417
0
        bool parse_int_part(number_adapter& action) {
418
0
            if (!have(&Encoding::is_digit0, action)) {
419
0
                return false;
420
0
            }
421
0
            parse_digits(action);
422
0
            return true;
423
0
        }
424
425
0
        void parse_frac_part(number_adapter& action) {
426
0
            if (!have(&Encoding::is_dot, action)) {
427
0
                return;
428
0
            }
429
0
            expect(&Encoding::is_digit, "need at least one digit after '.'",
430
0
                   action);
431
0
            parse_digits(action);
432
0
        }
433
434
0
        void parse_exp_part(number_adapter& action) {
435
0
            if (!have(&Encoding::is_eE, action)) {
436
0
                return;
437
0
            }
438
0
            have(&Encoding::is_plusminus, action);
439
0
            expect(&Encoding::is_digit, "need at least one digit in exponent",
440
0
                   action);
441
0
            parse_digits(action);
442
0
        }
443
444
0
        void parse_digits(number_adapter& action) {
445
0
            while (have(&Encoding::is_digit, action)) {
446
0
            }
447
0
        }
448
449
0
        void parse_escape() {
450
0
            if (have(&Encoding::is_quote)) {
451
0
                feed(0x22);
452
0
            } else if (have(&Encoding::is_backslash)) {
453
0
                feed(0x5c);
454
0
            } else if (have(&Encoding::is_slash)) {
455
0
                feed(0x2f);
456
0
            } else if (have(&Encoding::is_b)) {
457
0
                feed(0x08); // backspace
458
0
            } else if (have(&Encoding::is_f)) {
459
0
                feed(0x0c); // formfeed
460
0
            } else if (have(&Encoding::is_n)) {
461
0
                feed(0x0a); // line feed
462
0
            } else if (have(&Encoding::is_r)) {
463
0
                feed(0x0d); // carriage return
464
0
            } else if (have(&Encoding::is_t)) {
465
0
                feed(0x09); // horizontal tab
466
0
            } else if (have(&Encoding::is_u)) {
467
0
                parse_codepoint_ref();
468
0
            } else {
469
0
                parse_error("invalid escape sequence");
470
0
            }
471
0
        }
472
473
0
        unsigned parse_hex_quad() {
474
0
            unsigned codepoint = 0;
475
0
            for (int i = 0; i < 4; ++i) {
476
0
                int value = encoding.decode_hexdigit(
477
0
                    need_cur("invalid escape sequence"));
478
0
                if (value < 0) {
479
0
                    parse_error("invalid escape sequence");
480
0
                }
481
0
                codepoint *= 16;
482
0
                codepoint += value;
483
0
                next();
484
0
            }
485
0
            return codepoint;
486
0
        }
487
488
0
        static bool is_surrogate_high(unsigned codepoint) {
489
0
            return (codepoint & 0xfc00) == 0xd800;
490
0
        }
491
0
        static bool is_surrogate_low(unsigned codepoint) {
492
0
            return (codepoint & 0xfc00) == 0xdc00;
493
0
        }
494
0
        static unsigned combine_surrogates(unsigned high, unsigned low) {
495
0
            return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff));
496
0
        }
497
498
0
        void parse_codepoint_ref() {
499
0
            unsigned codepoint = parse_hex_quad();
500
0
            if (is_surrogate_low(codepoint)) {
501
0
                parse_error("invalid codepoint, stray low surrogate");
502
0
            }
503
0
            if (is_surrogate_high(codepoint)) {
504
0
                expect(&Encoding::is_backslash,
505
0
                    "invalid codepoint, stray high surrogate");
506
0
                expect(&Encoding::is_u,
507
0
                    "expected codepoint reference after high surrogate");
508
0
                int low = parse_hex_quad();
509
0
                if (!is_surrogate_low(low)) {
510
0
                    parse_error("expected low surrogate after high surrogate");
511
0
                }
512
0
                codepoint = combine_surrogates(codepoint, low);
513
0
            }
514
0
            feed(codepoint);
515
0
        }
516
517
0
        void feed(unsigned codepoint) {
518
0
            encoding.feed_codepoint(codepoint,
519
0
                                    boost::bind(&Callbacks::on_code_unit,
520
0
                                                boost::ref(callbacks), boost::placeholders::_1));
521
0
        }
522
523
        Callbacks& callbacks;
524
        Encoding& encoding;
525
        source src;
526
    };
527
528
}}}}
529
530
#endif