Coverage Report

Created: 2025-08-28 06:51

/src/boost/boost/json/basic_parser.hpp
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3
// Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
4
//
5
// Distributed under the Boost Software License, Version 1.0. (See accompanying
6
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7
//
8
// Official repository: https://github.com/boostorg/json
9
//
10
11
#ifndef BOOST_JSON_BASIC_PARSER_HPP
12
#define BOOST_JSON_BASIC_PARSER_HPP
13
14
#include <boost/json/detail/config.hpp>
15
#include <boost/json/detail/except.hpp>
16
#include <boost/json/error.hpp>
17
#include <boost/json/kind.hpp>
18
#include <boost/json/parse_options.hpp>
19
#include <boost/json/detail/stack.hpp>
20
#include <boost/json/detail/stream.hpp>
21
#include <boost/json/detail/utf8.hpp>
22
#include <boost/json/detail/sbo_buffer.hpp>
23
24
namespace boost {
25
namespace json {
26
27
/** An incremental SAX parser for serialized JSON.
28
29
    This implements a SAX-style parser, invoking a caller-supplied handler with
30
    each parsing event. To use, first declare a variable of type
31
    `basic_parser<T>` where `T` meets the handler requirements specified below.
32
    Then call @ref write_some one or more times with the input, setting
33
    `more = false` on the final buffer. The parsing events are realized through
34
    member function calls on the handler, which exists as a data member of the
35
    parser.
36
37
    The parser may dynamically allocate intermediate storage as needed to
38
    accommodate the nesting level of the input JSON. On subsequent invocations,
39
    the parser can cheaply re-use this memory, improving performance. This
40
    storage is freed when the parser is destroyed
41
42
    @par Usage
43
    To get the declaration and function definitions for this class it is
44
    necessary to include this file instead:
45
    @code
46
    #include <boost/json/basic_parser_impl.hpp>
47
    @endcode
48
49
    Users who wish to parse JSON into the DOM container @ref value will not use
50
    this class directly; instead they will create an instance of @ref parser or
51
    @ref stream_parser and use that instead. Alternatively, they may call the
52
    function @ref parse. This class is designed for users who wish to perform
53
    custom actions instead of building a @ref value. For example, to produce a
54
    DOM from an external library.
55
56
    @note
57
    By default, only conforming JSON using UTF-8 encoding is accepted. However,
58
    select non-compliant syntax can be allowed by construction using a
59
    @ref parse_options set to desired values.
60
61
    @par Handler
62
    The handler provided must be implemented as an object of class type which
63
    defines each of the required event member functions below. The event
64
    functions return a `bool` where `true` indicates success, and `false`
65
    indicates failure. If the member function returns `false`, it must set the
66
    error code to a suitable value. This error code will be returned by the
67
    write function to the caller.
68
69
    Handlers are required to declare the maximum limits on various elements. If
70
    these limits are exceeded during parsing, then parsing fails with an error.
71
72
    The following declaration meets the parser's handler requirements:
73
74
    @code
75
    struct handler
76
    {
77
        /// The maximum number of elements allowed in an array
78
        static constexpr std::size_t max_array_size = -1;
79
80
        /// The maximum number of elements allowed in an object
81
        static constexpr std::size_t max_object_size = -1;
82
83
        /// The maximum number of characters allowed in a string
84
        static constexpr std::size_t max_string_size = -1;
85
86
        /// The maximum number of characters allowed in a key
87
        static constexpr std::size_t max_key_size = -1;
88
89
        /// Called once when the JSON parsing begins.
90
        ///
91
        /// @return `true` on success.
92
        /// @param ec Set to the error, if any occurred.
93
        ///
94
        bool on_document_begin( error_code& ec );
95
96
        /// Called when the JSON parsing is done.
97
        ///
98
        /// @return `true` on success.
99
        /// @param ec Set to the error, if any occurred.
100
        ///
101
        bool on_document_end( error_code& ec );
102
103
        /// Called when the beginning of an array is encountered.
104
        ///
105
        /// @return `true` on success.
106
        /// @param ec Set to the error, if any occurred.
107
        ///
108
        bool on_array_begin( error_code& ec );
109
110
        /// Called when the end of the current array is encountered.
111
        ///
112
        /// @return `true` on success.
113
        /// @param n The number of elements in the array.
114
        /// @param ec Set to the error, if any occurred.
115
        ///
116
        bool on_array_end( std::size_t n, error_code& ec );
117
118
        /// Called when the beginning of an object is encountered.
119
        ///
120
        /// @return `true` on success.
121
        /// @param ec Set to the error, if any occurred.
122
        ///
123
        bool on_object_begin( error_code& ec );
124
125
        /// Called when the end of the current object is encountered.
126
        ///
127
        /// @return `true` on success.
128
        /// @param n The number of elements in the object.
129
        /// @param ec Set to the error, if any occurred.
130
        ///
131
        bool on_object_end( std::size_t n, error_code& ec );
132
133
        /// Called with characters corresponding to part of the current string.
134
        ///
135
        /// @return `true` on success.
136
        /// @param s The partial characters
137
        /// @param n The total size of the string thus far
138
        /// @param ec Set to the error, if any occurred.
139
        ///
140
        bool on_string_part( string_view s, std::size_t n, error_code& ec );
141
142
        /// Called with the last characters corresponding to the current string.
143
        ///
144
        /// @return `true` on success.
145
        /// @param s The remaining characters
146
        /// @param n The total size of the string
147
        /// @param ec Set to the error, if any occurred.
148
        ///
149
        bool on_string( string_view s, std::size_t n, error_code& ec );
150
151
        /// Called with characters corresponding to part of the current key.
152
        ///
153
        /// @return `true` on success.
154
        /// @param s The partial characters
155
        /// @param n The total size of the key thus far
156
        /// @param ec Set to the error, if any occurred.
157
        ///
158
        bool on_key_part( string_view s, std::size_t n, error_code& ec );
159
160
        /// Called with the last characters corresponding to the current key.
161
        ///
162
        /// @return `true` on success.
163
        /// @param s The remaining characters
164
        /// @param n The total size of the key
165
        /// @param ec Set to the error, if any occurred.
166
        ///
167
        bool on_key( string_view s, std::size_t n, error_code& ec );
168
169
        /// Called with the characters corresponding to part of the current number.
170
        ///
171
        /// @return `true` on success.
172
        /// @param s The partial characters
173
        /// @param ec Set to the error, if any occurred.
174
        ///
175
        bool on_number_part( string_view s, error_code& ec );
176
177
        /// Called when a signed integer is parsed.
178
        ///
179
        /// @return `true` on success.
180
        /// @param i The value
181
        /// @param s The remaining characters
182
        /// @param ec Set to the error, if any occurred.
183
        ///
184
        bool on_int64( int64_t i, string_view s, error_code& ec );
185
186
        /// Called when an unsigend integer is parsed.
187
        ///
188
        /// @return `true` on success.
189
        /// @param u The value
190
        /// @param s The remaining characters
191
        /// @param ec Set to the error, if any occurred.
192
        ///
193
        bool on_uint64( uint64_t u, string_view s, error_code& ec );
194
195
        /// Called when a double is parsed.
196
        ///
197
        /// @return `true` on success.
198
        /// @param d The value
199
        /// @param s The remaining characters
200
        /// @param ec Set to the error, if any occurred.
201
        ///
202
        bool on_double( double d, string_view s, error_code& ec );
203
204
        /// Called when a boolean is parsed.
205
        ///
206
        /// @return `true` on success.
207
        /// @param b The value
208
        /// @param s The remaining characters
209
        /// @param ec Set to the error, if any occurred.
210
        ///
211
        bool on_bool( bool b, error_code& ec );
212
213
        /// Called when a null is parsed.
214
        ///
215
        /// @return `true` on success.
216
        /// @param ec Set to the error, if any occurred.
217
        ///
218
        bool on_null( error_code& ec );
219
220
        /// Called with characters corresponding to part of the current comment.
221
        ///
222
        /// @return `true` on success.
223
        /// @param s The partial characters.
224
        /// @param ec Set to the error, if any occurred.
225
        ///
226
        bool on_comment_part( string_view s, error_code& ec );
227
228
        /// Called with the last characters corresponding to the current comment.
229
        ///
230
        /// @return `true` on success.
231
        /// @param s The remaining characters
232
        /// @param ec Set to the error, if any occurred.
233
        ///
234
        bool on_comment( string_view s, error_code& ec );
235
    };
236
    @endcode
237
238
    @see
239
        @ref parse,
240
        @ref stream_parser,
241
        \<\<examples_validate, validating parser example\>\>.
242
*/
243
template<class Handler>
244
class basic_parser
245
{
246
    enum class state : char
247
    {
248
        doc1,  doc3,
249
        com1,  com2,  com3, com4,
250
        lit1,
251
        str1,  str2,  str3,  str4,
252
        str5,  str6,  str7,  str8,
253
        sur1,  sur2,  sur3,
254
        sur4,  sur5,  sur6,
255
        obj1,  obj2,  obj3,  obj4,
256
        obj5,  obj6,  obj7,  obj8,
257
        obj9,  obj10, obj11,
258
        arr1,  arr2,  arr3,
259
        arr4,  arr5,  arr6,
260
        num1,  num2,  num3,  num4,
261
        num5,  num6,  num7,  num8,
262
        exp1,  exp2,  exp3,
263
        val1,  val2, val3
264
    };
265
266
    struct number
267
    {
268
        uint64_t mant;
269
        int bias;
270
        int exp;
271
        bool frac;
272
        bool neg;
273
    };
274
275
    template< bool StackEmpty_, char First_ >
276
    struct parse_number_helper;
277
278
    // optimization: must come first
279
    Handler h_;
280
281
    number num_;
282
    system::error_code ec_;
283
    detail::stack st_;
284
    detail::utf8_sequence seq_;
285
    unsigned u1_;
286
    unsigned u2_;
287
    bool more_; // false for final buffer
288
    bool done_ = false; // true on complete parse
289
    bool clean_ = true; // write_some exited cleanly
290
    const char* end_;
291
    detail::sbo_buffer<16 + 16 + 1 + 1> num_buf_;
292
    parse_options opt_;
293
    // how many levels deeper the parser can go
294
    std::size_t depth_ = opt_.max_depth;
295
    unsigned char cur_lit_ = 0;
296
    unsigned char lit_offset_ = 0;
297
298
    inline void reserve();
299
    inline const char* sentinel();
300
    inline bool incomplete(
301
        const detail::const_stream_wrapper& cs);
302
303
#ifdef __INTEL_COMPILER
304
#pragma warning push
305
#pragma warning disable 2196
306
#endif
307
308
    BOOST_NOINLINE
309
    inline
310
    const char*
311
    suspend_or_fail(state st);
312
313
    BOOST_NOINLINE
314
    inline
315
    const char*
316
    suspend_or_fail(
317
        state st,
318
        std::size_t n);
319
320
    BOOST_NOINLINE
321
    inline
322
    const char*
323
    fail(const char* p) noexcept;
324
325
    BOOST_NOINLINE
326
    inline
327
    const char*
328
    fail(
329
        const char* p,
330
        error ev,
331
        source_location const* loc) noexcept;
332
333
    BOOST_NOINLINE
334
    inline
335
    const char*
336
    maybe_suspend(
337
        const char* p,
338
        state st);
339
340
    BOOST_NOINLINE
341
    inline
342
    const char*
343
    maybe_suspend(
344
        const char* p,
345
        state st,
346
        std::size_t n);
347
348
    BOOST_NOINLINE
349
    inline
350
    const char*
351
    maybe_suspend(
352
        const char* p,
353
        state st,
354
        const number& num);
355
356
    BOOST_NOINLINE
357
    inline
358
    const char*
359
    suspend(
360
        const char* p,
361
        state st);
362
363
    BOOST_NOINLINE
364
    inline
365
    const char*
366
    suspend(
367
        const char* p,
368
        state st,
369
        const number& num);
370
371
#ifdef __INTEL_COMPILER
372
#pragma warning pop
373
#endif
374
375
    template<bool StackEmpty_/*, bool Terminal_*/>
376
    const char* parse_comment(const char* p,
377
        std::integral_constant<bool, StackEmpty_> stack_empty,
378
        /*std::integral_constant<bool, Terminal_>*/ bool terminal);
379
380
    template<bool StackEmpty_>
381
    const char* parse_document(const char* p,
382
        std::integral_constant<bool, StackEmpty_> stack_empty);
383
384
    template<bool StackEmpty_, bool AllowComments_/*,
385
        bool AllowTrailing_, bool AllowBadUTF8_*/>
386
    const char* parse_value(const char* p,
387
        std::integral_constant<bool, StackEmpty_> stack_empty,
388
        std::integral_constant<bool, AllowComments_> allow_comments,
389
        /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
390
        /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
391
        bool allow_bad_utf16);
392
393
    template<bool AllowComments_/*,
394
        bool AllowTrailing_, bool AllowBadUTF8_*/>
395
    const char* resume_value(const char* p,
396
        std::integral_constant<bool, AllowComments_> allow_comments,
397
        /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
398
        /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
399
        bool allow_bad_utf16);
400
401
    template<bool StackEmpty_, bool AllowComments_/*,
402
        bool AllowTrailing_, bool AllowBadUTF8_*/>
403
    const char* parse_object(const char* p,
404
        std::integral_constant<bool, StackEmpty_> stack_empty,
405
        std::integral_constant<bool, AllowComments_> allow_comments,
406
        /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
407
        /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
408
        bool allow_bad_utf16);
409
410
    template<bool StackEmpty_, bool AllowComments_/*,
411
        bool AllowTrailing_, bool AllowBadUTF8_*/>
412
    const char* parse_array(const char* p,
413
        std::integral_constant<bool, StackEmpty_> stack_empty,
414
        std::integral_constant<bool, AllowComments_> allow_comments,
415
        /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
416
        /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
417
        bool allow_bad_utf16);
418
419
    template<class Literal>
420
    const char* parse_literal(const char* p, Literal literal);
421
422
    template<bool StackEmpty_, bool IsKey_>
423
    const char* parse_string(const char* p,
424
        std::integral_constant<bool, StackEmpty_> stack_empty,
425
        std::integral_constant<bool, IsKey_> is_key,
426
        bool allow_bad_utf8,
427
        bool allow_bad_utf16);
428
429
    template<bool StackEmpty_>
430
    const char* parse_escaped(
431
        const char* p,
432
        std::size_t& total,
433
        std::integral_constant<bool, StackEmpty_> stack_empty,
434
        bool is_key,
435
        bool allow_bad_utf16);
436
437
    template<bool StackEmpty_, char First_, number_precision Numbers_>
438
    const char* parse_number(const char* p,
439
        std::integral_constant<bool, StackEmpty_> stack_empty,
440
        std::integral_constant<char, First_> first,
441
        std::integral_constant<number_precision, Numbers_> numbers);
442
443
    // intentionally private
444
    std::size_t
445
    depth() const noexcept
446
10.9k
    {
447
10.9k
        return opt_.max_depth - depth_;
448
10.9k
    }
449
450
public:
451
    /** Destructor.
452
453
        All dynamically allocated internal memory is freed.
454
455
        @par Effects
456
        @code
457
        handler().~Handler()
458
        @endcode
459
460
        @par Complexity
461
        Same as `~Handler()`.
462
463
        @par Exception Safety
464
        Same as `~Handler()`.
465
    */
466
23.4k
    ~basic_parser() = default;
467
468
    /** Constructors.
469
470
        Overload **(1)** constructs the parser with the specified options, with
471
        any additional arguments forwarded to the handler's constructor.
472
473
        `basic_parser` is not copyable or movable, so the copy constructor is
474
        deleted.
475
476
        @par Complexity
477
        Same as `Handler( std::forward< Args >( args )... )`.
478
479
        @par Exception Safety
480
        Same as `Handler( std::forward< Args >( args )... )`.
481
482
        @param opt Configuration settings for the parser. If this structure is
483
               default constructed, the parser will accept only standard JSON.
484
        @param args Optional additional arguments forwarded to the handler's
485
               constructor.
486
487
        @{
488
    */
489
    template<class... Args>
490
    explicit
491
    basic_parser(
492
        parse_options const& opt,
493
        Args&&... args);
494
495
    /// Overload
496
    basic_parser(
497
        basic_parser const&) = delete;
498
    /// @}
499
500
    /** Assignment.
501
502
        This type cannot be copied or moved. The copy assignment is deleted.
503
    */
504
    basic_parser& operator=(
505
        basic_parser const&) = delete;
506
507
    /** Return a reference to the handler.
508
509
        This function provides access to the constructed
510
        instance of the handler owned by the parser.
511
512
        @par Complexity
513
        Constant.
514
515
        @par Exception Safety
516
        No-throw guarantee.
517
518
        @{
519
    */
520
    Handler&
521
    handler() noexcept
522
47.8k
    {
523
47.8k
        return h_;
524
47.8k
    }
525
526
    Handler const&
527
    handler() const noexcept
528
    {
529
        return h_;
530
    }
531
    /// @}
532
533
    /** Return the last error.
534
535
        This returns the last error code which
536
        was generated in the most recent call
537
        to @ref write_some.
538
539
        @par Complexity
540
        Constant.
541
542
        @par Exception Safety
543
        No-throw guarantee.
544
    */
545
    system::error_code
546
    last_error() const noexcept
547
0
    {
548
0
        return ec_;
549
0
    }
550
551
    /** Check if a complete JSON text has been parsed.
552
553
        This function returns `true` when all of these conditions are met:
554
555
        @li A complete serialized JSON text has been presented to the parser,
556
            and
557
        @li No error or exception has occurred since the parser was
558
            constructed, or since the last call to @ref reset.
559
560
        @par Complexity
561
        Constant.
562
563
        @par Exception Safety
564
        No-throw guarantee.
565
    */
566
    bool
567
    done() const noexcept
568
9.86k
    {
569
9.86k
        return done_;
570
9.86k
    }
571
572
    /** Reset the state, to parse a new document.
573
574
        This function discards the current parsing
575
        state, to prepare for parsing a new document.
576
        Dynamically allocated temporary memory used
577
        by the implementation is not deallocated.
578
579
        @par Complexity
580
        Constant.
581
582
        @par Exception Safety
583
        No-throw guarantee.
584
    */
585
    void
586
    reset() noexcept;
587
588
    /** Indicate a parsing failure.
589
590
        This changes the state of the parser to indicate that the parse has
591
        failed. A parser implementation can use this to fail the parser if
592
        needed due to external inputs.
593
594
        @attention
595
        If `! ec.failed()`, an implementation-defined error code that indicates
596
        failure will be stored instead.
597
598
        @par Complexity
599
        Constant.
600
601
        @par Exception Safety
602
        No-throw guarantee.
603
604
        @param ec The error code to set.
605
    */
606
    void
607
    fail(system::error_code ec) noexcept;
608
609
    /** Parse some of input characters as JSON, incrementally.
610
611
        This function parses the JSON text in the specified buffer, calling the
612
        handler to emit each SAX parsing event. The parse proceeds from the
613
        current state, which is at the beginning of a new JSON or in the middle
614
        of the current JSON if any characters were already parsed.
615
616
        The characters in the buffer are processed starting from the beginning,
617
        until one of the following conditions is met:
618
619
        @li All of the characters in the buffer have been parsed, or
620
        @li Some of the characters in the buffer have been parsed and the JSON
621
            is complete, or
622
        @li A parsing error occurs.
623
624
        The supplied buffer does not need to contain the entire JSON.
625
        Subsequent calls can provide more serialized data, allowing JSON to be
626
        processed incrementally. The end of the serialized JSON can be
627
        indicated by passing `more = false`.
628
629
        @par Complexity
630
        Linear in `size`.
631
632
        @par Exception Safety
633
        Basic guarantee. Calls to the handler may throw.
634
635
        Upon error or exception, subsequent calls will fail until @ref reset
636
        is called to parse a new JSON.
637
638
        @return The number of characters successfully
639
        parsed, which may be smaller than `size`.
640
641
        @param more `true` if there are possibly more buffers in the current
642
               JSON, otherwise `false`.
643
644
        @param data A pointer to a buffer of `size` characters to parse.
645
646
        @param size The number of characters pointed to by `data`.
647
648
        @param ec Set to the error, if any occurred.
649
650
        @{
651
    */
652
    std::size_t
653
    write_some(
654
        bool more,
655
        char const* data,
656
        std::size_t size,
657
        system::error_code& ec);
658
659
    std::size_t
660
    write_some(
661
        bool more,
662
        char const* data,
663
        std::size_t size,
664
        std::error_code& ec);
665
    /// @}
666
};
667
668
} // namespace json
669
} // namespace boost
670
671
#endif