Coverage Report

Created: 2026-02-14 08:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/AK/JsonParser.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include <AK/CharacterTypes.h>
8
#include <AK/FloatingPointStringConversions.h>
9
#include <AK/JsonArray.h>
10
#include <AK/JsonObject.h>
11
#include <AK/JsonParser.h>
12
#include <math.h>
13
14
namespace AK {
15
16
constexpr bool is_space(int ch)
17
45.4M
{
18
45.4M
    return ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ';
19
45.4M
}
20
21
// ECMA-404 9 String
22
// Boils down to
23
// STRING = "\"" *("[^\"\\]" | "\\" ("[\"\\bfnrt]" | "u[0-9A-Za-z]{4}")) "\""
24
//     │├── " ──╮───────────────────────────────────────────────╭── " ──┤│
25
//              │                                               │
26
//              │  ╭───────────────────<─────────────────────╮  │
27
//              │  │                                         │  │
28
//              ╰──╰──╮───────────── [^"\\] ──────────────╭──╯──╯
29
//                    │                                   │
30
//                    ╰── \ ───╮──── ["\\bfnrt] ───────╭──╯
31
//                             │                       │
32
//                             ╰─── u[0-9A-Za-z]{4}  ──╯
33
//
34
ErrorOr<ByteString> JsonParser::consume_and_unescape_string()
35
4.07M
{
36
4.07M
    if (!consume_specific('"'))
37
175
        return Error::from_string_literal("JsonParser: Expected '\"'");
38
4.07M
    StringBuilder final_sb;
39
40
5.28M
    for (;;) {
41
        // OPTIMIZATION: We try to append as many literal characters as possible at a time
42
        //               This also pre-checks some error conditions
43
        // Note: All utf8 characters are either plain ascii,  or have their most signifiant bit set,
44
        //       which puts the, above plain ascii in value, so they will always consist
45
        //       of a set of "legal" non-special bytes,
46
        //       hence we don't need to bother with a code-point iterator,
47
        //       as a simple byte iterator suffices, which GenericLexer provides by default
48
5.28M
        size_t literal_characters = 0;
49
80.4M
        for (;;) {
50
80.4M
            char ch = peek(literal_characters);
51
            // Note: We get a 0 byte when we hit EOF
52
80.4M
            if (ch == 0)
53
739
                return Error::from_string_literal("JsonParser: EOF while parsing String");
54
            // Spec: All code points may be placed within the quotation marks except
55
            //       for the code points that must be escaped: quotation mark (U+0022),
56
            //       reverse solidus (U+005C), and the control characters U+0000 to U+001F.
57
            //       There are two-character escape sequence representations of some characters.
58
80.4M
            if (is_ascii_c0_control(ch))
59
34
                return Error::from_string_literal("JsonParser: ASCII control sequence encountered");
60
80.4M
            if (ch == '"' || ch == '\\')
61
5.28M
                break;
62
75.2M
            ++literal_characters;
63
75.2M
        }
64
5.28M
        final_sb.append(consume(literal_characters));
65
66
        // We have checked all cases except end-of-string and escaped characters in the loop above,
67
        // so we now only have to handle those two cases
68
5.28M
        char ch = peek();
69
70
5.28M
        if (ch == '"') {
71
4.07M
            consume();
72
4.07M
            break;
73
4.07M
        }
74
75
1.20M
        ignore(); // '\'
76
77
1.20M
        switch (peek()) {
78
76
        case '\0':
79
76
            return Error::from_string_literal("JsonParser: EOF while parsing String");
80
743
        case '"':
81
21.9k
        case '\\':
82
25.4k
        case '/':
83
25.4k
            final_sb.append(consume());
84
25.4k
            break;
85
289k
        case 'b':
86
289k
            ignore();
87
289k
            final_sb.append('\b');
88
289k
            break;
89
213k
        case 'f':
90
213k
            ignore();
91
213k
            final_sb.append('\f');
92
213k
            break;
93
2.70k
        case 'n':
94
2.70k
            ignore();
95
2.70k
            final_sb.append('\n');
96
2.70k
            break;
97
696
        case 'r':
98
696
            ignore();
99
696
            final_sb.append('\r');
100
696
            break;
101
830
        case 't':
102
830
            ignore();
103
830
            final_sb.append('\t');
104
830
            break;
105
675k
        case 'u': {
106
675k
            ignore(); // 'u'
107
108
            // https://ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf
109
            //
110
            // To escape a code point that is not in the Basic Multilingual Plane, the character may be represented as a
111
            // twelve-character sequence, encoding the UTF-16 surrogate pair corresponding to the code point. So for
112
            // example, a string containing only the G clef character (U+1D11E) may be represented as "\uD834\uDD1E".
113
            // However, whether a processor of JSON texts interprets such a surrogate pair as a single code point or as an
114
            // explicit surrogate pair is a semantic decision that is determined by the specific processor.
115
675k
            auto code_point = decode_single_or_paired_surrogate();
116
117
675k
            if (code_point.is_error())
118
314
                return Error::from_string_literal("JsonParser: Error while parsing Unicode escape");
119
120
675k
            final_sb.append_code_point(code_point.value());
121
675k
            break;
122
675k
        }
123
48
        default:
124
48
            dbgln("JsonParser: Invalid escaped character '{}' ({:#x}) ", peek(), peek());
125
48
            return Error::from_string_literal("JsonParser: Invalid escaped character");
126
1.20M
        }
127
1.20M
    }
128
129
4.07M
    return final_sb.to_byte_string();
130
4.07M
}
131
132
ErrorOr<JsonValue> JsonParser::parse_object()
133
74.5k
{
134
74.5k
    JsonObject object;
135
74.5k
    if (!consume_specific('{'))
136
0
        return Error::from_string_literal("JsonParser: Expected '{'");
137
1.25M
    for (;;) {
138
1.25M
        ignore_while(is_space);
139
1.25M
        if (peek() == '}')
140
9.88k
            break;
141
1.24M
        ignore_while(is_space);
142
1.24M
        auto name = TRY(consume_and_unescape_string());
143
1.24M
        ignore_while(is_space);
144
1.24M
        if (!consume_specific(':'))
145
113
            return Error::from_string_literal("JsonParser: Expected ':'");
146
1.24M
        ignore_while(is_space);
147
1.24M
        auto value = TRY(parse_helper());
148
1.18M
        object.set(name, move(value));
149
1.18M
        ignore_while(is_space);
150
1.18M
        if (peek() == '}')
151
5.01k
            break;
152
1.18M
        if (!consume_specific(','))
153
495
            return Error::from_string_literal("JsonParser: Expected ','");
154
1.18M
        ignore_while(is_space);
155
1.18M
        if (peek() == '}')
156
1
            return Error::from_string_literal("JsonParser: Unexpected '}'");
157
1.18M
    }
158
14.8k
    if (!consume_specific('}'))
159
0
        return Error::from_string_literal("JsonParser: Expected '}'");
160
14.8k
    return JsonValue { move(object) };
161
14.8k
}
162
163
ErrorOr<JsonValue> JsonParser::parse_array()
164
288k
{
165
288k
    JsonArray array;
166
288k
    if (!consume_specific('['))
167
0
        return Error::from_string_literal("JsonParser: Expected '['");
168
9.27M
    for (;;) {
169
9.27M
        ignore_while(is_space);
170
9.27M
        if (peek() == ']')
171
835
            break;
172
9.27M
        auto element = TRY(parse_helper());
173
9.08M
        TRY(array.append(move(element)));
174
9.08M
        ignore_while(is_space);
175
9.08M
        if (peek() == ']')
176
92.7k
            break;
177
8.98M
        if (!consume_specific(','))
178
919
            return Error::from_string_literal("JsonParser: Expected ','");
179
8.98M
        ignore_while(is_space);
180
8.98M
        if (peek() == ']')
181
1
            return Error::from_string_literal("JsonParser: Unexpected ']'");
182
8.98M
    }
183
93.6k
    ignore_while(is_space);
184
93.6k
    if (!consume_specific(']'))
185
0
        return Error::from_string_literal("JsonParser: Expected ']'");
186
93.6k
    return JsonValue { move(array) };
187
93.6k
}
188
189
ErrorOr<JsonValue> JsonParser::parse_string()
190
2.82M
{
191
2.82M
    auto string = TRY(consume_and_unescape_string());
192
2.82M
    return JsonValue(move(string));
193
2.82M
}
194
195
ErrorOr<JsonValue> JsonParser::parse_number()
196
7.31M
{
197
7.31M
    Vector<char, 32> number_buffer;
198
199
7.31M
    auto start_index = tell();
200
201
7.31M
    bool negative = false;
202
7.31M
    if (peek() == '-') {
203
135k
        number_buffer.append('-');
204
135k
        ++m_index;
205
135k
        negative = true;
206
207
135k
        if (!is_ascii_digit(peek()))
208
25
            return Error::from_string_literal("JsonParser: Unexpected '-' without further digits");
209
135k
    }
210
211
7.31M
    auto fallback_to_double_parse = [&]() -> ErrorOr<JsonValue> {
212
#ifdef KERNEL
213
#    error JSONParser is currently not available for the Kernel because it disallows floating point. \
214
       If you want to make this KERNEL compatible you can just make this fallback_to_double \
215
       function fail with an error in KERNEL mode.
216
#endif
217
        // FIXME: Since we know all the characters so far are ascii digits (and one . or e) we could
218
        //        use that in the floating point parser.
219
220
        // The first part should be just ascii digits
221
288k
        StringView view = m_input.substring_view(start_index);
222
223
288k
        char const* start = view.characters_without_null_termination();
224
288k
        auto parse_result = parse_first_floating_point(start, start + view.length());
225
226
288k
        if (parse_result.parsed_value()) {
227
288k
            auto characters_parsed = parse_result.end_ptr - start;
228
288k
            m_index = start_index + characters_parsed;
229
230
288k
            return JsonValue(parse_result.value);
231
288k
        }
232
0
        return Error::from_string_literal("JsonParser: Invalid floating point");
233
288k
    };
234
235
7.31M
    if (peek() == '0') {
236
1.89M
        if (is_ascii_digit(peek(1)))
237
10
            return Error::from_string_literal("JsonParser: Cannot have leading zeros");
238
239
        // Leading zeros are not allowed, however we can have a '.' or 'e' with
240
        // valid digits after just a zero. These cases will be detected by having the next element
241
        // start with a '.' or 'e'.
242
1.89M
    }
243
244
7.31M
    bool all_zero = true;
245
32.4M
    for (;;) {
246
32.4M
        char ch = peek();
247
32.4M
        if (ch == '.') {
248
13.7k
            if (!is_ascii_digit(peek(1)))
249
18
                return Error::from_string_literal("JsonParser: Must have digits after decimal point");
250
251
13.7k
            return fallback_to_double_parse();
252
13.7k
        }
253
32.4M
        if (ch == 'e' || ch == 'E') {
254
269k
            char next = peek(1);
255
269k
            if (!is_ascii_digit(next) && ((next != '+' && next != '-') || !is_ascii_digit(peek(2))))
256
65
                return Error::from_string_literal("JsonParser: Must have digits after exponent with an optional sign inbetween");
257
258
269k
            return fallback_to_double_parse();
259
269k
        }
260
261
32.2M
        if (is_ascii_digit(ch)) {
262
25.1M
            if (ch != '0')
263
7.60M
                all_zero = false;
264
265
25.1M
            number_buffer.append(ch);
266
25.1M
            ++m_index;
267
25.1M
            continue;
268
25.1M
        }
269
270
7.03M
        break;
271
32.2M
    }
272
273
    // Negative zero is always a double
274
7.03M
    if (negative && all_zero)
275
1.82k
        return JsonValue(-0.0);
276
277
7.03M
    StringView number_string(number_buffer.data(), number_buffer.size());
278
279
7.03M
    if (auto number = number_string.to_number<u64>(); number.has_value())
280
6.99M
        return JsonValue(*number);
281
35.0k
    if (auto number = number_string.to_number<i64>(); number.has_value())
282
29.9k
        return JsonValue(*number);
283
284
    // It's possible the unsigned value is bigger than u64 max
285
5.11k
    return fallback_to_double_parse();
286
35.0k
}
287
288
ErrorOr<JsonValue> JsonParser::parse_true()
289
2.62k
{
290
2.62k
    if (!consume_specific("true"sv))
291
49
        return Error::from_string_literal("JsonParser: Expected 'true'");
292
2.57k
    return JsonValue(true);
293
2.62k
}
294
295
ErrorOr<JsonValue> JsonParser::parse_false()
296
9.34k
{
297
9.34k
    if (!consume_specific("false"sv))
298
70
        return Error::from_string_literal("JsonParser: Expected 'false'");
299
9.27k
    return JsonValue(false);
300
9.34k
}
301
302
ErrorOr<JsonValue> JsonParser::parse_null()
303
6.55k
{
304
6.55k
    if (!consume_specific("null"sv))
305
59
        return Error::from_string_literal("JsonParser: Expected 'null'");
306
6.49k
    return JsonValue {};
307
6.55k
}
308
309
ErrorOr<JsonValue> JsonParser::parse_helper()
310
10.5M
{
311
10.5M
    ignore_while(is_space);
312
10.5M
    auto type_hint = peek();
313
10.5M
    switch (type_hint) {
314
74.5k
    case '{':
315
74.5k
        return parse_object();
316
288k
    case '[':
317
288k
        return parse_array();
318
2.82M
    case '"':
319
2.82M
        return parse_string();
320
135k
    case '-':
321
2.02M
    case '0':
322
2.37M
    case '1':
323
2.52M
    case '2':
324
2.68M
    case '3':
325
3.40M
    case '4':
326
3.54M
    case '5':
327
6.35M
    case '6':
328
6.87M
    case '7':
329
7.09M
    case '8':
330
7.31M
    case '9':
331
7.31M
        return parse_number();
332
9.34k
    case 'f':
333
9.34k
        return parse_false();
334
2.62k
    case 't':
335
2.62k
        return parse_true();
336
6.55k
    case 'n':
337
6.55k
        return parse_null();
338
10.5M
    }
339
340
364
    return Error::from_string_literal("JsonParser: Unexpected character");
341
10.5M
}
342
343
ErrorOr<JsonValue> JsonParser::parse()
344
6.25k
{
345
6.25k
    auto result = TRY(parse_helper());
346
2.68k
    ignore_while(is_space);
347
2.68k
    if (!is_eof())
348
226
        return Error::from_string_literal("JsonParser: Didn't consume all input");
349
2.45k
    return result;
350
2.68k
}
351
352
}