/src/hermes/lib/VM/JSLib/JSONLexer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #include "JSONLexer.h" |
9 | | |
10 | | #include "hermes/VM/StringPrimitive.h" |
11 | | #include "llvh/ADT/ScopeExit.h" |
12 | | |
13 | | #include "dtoa/dtoa.h" |
14 | | |
15 | | namespace hermes { |
16 | | namespace vm { |
17 | | |
18 | | static const char *TrueString = "true"; |
19 | | static const char *FalseString = "false"; |
20 | | static const char *NullString = "null"; |
21 | | |
22 | 0 | static bool isJSONWhiteSpace(char16_t ch) { |
23 | | // JSONWhiteSpace includes <TAB>, <CR>, <LF>, <SP>. |
24 | 0 | return (ch == u'\t' || ch == u'\r' || ch == u'\n' || ch == u' '); |
25 | 0 | } |
26 | | |
27 | 0 | ExecutionStatus JSONLexer::advance() { |
28 | 0 | return advanceHelper(false); |
29 | 0 | } |
30 | | |
31 | 0 | ExecutionStatus JSONLexer::advanceStrAsSymbol() { |
32 | 0 | return advanceHelper(true); |
33 | 0 | } |
34 | | |
35 | 0 | ExecutionStatus JSONLexer::advanceHelper(bool forKey) { |
36 | | // Skip whitespaces. |
37 | 0 | while (curCharPtr_.hasChar() && isJSONWhiteSpace(*curCharPtr_)) { |
38 | 0 | ++curCharPtr_; |
39 | 0 | } |
40 | | |
41 | | // End of buffer. |
42 | 0 | if (!curCharPtr_.hasChar()) { |
43 | 0 | token_.setEof(); |
44 | 0 | return ExecutionStatus::RETURNED; |
45 | 0 | } |
46 | | |
47 | 0 | token_.setFirstChar(*curCharPtr_); |
48 | |
|
49 | 0 | #define PUNC(ch, tok) \ |
50 | 0 | case ch: \ |
51 | 0 | token_.setPunctuator(tok); \ |
52 | 0 | ++curCharPtr_; \ |
53 | 0 | return ExecutionStatus::RETURNED |
54 | |
|
55 | 0 | #define WORD(ch, word, tok) \ |
56 | 0 | case ch: \ |
57 | 0 | return scanWord(word, tok) |
58 | |
|
59 | 0 | switch (*curCharPtr_) { |
60 | 0 | PUNC(u'{', JSONTokenKind::LBrace); |
61 | 0 | PUNC(u'}', JSONTokenKind::RBrace); |
62 | 0 | PUNC(u'[', JSONTokenKind::LSquare); |
63 | 0 | PUNC(u']', JSONTokenKind::RSquare); |
64 | 0 | PUNC(u',', JSONTokenKind::Comma); |
65 | 0 | PUNC(u':', JSONTokenKind::Colon); |
66 | 0 | WORD(u't', TrueString, JSONTokenKind::True); |
67 | 0 | WORD(u'f', FalseString, JSONTokenKind::False); |
68 | 0 | WORD(u'n', NullString, JSONTokenKind::Null); |
69 | | |
70 | | // clang-format off |
71 | 0 | case u'-': |
72 | 0 | case u'0': case u'1': case u'2': case u'3': case u'4': |
73 | 0 | case u'5': case u'6': case u'7': case u'8': case u'9': |
74 | | // clang-format on |
75 | 0 | return scanNumber(); |
76 | | |
77 | 0 | case u'"': |
78 | 0 | if (forKey) { |
79 | 0 | return scanString<StrAsSymbol>(); |
80 | 0 | } else { |
81 | 0 | return scanString<StrAsValue>(); |
82 | 0 | } |
83 | | |
84 | 0 | default: |
85 | 0 | return errorWithChar(u"Unexpected character: ", *curCharPtr_); |
86 | 0 | } |
87 | 0 | } |
88 | | |
89 | 0 | CallResult<char16_t> JSONLexer::consumeUnicode() { |
90 | 0 | uint16_t val = 0; |
91 | 0 | for (unsigned i = 0; i < 4; ++i) { |
92 | 0 | if (!curCharPtr_.hasChar()) { |
93 | 0 | return error("Unexpected end of input"); |
94 | 0 | } |
95 | 0 | int ch = *curCharPtr_ | 32; |
96 | 0 | if (ch >= '0' && ch <= '9') { |
97 | 0 | ch -= '0'; |
98 | 0 | } else if (ch >= 'a' && ch <= 'f') { |
99 | 0 | ch -= 'a' - 10; |
100 | 0 | } else { |
101 | 0 | return errorWithChar(u"Invalid unicode point character: ", *curCharPtr_); |
102 | 0 | } |
103 | 0 | val = (val << 4) + ch; |
104 | 0 | ++curCharPtr_; |
105 | 0 | } |
106 | | |
107 | 0 | return static_cast<char16_t>(val); |
108 | 0 | } |
109 | | |
110 | 0 | ExecutionStatus JSONLexer::scanNumber() { |
111 | 0 | llvh::SmallVector<char, 32> str8; |
112 | 0 | while (curCharPtr_.hasChar()) { |
113 | 0 | auto ch = *curCharPtr_; |
114 | 0 | if (!(ch == u'-' || ch == u'+' || ch == u'.' || (ch | 32) == u'e' || |
115 | 0 | (ch >= u'0' && ch <= u'9'))) { |
116 | 0 | break; |
117 | 0 | } |
118 | 0 | str8.push_back(ch); |
119 | 0 | ++curCharPtr_; |
120 | 0 | } |
121 | |
|
122 | 0 | size_t len = str8.size(); |
123 | 0 | assert(len > 0 && "scanNumber must be called on a number-looking char"); |
124 | 0 | if (str8[0] == '0' && len > 1 && str8[1] >= '0' && str8[1] <= '9') { |
125 | | // The integer part cannot start with 0, unless it's 0. |
126 | 0 | return errorWithChar(u"Unexpected character in number: ", str8[1]); |
127 | 0 | } |
128 | | |
129 | 0 | str8.push_back('\0'); |
130 | |
|
131 | 0 | char *endPtr; |
132 | 0 | double value = ::hermes_g_strtod(str8.data(), &endPtr); |
133 | 0 | if (endPtr != str8.data() + len) { |
134 | 0 | return errorWithChar(u"Unexpected character in number: ", *endPtr); |
135 | 0 | } |
136 | 0 | token_.setNumber(value); |
137 | 0 | return ExecutionStatus::RETURNED; |
138 | 0 | } |
139 | | |
140 | | template <typename ForKey> |
141 | 0 | ExecutionStatus JSONLexer::scanString() { |
142 | 0 | assert(*curCharPtr_ == '"'); |
143 | 0 | ++curCharPtr_; |
144 | 0 | bool hasEscape = false; |
145 | | // Ideally we don't have to use tmpStorage. In the case of a plain string with |
146 | | // no escapes, we construct an ArrayRef at the end of scanning that points to |
147 | | // the beginning and end of the string. |
148 | 0 | SmallU16String<32> tmpStorage; |
149 | 0 | curCharPtr_.beginCapture(); |
150 | | // Make sure we don't somehow leave a dangling open capture. |
151 | 0 | auto ensureCaptureClosed = |
152 | 0 | llvh::make_scope_exit([this] { curCharPtr_.cancelCapture(); }); Unexecuted instantiation: hermes::vm::JSONLexer::scanString<std::__1::integral_constant<bool, true> >()::{lambda()#1}::operator()() const Unexecuted instantiation: hermes::vm::JSONLexer::scanString<std::__1::integral_constant<bool, false> >()::{lambda()#1}::operator()() const |
153 | 0 | bool allAscii = true; |
154 | 0 | hermes::JenkinsHash hash = hermes::JenkinsHashInit; |
155 | |
|
156 | 0 | while (curCharPtr_.hasChar()) { |
157 | 0 | if (*curCharPtr_ == '"') { |
158 | | // End of string. |
159 | 0 | llvh::ArrayRef<char16_t> strRef = |
160 | 0 | hasEscape ? tmpStorage.arrayRef() : curCharPtr_.endCapture(); |
161 | 0 | ++curCharPtr_; |
162 | 0 | if constexpr (ForKey::value) { |
163 | 0 | auto symRes = runtime_.getIdentifierTable().getSymbolHandle( |
164 | 0 | runtime_, strRef, hash); |
165 | 0 | if (symRes == ExecutionStatus::EXCEPTION) |
166 | 0 | return ExecutionStatus::EXCEPTION; |
167 | 0 | token_.setSymbol(*symRes); |
168 | 0 | return ExecutionStatus::RETURNED; |
169 | 0 | } |
170 | 0 | auto strRes = |
171 | 0 | StringPrimitive::createWithKnownEncoding(runtime_, strRef, allAscii); |
172 | 0 | if (LLVM_UNLIKELY(strRes == ExecutionStatus::EXCEPTION)) { |
173 | 0 | return ExecutionStatus::EXCEPTION; |
174 | 0 | } |
175 | 0 | token_.setString(runtime_.makeHandle<StringPrimitive>(*strRes)); |
176 | 0 | return ExecutionStatus::RETURNED; |
177 | 0 | } else if (*curCharPtr_ <= '\u001F') { |
178 | 0 | return error(u"U+0000 thru U+001F is not allowed in string"); |
179 | 0 | } |
180 | 0 | char16_t scannedChar = -1; |
181 | 0 | if (*curCharPtr_ == u'\\') { |
182 | 0 | if (!hasEscape) { |
183 | | // This is the first escape character encountered, so append everything |
184 | | // we've seen so far to tmpStorage. |
185 | 0 | tmpStorage.append(curCharPtr_.endCapture()); |
186 | 0 | } |
187 | 0 | hasEscape = true; |
188 | 0 | ++curCharPtr_; |
189 | 0 | if (!curCharPtr_.hasChar()) { |
190 | 0 | return error("Unexpected end of input"); |
191 | 0 | } |
192 | 0 | switch (*curCharPtr_) { |
193 | 0 | #define CONSUME_VAL(v) \ |
194 | 0 | tmpStorage.push_back(v); \ |
195 | 0 | ++curCharPtr_; |
196 | | |
197 | 0 | case u'"': |
198 | 0 | case u'/': |
199 | 0 | case u'\\': |
200 | 0 | CONSUME_VAL(*curCharPtr_) |
201 | 0 | break; |
202 | 0 | case 'b': |
203 | 0 | CONSUME_VAL(8) |
204 | 0 | break; |
205 | 0 | case 'f': |
206 | 0 | CONSUME_VAL(12) |
207 | 0 | break; |
208 | 0 | case 'n': |
209 | 0 | CONSUME_VAL(10) |
210 | 0 | break; |
211 | 0 | case 'r': |
212 | 0 | CONSUME_VAL(13) |
213 | 0 | break; |
214 | 0 | case 't': |
215 | 0 | CONSUME_VAL(9) |
216 | 0 | break; |
217 | 0 | case 'u': { |
218 | 0 | ++curCharPtr_; |
219 | 0 | CallResult<char16_t> cr = consumeUnicode(); |
220 | 0 | if (LLVM_UNLIKELY(cr == ExecutionStatus::EXCEPTION)) { |
221 | 0 | return ExecutionStatus::EXCEPTION; |
222 | 0 | } |
223 | 0 | tmpStorage.push_back(*cr); |
224 | 0 | break; |
225 | 0 | } |
226 | | |
227 | 0 | default: |
228 | 0 | return errorWithChar(u"Invalid escape sequence: ", *curCharPtr_); |
229 | 0 | } |
230 | 0 | scannedChar = tmpStorage.back(); |
231 | 0 | } else { |
232 | 0 | scannedChar = *curCharPtr_; |
233 | 0 | if (hasEscape) |
234 | 0 | tmpStorage.push_back(scannedChar); |
235 | 0 | ++curCharPtr_; |
236 | 0 | } |
237 | 0 | if constexpr (ForKey::value) { |
238 | 0 | hash = hermes::updateJenkinsHash(hash, scannedChar); |
239 | 0 | } else { |
240 | 0 | allAscii &= isASCII(scannedChar); |
241 | 0 | } |
242 | 0 | } |
243 | 0 | return error("Unexpected end of input"); |
244 | 0 | } Unexecuted instantiation: hermes::vm::ExecutionStatus hermes::vm::JSONLexer::scanString<std::__1::integral_constant<bool, true> >() Unexecuted instantiation: hermes::vm::ExecutionStatus hermes::vm::JSONLexer::scanString<std::__1::integral_constant<bool, false> >() |
245 | | |
246 | 0 | ExecutionStatus JSONLexer::scanWord(const char *word, JSONTokenKind kind) { |
247 | 0 | while (*word && curCharPtr_.hasChar()) { |
248 | 0 | if (*curCharPtr_ != *word) { |
249 | 0 | return errorWithChar(u"Unexpected character: ", *curCharPtr_); |
250 | 0 | } |
251 | 0 | ++curCharPtr_; |
252 | 0 | ++word; |
253 | 0 | } |
254 | 0 | if (*word) { |
255 | 0 | return error(u"Unexpected end of input"); |
256 | 0 | } |
257 | 0 | token_.setPunctuator(kind); |
258 | 0 | return ExecutionStatus::RETURNED; |
259 | 0 | } |
260 | | |
261 | | } // namespace vm |
262 | | } // namespace hermes |