Coverage Report

Created: 2025-06-24 06:43

/src/hermes/lib/VM/JSLib/JSONLexer.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#include "JSONLexer.h"
9
10
#include "hermes/VM/StringPrimitive.h"
11
#include "llvh/ADT/ScopeExit.h"
12
13
#include "dtoa/dtoa.h"
14
15
namespace hermes {
16
namespace vm {
17
18
static const char *TrueString = "true";
19
static const char *FalseString = "false";
20
static const char *NullString = "null";
21
22
0
static bool isJSONWhiteSpace(char16_t ch) {
23
  // JSONWhiteSpace includes <TAB>, <CR>, <LF>, <SP>.
24
0
  return (ch == u'\t' || ch == u'\r' || ch == u'\n' || ch == u' ');
25
0
}
26
27
0
ExecutionStatus JSONLexer::advance() {
28
0
  return advanceHelper(false);
29
0
}
30
31
0
ExecutionStatus JSONLexer::advanceStrAsSymbol() {
32
0
  return advanceHelper(true);
33
0
}
34
35
0
ExecutionStatus JSONLexer::advanceHelper(bool forKey) {
36
  // Skip whitespaces.
37
0
  while (curCharPtr_.hasChar() && isJSONWhiteSpace(*curCharPtr_)) {
38
0
    ++curCharPtr_;
39
0
  }
40
41
  // End of buffer.
42
0
  if (!curCharPtr_.hasChar()) {
43
0
    token_.setEof();
44
0
    return ExecutionStatus::RETURNED;
45
0
  }
46
47
0
  token_.setFirstChar(*curCharPtr_);
48
49
0
#define PUNC(ch, tok)          \
50
0
  case ch:                     \
51
0
    token_.setPunctuator(tok); \
52
0
    ++curCharPtr_;             \
53
0
    return ExecutionStatus::RETURNED
54
55
0
#define WORD(ch, word, tok) \
56
0
  case ch:                  \
57
0
    return scanWord(word, tok)
58
59
0
  switch (*curCharPtr_) {
60
0
    PUNC(u'{', JSONTokenKind::LBrace);
61
0
    PUNC(u'}', JSONTokenKind::RBrace);
62
0
    PUNC(u'[', JSONTokenKind::LSquare);
63
0
    PUNC(u']', JSONTokenKind::RSquare);
64
0
    PUNC(u',', JSONTokenKind::Comma);
65
0
    PUNC(u':', JSONTokenKind::Colon);
66
0
    WORD(u't', TrueString, JSONTokenKind::True);
67
0
    WORD(u'f', FalseString, JSONTokenKind::False);
68
0
    WORD(u'n', NullString, JSONTokenKind::Null);
69
70
      // clang-format off
71
0
    case u'-':
72
0
    case u'0': case u'1': case u'2': case u'3': case u'4':
73
0
    case u'5': case u'6': case u'7': case u'8': case u'9':
74
      // clang-format on
75
0
      return scanNumber();
76
77
0
    case u'"':
78
0
      if (forKey) {
79
0
        return scanString<StrAsSymbol>();
80
0
      } else {
81
0
        return scanString<StrAsValue>();
82
0
      }
83
84
0
    default:
85
0
      return errorWithChar(u"Unexpected character: ", *curCharPtr_);
86
0
  }
87
0
}
88
89
0
CallResult<char16_t> JSONLexer::consumeUnicode() {
90
0
  uint16_t val = 0;
91
0
  for (unsigned i = 0; i < 4; ++i) {
92
0
    if (!curCharPtr_.hasChar()) {
93
0
      return error("Unexpected end of input");
94
0
    }
95
0
    int ch = *curCharPtr_ | 32;
96
0
    if (ch >= '0' && ch <= '9') {
97
0
      ch -= '0';
98
0
    } else if (ch >= 'a' && ch <= 'f') {
99
0
      ch -= 'a' - 10;
100
0
    } else {
101
0
      return errorWithChar(u"Invalid unicode point character: ", *curCharPtr_);
102
0
    }
103
0
    val = (val << 4) + ch;
104
0
    ++curCharPtr_;
105
0
  }
106
107
0
  return static_cast<char16_t>(val);
108
0
}
109
110
0
ExecutionStatus JSONLexer::scanNumber() {
111
0
  llvh::SmallVector<char, 32> str8;
112
0
  while (curCharPtr_.hasChar()) {
113
0
    auto ch = *curCharPtr_;
114
0
    if (!(ch == u'-' || ch == u'+' || ch == u'.' || (ch | 32) == u'e' ||
115
0
          (ch >= u'0' && ch <= u'9'))) {
116
0
      break;
117
0
    }
118
0
    str8.push_back(ch);
119
0
    ++curCharPtr_;
120
0
  }
121
122
0
  size_t len = str8.size();
123
0
  assert(len > 0 && "scanNumber must be called on a number-looking char");
124
0
  if (str8[0] == '0' && len > 1 && str8[1] >= '0' && str8[1] <= '9') {
125
    // The integer part cannot start with 0, unless it's 0.
126
0
    return errorWithChar(u"Unexpected character in number: ", str8[1]);
127
0
  }
128
129
0
  str8.push_back('\0');
130
131
0
  char *endPtr;
132
0
  double value = ::hermes_g_strtod(str8.data(), &endPtr);
133
0
  if (endPtr != str8.data() + len) {
134
0
    return errorWithChar(u"Unexpected character in number: ", *endPtr);
135
0
  }
136
0
  token_.setNumber(value);
137
0
  return ExecutionStatus::RETURNED;
138
0
}
139
140
template <typename ForKey>
141
0
ExecutionStatus JSONLexer::scanString() {
142
0
  assert(*curCharPtr_ == '"');
143
0
  ++curCharPtr_;
144
0
  bool hasEscape = false;
145
  // Ideally we don't have to use tmpStorage. In the case of a plain string with
146
  // no escapes, we construct an ArrayRef at the end of scanning that points to
147
  // the beginning and end of the string.
148
0
  SmallU16String<32> tmpStorage;
149
0
  curCharPtr_.beginCapture();
150
  // Make sure we don't somehow leave a dangling open capture.
151
0
  auto ensureCaptureClosed =
152
0
      llvh::make_scope_exit([this] { curCharPtr_.cancelCapture(); });
Unexecuted instantiation: hermes::vm::JSONLexer::scanString<std::__1::integral_constant<bool, true> >()::{lambda()#1}::operator()() const
Unexecuted instantiation: hermes::vm::JSONLexer::scanString<std::__1::integral_constant<bool, false> >()::{lambda()#1}::operator()() const
153
0
  bool allAscii = true;
154
0
  hermes::JenkinsHash hash = hermes::JenkinsHashInit;
155
156
0
  while (curCharPtr_.hasChar()) {
157
0
    if (*curCharPtr_ == '"') {
158
      // End of string.
159
0
      llvh::ArrayRef<char16_t> strRef =
160
0
          hasEscape ? tmpStorage.arrayRef() : curCharPtr_.endCapture();
161
0
      ++curCharPtr_;
162
0
      if constexpr (ForKey::value) {
163
0
        auto symRes = runtime_.getIdentifierTable().getSymbolHandle(
164
0
            runtime_, strRef, hash);
165
0
        if (symRes == ExecutionStatus::EXCEPTION)
166
0
          return ExecutionStatus::EXCEPTION;
167
0
        token_.setSymbol(*symRes);
168
0
        return ExecutionStatus::RETURNED;
169
0
      }
170
0
      auto strRes =
171
0
          StringPrimitive::createWithKnownEncoding(runtime_, strRef, allAscii);
172
0
      if (LLVM_UNLIKELY(strRes == ExecutionStatus::EXCEPTION)) {
173
0
        return ExecutionStatus::EXCEPTION;
174
0
      }
175
0
      token_.setString(runtime_.makeHandle<StringPrimitive>(*strRes));
176
0
      return ExecutionStatus::RETURNED;
177
0
    } else if (*curCharPtr_ <= '\u001F') {
178
0
      return error(u"U+0000 thru U+001F is not allowed in string");
179
0
    }
180
0
    char16_t scannedChar = -1;
181
0
    if (*curCharPtr_ == u'\\') {
182
0
      if (!hasEscape) {
183
        // This is the first escape character encountered, so append everything
184
        // we've seen so far to tmpStorage.
185
0
        tmpStorage.append(curCharPtr_.endCapture());
186
0
      }
187
0
      hasEscape = true;
188
0
      ++curCharPtr_;
189
0
      if (!curCharPtr_.hasChar()) {
190
0
        return error("Unexpected end of input");
191
0
      }
192
0
      switch (*curCharPtr_) {
193
0
#define CONSUME_VAL(v)     \
194
0
  tmpStorage.push_back(v); \
195
0
  ++curCharPtr_;
196
197
0
        case u'"':
198
0
        case u'/':
199
0
        case u'\\':
200
0
          CONSUME_VAL(*curCharPtr_)
201
0
          break;
202
0
        case 'b':
203
0
          CONSUME_VAL(8)
204
0
          break;
205
0
        case 'f':
206
0
          CONSUME_VAL(12)
207
0
          break;
208
0
        case 'n':
209
0
          CONSUME_VAL(10)
210
0
          break;
211
0
        case 'r':
212
0
          CONSUME_VAL(13)
213
0
          break;
214
0
        case 't':
215
0
          CONSUME_VAL(9)
216
0
          break;
217
0
        case 'u': {
218
0
          ++curCharPtr_;
219
0
          CallResult<char16_t> cr = consumeUnicode();
220
0
          if (LLVM_UNLIKELY(cr == ExecutionStatus::EXCEPTION)) {
221
0
            return ExecutionStatus::EXCEPTION;
222
0
          }
223
0
          tmpStorage.push_back(*cr);
224
0
          break;
225
0
        }
226
227
0
        default:
228
0
          return errorWithChar(u"Invalid escape sequence: ", *curCharPtr_);
229
0
      }
230
0
      scannedChar = tmpStorage.back();
231
0
    } else {
232
0
      scannedChar = *curCharPtr_;
233
0
      if (hasEscape)
234
0
        tmpStorage.push_back(scannedChar);
235
0
      ++curCharPtr_;
236
0
    }
237
0
    if constexpr (ForKey::value) {
238
0
      hash = hermes::updateJenkinsHash(hash, scannedChar);
239
0
    } else {
240
0
      allAscii &= isASCII(scannedChar);
241
0
    }
242
0
  }
243
0
  return error("Unexpected end of input");
244
0
}
Unexecuted instantiation: hermes::vm::ExecutionStatus hermes::vm::JSONLexer::scanString<std::__1::integral_constant<bool, true> >()
Unexecuted instantiation: hermes::vm::ExecutionStatus hermes::vm::JSONLexer::scanString<std::__1::integral_constant<bool, false> >()
245
246
0
ExecutionStatus JSONLexer::scanWord(const char *word, JSONTokenKind kind) {
247
0
  while (*word && curCharPtr_.hasChar()) {
248
0
    if (*curCharPtr_ != *word) {
249
0
      return errorWithChar(u"Unexpected character: ", *curCharPtr_);
250
0
    }
251
0
    ++curCharPtr_;
252
0
    ++word;
253
0
  }
254
0
  if (*word) {
255
0
    return error(u"Unexpected end of input");
256
0
  }
257
0
  token_.setPunctuator(kind);
258
0
  return ExecutionStatus::RETURNED;
259
0
}
260
261
} // namespace vm
262
} // namespace hermes