/src/hermes/lib/Parser/JSLexer.cpp

Source (jump to first uncovered line)
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "hermes/Parser/JSLexer.h"
#include "hermes/Platform/Unicode/CharacterProperties.h"

#include "dtoa/dtoa.h"
#include "hermes/Support/Conversions.h"

#include "llvh/ADT/ScopeExit.h"
#include "llvh/ADT/StringSwitch.h"

namespace hermes {
namespace parser {

namespace {

const char *g_tokenStr[] = {
#define TOK(name, str) str,
#include "hermes/Parser/TokenKinds.def"
};

const int UTF8_LINE_TERMINATOR_CHAR0 = 0xe2;

inline bool matchUnicodeLineTerminatorOffset1(const char *curCharPtr_) {
  // Line separator \u2028 UTF8 encoded is      : e2 80 a8
  // Paragraph separator \u2029 UTF8 encoded is: e2 80 a9
  return (unsigned char)curCharPtr_[1] == 0x80 &&
      ((unsigned char)curCharPtr_[2] == 0xa8 ||
       (unsigned char)curCharPtr_[2] == 0xa9);
}
} // namespace

const char *tokenKindStr(TokenKind kind) {
  assert(kind <= TokenKind::_last_token);
  return g_tokenStr[static_cast<unsigned>(kind)];
}

#if HERMES_PARSE_JSX
static llvh::DenseMap<llvh::StringRef, uint32_t> initializeHTMLEntities() {
  llvh::DenseMap<llvh::StringRef, uint32_t> entities{};

#define HTML_ENTITY(NAME, VALUE) \
  entities.insert({llvh::StringLiteral(#NAME), VALUE});
#include "hermes/Parser/HTMLEntities.def"

  return entities;
}

static const llvh::DenseMap<llvh::StringRef, uint32_t> &getHTMLEntities() {
  static const auto entities = initializeHTMLEntities();
  return entities;
}
#endif

JSLexer::JSLexer(
    uint32_t bufId,
    SourceErrorManager &sm,
    Allocator &allocator,
    StringTable *strTab,
    bool strictMode,
    bool convertSurrogates)
    : sm_(sm),
      allocator_(allocator),
      ownStrTab_(strTab ? nullptr : new StringTable(allocator_)),
      strTab_(strTab ? *strTab : *ownStrTab_),
#if HERMES_PARSE_JSX
      htmlEntities_(getHTMLEntities()),
#endif
      strictMode_(strictMode),
      convertSurrogates_(convertSurrogates) {
  initializeWithBufferId(bufId);
  initializeReservedIdentifiers();
}

JSLexer::JSLexer(
    std::unique_ptr<llvh::MemoryBuffer> input,
    SourceErrorManager &sm,
    Allocator &allocator,
    StringTable *strTab,
    bool strictMode,
    bool convertSurrogates)
    : sm_(sm),
      allocator_(allocator),
      ownStrTab_(strTab ? nullptr : new StringTable(allocator_)),
      strTab_(strTab ? *strTab : *ownStrTab_),
#if HERMES_PARSE_JSX
      htmlEntities_(getHTMLEntities()),
#endif
      strictMode_(strictMode),
      convertSurrogates_(convertSurrogates) {
  auto bufId = sm_.addNewSourceBuffer(std::move(input));
  initializeWithBufferId(bufId);
  initializeReservedIdentifiers();
}

void JSLexer::initializeWithBufferId(uint32_t bufId) {
  auto *buffer = sm_.getSourceBuffer(bufId);
  bufId_ = bufId;
  bufferStart_ = buffer->getBufferStart();
  bufferEnd_ = buffer->getBufferEnd();
  curCharPtr_ = bufferStart_;
  assert(*bufferEnd_ == 0 && "buffer must be zero terminated");
}

void JSLexer::initializeReservedIdentifiers() {
  // Add all reserved words to the identifier table
#define RESWORD(name) resWordIdent(TokenKind::rw_##name) = getIdentifier(#name);
#include "hermes/Parser/TokenKinds.def"
}

const Token *JSLexer::advance(GrammarContext grammarContext) {
  newLineBeforeCurrentToken_ = false;

  for (;;) {
    assert(curCharPtr_ <= bufferEnd_ && "lexing past end of input");
#define PUNC_L1_1(ch, tok)        \
  case ch:                        \
    token_.setStart(curCharPtr_); \
    token_.setPunctuator(tok);    \
    ++curCharPtr_;                \
    break

#define PUNC_L2_3(ch1, tok1, ch2a, tok2a, ch2b, tok2b) \
  case ch1:                                            \
    token_.setStart(curCharPtr_);                      \
    if (curCharPtr_[1] == ch2a) {                      \
      token_.setPunctuator(tok2a);                     \
      curCharPtr_ += 2;                                \
    } else if (curCharPtr_[1] == ch2b) {               \
      token_.setPunctuator(tok2b);                     \
      curCharPtr_ += 2;                                \
    } else {                                           \
      token_.setPunctuator(tok1);                      \
      curCharPtr_ += 1;                                \
    }                                                  \
    break

#define PUNC_L2_2(ch1, tok1, ch2, tok2) \
  case ch1:                             \
    token_.setStart(curCharPtr_);       \
    if (curCharPtr_[1] == (ch2)) {      \
      token_.setPunctuator(tok2);       \
      curCharPtr_ += 2;                 \
    } else {                            \
      token_.setPunctuator(tok1);       \
      curCharPtr_ += 1;                 \
    }                                   \
    break

#define PUNC_L3_3(ch1, tok1, ch2, tok2, ch3, tok3) \
  case ch1:                                        \
    token_.setStart(curCharPtr_);                  \
    if (curCharPtr_[1] != (ch2)) {                 \
      token_.setPunctuator(tok1);                  \
      curCharPtr_ += 1;                            \
    } else if (curCharPtr_[2] == (ch3)) {          \
      token_.setPunctuator(tok3);                  \
      curCharPtr_ += 3;                            \
    } else {                                       \
      token_.setPunctuator(tok2);                  \
      curCharPtr_ += 2;                            \
    }                                              \
    break

    switch ((unsigned char)*curCharPtr_) {
      case 0:
        token_.setStart(curCharPtr_);
        if (curCharPtr_ == bufferEnd_) {
          token_.setEof();
        } else {
          if (!error(
                  token_.getStartLoc(),
                  "unrecognized Unicode character \\u0000")) {
            token_.setEof();
          } else {
            ++curCharPtr_;
            continue;
          }
        }
        break;

        // clang-format off
      PUNC_L1_1('}', TokenKind::r_brace);
      PUNC_L1_1('(', TokenKind::l_paren);
      PUNC_L1_1(')', TokenKind::r_paren);
      PUNC_L1_1('[', TokenKind::l_square);
      PUNC_L1_1(']', TokenKind::r_square);
      PUNC_L1_1(';', TokenKind::semi);
      PUNC_L1_1(',', TokenKind::comma);
      PUNC_L1_1('~', TokenKind::tilde);
      PUNC_L1_1(':', TokenKind::colon);

      // { {|
      case '{':
        token_.setStart(curCharPtr_);
        if (HERMES_PARSE_FLOW &&
            LLVM_UNLIKELY(grammarContext == GrammarContext::Type) &&
            curCharPtr_[1] == '|') {
          token_.setPunctuator(TokenKind::l_bracepipe);
          curCharPtr_ += 2;
        } else {
          token_.setPunctuator(TokenKind::l_brace);
          curCharPtr_ += 1;
        }
        break;

      // = => == ===
      case '=':
        token_.setStart(curCharPtr_);
        if (curCharPtr_[1] == '>') {
          token_.setPunctuator(TokenKind::equalgreater);
          curCharPtr_ += 2;
        } else if (curCharPtr_[1] != '=') {
          token_.setPunctuator(TokenKind::equal);
          curCharPtr_ += 1;
        } else if (curCharPtr_[2] == '=') {
          token_.setPunctuator(TokenKind::equalequalequal);
          curCharPtr_ += 3;
        } else {
          token_.setPunctuator(TokenKind::equalequal);
          curCharPtr_ += 2;
        }
        break;

      // ! != !==
      PUNC_L3_3('!', TokenKind::exclaim, '=', TokenKind::exclaimequal, '=', TokenKind::exclaimequalequal);

      // + ++ +=
      // - -- -=
      // & && &=
      // | || |=
      PUNC_L2_3('+', TokenKind::plus,  '+', TokenKind::plusplus,   '=', TokenKind::plusequal);
      PUNC_L2_3('-', TokenKind::minus, '-', TokenKind::minusminus, '=', TokenKind::minusequal);

      case '&':
        token_.setStart(curCharPtr_);
        if (curCharPtr_[1] == '&') {
          if (curCharPtr_[2] == '=') {
            token_.setPunctuator(TokenKind::ampampequal);
            curCharPtr_ += 3;
          } else {
            token_.setPunctuator(TokenKind::ampamp);
            curCharPtr_ += 2;
          }
        } else if (curCharPtr_[1] == '=') {
          token_.setPunctuator(TokenKind::ampequal);
          curCharPtr_ += 2;
        } else {
          token_.setPunctuator(TokenKind::amp);
          curCharPtr_ += 1;
        }
        break;

      case '|':
        token_.setStart(curCharPtr_);
        if (HERMES_PARSE_FLOW &&
            LLVM_UNLIKELY(grammarContext == GrammarContext::Type) &&
            curCharPtr_[1] == '}') {
          token_.setPunctuator(TokenKind::piper_brace);
          curCharPtr_ += 2;
        } else {
          if (curCharPtr_[1] == '|') {
            if (curCharPtr_[2] == '=') {
              token_.setPunctuator(TokenKind::pipepipeequal);
              curCharPtr_ += 3;
            } else {
              token_.setPunctuator(TokenKind::pipepipe);
              curCharPtr_ += 2;
            }
          } else if (curCharPtr_[1] == '=') {
            token_.setPunctuator(TokenKind::pipeequal);
            curCharPtr_ += 2;
          } else {
            token_.setPunctuator(TokenKind::pipe);
            curCharPtr_ += 1;
          }
        }
        break;

      // ? ?? ?.
      case '?':
        token_.setStart(curCharPtr_);
        if (curCharPtr_[1] == '.' && !isdigit(curCharPtr_[2])) {
          // OptionalChainingPunctuator ::
          // ?. [lookahead does not contain DecimalDigit]
          // This is done to prevent `x?.3:y` from being recognized
          // as `x ?. 3 : y` instead of `x ? .3 : y`.
          token_.setPunctuator(TokenKind::questiondot);
          curCharPtr_ += 2;
        } else if (
            curCharPtr_[1] == '?' &&
            LLVM_LIKELY(grammarContext != GrammarContext::Type)) {
          if (curCharPtr_[2] == '=') {
            token_.setPunctuator(TokenKind::questionquestionequal);
            curCharPtr_ += 3;
          } else {
            token_.setPunctuator(TokenKind::questionquestion);
            curCharPtr_ += 2;
          }
        } else {
          token_.setPunctuator(TokenKind::question);
          curCharPtr_ += 1;
        }
        break;

      // * *= ** **=
      case '*':
        token_.setStart(curCharPtr_);
        if (curCharPtr_[1] == '=') {
          token_.setPunctuator(TokenKind::starequal);
          curCharPtr_ += 2;
        } else if (curCharPtr_[1] != '*') {
          token_.setPunctuator(TokenKind::star);
          curCharPtr_ += 1;
        } else if (curCharPtr_[2] == '=') {
          token_.setPunctuator(TokenKind::starstarequal);
          curCharPtr_ += 3;
        } else {
          token_.setPunctuator(TokenKind::starstar);
          curCharPtr_ += 2;
        }
        break;

        // * *=
        // ^ ^=
        // / /=
        PUNC_L2_2('^', TokenKind::caret, '=', TokenKind::caretequal);

      // % %=
      case '%':
        token_.setStart(curCharPtr_);
        if (HERMES_PARSE_FLOW &&
            LLVM_UNLIKELY(grammarContext == GrammarContext::Type) &&
            curCharPtr_ + 7 <= bufferEnd_ &&
            llvh::StringRef(curCharPtr_, 7) == "%checks") {
          token_.setIdentifier(getStringLiteral("%checks"));
          curCharPtr_ += 7;
        } else if (curCharPtr_[1] == ('=')) {
          token_.setPunctuator(TokenKind::percentequal);
          curCharPtr_ += 2;
        } else {
          token_.setPunctuator(TokenKind::percent);
          curCharPtr_ += 1;
        }
        break;

        // clang-format on

      case '\r':
      case '\n':
        ++curCharPtr_;
        newLineBeforeCurrentToken_ = true;
        continue;

      // Line separator \u2028 UTF8 encoded is      : e2 80 a8
      // Paragraph separator \u2029 UTF8 encoded is : e2 80 a9
      case UTF8_LINE_TERMINATOR_CHAR0:
        if (matchUnicodeLineTerminatorOffset1(curCharPtr_)) {
          curCharPtr_ += 3;
          newLineBeforeCurrentToken_ = true;
          continue;
        } else {
          goto default_label;
        }

      case '\v':
      case '\f':
        ++curCharPtr_;
        continue;

      case '\t':
      case ' ':
        // Spaces frequently come in groups, so use a tight inner loop to skip.
        do
          ++curCharPtr_;
        while (*curCharPtr_ == '\t' || *curCharPtr_ == ' ');
        continue;

      // No-break space \u00A0 is UTF8 encoded as: c2 a0
      case 0xc2:
        if ((unsigned char)curCharPtr_[1] == 0xa0) {
          curCharPtr_ += 2;
          continue;
        } else {
          goto default_label;
        }

      // Byte-order mark \uFEFF is encoded as: ef bb bf
      case 0xef:
        if ((unsigned char)curCharPtr_[1] == 0xbb &&
            (unsigned char)curCharPtr_[2] == 0xbf) {
          curCharPtr_ += 3;
          continue;
        } else {
          goto default_label;
        }

      case '/':
        if (curCharPtr_[1] == '/') { // Line comment?
          scanLineComment(curCharPtr_);
          continue;
        } else if (curCharPtr_[1] == '*') { // Block comment?
          curCharPtr_ = skipBlockComment(curCharPtr_);
          continue;
        } else {
          token_.setStart(curCharPtr_);
          if (grammarContext == AllowRegExp) {
            scanRegExp();
          } else if (curCharPtr_[1] == '=') {
            token_.setPunctuator(TokenKind::slashequal);
            curCharPtr_ += 2;
          } else {
            token_.setPunctuator(TokenKind::slash);
            curCharPtr_ += 1;
          }
        }
        break;

      case '#':
        if (LLVM_UNLIKELY(
                curCharPtr_ == bufferStart_ && curCharPtr_[1] == '!')) {
          // #! (hashbang) at the very start of the buffer.
          scanLineComment(curCharPtr_);
          continue;
        }
        token_.setStart(curCharPtr_);
        if (!scanPrivateIdentifier()) {
          continue;
        }
        break;

      // <  <= << <<=
      case '<':
        token_.setStart(curCharPtr_);
        if (HERMES_PARSE_FLOW &&
            LLVM_UNLIKELY(grammarContext == JSLexer::GrammarContext::Type)) {
          token_.setPunctuator(TokenKind::less);
          curCharPtr_ += 1;
        } else if (curCharPtr_[1] == '=') {
          token_.setPunctuator(TokenKind::lessequal);
          curCharPtr_ += 2;
        } else if (curCharPtr_[1] == '<') {
          if (curCharPtr_[2] == '=') {
            token_.setPunctuator(TokenKind::lesslessequal);
            curCharPtr_ += 3;
          } else {
            token_.setPunctuator(TokenKind::lessless);
            curCharPtr_ += 2;
          }
        } else {
          token_.setPunctuator(TokenKind::less);
          curCharPtr_ += 1;
        }
        break;

      // > >= >> >>> >>= >>>=
      case '>':
        token_.setStart(curCharPtr_);
        if ((HERMES_PARSE_FLOW &&
             LLVM_UNLIKELY(grammarContext == JSLexer::GrammarContext::Type)) ||
            (HERMES_PARSE_JSX &&
             LLVM_UNLIKELY(
                 grammarContext ==
                 JSLexer::GrammarContext::AllowJSXIdentifier))) {
          token_.setPunctuator(TokenKind::greater);
          curCharPtr_ += 1;
        } else if (curCharPtr_[1] == '=') { // >=
          token_.setPunctuator(TokenKind::greaterequal);
          curCharPtr_ += 2;
        } else if (curCharPtr_[1] == '>') { // >>
          if (curCharPtr_[2] == '=') { // >>=
            token_.setPunctuator(TokenKind::greatergreaterequal);
            curCharPtr_ += 3;
          } else if (curCharPtr_[2] == '>') { // >>>
            if (curCharPtr_[3] == '=') { // >>>=
              token_.setPunctuator(TokenKind::greatergreatergreaterequal);
              curCharPtr_ += 4;
            } else {
              token_.setPunctuator(TokenKind::greatergreatergreater);
              curCharPtr_ += 3;
            }
          } else {
            token_.setPunctuator(TokenKind::greatergreater);
            curCharPtr_ += 2;
          }
        } else {
          token_.setPunctuator(TokenKind::greater);
          curCharPtr_ += 1;
        }
        break;

      case '.':
        token_.setStart(curCharPtr_);
        if (curCharPtr_[1] >= '0' && curCharPtr_[1] <= '9') {
          scanNumber(grammarContext);
        } else if (curCharPtr_[1] == '.' && curCharPtr_[2] == '.') {
          token_.setPunctuator(TokenKind::dotdotdot);
          curCharPtr_ += 3;
        } else {
          token_.setPunctuator(TokenKind::period);
          ++curCharPtr_;
        }
        break;

        // clang-format off
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
        // clang-format on
        token_.setStart(curCharPtr_);
        scanNumber(grammarContext);
        break;

        // clang-format off
      case '_': case '$':
      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
      case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
      case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
      case 'v': case 'w': case 'x': case 'y': case 'z':
      case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
      case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
      case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
      case 'V': case 'W': case 'X': case 'Y': case 'Z':
        // clang-format on
        token_.setStart(curCharPtr_);
        scanIdentifierFastPathInContext(curCharPtr_, grammarContext);
        break;

      case '@':
        token_.setStart(curCharPtr_);
        if (HERMES_PARSE_FLOW &&
            LLVM_UNLIKELY(grammarContext == GrammarContext::Type)) {
          scanIdentifierFastPathInContext(curCharPtr_, grammarContext);
        } else {
          curCharPtr_ += 1;
          errorRange(token_.getStartLoc(), "unrecognized character '@'");
          continue;
        }
        break;

      case '\\': {
        token_.setStart(curCharPtr_);
        tmpStorage_.clear();
        uint32_t cp = consumeUnicodeEscape();
        if (!isUnicodeIDStart(cp)) {
          errorRange(
              token_.getStartLoc(),
              "Unicode escape \\u" + Twine::utohexstr(cp) +
                  " is not a valid identifier start");
          continue;
        } else {
          appendUnicodeToStorage(cp);
        }
        scanIdentifierPartsInContext(grammarContext);
        break;
      }

      case '\'':
      case '"':
        token_.setStart(curCharPtr_);
        scanStringInContext(grammarContext);
        break;

      case '`':
        token_.setStart(curCharPtr_);
        scanTemplateLiteral();
        break;

      default_label:
      default: {
        token_.setStart(curCharPtr_);
        uint32_t ch = decodeUTF8();

        if (isUnicodeOnlyLetter(ch)) {
          tmpStorage_.clear();
          appendUnicodeToStorage(ch);
          scanIdentifierPartsInContext(grammarContext);
        } else if (isUnicodeOnlySpace(ch)) {
          continue;
        } else {
          if (ch > 31 && ch < 127)
            errorRange(
                token_.getStartLoc(),
                "unrecognized character '" + Twine((char)ch) + "'");
          else
            errorRange(
                token_.getStartLoc(),
                "unrecognized Unicode character \\u" + Twine::utohexstr(ch));
          continue;
        }

        break;
      }
    }

    // Always terminate the loop unless "continue" was used.
    break;
  } // for(;;)

  finishToken(curCharPtr_);

  return &token_;
}

#if HERMES_PARSE_JSX

const Token *JSLexer::advanceInJSXChild() {
  token_.setStart(curCharPtr_);
  for (;;) {
    assert(curCharPtr_ <= bufferEnd_ && "lexing past end of input");
    switch (*curCharPtr_) {
      PUNC_L1_1('{', TokenKind::l_brace);
      PUNC_L1_1('<', TokenKind::less);

      case 0:
        if (curCharPtr_ == bufferEnd_) {
          token_.setEof();
          break;
        }
        // Fall-through to start scanning text.
        [[fallthrough]];

      default: {
        const char *start = curCharPtr_;
        token_.setStart(start);

        // Build up cooked value using XHTML entities
        tmpStorage_.clear();
        rawStorage_.clear();
        for (;;) {
          char c = *curCharPtr_;

          if (LLVM_UNLIKELY(isUTF8Start(*curCharPtr_))) {
            uint32_t codepoint = _decodeUTF8SlowPath(curCharPtr_);
            appendUnicodeToStorage(codepoint);
            appendUnicodeToStorage(codepoint, rawStorage_);
            continue;
          } else if (c == '&') {
            const char *htmlStart = curCharPtr_;
            auto codePoint = consumeHTMLEntityOptional();
            if (codePoint.hasValue()) {
              appendUnicodeToStorage(*codePoint);
              rawStorage_.append(
                  {htmlStart, (size_t)(curCharPtr_ - htmlStart)});
              continue;
            }
          } else if (
              (c == 0 && curCharPtr_ == bufferEnd_) || c == '{' || c == '<') {
            token_.setJSXText(
                getStringLiteral(tmpStorage_.str()),
                getStringLiteral(rawStorage_.str()));
            break;
          }
          tmpStorage_.push_back(c);
          rawStorage_.push_back(c);
          ++curCharPtr_;
        }
        break;
      }
    }

    // Always terminate the loop unless "continue" was used.
    break;
  }
  finishToken(curCharPtr_);
  return &token_;
}

llvh::Optional<uint32_t> JSLexer::consumeHTMLEntityOptional() {
  assert(*curCharPtr_ == '&');
  const char *start = curCharPtr_;

  if (curCharPtr_[1] == '#') {
    if (curCharPtr_[2] == 'x') {
      // HTML entity with form &#xHEX>;
      curCharPtr_ += 3;
      const char *numberStart = curCharPtr_;

      uint32_t codePoint = 0;
      char ch = *curCharPtr_;

      // Calculate code point from non-empty sequence of hex digits followed by
      // a semicolon.
      for (;;) {
        if (ch == ';' && curCharPtr_ != numberStart) {
          curCharPtr_++;
          return codePoint;
        } else if (isdigit(ch)) {
          ch -= '0';
        } else {
          ch |= 32;
          if (ch >= 'a' && ch <= 'f') {
            ch -= 'a' - 10;
          } else {
            break;
          }
        }

        // Check that this number is representable as a code point
        codePoint = (codePoint << 4) + ch;
        if (codePoint > UNICODE_MAX_VALUE) {
          break;
        }

        ++curCharPtr_;
        ch = *curCharPtr_;
      }
    } else {
      // HTML entity with form &#NUMBER;
      curCharPtr_ += 2;
      const char *numberStart = curCharPtr_;

      uint32_t codePoint = 0;
      char ch = *curCharPtr_;

      // Calculate code point from non-empty sequence of decimal digits followed
      // by a semicolon.
      for (;;) {
        if (ch == ';' && curCharPtr_ != numberStart) {
          curCharPtr_++;
          return codePoint;
        } else if (isdigit(ch)) {
          // Check that this number is representable as a code point
          codePoint = codePoint * 10 + (ch - '0');
          if (codePoint > UNICODE_MAX_VALUE) {
            break;
          }
        } else {
          break;
        }

        ++curCharPtr_;
        ch = *curCharPtr_;
      }
    }
  } else {
    // HTML entity with form &NAME;
    ++curCharPtr_;

    // Gather HTML entity name and lookup name in table. HTML entity names are
    // composed of a sequence of up to 8 alphanumeric characters followed by a
    // semicolon. To minimize backtracking due to an `&` without a following
    // semicolon we only need to look at most 9 characters ahead (8 for the
    // name, 1 for the semicolon).
    for (int i = 0; i < 9; i++) {
      char ch = *curCharPtr_;
      if (ch == ';') {
        auto it = htmlEntities_.find(llvh::StringRef(curCharPtr_ - i, i));
        if (it == htmlEntities_.end()) {
          break;
        }

        curCharPtr_++;
        return it->second;
      } else if (((ch | 32) >= 'a' && (ch | 32) <= 'z') || isdigit(ch)) {
        ++curCharPtr_;
      } else {
        break;
      }
    }
  }

  curCharPtr_ = start;
  return llvh::None;
}

#endif

bool JSLexer::isCurrentTokenADirective() {
  // The current token must be a string literal without escapes.
  if (token_.getKind() != TokenKind::string_literal ||
      token_.getStringLiteralContainsEscapes()) {
    return false;
  }

  const char *ptr = curCharPtr_;

  // A directive is a string literal (the current token, directly behind
  // curCharPtr_), followed by a semicolon, new line, or eof that we will now
  // try to find. There can also be comments. So, we loop, consuming whitespace
  // until we encounter:
  // - EOF. Don't consume it and succeed.
  // - Semicolon. Don't consume it and succeed.
  // - Right brace. Don't consume it and succeed.
  // - A new line. Don't consume it and succeed.
  // - A line comment. It implies a new line. Don't consume it and succeed.
  // - A block comment. Consume it and continue.
  // - Anything else. We consume nothing and fail.

  for (;;) {
    assert(ptr <= bufferEnd_ && "lexing past end of input");

    switch (*((const unsigned char *)ptr)) {
      case 0:
        // EOF?
        if (ptr == bufferEnd_)
          return true;
        // We encountered a stray 0 character.
        return false;

      case ';':
      case '}':
        return true;

      case '\r':
      case '\n':
        return true;

      // Line separator \u2028 UTF8 encoded is      : e2 80 a8
      // Paragraph separator \u2029 UTF8 encoded is : e2 80 a9
      case UTF8_LINE_TERMINATOR_CHAR0:
        if (matchUnicodeLineTerminatorOffset1(ptr))
          return true;
        return false;

      case '\v':
      case '\f':
        // Skip whitespace.
        ++ptr;
        continue;

      case '\t':
      case ' ':
        // Spaces frequently come in groups, so use a tight inner loop to skip.
        do
          ++ptr;
        while (*ptr == '\t' || *ptr == ' ');
        continue;

      // No-break space \u00A0 is UTF8 encoded as: c2 a0
      case 0xc2:
        if ((unsigned char)ptr[1] == 0xa0) {
          ptr += 2;
          continue;
        } else {
          goto default_label;
        }

      // Byte-order mark \uFEFF is encoded as: ef bb bf
      case 0xef:
        if ((unsigned char)ptr[1] == 0xbb && (unsigned char)ptr[2] == 0xbf) {
          ptr += 3;
          continue;
        } else {
          goto default_label;
        }

      case '/':
        if (ptr[1] == '/') { // Line comment?
          // It implies a new line, so we are good.
          return true;
        } else if (ptr[1] == '*') { // Block comment?
          auto savedCommentStorageSize = commentStorage_.size();
          auto commentScope = llvh::make_scope_exit([&] {
            if (storeComments_)
              commentStorage_.erase(
                  commentStorage_.begin() + savedCommentStorageSize,
                  commentStorage_.end());
          });
          SourceErrorManager::SaveAndSuppressMessages suppress(&sm_);
          ptr = skipBlockComment(ptr);
          continue;
        } else {
          return false;
        }

      // Handle all other characters: if it is a unicode space, skip it.
      // Otherwise we have failed.
      default_label:
      default: {
        if (hermes::isUTF8Start(*ptr)) {
          auto peeked = _peekUTF8(ptr);
          if (isUnicodeOnlySpace(peeked.first)) {
            ptr = peeked.second;
            continue;
          }
        }
        return false;
      }
    }
  }

  // We arrive here if we matched a directive. 'ptr' is the final character.
  return true;
}

const Token *JSLexer::rescanRBraceInTemplateLiteral() {
  assert(token_.getKind() == TokenKind::r_brace && "need } to rescan");
  --curCharPtr_;
  // Undo the storage for the '}'.
  if (LLVM_UNLIKELY(storeTokens_)) {
    tokenStorage_.pop_back();
  }
  assert(*curCharPtr_ == '}' && "non-} was scanned as r_brace");
  token_.setStart(curCharPtr_);
  scanTemplateLiteral();
  finishToken(curCharPtr_);
  return &token_;
}

OptValue<TokenKind> JSLexer::lookahead1(OptValue<TokenKind> expectedToken) {
  // We support TokenKind::question here because of Flow's render types.
  // `renders?` is not a token itself (as making it a token would be bad for
  // identifier parsing performance). When we are parsing something like
  // (renders?: number) => string and the cursor is under the `?`, we need to
  // perform a lookahead to see if the next token is a colon, in which case
  // this is a function parameter, and if not then parse as a render type.
  assert(
      (token_.getKind() == TokenKind::identifier || token_.isResWord() ||
       token_.getKind() == TokenKind::question) &&
      "unsupported current token");
  UniqueString *savedIdent;
  if (token_.getKind() == TokenKind::identifier || token_.isResWord()) {
    savedIdent = token_.getResWordOrIdentifier();
  }
  TokenKind savedKind = token_.getKind();
  SMLoc start = token_.getStartLoc();
  SMLoc end = token_.getEndLoc();
  const char *cur = curCharPtr_;
  SourceErrorManager::SaveAndSuppressMessages suppress(&sm_);

  // Remove any comments that were stored during the lookahead
  auto savedCommentStorageSize = commentStorage_.size();
  auto commentScope = llvh::make_scope_exit([&] {
    if (storeComments_)
      commentStorage_.erase(
          commentStorage_.begin() + savedCommentStorageSize,
          commentStorage_.end());
  });

  advance();
  OptValue<TokenKind> kind = token_.getKind();
  if (isNewLineBeforeCurrentToken()) {
    // Disregard anything after LineTerminator.
    kind = llvh::None;
  } else if (expectedToken == kind) {
    // Do not move the cursor back.
    return kind;
  }

  token_.setStart(start.getPointer());
  token_.setEnd(end.getPointer());
  if (savedKind == TokenKind::identifier) {
    token_.setIdentifier(savedIdent);
  } else if (savedKind == TokenKind::question) {
    token_.setPunctuator(TokenKind::question);
  } else {
    token_.setResWord(savedKind, savedIdent);
  }
  seek(SMLoc::getFromPointer(cur));

  // Undo the storage for the token we just advanced to.
  if (LLVM_UNLIKELY(storeTokens_)) {
    tokenStorage_.pop_back();
  }

  return kind;
}

uint32_t JSLexer::consumeUnicodeEscape() {
  assert(*curCharPtr_ == '\\');
  ++curCharPtr_;

  if (*curCharPtr_ != 'u') {
    error(
        {SMLoc::getFromPointer(curCharPtr_ - 1),
         SMLoc::getFromPointer(curCharPtr_ + 1)},
        "invalid Unicode escape");
    return UNICODE_REPLACEMENT_CHARACTER;
  }
  ++curCharPtr_;

  if (*curCharPtr_ == '{') {
    auto cp = consumeBracedCodePoint();
    if (!cp.hasValue()) {
      // consumeBracedCodePoint has reported an error.
      return UNICODE_REPLACEMENT_CHARACTER;
    }
    return *cp;
  }

  auto cp = consumeHex(4);
  if (!cp)
    return UNICODE_REPLACEMENT_CHARACTER;

  // We don't need t check for valid UTF-16. JavaScript allows invalid surrogate
  // pairs, so we just encode every UTF-16 code into a UTF-8 sequence, even
  // though theoretically it is not a valid UTF-8. (UTF-8 would be "valid" if we
  // collected the surrogate pair, decoded it into UTF-32 and encoded that into
  // UTF-16).
  return cp.getValue();
}

llvh::Optional<uint32_t> JSLexer::consumeUnicodeEscapeOptional() {
  const char *start = curCharPtr_;
  assert(*curCharPtr_ == '\\');
  ++curCharPtr_;

  if (*curCharPtr_ != 'u') {
    curCharPtr_ = start;
    return llvh::None;
  }
  ++curCharPtr_;

  if (*curCharPtr_ == '{') {
    // Avoid reporting an error because we are consuming the escape optionally.
    auto cp = consumeBracedCodePoint(false);
    if (!cp) {
      curCharPtr_ = start;
      return llvh::None;
    }
    return *cp;
  }

  auto cp = consumeHex(4, false);
  if (!cp) {
    curCharPtr_ = start;
    return llvh::None;
  }

  // We don't need t check for valid UTF-16. JavaScript allows invalid surrogate
  // pairs, so we just encode every UTF-16 code into a UTF-8 sequence, even
  // though theoretically it is not a valid UTF-8. (UTF-8 would be "valid" if we
  // collected the surrogate pair, decoded it into UTF-32 and encoded that into
  // UTF-16).
  return cp.getValue();
}

bool JSLexer::consumeIdentifierStart() {
  if (*curCharPtr_ == '_' || *curCharPtr_ == '$' ||
      ((*curCharPtr_ | 32) >= 'a' && (*curCharPtr_ | 32) <= 'z')) {
    tmpStorage_.clear();
    tmpStorage_.push_back(*curCharPtr_++);
    return true;
  }

  if (*curCharPtr_ == '\\') {
    SMLoc startLoc = SMLoc::getFromPointer(curCharPtr_);
    tmpStorage_.clear();
    uint32_t cp = consumeUnicodeEscape();
    if (!isUnicodeIDStart(cp)) {
      errorRange(
          startLoc,
          "Unicode escape \\u" + Twine::utohexstr(cp) +
              "is not a valid identifier start");
    } else {
      appendUnicodeToStorage(cp);
    }
    return true;
  }

  if (LLVM_LIKELY(!isUTF8Start(*curCharPtr_)))
    return false;

  auto decoded = _peekUTF8();
  if (isUnicodeIDStart(decoded.first)) {
    tmpStorage_.clear();
    appendUnicodeToStorage(decoded.first);
    curCharPtr_ = decoded.second;
    return true;
  }

  return false;
}

template <JSLexer::IdentifierMode Mode>
bool JSLexer::consumeOneIdentifierPartNoEscape() {
  char ch = *curCharPtr_;
  if (ch == '_' || ch == '$' || ((ch | 32) >= 'a' && (ch | 32) <= 'z') ||
      (ch >= '0' && ch <= '9') || (Mode == IdentifierMode::JSX && ch == '-') ||
      (Mode == IdentifierMode::Flow && ch == '@')) {
    tmpStorage_.push_back(*curCharPtr_++);
    return true;
  } else if (LLVM_UNLIKELY(isUTF8Start(ch))) {
    // If we have encountered a Unicode character, we try to decode it. If it
    // can be a part of the identifier, we consume it, otherwise we leave it
    // alone.
    auto decoded = _peekUTF8();
    if (isUnicodeIDContinue(decoded.first)) {
      appendUnicodeToStorage(decoded.first);
      curCharPtr_ = decoded.second;
      return true;
    }
  }
  return false;
}

template <JSLexer::IdentifierMode Mode>
void JSLexer::consumeIdentifierParts() {
  for (;;) {
    // Try consuming an non-escaped identifier part. Failing that, check for an
    // escape.
    if (consumeOneIdentifierPartNoEscape<Mode>())
      continue;
    else if (*curCharPtr_ == '\\') {
      // Decode the escape.
      SMLoc startLoc = SMLoc::getFromPointer(curCharPtr_);
      uint32_t cp = consumeUnicodeEscape();
      if (!isUnicodeIDContinue(cp)) {
        errorRange(
            startLoc,
            "Unicode escape \\u" + Twine::utohexstr(cp) +
                " is not a valid identifier codepoint");
      } else {
        appendUnicodeToStorage(cp);
      }
    } else
      break;
  }
}

unsigned char JSLexer::consumeOctal(unsigned maxLen) {
  assert(*curCharPtr_ >= '0' && *curCharPtr_ <= '7');

  if (strictMode_) {
    if (!error(
            SMLoc::getFromPointer(curCharPtr_ - 1),
            "octals not allowed in strict mode")) {
      return 0;
    }
  }

  auto res = (unsigned char)(*curCharPtr_++ - '0');
  while (--maxLen && *curCharPtr_ >= '0' && *curCharPtr_ <= '7')
    res = (res << 3) + *curCharPtr_++ - '0';

  return res;
}

llvh::Optional<uint32_t> JSLexer::consumeHex(
    unsigned requiredLen,
    bool errorOnFail) {
  uint32_t cp = 0;
  for (unsigned i = 0; i != requiredLen; ++i) {
    unsigned ch = *curCharPtr_;
    if (ch >= '0' && ch <= '9') {
      ch -= '0';
    } else {
      // Now that we know it is not a digit, it is safe to lowercase.
      ch |= 32;
      if (ch >= 'a' && ch <= 'f') {
        ch -= 'a' - 10;
      } else {
        if (errorOnFail) {
          error(SMLoc::getFromPointer(curCharPtr_), "invalid hex number");
        }
        return llvh::None;
      }
    }
    cp = (cp << 4) + ch;
    ++curCharPtr_;
  }

  return cp;
}

llvh::Optional<uint32_t> JSLexer::consumeBracedCodePoint(bool errorOnFail) {
  assert(*curCharPtr_ == '{' && "braced codepoint must begin with {");
  ++curCharPtr_;
  const char *start = curCharPtr_;

  // Set to true if we failed to get a code point that is in bounds or saw
  // an invalid character.
  bool failed = false;

  // Loop until we hit the } or eof, max out the value, or see an invalid char.
  uint32_t cp = 0;
  for (; *curCharPtr_ != '}'; ++curCharPtr_) {
    int ch = *curCharPtr_;
    if (ch >= '0' && ch <= '9') {
      ch -= '0';
    } else if (ch >= 'a' && ch <= 'f') {
      ch -= 'a' - 10;
    } else if (ch >= 'A' && ch <= 'F') {
      ch -= 'A' - 10;
    } else {
      // The only way this can be the end of the buffer is if this is a \0.
      // Check if this is the end of the buffer, else continue so that we
      // may report more errors after this braced code point.
      if (curCharPtr_ == bufferEnd_) {
        if (!failed && errorOnFail) {
          error(
              SMLoc::getFromPointer(start),
              "non-terminated unicode codepoint escape");
        }
        return llvh::None;
      }
      // Invalid character, set the failed flag and continue.
      if (!failed && errorOnFail) {
        if (!error(
                SMLoc::getFromPointer(curCharPtr_),
                "invalid character in unicode codepoint escape")) {
          return llvh::None;
        }
      }
      failed = true;
      continue;
    }
    cp = (cp << 4) + ch;
    if (cp > UNICODE_MAX_VALUE) {
      // Number grew too big, set the failed flag and continue.
      if (!failed && errorOnFail) {
        if (!error(
                SMLoc::getFromPointer(start),
                "unicode codepoint escape is too large")) {
          return llvh::None;
        }
      }
      failed = true;
    }
  }

  assert(curCharPtr_ < bufferEnd_ && "bufferEnd_ should cause early return");

  // An empty escape sequence is invalid.
  if (curCharPtr_ == start) {
    if (!failed && errorOnFail) {
      if (!error(
              SMLoc::getFromPointer(start), "empty unicode codepoint escape")) {
        return llvh::None;
      }
    }
    failed = true;
  }

  // Consume the final } and return.
  ++curCharPtr_;
  return failed ? llvh::None : llvh::Optional<uint32_t>{cp};
}

llvh::StringRef JSLexer::lineCommentHelper(const char *start) {
  assert(
      (start[0] == '/' && start[1] == '/') ||
      (start[0] == '#' && start[1] == '!'));
  const char *lineCommentEnd;
  const char *cur = start + 2;

  for (;;) {
    switch ((unsigned char)*cur) {
      case 0:
        if (cur == bufferEnd_) {
          lineCommentEnd = cur;
          goto endLoop;
        } else {
          ++cur;
        }
        break;

      case '\r':
      case '\n':
        lineCommentEnd = cur;
        ++cur;
        newLineBeforeCurrentToken_ = true;
        goto endLoop;

        // Line separator \u2028 UTF8 encoded is      : e2 80 a8
        // Paragraph separator \u2029 UTF8 encoded is: e2 80 a9
      case UTF8_LINE_TERMINATOR_CHAR0:
        if (matchUnicodeLineTerminatorOffset1(cur)) {
          lineCommentEnd = cur;
          cur += 3;
          newLineBeforeCurrentToken_ = true;
          goto endLoop;
        } else {
          _decodeUTF8SlowPath(cur);
        }
        break;

      default:
        if (LLVM_UNLIKELY(isUTF8Start(*cur)))
          _decodeUTF8SlowPath(cur);
        else
          ++cur;
        break;
    }
  }
endLoop:

  curCharPtr_ = cur;
  return llvh::StringRef(start, lineCommentEnd - start);
}

void JSLexer::scanLineComment(const char *start) {
  llvh::StringRef comment = lineCommentHelper(start);

  if (storeComments_) {
    commentStorage_.emplace_back(
        start[0] == '/' ? StoredComment::Kind::Line
                        : StoredComment::Kind::Hashbang,
        SMRange{
            SMLoc::getFromPointer(comment.begin()),
            SMLoc::getFromPointer(comment.end())});
  }

  // Check for magic comments, which excludes #!.
  // Syntax is //# name=value
  if (!comment.consume_front(llvh::StringLiteral("//# ")))
    return;

  if (comment.consume_front(llvh::StringLiteral("sourceURL=")))
    sm_.setSourceUrl(bufId_, comment);
  else if (comment.consume_front(llvh::StringLiteral("sourceMappingURL=")))
    sm_.setSourceMappingUrl(bufId_, comment);
}

const char *JSLexer::skipBlockComment(const char *start) {
  assert(start[0] == '/' && start[1] == '*');
  SMLoc blockCommentStart = SMLoc::getFromPointer(start);
  const char *cur = start + 2;

  for (;;) {
    switch ((unsigned char)*cur) {
      case 0:
        if (cur == bufferEnd_) {
          error(SMLoc::getFromPointer(cur), "non-terminated block comment");
          sm_.note(blockCommentStart, "comment started here");
          goto endLoop;
        } else {
          ++cur;
        }
        break;

      case '\r':
      case '\n':
        ++cur;
        newLineBeforeCurrentToken_ = true;
        break;

      // Line separator \u2028 UTF8 encoded is      : e2 80 a8
      // Paragraph separator \u2029 UTF8 encoded is: e2 80 a9
      case UTF8_LINE_TERMINATOR_CHAR0:
        if (matchUnicodeLineTerminatorOffset1(cur)) {
          cur += 3;
          newLineBeforeCurrentToken_ = true;
        } else {
          _decodeUTF8SlowPath(cur);
        }
        break;

      case '*':
        ++cur;
        if (*cur == '/') {
          ++cur;
          goto endLoop;
        }
        break;

      default:
        if (LLVM_UNLIKELY(isUTF8Start(*cur)))
          _decodeUTF8SlowPath(cur);
        else
          ++cur;
        break;
    }
  }
endLoop:

  if (storeComments_) {
    commentStorage_.emplace_back(
        StoredComment::Kind::Block,
        SMRange{blockCommentStart, SMLoc::getFromPointer(cur)});
  }

  return cur;
}

void JSLexer::scanNumber(GrammarContext grammarContext) {
  // A somewhat ugly state machine for scanning a number

  unsigned radix = 10;
  bool real = false;
  bool ok = true;
  const char *rawStart = curCharPtr_;
  const char *start = curCharPtr_;

  // True when we encounter the numeric literal separator: '_'.
  bool seenSeparator = false;

  // True when we encounter a legacy octal number (starts with '0').
  bool legacyOctal = false;

  // Detect the radix
  if (*curCharPtr_ == '0') {
    if ((curCharPtr_[1] | 32) == 'x') {
      radix = 16;
      curCharPtr_ += 2;
      start += 2;
    } else if ((curCharPtr_[1] | 32) == 'o') {
      radix = 8;
      curCharPtr_ += 2;
      start += 2;
    } else if ((curCharPtr_[1] | 32) == 'b') {
      radix = 2;
      curCharPtr_ += 2;
      start += 2;
    } else if (curCharPtr_[1] == '.') {
      curCharPtr_ += 2;
      goto fraction;
    } else if ((curCharPtr_[1] | 32) == 'e') {
      curCharPtr_ += 2;
      goto exponent;
    } else {
      radix = 8;
      legacyOctal = true;
      ++curCharPtr_;
    }
  }

  while (isdigit(*curCharPtr_) ||
         (radix == 16 && (*curCharPtr_ | 32) >= 'a' &&
          (*curCharPtr_ | 32) <= 'f') ||
         (*curCharPtr_ == '_')) {
    seenSeparator |= *curCharPtr_ == '_';
    ++curCharPtr_;
  }

  if (radix == 10 || legacyOctal) {
    // It is not necessarily an integer.
    // We could have interpreted as legacyOctal initially but will have to
    // change to decimal later.
    if (*curCharPtr_ == '.') {
      ++curCharPtr_;
      goto fraction;
    }

    if ((*curCharPtr_ | 32) == 'e') {
      ++curCharPtr_;
      goto exponent;
    }
  }

  goto end;

fraction:
  // We arrive here after we have consumed the decimal dot ".".
  //
  real = true;
  while (isdigit(*curCharPtr_) || *curCharPtr_ == '_') {
    seenSeparator |= *curCharPtr_ == '_';
    ++curCharPtr_;
  }

  if ((*curCharPtr_ | 32) == 'e') {
    ++curCharPtr_;
    goto exponent;
  } else {
    goto end;
  }

exponent:
  // We arrive here after we have consumed the exponent character 'e' or 'E'.
  //
  real = true;
  if (*curCharPtr_ == '+' || *curCharPtr_ == '-')
    ++curCharPtr_;
  if (isdigit(*curCharPtr_)) {
    do {
      seenSeparator |= *curCharPtr_ == '_';
      ++curCharPtr_;
    } while (isdigit(*curCharPtr_) || *curCharPtr_ == '_');
  } else {
    ok = false;
  }

end:
  // We arrive here after we have consumed all we can from the number. Now,
  // as per the spec, we consume a sequence of identifier characters if they
  // follow directly, which means the number is invalid if it's not BigInt.
  if (consumeIdentifierStart()) {
    consumeIdentifierParts<IdentifierMode::JS>();

    llvh::StringRef raw{rawStart, (size_t)(curCharPtr_ - rawStart)};
    if (ok && !real && (!legacyOctal || raw == "0n") && tmpStorage_ == "n") {
      assert(curCharPtr_ > start && "Must consume at least the trailing n.");
      llvh::ArrayRef<char> digits{start, curCharPtr_ - 1};
      // Use parseIntWithRadixDigits to validate the bigint literal's digits.
      // The digits themselves can be ignored, since we're only interested in
      // whether the string was parsed correctly.
      if (digits.size() &&
          parseIntWithRadixDigits</* AllowNumericSeparator */ true>(
              digits, radix, [](uint8_t) {})) {
        // This is a BigInt.
        rawStorage_.clear();
        rawStorage_.append(raw);
        token_.setBigIntLiteral(getStringLiteral(rawStorage_));
        return;
      }

      // This is a BigInt with invalid digits; fail.
    }

    ok = false;
  }

  double val;

  /// ES6.0 B.1.1
  /// If we encounter a "legacy" octal number (starting with a '0') but if
  /// the integer contains '8' or '9' we interpret it as decimal.
  const auto updateLegacyOctalRadix =
      [this, &radix, start, &legacyOctal]() -> void {
    assert(
        legacyOctal &&
        "updateLegacyOctalRadix can only be called in legacyOctal mode");
    (void)legacyOctal;
    for (auto *scanPtr = start; scanPtr != curCharPtr_; ++scanPtr) {
      if (*scanPtr == '.' || *scanPtr == 'e') {
        break;
      }
      if (LLVM_UNLIKELY(*scanPtr >= '8') && LLVM_LIKELY(*scanPtr != '_')) {
        sm_.warning(
            SMRange(token_.getStartLoc(), SMLoc::getFromPointer(curCharPtr_)),
            "Numeric literal starts with 0 but contains an 8 or 9 digit. "
            "Interpreting as decimal (not octal).");
        radix = 10;
        break;
      }
    }
  };

  if (!ok) {
    errorRange(token_.getStartLoc(), "invalid numeric literal");
    val = std::numeric_limits<double>::quiet_NaN();
  } else if (
      !real && radix == 10 && curCharPtr_ - start <= 9 &&
      LLVM_LIKELY(!seenSeparator)) {
    // If this is a decimal integer of at most 9 digits (log10(2**31-1), it
    // can fit in a 32-bit integer. Use a faster conversion.
    int32_t ival = *start - '0';
    while (++start != curCharPtr_)
      ival = ival * 10 + (*start - '0');
    val = ival;
  } else if (real || radix == 10) {
    if (legacyOctal) {
      if (strictMode_ || grammarContext == GrammarContext::Type) {
        if (!errorRange(
                token_.getStartLoc(),
                "Decimals with leading zeros are not allowed in strict mode")) {
          val = std::numeric_limits<double>::quiet_NaN();
          goto done;
        }
      } else {
        // Check to see if we can actually scan this as radix 10.
        // Non-integer numbers must be in base 10, otherwise we error.
        updateLegacyOctalRadix();
        if (LLVM_LIKELY(radix != 10)) {
          if (!errorRange(
                  token_.getStartLoc(),
                  "Octal numeric literals must be integers")) {
            val = std::numeric_limits<double>::quiet_NaN();
            goto done;
          }
        }
      }
    }

    // We need a zero-terminated buffer for hermes_g_strtod().
    llvh::SmallString<32> buf;
    buf.reserve(curCharPtr_ - start + 1);
    if (LLVM_UNLIKELY(seenSeparator)) {
      for (const char *it = start; it != curCharPtr_; ++it) {
        if (LLVM_LIKELY(*it != '_')) {
          buf.push_back(*it);
        } else {
          // Check to ensure that '_' is surrounded by digits.
          // This is safe because the source buffer is zero-terminated and
          // we know that the numeric literal didn't start with '_'.
          // Note that we could have a 0b_11 literal, but we'd still fail
          // properly because of the radix==16 check.
          char prev = *(it - 1);
          char next = *(it + 1);
          if (!isdigit(prev) &&
              !(radix == 16 && 'a' <= (prev | 32) && (prev | 32) <= 'f')) {
            errorRange(
                token_.getStartLoc(),
                "numeric separator must come after a digit");
          } else if (
              !isdigit(next) &&
              !(radix == 16 && 'a' <= (next | 32) && (next | 32) <= 'f')) {
            errorRange(
                token_.getStartLoc(),
                "numeric separator must come before a digit");
          }
        }
      }
    } else {
      buf.append(start, curCharPtr_);
    }
    buf.push_back(0);
    char *endPtr;
    val = ::hermes_g_strtod(buf.data(), &endPtr);
    if (endPtr != &buf.back()) {
      errorRange(token_.getStartLoc(), "invalid numeric literal");
      val = std::numeric_limits<double>::quiet_NaN();
    }
  } else {
    if (legacyOctal &&
        (strictMode_ || grammarContext == GrammarContext::Type) &&
        curCharPtr_ - start > 1) {
      if (!errorRange(
              token_.getStartLoc(),
              "Octal literals must use '0o' in strict mode")) {
        val = std::numeric_limits<double>::quiet_NaN();
        goto done;
      }
    }

    // Handle the zero-radix case. This could only happen with radix 16
    // because otherwise start wouldn't have been changed.
    if (curCharPtr_ == start) {
      errorRange(
          token_.getStartLoc(),
          llvh::Twine("No digits after ") + llvh::StringRef(start - 2, 2));
      val = std::numeric_limits<double>::quiet_NaN();
    } else {
      // Parse the rest of the number:
      if (legacyOctal) {
        updateLegacyOctalRadix();
        // LegacyOctalLikeDecimalIntegerLiteral cannot contain separators.
        if (LLVM_UNLIKELY(seenSeparator)) {
          errorRange(
              token_.getStartLoc(),
              "Numeric separator cannot be used in literal after leading 0");
        }
      }
      auto parsedInt = parseIntWithRadix</* AllowNumericSeparator */ true>(
          llvh::ArrayRef<char>{start, (size_t)(curCharPtr_ - start)}, radix);
      if (!parsedInt) {
        errorRange(token_.getStartLoc(), "invalid integer literal");
        val = std::numeric_limits<double>::quiet_NaN();
      } else {
        val = parsedInt.getValue();
      }
    }
  }

done:
  token_.setNumericLiteral(val);
}

static TokenKind matchReservedWord(const char *str, unsigned len) {
  return llvh::StringSwitch<TokenKind>(llvh::StringRef(str, len))
#define RESWORD(name) .Case(#name, TokenKind::rw_##name)
#include "hermes/Parser/TokenKinds.def"
      .Default(TokenKind::identifier);
}

TokenKind JSLexer::scanReservedWord(const char *start, unsigned length) {
  TokenKind rw = matchReservedWord(start, length);

  // Check for "Future reserved words" which should not be recognised in non-
  // strict mode.
  if (!strictMode_ && rw != TokenKind::identifier) {
    switch (rw) {
      case TokenKind::rw_implements:
      case TokenKind::rw_interface:
      case TokenKind::rw_package:
      case TokenKind::rw_private:
      case TokenKind::rw_protected:
      case TokenKind::rw_public:
      case TokenKind::rw_static:
      case TokenKind::rw_yield:
        rw = TokenKind::identifier;
      default:
        break;
    }
  }
  return rw;
}

template <JSLexer::IdentifierMode Mode>
void JSLexer::scanIdentifierFastPath(const char *start) {
  const char *end = start;

  // Quickly consume the ASCII identifier part.
  char ch;
  do
    ch = (unsigned char)*++end;
  while (ch == '_' || ch == '$' || ((ch | 32) >= 'a' && (ch | 32) <= 'z') ||
         (ch >= '0' && ch <= '9') ||
         (Mode == IdentifierMode::JSX && ch == '-') ||
         (Mode == IdentifierMode::Flow && ch == '@'));

  // Check whether a slow part of the identifier follows.
  if (LLVM_UNLIKELY(ch == '\\')) {
    // An escape. Pass the baton to the slow path.
    initStorageWith(start, end);
    curCharPtr_ = end;
    scanIdentifierParts<Mode>();
    return;
  } else if (LLVM_UNLIKELY(isUTF8Start(ch))) {
    // If we have encountered a Unicode character, we try to decode it. If it
    // can be a part of the identifier,
    // we consume it, otherwise we leave it alone.
    auto decoded = _peekUTF8(end);
    if (isUnicodeIDContinue(decoded.first)) {
      initStorageWith(start, end);
      appendUnicodeToStorage(decoded.first);
      curCharPtr_ = decoded.second;
      scanIdentifierParts<Mode>();
      return;
    }
  }

  curCharPtr_ = end;

  size_t length = end - start;

  auto rw = scanReservedWord(start, (unsigned)length);
  if (rw != TokenKind::identifier) {
    token_.setResWord(rw, resWordIdent(rw));
  } else {
    token_.setIdentifier(getIdentifier(llvh::StringRef(start, length)));
  }
}

template <JSLexer::IdentifierMode Mode>
void JSLexer::scanIdentifierParts() {
  consumeIdentifierParts<Mode>();
  auto rw =
      scanReservedWord(tmpStorage_.str().begin(), tmpStorage_.str().size());
  if (rw != TokenKind::identifier) {
    token_.setResWord(rw, resWordIdent(rw));
    sm_.warning(
        {token_.getStartLoc(), SMLoc::getFromPointer(curCharPtr_)},
        "scanning identifier with unicode escape as reserved word",
        Subsystem::Lexer);
  } else {
    token_.setIdentifier(getIdentifier(tmpStorage_.str()));
  }
}

bool JSLexer::scanPrivateIdentifier() {
  assert(*curCharPtr_ == '#');

  // Skip the '#'.
  const char *start = curCharPtr_;
  ++curCharPtr_;

  // Scan the actual identifier.
  if (LLVM_LIKELY(isASCIIIdentifierStart(*curCharPtr_))) {
    scanIdentifierFastPath<IdentifierMode::JS>(curCharPtr_);
  } else if (consumeIdentifierStart()) {
    // curCharPtr_ has been updated by consumeIdentifierStart.
    scanIdentifierParts<IdentifierMode::JS>();
  } else {
    error(SMLoc::getFromPointer(start), "empty private identifier");
    return false;
  }

  // Parsed a resword or identifier.
  // Convert the TokenKind to private_identifier after the fact.
  // This avoids adding another Mode to IdentifierMode.
  token_.setPrivateIdentifier(token_.getResWordOrIdentifier());

  return true;
}

template <bool JSX>
void JSLexer::scanString() {
  assert(*curCharPtr_ == '\'' || *curCharPtr_ == '"');
  char quoteCh = *curCharPtr_++;

  // Track whether we encounter any escapes or new line continuations. We need
  // that information in order to detect directives.
  bool escapes = false;

  tmpStorage_.clear();

  for (;;) {
    if (*curCharPtr_ == quoteCh) {
      ++curCharPtr_;
      break;
    } else if (!JSX && *curCharPtr_ == '\\') {
      escapes = true;
      ++curCharPtr_;
      switch ((unsigned char)*curCharPtr_) {
        case '\'':
        case '"':
        case '\\':
          tmpStorage_.push_back((unsigned char)*curCharPtr_++);
          break;

        case 'b':
          ++curCharPtr_;
          tmpStorage_.push_back(8);
          break;
        case 'f':
          ++curCharPtr_;
          tmpStorage_.push_back(12);
          break;
        case 'n':
          ++curCharPtr_;
          tmpStorage_.push_back(10);
          break;
        case 'r':
          ++curCharPtr_;
          tmpStorage_.push_back(13);
          break;
        case 't':
          ++curCharPtr_;
          tmpStorage_.push_back(9);
          break;
        case 'v':
          ++curCharPtr_;
          tmpStorage_.push_back(11);
          break;

        case '\0': // EOF?
          if (curCharPtr_ == bufferEnd_) { // eof?
            error(SMLoc::getFromPointer(curCharPtr_), "non-terminated string");
            sm_.note(token_.getStartLoc(), "string started here");
            goto breakLoop;
          } else {
            tmpStorage_.push_back((unsigned char)*curCharPtr_++);
          }
          break;

        case '0':
          // '\0' is not an octal so handle it separately.
          if (!(curCharPtr_[1] >= '0' && curCharPtr_[1] <= '7')) {
            ++curCharPtr_;
            appendUnicodeToStorage(0);
            break;
          }
          [[fallthrough]];
        case '1':
        case '2':
        case '3':
          appendUnicodeToStorage(consumeOctal(3));
          break;
        case '4':
        case '5':
        case '6':
        case '7':
          appendUnicodeToStorage(consumeOctal(2));
          break;

        case 'x': {
          ++curCharPtr_;
          auto v = consumeHex(2);
          appendUnicodeToStorage(v ? *v : 0);
          break;
        }

        case 'u':
          --curCharPtr_;
          appendUnicodeToStorage(consumeUnicodeEscape());
          break;

        // Escaped line terminator. We just need to skip it.
        case '\n':
          ++curCharPtr_;
          break;
        case '\r':
          ++curCharPtr_;
          if (*curCharPtr_ == '\n') // skip CR LF
            ++curCharPtr_;
          break;
        case UTF8_LINE_TERMINATOR_CHAR0:
          if (matchUnicodeLineTerminatorOffset1(curCharPtr_)) {
            curCharPtr_ += 3;
            break;
          }
          appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
          break;

        default:
          if (LLVM_UNLIKELY(isUTF8Start(*curCharPtr_)))
            appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
          else
            tmpStorage_.push_back((unsigned char)*curCharPtr_++);
          break;
      }
    } else if (LLVM_UNLIKELY(*curCharPtr_ == '\n' || *curCharPtr_ == '\r')) {
      if (JSX) {
        tmpStorage_.push_back(*curCharPtr_++);
      } else {
        error(SMLoc::getFromPointer(curCharPtr_), "non-terminated string");
        sm_.note(token_.getStartLoc(), "string started here");
        break;
      }
#if HERMES_PARSE_JSX
    } else if (LLVM_UNLIKELY(JSX && *curCharPtr_ == '&')) {
      auto codePoint = consumeHTMLEntityOptional();
      if (codePoint.hasValue()) {
        appendUnicodeToStorage(*codePoint);
      } else {
        tmpStorage_.push_back(*curCharPtr_++);
      }
#endif
    } else if (LLVM_UNLIKELY(*curCharPtr_ == 0 && curCharPtr_ == bufferEnd_)) {
      error(SMLoc::getFromPointer(curCharPtr_), "non-terminated string");
      sm_.note(token_.getStartLoc(), "string started here");
      break;
    } else {
      if (LLVM_UNLIKELY(isUTF8Start(*curCharPtr_))) {
        // Decode and re-encode the character and append it to the string
        // storage
        appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
      } else {
        tmpStorage_.push_back(*curCharPtr_++);
      }
    }
  }
breakLoop:
  token_.setStringLiteral(getStringLiteral(tmpStorage_.str()), escapes);
}

void JSLexer::scanTemplateLiteral() {
  assert(*curCharPtr_ == '`' || *curCharPtr_ == '}');

  // Whether the token will result in TemplateHead upon encountering ${.
  // If we end the literal with `, then the result is NoSubstitutionTemplate,
  // so this will be ignored.
  bool isHead = *curCharPtr_ == '`';

  // If the token ended with a ` then it's a tail (or NoSubstitutionTemplate),
  // and if it ended with a ${ then it's not a tail.
  bool isTail = false;

  // Advance past the initial `.
  ++curCharPtr_;

  // Track whether we encounter any NotEscapeSequence instances,
  // which will be used to error out on non-tagged sequences.
  bool foundNotEscapeSequence = false;

  // Store the Template Value (TV) in the tmpStorage_.
  tmpStorage_.clear();

  // Store the Template Raw Value (TRV) in the rawStorage_.
  rawStorage_.clear();

  /// Return the Template Raw Value (TRV) of character \p c.
  /// The only time the TRV is different from c is when c is a <CR>.
  /// In that case, this function will return 0x0a (LINE FEED).
  const auto trv = [](char c) -> char {
    if (c == '\r') {
      // This case takes \r and \r\n into account.
      // The code below which consumes line separators will skip the following
      // \n if there is a \r\n.
      // For the purposes of finding the TRV it doesn't matter.
      return 0x0a;
    }
    return c;
  };

  for (;;) {
    if (*curCharPtr_ == '`') {
      isTail = true;
      ++curCharPtr_;
      break;
    } else if (*curCharPtr_ == '$' && curCharPtr_[1] == '{') {
      // End of the TemplateCharacters.
      isTail = false;
      curCharPtr_ += 2;
      break;
    } else if (*curCharPtr_ == '\\') {
      rawStorage_.push_back(*curCharPtr_);
      ++curCharPtr_;
      rawStorage_.push_back(trv(*curCharPtr_));
      switch ((unsigned char)*curCharPtr_) {
        case '\'':
        case '"':
        case '\\':
          tmpStorage_.push_back((unsigned char)*curCharPtr_++);
          break;

        case 'b':
          ++curCharPtr_;
          tmpStorage_.push_back(8);
          break;
        case 'f':
          ++curCharPtr_;
          tmpStorage_.push_back(12);
          break;
        case 'n':
          ++curCharPtr_;
          tmpStorage_.push_back(10);
          break;
        case 'r':
          ++curCharPtr_;
          tmpStorage_.push_back(13);
          break;
        case 't':
          ++curCharPtr_;
          tmpStorage_.push_back(9);
          break;
        case 'v':
          ++curCharPtr_;
          tmpStorage_.push_back(11);
          break;

        case '\0': // EOF?
          if (curCharPtr_ == bufferEnd_) { // eof?
            error(
                SMLoc::getFromPointer(curCharPtr_),
                "non-terminated template literal");
            sm_.note(token_.getStartLoc(), "template literal started here");
            goto breakLoop;
          } else {
            tmpStorage_.push_back((unsigned char)*curCharPtr_++);
          }
          break;

        case '0':
          // '\0' is only a valid escape sequence if not followed by a
          // DecimalDigit.
          if (!(curCharPtr_[1] >= '0' && curCharPtr_[1] <= '9')) {
            ++curCharPtr_;
            appendUnicodeToStorage(0);
            break;
          }
          [[fallthrough]];

        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
          // NotEscapeSequence :: DecimalDigit but not 0
          // NotEscapeSequence :: 0 DecimalDigit
          // Octal numbers are not supported in template strings,
          // so leave the number in the raw storage (done above) and move on.
          ++curCharPtr_;
          foundNotEscapeSequence = true;
          break;

        case 'x': {
          ++curCharPtr_;
          const char *start = curCharPtr_;
          auto v = consumeHex(2, false);
          if (!v) {
            foundNotEscapeSequence = true;
          }
          appendUnicodeToStorage(v ? *v : 0);
          rawStorage_.append({start, (size_t)(curCharPtr_ - start)});
          break;
        }

        case 'u': {
          // Pointer to the first character after the 'u', which is where we
          // can continue scanning from if we fail to decode an escape.
          const char *start = curCharPtr_ + 1;
          // Reset the pointer to the '\' to scan the unicode escape.
          --curCharPtr_;
          assert(*curCharPtr_ == '\\' && "must have started with \\");
          auto codepoint = consumeUnicodeEscapeOptional();
          if (!codepoint) {
            foundNotEscapeSequence = true;
            curCharPtr_ = start;
            break;
          }
          appendUnicodeToStorage(*codepoint);
          rawStorage_.append({start, (size_t)(curCharPtr_ - start)});
          break;
        }

        // Escaped line terminator. We just need to skip it, because it was
        // added to the raw storage at the start of the switch statement.
        case '\n':
          ++curCharPtr_;
          break;
        case '\r':
          ++curCharPtr_;
          if (*curCharPtr_ == '\n') // skip CR LF
            ++curCharPtr_;
          break;
        case UTF8_LINE_TERMINATOR_CHAR0: {
          bool isLineTerminator =
              matchUnicodeLineTerminatorOffset1(curCharPtr_);
          uint32_t codepoint = _decodeUTF8SlowPath(curCharPtr_);
          // Needs to be added to the rawStorage_ regardless,
          // but we first need to pop off the byte that was added prior to the
          // switch statement.
          rawStorage_.pop_back();
          appendUnicodeToStorage(codepoint, rawStorage_);
          if (!isLineTerminator) {
            // Only add the codepoint to the tmpStorage if it wasn't a line
            // terminator.
            appendUnicodeToStorage(codepoint);
          }
          break;
        }

        default:
          if (LLVM_UNLIKELY(isUTF8Start(*curCharPtr_))) {
            uint32_t codepoint = _decodeUTF8SlowPath(curCharPtr_);
            appendUnicodeToStorage(codepoint);
            // Remove the last byte from rawStorage_ and then append the
            // unicode codepoint to it. The already inserted byte will change
            // if this codepoint is in Supplementary Planes.
            rawStorage_.pop_back();
            appendUnicodeToStorage(codepoint, rawStorage_);
          } else {
            // The TV of EscapeSequence is the SV of EscapeSequence.
            tmpStorage_.push_back((unsigned char)*curCharPtr_++);
          }
          break;
      }
    } else if (LLVM_UNLIKELY(*curCharPtr_ == 0 && curCharPtr_ == bufferEnd_)) {
      error(
          SMLoc::getFromPointer(curCharPtr_),
          "non-terminated template literal");
      sm_.note(token_.getStartLoc(), "template literal started here");
      break;
    } else if (*curCharPtr_ == '\r') {
      // The TV of LineTerminatorSequence is the TRV of
      // LineTerminatorSequence. The only time this differs from the same
      // characters as the bytes in the file is when the sequence begins with
      // a <CR>.
      tmpStorage_.push_back(trv(*curCharPtr_));
      rawStorage_.push_back(trv(*curCharPtr_));
      curCharPtr_++;
      if (*curCharPtr_ == '\n') {
        // Skip the <CR> <LF>
        curCharPtr_++;
      }
    } else {
      if (LLVM_UNLIKELY(isUTF8Start(*curCharPtr_))) {
        // Decode and re-encode the character and append it to the string
        // storage
        uint32_t codepoint = _decodeUTF8SlowPath(curCharPtr_);
        appendUnicodeToStorage(codepoint);
        appendUnicodeToStorage(codepoint, rawStorage_);
      } else {
        rawStorage_.push_back(*curCharPtr_);
        tmpStorage_.push_back(*curCharPtr_++);
      }
    }
  }
breakLoop:
  // If the template literal is tagged and contains invalid escapes, then
  // cooked should be null because there is no way to cook it, per the ESTree
  // 2018 spec. The parser will error when encountering an untagged literal
  // with invalid escapes, so we place nullptr here.
  UniqueString *cookedStr =
      foundNotEscapeSequence ? nullptr : getStringLiteral(tmpStorage_.str());
  UniqueString *rawStr = getStringLiteral(rawStorage_.str());
  if (isHead) {
    if (isTail) {
      // ` characters `
      token_.setTemplateLiteral(
          TokenKind::no_substitution_template, cookedStr, rawStr);
    } else {
      // ` characters ${
      token_.setTemplateLiteral(TokenKind::template_head, cookedStr, rawStr);
    }
  } else {
    if (isTail) {
      // } characters `
      token_.setTemplateLiteral(TokenKind::template_tail, cookedStr, rawStr);
    } else {
      // } characters ${
      token_.setTemplateLiteral(TokenKind::template_middle, cookedStr, rawStr);
    }
  }
}

/// TODO: this has to be implemented properly.
void JSLexer::scanRegExp() {
  SMLoc startLoc = SMLoc::getFromPointer(curCharPtr_);
  assert(*curCharPtr_ == '/');
  ++curCharPtr_;

  tmpStorage_.clear();
  bool inClass = false;

  for (;;) {
    switch ((unsigned char)*curCharPtr_) {
      case '/':
        if (!inClass) {
          ++curCharPtr_;
          goto exitLoop;
        }
        break;

      case '[':
        inClass = true; // It may be true already, but so what.
        break;

      case ']':
        inClass = false; // It may be false already, but so what.
        break;

      case '\\': // an escape
        tmpStorage_.push_back((unsigned char)*curCharPtr_);
        ++curCharPtr_;
        switch ((unsigned char)*curCharPtr_) {
          case '\0':
            if (curCharPtr_ == bufferEnd_)
              goto unterminated;
            break;
          case UTF8_LINE_TERMINATOR_CHAR0:
            if (matchUnicodeLineTerminatorOffset1(curCharPtr_))
              goto unterminated;
            break;
          case '\n':
          case '\r':
            goto unterminated;
        }
        break;

      case '\0':
        if (curCharPtr_ == bufferEnd_)
          goto unterminated;
        break;
      case UTF8_LINE_TERMINATOR_CHAR0:
        if (matchUnicodeLineTerminatorOffset1(curCharPtr_))
          goto unterminated;
        break;

      case '\n':
      case '\r':
      unterminated:
        error(
            SMLoc::getFromPointer(curCharPtr_),
            "non-terminated regular expression literal");
        sm_.note(startLoc, "regular expression started here");
        goto exitLoop;
    }

    if (LLVM_UNLIKELY(isUTF8Start((unsigned char)*curCharPtr_)))
      appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
    else
      tmpStorage_.push_back((unsigned char)*curCharPtr_++);
  }
exitLoop:
  UniqueString *body = getStringLiteral(tmpStorage_.str());

  // Scan the flags. We must not interpret escape sequences.
  // E6 5.1 7.8.5: "The Strings of characters comprising the
  // RegularExpressionBody and the RegularExpressionFlags are passed
  // uninterpreted to the regular expression constructor"
  tmpStorage_.clear();
  bool escapingBackslash = false;
  for (;;) {
    if (consumeOneIdentifierPartNoEscape<IdentifierMode::JS>()) {
      escapingBackslash = false;
      continue;
    } else if (*curCharPtr_ == '\\') {
      tmpStorage_.push_back(*curCharPtr_++);

      // ES6 11.8.5.1: It is a Syntax Error if IdentifierPart contains a
      // Unicode escape sequence.
      escapingBackslash = !escapingBackslash;
      if (escapingBackslash && *curCharPtr_ == 'u') {
        error(
            SMLoc::getFromPointer(curCharPtr_),
            "Unicode escape sequences are not allowed in regular expression flags");
      }
    } else {
      break;
    }
  }

  UniqueString *flags = getStringLiteral(tmpStorage_.str());

  token_.setRegExpLiteral(new (allocator_.Allocate<RegExpLiteral>(1))
                              RegExpLiteral(body, flags));
}

UniqueString *JSLexer::convertSurrogatesInString(llvh::StringRef str) {
  std::string output;
  convertUTF8WithSurrogatesToUTF8WithReplacements(output, str);
  return strTab_.getString(output);
}

bool JSLexer::error(llvh::SMLoc loc, const llvh::Twine &msg) {
  sm_.error(loc, msg, Subsystem::Lexer);
  if (!sm_.isErrorLimitReached())
    return true;
  forceEOF();
  return false;
}

bool JSLexer::error(llvh::SMRange range, const llvh::Twine &msg) {
  sm_.error(range, msg, Subsystem::Lexer);
  if (!sm_.isErrorLimitReached())
    return true;
  forceEOF();
  return false;
}

bool JSLexer::error(
    llvh::SMLoc loc,
    llvh::SMRange range,
    const llvh::Twine &msg) {
  sm_.error(loc, range, msg, Subsystem::Lexer);
  if (!sm_.isErrorLimitReached())
    return true;
  forceEOF();
  return false;
}

} // namespace parser
} // namespace hermes

Coverage Report

Created: 2023-11-19 07:23