Coverage Report

Created: 2025-12-11 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/hermes/include/hermes/Support/UTF8.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#ifndef HERMES_SUPPORT_UTF8_H
9
#define HERMES_SUPPORT_UTF8_H
10
11
#include "hermes/Platform/Unicode/CharacterProperties.h"
12
#include "llvh/ADT/ArrayRef.h"
13
#include "llvh/ADT/Twine.h"
14
#include "llvh/Support/Compiler.h"
15
16
#include <cstddef>
17
18
namespace hermes {
19
20
/// Maximum number of bytes in a valid UTF-8 codepoint
21
constexpr size_t UTF8CodepointMaxBytes = 6;
22
23
/// Encode a unicode code point as a UTF-8 sequence of bytes.
24
void encodeUTF8(char *&dst, uint32_t cp);
25
26
/// Check whether a byte is a regular ASCII or a UTF8 starting byte.
27
/// \return true if it is UTF8 starting byte.
28
16.1M
inline bool isUTF8Start(char ch) {
29
16.1M
  return (ch & 0x80) != 0;
30
16.1M
}
31
32
/// \return true if this is a UTF-8 leading byte.
33
0
inline bool isUTF8LeadingByte(char ch) {
34
0
  return (ch & 0xC0) == 0xC0;
35
0
}
36
37
/// \return true if this is a UTF-8 continuation byte, or in other words, this
38
/// is a byte in the "middle" of a UTF-8 codepoint.
39
1.51M
inline static bool isUTF8ContinuationByte(char ch) {
40
1.51M
  return (ch & 0xC0) == 0x80;
41
1.51M
}
Unexecuted instantiation: hermes.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: DebuggerAPI.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: BigIntPrimitive.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Callable.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: CodeBlock.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Domain.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: GCBase.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: HeapSnapshot.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: HiddenClass.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: IdentifierTable.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Interpreter.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Interpreter-slowpaths.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSArray.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSArrayBuffer.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSDataView.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSDate.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSError.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSGenerator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSObject.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSProxy.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSRegExp.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSMapImpl.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSNativeFunctions.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSTypedArray.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSWeakMapImpl.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSWeakRef.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: DecoratedObject.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: HostModel.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: NativeState.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Operations.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: PrimitiveBox.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: PropertyAccessor.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Runtime.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: RuntimeModule.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: CodeCoverageProfiler.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SamplingProfiler.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SamplingProfilerPosix.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SamplingProfilerSampler.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SegmentedArray.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SerializedLiteralParser.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: StackTracesTree.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: StringPrimitive.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: StringView.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SymbolRegistry.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: TimeLimitMonitor.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: TwineChar16.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: StringRefUtils.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: IdentifierHashTable.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Array.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: ArrayBuffer.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: ArrayIterator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: AsyncFunction.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Base64.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Base64Util.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: BigInt.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: CallSite.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: DataView.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: TypedArray.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Error.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: GeneratorFunction.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: GeneratorPrototype.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: GlobalObject.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: IteratorPrototype.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: HermesInternal.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: HermesBuiltin.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSLibInternal.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSLibStorage.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Map.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Math.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSON.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: RuntimeJSONUtils.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSONLexer.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Object.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Proxy.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Reflect.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Set.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: String.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: StringIterator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Function.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Number.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Boolean.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: RegExp.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: RegExpStringIterator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: DateUtil.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: DateCache.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Symbol.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Date.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: WeakMap.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: WeakRef.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: WeakSet.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: print.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: eval.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: escape.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: require.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: TextEncoder.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: AlignedHeapSegment.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: HadesGC.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Debugger.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: DebuggerInternal.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: ArrayStorage.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: CheckHeapWellFormedAcceptor.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: DictPropertyMap.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: DummyObject.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: OrderedHashMap.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: HermesValue.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSCallSite.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSCallableProxy.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSRegExpStringIterator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: ChromeTraceSerializer.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: ProfileGenerator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SingleObject.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: FillerCell.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: BoxedDouble.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: HBC.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: ISel.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Bytecode.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: BytecodeStream.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: BytecodeGenerator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: BytecodeDataProvider.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: BytecodeProviderFromSrc.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: ConsecutiveStringStorage.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: DebugInfo.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: Passes.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SerializedLiteralGenerator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: IRGen.cpp:hermes::isUTF8ContinuationByte(char)
ESTreeIRGen-expr.cpp:hermes::isUTF8ContinuationByte(char)
Line
Count
Source
39
1.53k
inline static bool isUTF8ContinuationByte(char ch) {
40
1.53k
  return (ch & 0xC0) == 0x80;
41
1.53k
}
Unexecuted instantiation: SourceMapGenerator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SourceMapParser.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SourceMapTranslator.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: SourceMap.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSONParser.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSParser.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSParserImpl.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSParserImpl-flow.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSParserImpl-jsx.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSParserImpl-ts.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSLexer.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: ES6Class.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: RegexSerialization.cpp:hermes::isUTF8ContinuationByte(char)
Unexecuted instantiation: JSONEmitter.cpp:hermes::isUTF8ContinuationByte(char)
SourceErrorManager.cpp:hermes::isUTF8ContinuationByte(char)
Line
Count
Source
39
1.51M
inline static bool isUTF8ContinuationByte(char ch) {
40
1.51M
  return (ch & 0xC0) == 0x80;
41
1.51M
}
Unexecuted instantiation: UTF8.cpp:hermes::isUTF8ContinuationByte(char)
42
43
/// \return true if this is a valid ASCII character.
44
/// As in the range of 0-127.
45
template <typename Char>
46
39.2k
bool isASCII(Char c) {
47
  // We start with a mask representing all valid set bits of ASCII. Flip the
48
  // mask, so it now represents all invalid bits. Test if any bit is set that
49
  // would make it an invalid ASCII character.
50
39.2k
  constexpr uint32_t asciiMask = 0x7f;
51
39.2k
  return (c & static_cast<Char>(~asciiMask)) == 0;
52
39.2k
}
bool hermes::isASCII<char16_t>(char16_t)
Line
Count
Source
46
39.2k
bool isASCII(Char c) {
47
  // We start with a mask representing all valid set bits of ASCII. Flip the
48
  // mask, so it now represents all invalid bits. Test if any bit is set that
49
  // would make it an invalid ASCII character.
50
39.2k
  constexpr uint32_t asciiMask = 0x7f;
51
39.2k
  return (c & static_cast<Char>(~asciiMask)) == 0;
52
39.2k
}
Unexecuted instantiation: bool hermes::isASCII<char>(char)
53
54
/// \return true if this is a pure ASCII char sequence.
55
template <typename Iter>
56
1.15k
inline bool isAllASCII(Iter begin, Iter end) {
57
40.0k
  while (begin < end) {
58
39.2k
    if (!isASCII(*begin))
59
286
      return false;
60
38.9k
    ++begin;
61
38.9k
  }
62
872
  return true;
63
1.15k
}
bool hermes::isAllASCII<char16_t const*>(char16_t const*, char16_t const*)
Line
Count
Source
56
1.15k
inline bool isAllASCII(Iter begin, Iter end) {
57
40.0k
  while (begin < end) {
58
39.2k
    if (!isASCII(*begin))
59
286
      return false;
60
38.9k
    ++begin;
61
38.9k
  }
62
872
  return true;
63
1.15k
}
Unexecuted instantiation: bool hermes::isAllASCII<std::__1::__wrap_iter<char*> >(std::__1::__wrap_iter<char*>, std::__1::__wrap_iter<char*>)
64
65
/// Overload for char* and uint8_t*.
66
bool isAllASCII(const uint8_t *start, const uint8_t *end);
67
68
94
inline bool isAllASCII(const char *start, const char *end) {
69
94
  return isAllASCII((const uint8_t *)start, (const uint8_t *)end);
70
94
}
71
72
/// Decode a sequence of UTF8 encoded bytes when it is known that the first byte
73
/// is a start of an UTF8 sequence.
74
/// \tparam allowSurrogates when false, values in the surrogate range are
75
///     reported as errors
76
template <bool allowSurrogates, typename F>
77
93.6k
uint32_t _decodeUTF8SlowPath(const char *&from, F error) {
78
93.6k
  uint32_t ch = (uint32_t)from[0];
79
93.6k
  uint32_t result;
80
81
93.6k
  assert(isUTF8Start(ch));
82
83
93.6k
  if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) {
84
81.3k
    uint32_t ch1 = (uint32_t)from[1];
85
81.3k
    if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) {
86
1
      from += 1;
87
1
      error("Invalid UTF-8 continuation byte");
88
1
      return UNICODE_REPLACEMENT_CHARACTER;
89
1
    }
90
91
81.3k
    from += 2;
92
81.3k
    result = ((ch & 0x1F) << 6) | (ch1 & 0x3F);
93
81.3k
    if (LLVM_UNLIKELY(result <= 0x7F)) {
94
0
      error("Non-canonical UTF-8 encoding");
95
0
      return UNICODE_REPLACEMENT_CHARACTER;
96
0
    }
97
98
81.3k
  } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) {
99
8.81k
    uint32_t ch1 = (uint32_t)from[1];
100
8.81k
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
101
1
      from += 1;
102
1
      error("Invalid UTF-8 continuation byte");
103
1
      return UNICODE_REPLACEMENT_CHARACTER;
104
1
    }
105
8.81k
    uint32_t ch2 = (uint32_t)from[2];
106
8.81k
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
107
0
      from += 2;
108
0
      error("Invalid UTF-8 continuation byte");
109
0
      return UNICODE_REPLACEMENT_CHARACTER;
110
0
    }
111
8.81k
    from += 3;
112
8.81k
    result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F);
113
8.81k
    if (LLVM_UNLIKELY(result <= 0x7FF)) {
114
0
      error("Non-canonical UTF-8 encoding");
115
0
      return UNICODE_REPLACEMENT_CHARACTER;
116
0
    }
117
8.81k
    if (LLVM_UNLIKELY(
118
8.81k
            result >= UNICODE_SURROGATE_FIRST &&
119
8.81k
            result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) {
120
2
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
121
2
      return UNICODE_REPLACEMENT_CHARACTER;
122
2
    }
123
124
8.81k
  } else if ((ch & 0xF8) == 0xF0) {
125
3.46k
    uint32_t ch1 = (uint32_t)from[1];
126
3.46k
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
127
0
      from += 1;
128
0
      error("Invalid UTF-8 continuation byte");
129
0
      return UNICODE_REPLACEMENT_CHARACTER;
130
0
    }
131
3.46k
    uint32_t ch2 = (uint32_t)from[2];
132
3.46k
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
133
0
      from += 2;
134
0
      error("Invalid UTF-8 continuation byte");
135
0
      return UNICODE_REPLACEMENT_CHARACTER;
136
0
    }
137
3.46k
    uint32_t ch3 = (uint32_t)from[3];
138
3.46k
    if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) {
139
0
      from += 3;
140
0
      error("Invalid UTF-8 continuation byte");
141
0
      return UNICODE_REPLACEMENT_CHARACTER;
142
0
    }
143
3.46k
    from += 4;
144
3.46k
    result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) |
145
3.46k
        (ch3 & 0x3F);
146
3.46k
    if (LLVM_UNLIKELY(result <= 0xFFFF)) {
147
0
      error("Non-canonical UTF-8 encoding");
148
0
      return UNICODE_REPLACEMENT_CHARACTER;
149
0
    }
150
3.46k
    if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) {
151
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
152
0
      return UNICODE_REPLACEMENT_CHARACTER;
153
0
    }
154
155
3.46k
  } else {
156
10
    from += 1;
157
10
    error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch));
158
10
    return UNICODE_REPLACEMENT_CHARACTER;
159
10
  }
160
161
93.6k
  return result;
162
93.6k
}
unsigned int hermes::_decodeUTF8SlowPath<false, hermes::parser::JSLexer::decodeUTF8()::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::parser::JSLexer::decodeUTF8()::{lambda(llvh::Twine const&)#1})
Line
Count
Source
77
4.05k
uint32_t _decodeUTF8SlowPath(const char *&from, F error) {
78
4.05k
  uint32_t ch = (uint32_t)from[0];
79
4.05k
  uint32_t result;
80
81
4.05k
  assert(isUTF8Start(ch));
82
83
4.05k
  if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) {
84
3.97k
    uint32_t ch1 = (uint32_t)from[1];
85
3.97k
    if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) {
86
0
      from += 1;
87
0
      error("Invalid UTF-8 continuation byte");
88
0
      return UNICODE_REPLACEMENT_CHARACTER;
89
0
    }
90
91
3.97k
    from += 2;
92
3.97k
    result = ((ch & 0x1F) << 6) | (ch1 & 0x3F);
93
3.97k
    if (LLVM_UNLIKELY(result <= 0x7F)) {
94
0
      error("Non-canonical UTF-8 encoding");
95
0
      return UNICODE_REPLACEMENT_CHARACTER;
96
0
    }
97
98
3.97k
  } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) {
99
74
    uint32_t ch1 = (uint32_t)from[1];
100
74
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
101
1
      from += 1;
102
1
      error("Invalid UTF-8 continuation byte");
103
1
      return UNICODE_REPLACEMENT_CHARACTER;
104
1
    }
105
73
    uint32_t ch2 = (uint32_t)from[2];
106
73
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
107
0
      from += 2;
108
0
      error("Invalid UTF-8 continuation byte");
109
0
      return UNICODE_REPLACEMENT_CHARACTER;
110
0
    }
111
73
    from += 3;
112
73
    result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F);
113
73
    if (LLVM_UNLIKELY(result <= 0x7FF)) {
114
0
      error("Non-canonical UTF-8 encoding");
115
0
      return UNICODE_REPLACEMENT_CHARACTER;
116
0
    }
117
73
    if (LLVM_UNLIKELY(
118
73
            result >= UNICODE_SURROGATE_FIRST &&
119
73
            result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) {
120
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
121
0
      return UNICODE_REPLACEMENT_CHARACTER;
122
0
    }
123
124
73
  } else if ((ch & 0xF8) == 0xF0) {
125
5
    uint32_t ch1 = (uint32_t)from[1];
126
5
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
127
0
      from += 1;
128
0
      error("Invalid UTF-8 continuation byte");
129
0
      return UNICODE_REPLACEMENT_CHARACTER;
130
0
    }
131
5
    uint32_t ch2 = (uint32_t)from[2];
132
5
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
133
0
      from += 2;
134
0
      error("Invalid UTF-8 continuation byte");
135
0
      return UNICODE_REPLACEMENT_CHARACTER;
136
0
    }
137
5
    uint32_t ch3 = (uint32_t)from[3];
138
5
    if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) {
139
0
      from += 3;
140
0
      error("Invalid UTF-8 continuation byte");
141
0
      return UNICODE_REPLACEMENT_CHARACTER;
142
0
    }
143
5
    from += 4;
144
5
    result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) |
145
5
        (ch3 & 0x3F);
146
5
    if (LLVM_UNLIKELY(result <= 0xFFFF)) {
147
0
      error("Non-canonical UTF-8 encoding");
148
0
      return UNICODE_REPLACEMENT_CHARACTER;
149
0
    }
150
5
    if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) {
151
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
152
0
      return UNICODE_REPLACEMENT_CHARACTER;
153
0
    }
154
155
5
  } else {
156
3
    from += 1;
157
3
    error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch));
158
3
    return UNICODE_REPLACEMENT_CHARACTER;
159
3
  }
160
161
4.05k
  return result;
162
4.05k
}
unsigned int hermes::_decodeUTF8SlowPath<false, hermes::parser::JSLexer::_decodeUTF8SlowPath(char const*&)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::parser::JSLexer::_decodeUTF8SlowPath(char const*&)::{lambda(llvh::Twine const&)#1})
Line
Count
Source
77
12.3k
uint32_t _decodeUTF8SlowPath(const char *&from, F error) {
78
12.3k
  uint32_t ch = (uint32_t)from[0];
79
12.3k
  uint32_t result;
80
81
12.3k
  assert(isUTF8Start(ch));
82
83
12.3k
  if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) {
84
6.03k
    uint32_t ch1 = (uint32_t)from[1];
85
6.03k
    if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) {
86
1
      from += 1;
87
1
      error("Invalid UTF-8 continuation byte");
88
1
      return UNICODE_REPLACEMENT_CHARACTER;
89
1
    }
90
91
6.03k
    from += 2;
92
6.03k
    result = ((ch & 0x1F) << 6) | (ch1 & 0x3F);
93
6.03k
    if (LLVM_UNLIKELY(result <= 0x7F)) {
94
0
      error("Non-canonical UTF-8 encoding");
95
0
      return UNICODE_REPLACEMENT_CHARACTER;
96
0
    }
97
98
6.33k
  } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) {
99
3.04k
    uint32_t ch1 = (uint32_t)from[1];
100
3.04k
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
101
0
      from += 1;
102
0
      error("Invalid UTF-8 continuation byte");
103
0
      return UNICODE_REPLACEMENT_CHARACTER;
104
0
    }
105
3.04k
    uint32_t ch2 = (uint32_t)from[2];
106
3.04k
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
107
0
      from += 2;
108
0
      error("Invalid UTF-8 continuation byte");
109
0
      return UNICODE_REPLACEMENT_CHARACTER;
110
0
    }
111
3.04k
    from += 3;
112
3.04k
    result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F);
113
3.04k
    if (LLVM_UNLIKELY(result <= 0x7FF)) {
114
0
      error("Non-canonical UTF-8 encoding");
115
0
      return UNICODE_REPLACEMENT_CHARACTER;
116
0
    }
117
3.04k
    if (LLVM_UNLIKELY(
118
3.04k
            result >= UNICODE_SURROGATE_FIRST &&
119
3.04k
            result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) {
120
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
121
0
      return UNICODE_REPLACEMENT_CHARACTER;
122
0
    }
123
124
3.28k
  } else if ((ch & 0xF8) == 0xF0) {
125
3.28k
    uint32_t ch1 = (uint32_t)from[1];
126
3.28k
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
127
0
      from += 1;
128
0
      error("Invalid UTF-8 continuation byte");
129
0
      return UNICODE_REPLACEMENT_CHARACTER;
130
0
    }
131
3.28k
    uint32_t ch2 = (uint32_t)from[2];
132
3.28k
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
133
0
      from += 2;
134
0
      error("Invalid UTF-8 continuation byte");
135
0
      return UNICODE_REPLACEMENT_CHARACTER;
136
0
    }
137
3.28k
    uint32_t ch3 = (uint32_t)from[3];
138
3.28k
    if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) {
139
0
      from += 3;
140
0
      error("Invalid UTF-8 continuation byte");
141
0
      return UNICODE_REPLACEMENT_CHARACTER;
142
0
    }
143
3.28k
    from += 4;
144
3.28k
    result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) |
145
3.28k
        (ch3 & 0x3F);
146
3.28k
    if (LLVM_UNLIKELY(result <= 0xFFFF)) {
147
0
      error("Non-canonical UTF-8 encoding");
148
0
      return UNICODE_REPLACEMENT_CHARACTER;
149
0
    }
150
3.28k
    if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) {
151
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
152
0
      return UNICODE_REPLACEMENT_CHARACTER;
153
0
    }
154
155
3.28k
  } else {
156
4
    from += 1;
157
4
    error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch));
158
4
    return UNICODE_REPLACEMENT_CHARACTER;
159
4
  }
160
161
12.3k
  return result;
162
12.3k
}
unsigned int hermes::_decodeUTF8SlowPath<false, hermes::parser::JSLexer::_peekUTF8(char const*) const::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::parser::JSLexer::_peekUTF8(char const*) const::{lambda(llvh::Twine const&)#1})
Line
Count
Source
77
69.1k
uint32_t _decodeUTF8SlowPath(const char *&from, F error) {
78
69.1k
  uint32_t ch = (uint32_t)from[0];
79
69.1k
  uint32_t result;
80
81
69.1k
  assert(isUTF8Start(ch));
82
83
69.1k
  if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) {
84
68.9k
    uint32_t ch1 = (uint32_t)from[1];
85
68.9k
    if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) {
86
0
      from += 1;
87
0
      error("Invalid UTF-8 continuation byte");
88
0
      return UNICODE_REPLACEMENT_CHARACTER;
89
0
    }
90
91
68.9k
    from += 2;
92
68.9k
    result = ((ch & 0x1F) << 6) | (ch1 & 0x3F);
93
68.9k
    if (LLVM_UNLIKELY(result <= 0x7F)) {
94
0
      error("Non-canonical UTF-8 encoding");
95
0
      return UNICODE_REPLACEMENT_CHARACTER;
96
0
    }
97
98
68.9k
  } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) {
99
68
    uint32_t ch1 = (uint32_t)from[1];
100
68
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
101
0
      from += 1;
102
0
      error("Invalid UTF-8 continuation byte");
103
0
      return UNICODE_REPLACEMENT_CHARACTER;
104
0
    }
105
68
    uint32_t ch2 = (uint32_t)from[2];
106
68
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
107
0
      from += 2;
108
0
      error("Invalid UTF-8 continuation byte");
109
0
      return UNICODE_REPLACEMENT_CHARACTER;
110
0
    }
111
68
    from += 3;
112
68
    result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F);
113
68
    if (LLVM_UNLIKELY(result <= 0x7FF)) {
114
0
      error("Non-canonical UTF-8 encoding");
115
0
      return UNICODE_REPLACEMENT_CHARACTER;
116
0
    }
117
68
    if (LLVM_UNLIKELY(
118
68
            result >= UNICODE_SURROGATE_FIRST &&
119
68
            result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) {
120
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
121
0
      return UNICODE_REPLACEMENT_CHARACTER;
122
0
    }
123
124
178
  } else if ((ch & 0xF8) == 0xF0) {
125
175
    uint32_t ch1 = (uint32_t)from[1];
126
175
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
127
0
      from += 1;
128
0
      error("Invalid UTF-8 continuation byte");
129
0
      return UNICODE_REPLACEMENT_CHARACTER;
130
0
    }
131
175
    uint32_t ch2 = (uint32_t)from[2];
132
175
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
133
0
      from += 2;
134
0
      error("Invalid UTF-8 continuation byte");
135
0
      return UNICODE_REPLACEMENT_CHARACTER;
136
0
    }
137
175
    uint32_t ch3 = (uint32_t)from[3];
138
175
    if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) {
139
0
      from += 3;
140
0
      error("Invalid UTF-8 continuation byte");
141
0
      return UNICODE_REPLACEMENT_CHARACTER;
142
0
    }
143
175
    from += 4;
144
175
    result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) |
145
175
        (ch3 & 0x3F);
146
175
    if (LLVM_UNLIKELY(result <= 0xFFFF)) {
147
0
      error("Non-canonical UTF-8 encoding");
148
0
      return UNICODE_REPLACEMENT_CHARACTER;
149
0
    }
150
175
    if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) {
151
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
152
0
      return UNICODE_REPLACEMENT_CHARACTER;
153
0
    }
154
155
175
  } else {
156
3
    from += 1;
157
3
    error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch));
158
3
    return UNICODE_REPLACEMENT_CHARACTER;
159
3
  }
160
161
69.1k
  return result;
162
69.1k
}
unsigned int hermes::_decodeUTF8SlowPath<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1})
Line
Count
Source
77
3.53k
uint32_t _decodeUTF8SlowPath(const char *&from, F error) {
78
3.53k
  uint32_t ch = (uint32_t)from[0];
79
3.53k
  uint32_t result;
80
81
3.53k
  assert(isUTF8Start(ch));
82
83
3.53k
  if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) {
84
549
    uint32_t ch1 = (uint32_t)from[1];
85
549
    if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) {
86
0
      from += 1;
87
0
      error("Invalid UTF-8 continuation byte");
88
0
      return UNICODE_REPLACEMENT_CHARACTER;
89
0
    }
90
91
549
    from += 2;
92
549
    result = ((ch & 0x1F) << 6) | (ch1 & 0x3F);
93
549
    if (LLVM_UNLIKELY(result <= 0x7F)) {
94
0
      error("Non-canonical UTF-8 encoding");
95
0
      return UNICODE_REPLACEMENT_CHARACTER;
96
0
    }
97
98
2.98k
  } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) {
99
2.98k
    uint32_t ch1 = (uint32_t)from[1];
100
2.98k
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
101
0
      from += 1;
102
0
      error("Invalid UTF-8 continuation byte");
103
0
      return UNICODE_REPLACEMENT_CHARACTER;
104
0
    }
105
2.98k
    uint32_t ch2 = (uint32_t)from[2];
106
2.98k
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
107
0
      from += 2;
108
0
      error("Invalid UTF-8 continuation byte");
109
0
      return UNICODE_REPLACEMENT_CHARACTER;
110
0
    }
111
2.98k
    from += 3;
112
2.98k
    result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F);
113
2.98k
    if (LLVM_UNLIKELY(result <= 0x7FF)) {
114
0
      error("Non-canonical UTF-8 encoding");
115
0
      return UNICODE_REPLACEMENT_CHARACTER;
116
0
    }
117
2.98k
    if (LLVM_UNLIKELY(
118
2.98k
            result >= UNICODE_SURROGATE_FIRST &&
119
2.98k
            result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) {
120
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
121
0
      return UNICODE_REPLACEMENT_CHARACTER;
122
0
    }
123
124
2.98k
  } else if ((ch & 0xF8) == 0xF0) {
125
0
    uint32_t ch1 = (uint32_t)from[1];
126
0
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
127
0
      from += 1;
128
0
      error("Invalid UTF-8 continuation byte");
129
0
      return UNICODE_REPLACEMENT_CHARACTER;
130
0
    }
131
0
    uint32_t ch2 = (uint32_t)from[2];
132
0
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
133
0
      from += 2;
134
0
      error("Invalid UTF-8 continuation byte");
135
0
      return UNICODE_REPLACEMENT_CHARACTER;
136
0
    }
137
0
    uint32_t ch3 = (uint32_t)from[3];
138
0
    if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) {
139
0
      from += 3;
140
0
      error("Invalid UTF-8 continuation byte");
141
0
      return UNICODE_REPLACEMENT_CHARACTER;
142
0
    }
143
0
    from += 4;
144
0
    result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) |
145
0
        (ch3 & 0x3F);
146
0
    if (LLVM_UNLIKELY(result <= 0xFFFF)) {
147
0
      error("Non-canonical UTF-8 encoding");
148
0
      return UNICODE_REPLACEMENT_CHARACTER;
149
0
    }
150
0
    if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) {
151
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
152
0
      return UNICODE_REPLACEMENT_CHARACTER;
153
0
    }
154
155
0
  } else {
156
0
    from += 1;
157
0
    error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch));
158
0
    return UNICODE_REPLACEMENT_CHARACTER;
159
0
  }
160
161
3.53k
  return result;
162
3.53k
}
Unexecuted instantiation: HBC.cpp:unsigned int hermes::_decodeUTF8SlowPath<false, hermes::hbc::generateBytecodeModule(hermes::Module*, hermes::Function*, hermes::Function*, hermes::BytecodeGenerationOptions const&, hermes::OptValue<unsigned int>, hermes::SourceMapGenerator*, std::__1::unique_ptr<hermes::hbc::BCProviderBase, std::__1::default_delete<hermes::hbc::BCProviderBase> >)::$_4::operator()(llvh::StringRef) const::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::hbc::generateBytecodeModule(hermes::Module*, hermes::Function*, hermes::Function*, hermes::BytecodeGenerationOptions const&, hermes::OptValue<unsigned int>, hermes::SourceMapGenerator*, std::__1::unique_ptr<hermes::hbc::BCProviderBase, std::__1::default_delete<hermes::hbc::BCProviderBase> >)::$_4::operator()(llvh::StringRef) const::{lambda(llvh::Twine const&)#1})
BytecodeGenerator.cpp:unsigned int hermes::_decodeUTF8SlowPath<false, hermes::hbc::(anonymous namespace)::ensureUTF8Identifer(hermes::StringTable&, hermes::Identifier, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)::$_0>(char const*&, hermes::hbc::(anonymous namespace)::ensureUTF8Identifer(hermes::StringTable&, hermes::Identifier, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)::$_0)
Line
Count
Source
77
2
uint32_t _decodeUTF8SlowPath(const char *&from, F error) {
78
2
  uint32_t ch = (uint32_t)from[0];
79
2
  uint32_t result;
80
81
2
  assert(isUTF8Start(ch));
82
83
2
  if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) {
84
0
    uint32_t ch1 = (uint32_t)from[1];
85
0
    if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) {
86
0
      from += 1;
87
0
      error("Invalid UTF-8 continuation byte");
88
0
      return UNICODE_REPLACEMENT_CHARACTER;
89
0
    }
90
91
0
    from += 2;
92
0
    result = ((ch & 0x1F) << 6) | (ch1 & 0x3F);
93
0
    if (LLVM_UNLIKELY(result <= 0x7F)) {
94
0
      error("Non-canonical UTF-8 encoding");
95
0
      return UNICODE_REPLACEMENT_CHARACTER;
96
0
    }
97
98
2
  } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) {
99
2
    uint32_t ch1 = (uint32_t)from[1];
100
2
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
101
0
      from += 1;
102
0
      error("Invalid UTF-8 continuation byte");
103
0
      return UNICODE_REPLACEMENT_CHARACTER;
104
0
    }
105
2
    uint32_t ch2 = (uint32_t)from[2];
106
2
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
107
0
      from += 2;
108
0
      error("Invalid UTF-8 continuation byte");
109
0
      return UNICODE_REPLACEMENT_CHARACTER;
110
0
    }
111
2
    from += 3;
112
2
    result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F);
113
2
    if (LLVM_UNLIKELY(result <= 0x7FF)) {
114
0
      error("Non-canonical UTF-8 encoding");
115
0
      return UNICODE_REPLACEMENT_CHARACTER;
116
0
    }
117
2
    if (LLVM_UNLIKELY(
118
2
            result >= UNICODE_SURROGATE_FIRST &&
119
2
            result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) {
120
2
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
121
2
      return UNICODE_REPLACEMENT_CHARACTER;
122
2
    }
123
124
2
  } else if ((ch & 0xF8) == 0xF0) {
125
0
    uint32_t ch1 = (uint32_t)from[1];
126
0
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
127
0
      from += 1;
128
0
      error("Invalid UTF-8 continuation byte");
129
0
      return UNICODE_REPLACEMENT_CHARACTER;
130
0
    }
131
0
    uint32_t ch2 = (uint32_t)from[2];
132
0
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
133
0
      from += 2;
134
0
      error("Invalid UTF-8 continuation byte");
135
0
      return UNICODE_REPLACEMENT_CHARACTER;
136
0
    }
137
0
    uint32_t ch3 = (uint32_t)from[3];
138
0
    if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) {
139
0
      from += 3;
140
0
      error("Invalid UTF-8 continuation byte");
141
0
      return UNICODE_REPLACEMENT_CHARACTER;
142
0
    }
143
0
    from += 4;
144
0
    result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) |
145
0
        (ch3 & 0x3F);
146
0
    if (LLVM_UNLIKELY(result <= 0xFFFF)) {
147
0
      error("Non-canonical UTF-8 encoding");
148
0
      return UNICODE_REPLACEMENT_CHARACTER;
149
0
    }
150
0
    if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) {
151
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
152
0
      return UNICODE_REPLACEMENT_CHARACTER;
153
0
    }
154
155
0
  } else {
156
0
    from += 1;
157
0
    error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch));
158
0
    return UNICODE_REPLACEMENT_CHARACTER;
159
0
  }
160
161
0
  return result;
162
2
}
unsigned int hermes::_decodeUTF8SlowPath<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1})
Line
Count
Source
77
4.52k
uint32_t _decodeUTF8SlowPath(const char *&from, F error) {
78
4.52k
  uint32_t ch = (uint32_t)from[0];
79
4.52k
  uint32_t result;
80
81
4.52k
  assert(isUTF8Start(ch));
82
83
4.52k
  if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) {
84
1.89k
    uint32_t ch1 = (uint32_t)from[1];
85
1.89k
    if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) {
86
0
      from += 1;
87
0
      error("Invalid UTF-8 continuation byte");
88
0
      return UNICODE_REPLACEMENT_CHARACTER;
89
0
    }
90
91
1.89k
    from += 2;
92
1.89k
    result = ((ch & 0x1F) << 6) | (ch1 & 0x3F);
93
1.89k
    if (LLVM_UNLIKELY(result <= 0x7F)) {
94
0
      error("Non-canonical UTF-8 encoding");
95
0
      return UNICODE_REPLACEMENT_CHARACTER;
96
0
    }
97
98
2.62k
  } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) {
99
2.62k
    uint32_t ch1 = (uint32_t)from[1];
100
2.62k
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
101
0
      from += 1;
102
0
      error("Invalid UTF-8 continuation byte");
103
0
      return UNICODE_REPLACEMENT_CHARACTER;
104
0
    }
105
2.62k
    uint32_t ch2 = (uint32_t)from[2];
106
2.62k
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
107
0
      from += 2;
108
0
      error("Invalid UTF-8 continuation byte");
109
0
      return UNICODE_REPLACEMENT_CHARACTER;
110
0
    }
111
2.62k
    from += 3;
112
2.62k
    result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F);
113
2.62k
    if (LLVM_UNLIKELY(result <= 0x7FF)) {
114
0
      error("Non-canonical UTF-8 encoding");
115
0
      return UNICODE_REPLACEMENT_CHARACTER;
116
0
    }
117
2.62k
    if (LLVM_UNLIKELY(
118
2.62k
            result >= UNICODE_SURROGATE_FIRST &&
119
2.62k
            result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) {
120
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
121
0
      return UNICODE_REPLACEMENT_CHARACTER;
122
0
    }
123
124
2.62k
  } else if ((ch & 0xF8) == 0xF0) {
125
0
    uint32_t ch1 = (uint32_t)from[1];
126
0
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
127
0
      from += 1;
128
0
      error("Invalid UTF-8 continuation byte");
129
0
      return UNICODE_REPLACEMENT_CHARACTER;
130
0
    }
131
0
    uint32_t ch2 = (uint32_t)from[2];
132
0
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
133
0
      from += 2;
134
0
      error("Invalid UTF-8 continuation byte");
135
0
      return UNICODE_REPLACEMENT_CHARACTER;
136
0
    }
137
0
    uint32_t ch3 = (uint32_t)from[3];
138
0
    if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) {
139
0
      from += 3;
140
0
      error("Invalid UTF-8 continuation byte");
141
0
      return UNICODE_REPLACEMENT_CHARACTER;
142
0
    }
143
0
    from += 4;
144
0
    result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) |
145
0
        (ch3 & 0x3F);
146
0
    if (LLVM_UNLIKELY(result <= 0xFFFF)) {
147
0
      error("Non-canonical UTF-8 encoding");
148
0
      return UNICODE_REPLACEMENT_CHARACTER;
149
0
    }
150
0
    if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) {
151
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
152
0
      return UNICODE_REPLACEMENT_CHARACTER;
153
0
    }
154
155
0
  } else {
156
0
    from += 1;
157
0
    error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch));
158
0
    return UNICODE_REPLACEMENT_CHARACTER;
159
0
  }
160
161
4.52k
  return result;
162
4.52k
}
DebugInfo.cpp:unsigned int hermes::_decodeUTF8SlowPath<false, hermes::hbc::DebugInfoGenerator::appendString(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, hermes::Identifier)::$_0>(char const*&, hermes::hbc::DebugInfoGenerator::appendString(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, hermes::Identifier)::$_0)
Line
Count
Source
77
6
uint32_t _decodeUTF8SlowPath(const char *&from, F error) {
78
6
  uint32_t ch = (uint32_t)from[0];
79
6
  uint32_t result;
80
81
6
  assert(isUTF8Start(ch));
82
83
6
  if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) {
84
0
    uint32_t ch1 = (uint32_t)from[1];
85
0
    if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) {
86
0
      from += 1;
87
0
      error("Invalid UTF-8 continuation byte");
88
0
      return UNICODE_REPLACEMENT_CHARACTER;
89
0
    }
90
91
0
    from += 2;
92
0
    result = ((ch & 0x1F) << 6) | (ch1 & 0x3F);
93
0
    if (LLVM_UNLIKELY(result <= 0x7F)) {
94
0
      error("Non-canonical UTF-8 encoding");
95
0
      return UNICODE_REPLACEMENT_CHARACTER;
96
0
    }
97
98
6
  } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) {
99
4
    uint32_t ch1 = (uint32_t)from[1];
100
4
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
101
0
      from += 1;
102
0
      error("Invalid UTF-8 continuation byte");
103
0
      return UNICODE_REPLACEMENT_CHARACTER;
104
0
    }
105
4
    uint32_t ch2 = (uint32_t)from[2];
106
4
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
107
0
      from += 2;
108
0
      error("Invalid UTF-8 continuation byte");
109
0
      return UNICODE_REPLACEMENT_CHARACTER;
110
0
    }
111
4
    from += 3;
112
4
    result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F);
113
4
    if (LLVM_UNLIKELY(result <= 0x7FF)) {
114
0
      error("Non-canonical UTF-8 encoding");
115
0
      return UNICODE_REPLACEMENT_CHARACTER;
116
0
    }
117
4
    if (LLVM_UNLIKELY(
118
4
            result >= UNICODE_SURROGATE_FIRST &&
119
4
            result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) {
120
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
121
0
      return UNICODE_REPLACEMENT_CHARACTER;
122
0
    }
123
124
4
  } else if ((ch & 0xF8) == 0xF0) {
125
2
    uint32_t ch1 = (uint32_t)from[1];
126
2
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
127
0
      from += 1;
128
0
      error("Invalid UTF-8 continuation byte");
129
0
      return UNICODE_REPLACEMENT_CHARACTER;
130
0
    }
131
2
    uint32_t ch2 = (uint32_t)from[2];
132
2
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
133
0
      from += 2;
134
0
      error("Invalid UTF-8 continuation byte");
135
0
      return UNICODE_REPLACEMENT_CHARACTER;
136
0
    }
137
2
    uint32_t ch3 = (uint32_t)from[3];
138
2
    if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) {
139
0
      from += 3;
140
0
      error("Invalid UTF-8 continuation byte");
141
0
      return UNICODE_REPLACEMENT_CHARACTER;
142
0
    }
143
2
    from += 4;
144
2
    result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) |
145
2
        (ch3 & 0x3F);
146
2
    if (LLVM_UNLIKELY(result <= 0xFFFF)) {
147
0
      error("Non-canonical UTF-8 encoding");
148
0
      return UNICODE_REPLACEMENT_CHARACTER;
149
0
    }
150
2
    if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) {
151
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
152
0
      return UNICODE_REPLACEMENT_CHARACTER;
153
0
    }
154
155
2
  } else {
156
0
    from += 1;
157
0
    error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch));
158
0
    return UNICODE_REPLACEMENT_CHARACTER;
159
0
  }
160
161
6
  return result;
162
6
}
Unexecuted instantiation: unsigned int hermes::_decodeUTF8SlowPath<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1})
Unexecuted instantiation: JSONEmitter.cpp:unsigned int hermes::_decodeUTF8SlowPath<true, hermes::JSONEmitter::primitiveEmitString(llvh::StringRef)::$_0>(char const*&, hermes::JSONEmitter::primitiveEmitString(llvh::StringRef)::$_0)
Unexecuted instantiation: SourceErrorManager.cpp:unsigned int hermes::_decodeUTF8SlowPath<true, hermes::SourceErrorManager::buildSourceAndCaretLine(llvh::SMDiagnostic const&, hermes::SourceErrorOutputOptions)::$_0>(char const*&, hermes::SourceErrorManager::buildSourceAndCaretLine(llvh::SMDiagnostic const&, hermes::SourceErrorOutputOptions)::$_0)
unsigned int hermes::_decodeUTF8SlowPath<true, hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1})
Line
Count
Source
77
4
uint32_t _decodeUTF8SlowPath(const char *&from, F error) {
78
4
  uint32_t ch = (uint32_t)from[0];
79
4
  uint32_t result;
80
81
4
  assert(isUTF8Start(ch));
82
83
4
  if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) {
84
0
    uint32_t ch1 = (uint32_t)from[1];
85
0
    if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) {
86
0
      from += 1;
87
0
      error("Invalid UTF-8 continuation byte");
88
0
      return UNICODE_REPLACEMENT_CHARACTER;
89
0
    }
90
91
0
    from += 2;
92
0
    result = ((ch & 0x1F) << 6) | (ch1 & 0x3F);
93
0
    if (LLVM_UNLIKELY(result <= 0x7F)) {
94
0
      error("Non-canonical UTF-8 encoding");
95
0
      return UNICODE_REPLACEMENT_CHARACTER;
96
0
    }
97
98
4
  } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) {
99
4
    uint32_t ch1 = (uint32_t)from[1];
100
4
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
101
0
      from += 1;
102
0
      error("Invalid UTF-8 continuation byte");
103
0
      return UNICODE_REPLACEMENT_CHARACTER;
104
0
    }
105
4
    uint32_t ch2 = (uint32_t)from[2];
106
4
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
107
0
      from += 2;
108
0
      error("Invalid UTF-8 continuation byte");
109
0
      return UNICODE_REPLACEMENT_CHARACTER;
110
0
    }
111
4
    from += 3;
112
4
    result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F);
113
4
    if (LLVM_UNLIKELY(result <= 0x7FF)) {
114
0
      error("Non-canonical UTF-8 encoding");
115
0
      return UNICODE_REPLACEMENT_CHARACTER;
116
0
    }
117
4
    if (LLVM_UNLIKELY(
118
4
            result >= UNICODE_SURROGATE_FIRST &&
119
4
            result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) {
120
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
121
0
      return UNICODE_REPLACEMENT_CHARACTER;
122
0
    }
123
124
4
  } else if ((ch & 0xF8) == 0xF0) {
125
0
    uint32_t ch1 = (uint32_t)from[1];
126
0
    if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) {
127
0
      from += 1;
128
0
      error("Invalid UTF-8 continuation byte");
129
0
      return UNICODE_REPLACEMENT_CHARACTER;
130
0
    }
131
0
    uint32_t ch2 = (uint32_t)from[2];
132
0
    if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) {
133
0
      from += 2;
134
0
      error("Invalid UTF-8 continuation byte");
135
0
      return UNICODE_REPLACEMENT_CHARACTER;
136
0
    }
137
0
    uint32_t ch3 = (uint32_t)from[3];
138
0
    if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) {
139
0
      from += 3;
140
0
      error("Invalid UTF-8 continuation byte");
141
0
      return UNICODE_REPLACEMENT_CHARACTER;
142
0
    }
143
0
    from += 4;
144
0
    result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) |
145
0
        (ch3 & 0x3F);
146
0
    if (LLVM_UNLIKELY(result <= 0xFFFF)) {
147
0
      error("Non-canonical UTF-8 encoding");
148
0
      return UNICODE_REPLACEMENT_CHARACTER;
149
0
    }
150
0
    if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) {
151
0
      error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result));
152
0
      return UNICODE_REPLACEMENT_CHARACTER;
153
0
    }
154
155
0
  } else {
156
0
    from += 1;
157
0
    error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch));
158
0
    return UNICODE_REPLACEMENT_CHARACTER;
159
0
  }
160
161
4
  return result;
162
4
}
163
164
/// Scans back from \p ptr until the start of the previous UTF-8 codepoint.
165
/// Logically, this is equivalent to `--ptr` in the codepoint space.
166
/// It could be a regular ASCII character, or a multi-byte encoded character.
167
/// This function assumes that the input is valid!
168
0
inline const char *previousUTF8Start(const char *ptr) {
169
0
  --ptr;
170
0
  // If the previous codepoint is ASCII, we are done.
171
0
  if (!(*ptr & 0x80))
172
0
    return ptr;
173
0
  // Scan backwards until we find a leading byte (11xxxxxx)
174
0
  while ((*ptr & 0xC0) != 0xC0)
175
0
    --ptr;
176
0
  return ptr;
177
0
}
178
179
/// Decode a sequence of UTF8 encoded bytes into a Unicode codepoint.
180
/// In case of decoding errors, the provided callback is invoked with an
181
/// apropriate messsage and UNICODE_REPLACEMENT_CHARACTER is returned.
182
///
183
/// \tparam allowSurrogates when false, values in the surrogate range are
184
///     reported as errors
185
/// \param error callback invoked with an error message
186
/// \return the codepoint
187
template <bool allowSurrogates, typename F>
188
5.12M
inline uint32_t decodeUTF8(const char *&from, F error) {
189
5.12M
  if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII?
190
5.11M
    return *from++;
191
192
12.1k
  return _decodeUTF8SlowPath<allowSurrogates>(from, error);
193
5.12M
}
unsigned int hermes::decodeUTF8<false, hermes::parser::JSLexer::decodeUTF8()::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::parser::JSLexer::decodeUTF8()::{lambda(llvh::Twine const&)#1})
Line
Count
Source
188
4.05k
inline uint32_t decodeUTF8(const char *&from, F error) {
189
4.05k
  if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII?
190
4
    return *from++;
191
192
4.05k
  return _decodeUTF8SlowPath<allowSurrogates>(from, error);
193
4.05k
}
unsigned int hermes::decodeUTF8<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1})
Line
Count
Source
188
1.99M
inline uint32_t decodeUTF8(const char *&from, F error) {
189
1.99M
  if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII?
190
1.99M
    return *from++;
191
192
3.53k
  return _decodeUTF8SlowPath<allowSurrogates>(from, error);
193
1.99M
}
BytecodeGenerator.cpp:unsigned int hermes::decodeUTF8<false, hermes::hbc::(anonymous namespace)::ensureUTF8Identifer(hermes::StringTable&, hermes::Identifier, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)::$_0>(char const*&, hermes::hbc::(anonymous namespace)::ensureUTF8Identifer(hermes::StringTable&, hermes::Identifier, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)::$_0)
Line
Count
Source
188
2.40k
inline uint32_t decodeUTF8(const char *&from, F error) {
189
2.40k
  if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII?
190
2.40k
    return *from++;
191
192
2
  return _decodeUTF8SlowPath<allowSurrogates>(from, error);
193
2.40k
}
unsigned int hermes::decodeUTF8<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1})
Line
Count
Source
188
3.11M
inline uint32_t decodeUTF8(const char *&from, F error) {
189
3.11M
  if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII?
190
3.11M
    return *from++;
191
192
4.52k
  return _decodeUTF8SlowPath<allowSurrogates>(from, error);
193
3.11M
}
DebugInfo.cpp:unsigned int hermes::decodeUTF8<false, hermes::hbc::DebugInfoGenerator::appendString(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, hermes::Identifier)::$_0>(char const*&, hermes::hbc::DebugInfoGenerator::appendString(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, hermes::Identifier)::$_0)
Line
Count
Source
188
1.27k
inline uint32_t decodeUTF8(const char *&from, F error) {
189
1.27k
  if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII?
190
1.27k
    return *from++;
191
192
6
  return _decodeUTF8SlowPath<allowSurrogates>(from, error);
193
1.27k
}
unsigned int hermes::decodeUTF8<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1})
Line
Count
Source
188
143
inline uint32_t decodeUTF8(const char *&from, F error) {
189
143
  if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII?
190
143
    return *from++;
191
192
0
  return _decodeUTF8SlowPath<allowSurrogates>(from, error);
193
143
}
Unexecuted instantiation: JSONEmitter.cpp:unsigned int hermes::decodeUTF8<true, hermes::JSONEmitter::primitiveEmitString(llvh::StringRef)::$_0>(char const*&, hermes::JSONEmitter::primitiveEmitString(llvh::StringRef)::$_0)
Unexecuted instantiation: SourceErrorManager.cpp:unsigned int hermes::decodeUTF8<true, hermes::SourceErrorManager::buildSourceAndCaretLine(llvh::SMDiagnostic const&, hermes::SourceErrorOutputOptions)::$_0>(char const*&, hermes::SourceErrorManager::buildSourceAndCaretLine(llvh::SMDiagnostic const&, hermes::SourceErrorOutputOptions)::$_0)
unsigned int hermes::decodeUTF8<true, hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1})
Line
Count
Source
188
4
inline uint32_t decodeUTF8(const char *&from, F error) {
189
4
  if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII?
190
0
    return *from++;
191
192
4
  return _decodeUTF8SlowPath<allowSurrogates>(from, error);
193
4
}
194
195
/// Encode a 32-bit value, into UTF16. If the value is a part of a surrogate
196
/// pair, it is encoded without any conversion.
197
template <typename OutIt>
198
5.11M
inline void encodeUTF16(OutIt &dest, uint32_t cp) {
199
5.11M
  if (LLVM_LIKELY(cp < 0x10000)) {
200
5.11M
    *dest = (uint16_t)cp;
201
5.11M
    ++dest; // Use pre-increment in case this is an iterator.
202
5.11M
  } else {
203
0
    assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value");
204
0
    cp -= 0x10000;
205
0
    *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF);
206
0
    ++dest;
207
0
    *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF);
208
0
    ++dest;
209
0
  }
210
5.11M
}
void hermes::encodeUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >&, unsigned int)
Line
Count
Source
198
1.99M
inline void encodeUTF16(OutIt &dest, uint32_t cp) {
199
1.99M
  if (LLVM_LIKELY(cp < 0x10000)) {
200
1.99M
    *dest = (uint16_t)cp;
201
1.99M
    ++dest; // Use pre-increment in case this is an iterator.
202
1.99M
  } else {
203
0
    assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value");
204
0
    cp -= 0x10000;
205
0
    *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF);
206
0
    ++dest;
207
0
    *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF);
208
0
    ++dest;
209
0
  }
210
1.99M
}
void hermes::encodeUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >&, unsigned int)
Line
Count
Source
198
3.11M
inline void encodeUTF16(OutIt &dest, uint32_t cp) {
199
3.11M
  if (LLVM_LIKELY(cp < 0x10000)) {
200
3.11M
    *dest = (uint16_t)cp;
201
3.11M
    ++dest; // Use pre-increment in case this is an iterator.
202
3.11M
  } else {
203
0
    assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value");
204
0
    cp -= 0x10000;
205
0
    *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF);
206
0
    ++dest;
207
0
    *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF);
208
0
    ++dest;
209
0
  }
210
3.11M
}
void hermes::encodeUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >&, unsigned int)
Line
Count
Source
198
143
inline void encodeUTF16(OutIt &dest, uint32_t cp) {
199
143
  if (LLVM_LIKELY(cp < 0x10000)) {
200
143
    *dest = (uint16_t)cp;
201
143
    ++dest; // Use pre-increment in case this is an iterator.
202
143
  } else {
203
0
    assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value");
204
0
    cp -= 0x10000;
205
0
    *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF);
206
0
    ++dest;
207
0
    *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF);
208
0
    ++dest;
209
0
  }
210
143
}
Unexecuted instantiation: void hermes::encodeUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 2u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 2u> >&, unsigned int)
void hermes::encodeUTF16<char16_t*>(char16_t*&, unsigned int)
Line
Count
Source
198
4
inline void encodeUTF16(OutIt &dest, uint32_t cp) {
199
4
  if (LLVM_LIKELY(cp < 0x10000)) {
200
4
    *dest = (uint16_t)cp;
201
4
    ++dest; // Use pre-increment in case this is an iterator.
202
4
  } else {
203
0
    assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value");
204
0
    cp -= 0x10000;
205
0
    *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF);
206
0
    ++dest;
207
0
    *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF);
208
0
    ++dest;
209
0
  }
210
4
}
211
212
/// Decode a UTF-8 sequence, which is assumed to be valid, but may possibly
213
/// contain explicitly encoded surrogate pairs, into a UTF-16 sequence.
214
/// \return the updated destination iterator
215
template <typename OutIt>
216
inline OutIt convertUTF8WithSurrogatesToUTF16(
217
    OutIt dest,
218
    const char *begin8,
219
4.25k
    const char *end8) {
220
5.12M
  while (begin8 < end8)
221
5.11M
    encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) {
222
0
                  llvm_unreachable("invalid UTF-8");
223
0
                }));
Unexecuted instantiation: hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}::operator()(llvh::Twine const&) const
Unexecuted instantiation: hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1}::operator()(llvh::Twine const&) const
Unexecuted instantiation: hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}::operator()(llvh::Twine const&) const
Unexecuted instantiation: hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1}::operator()(llvh::Twine const&) const
224
4.25k
  return dest;
225
4.25k
}
std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)
Line
Count
Source
219
1.99k
    const char *end8) {
220
2.00M
  while (begin8 < end8)
221
1.99M
    encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) {
222
1.99M
                  llvm_unreachable("invalid UTF-8");
223
1.99M
                }));
224
1.99k
  return dest;
225
1.99k
}
std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)
Line
Count
Source
219
271
    const char *end8) {
220
3.11M
  while (begin8 < end8)
221
3.11M
    encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) {
222
3.11M
                  llvm_unreachable("invalid UTF-8");
223
3.11M
                }));
224
271
  return dest;
225
271
}
std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)
Line
Count
Source
219
1.99k
    const char *end8) {
220
2.13k
  while (begin8 < end8)
221
143
    encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) {
222
143
                  llvm_unreachable("invalid UTF-8");
223
143
                }));
224
1.99k
  return dest;
225
1.99k
}
char16_t* hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)
Line
Count
Source
219
2
    const char *end8) {
220
6
  while (begin8 < end8)
221
4
    encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) {
222
4
                  llvm_unreachable("invalid UTF-8");
223
4
                }));
224
2
  return dest;
225
2
}
226
227
/// Convert a UTF-16 encoded string \p input to UTF-8 stored in \p dest,
228
/// encoding each surrogate halves individually into UTF-8.
229
/// This is the inverse function of convertUTF8WithSurrogatesToUTF16.
230
/// Note the result is not valid utf-8 if it contains surrogate values.
231
/// Only use it to get the internal representation of utf-8 strings in hermes
232
/// compiler.
233
void convertUTF16ToUTF8WithSingleSurrogates(
234
    std::string &dest,
235
    llvh::ArrayRef<char16_t> input);
236
237
/// Convert a UTF-16 encoded string \p input to UTF-8 stored in \p dest,
238
/// replacing unpaired surrogates halves with the Unicode replacement character.
239
/// \param maxCharacters If non-zero, the maximum number of characters to
240
///   convert.
241
/// \return false if the string was truncated, true if the whole string was
242
///   written out successfully.
243
bool convertUTF16ToUTF8WithReplacements(
244
    std::string &dest,
245
    llvh::ArrayRef<char16_t> input,
246
    size_t maxCharacters = 0);
247
248
/// Convert a UTF-16 encoded string \p input to a pre-allocated UTF-8 buffer
249
/// \p outBuffer of length \p outBufferLength, replacing unpaired surrogates
250
/// halves with the Unicode replacement character.
251
/// \return a std::pair with the first element being the number of UTF-16
252
///   characters converted, and the second element being the number of UTF-8
253
///   characters written
254
std::pair<uint32_t, uint32_t> convertUTF16ToUTF8BufferWithReplacements(
255
    llvh::MutableArrayRef<uint8_t> outBuffer,
256
    llvh::ArrayRef<char16_t> input);
257
258
/// Convert a UTF-8 encoded string (with surrogates) \p input to a UTF-8 one
259
/// (without surrogates), storing the conversion in \p output. Output characters
260
/// are appended to \p output.
261
void convertUTF8WithSurrogatesToUTF8WithReplacements(
262
    std::string &output,
263
    llvh::StringRef input);
264
265
} // namespace hermes
266
267
#endif // HERMES_SUPPORT_UTF8_H