/src/hermes/include/hermes/Support/UTF8.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #ifndef HERMES_SUPPORT_UTF8_H |
9 | | #define HERMES_SUPPORT_UTF8_H |
10 | | |
11 | | #include "hermes/Platform/Unicode/CharacterProperties.h" |
12 | | #include "llvh/ADT/ArrayRef.h" |
13 | | #include "llvh/ADT/Twine.h" |
14 | | #include "llvh/Support/Compiler.h" |
15 | | |
16 | | #include <cstddef> |
17 | | |
18 | | namespace hermes { |
19 | | |
20 | | /// Maximum number of bytes in a valid UTF-8 codepoint |
21 | | constexpr size_t UTF8CodepointMaxBytes = 6; |
22 | | |
23 | | /// Encode a unicode code point as a UTF-8 sequence of bytes. |
24 | | void encodeUTF8(char *&dst, uint32_t cp); |
25 | | |
26 | | /// Check whether a byte is a regular ASCII or a UTF8 starting byte. |
27 | | /// \return true if it is UTF8 starting byte. |
28 | 16.1M | inline bool isUTF8Start(char ch) { |
29 | 16.1M | return (ch & 0x80) != 0; |
30 | 16.1M | } |
31 | | |
32 | | /// \return true if this is a UTF-8 leading byte. |
33 | 0 | inline bool isUTF8LeadingByte(char ch) { |
34 | 0 | return (ch & 0xC0) == 0xC0; |
35 | 0 | } |
36 | | |
37 | | /// \return true if this is a UTF-8 continuation byte, or in other words, this |
38 | | /// is a byte in the "middle" of a UTF-8 codepoint. |
39 | 1.51M | inline static bool isUTF8ContinuationByte(char ch) { |
40 | 1.51M | return (ch & 0xC0) == 0x80; |
41 | 1.51M | } Unexecuted instantiation: hermes.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: DebuggerAPI.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: BigIntPrimitive.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Callable.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: CodeBlock.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Domain.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: GCBase.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: HeapSnapshot.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: HiddenClass.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: IdentifierTable.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Interpreter.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Interpreter-slowpaths.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSArray.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSArrayBuffer.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSDataView.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSDate.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSError.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSGenerator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSObject.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSProxy.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSRegExp.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSMapImpl.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSNativeFunctions.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSTypedArray.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSWeakMapImpl.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSWeakRef.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: DecoratedObject.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: HostModel.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: NativeState.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Operations.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: PrimitiveBox.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: PropertyAccessor.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Runtime.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: RuntimeModule.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: CodeCoverageProfiler.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SamplingProfiler.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SamplingProfilerPosix.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SamplingProfilerSampler.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SegmentedArray.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SerializedLiteralParser.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: StackTracesTree.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: StringPrimitive.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: StringView.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SymbolRegistry.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: TimeLimitMonitor.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: TwineChar16.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: StringRefUtils.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: IdentifierHashTable.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Array.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: ArrayBuffer.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: ArrayIterator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: AsyncFunction.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Base64.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Base64Util.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: BigInt.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: CallSite.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: DataView.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: TypedArray.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Error.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: GeneratorFunction.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: GeneratorPrototype.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: GlobalObject.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: IteratorPrototype.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: HermesInternal.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: HermesBuiltin.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSLibInternal.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSLibStorage.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Map.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Math.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSON.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: RuntimeJSONUtils.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSONLexer.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Object.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Proxy.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Reflect.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Set.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: String.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: StringIterator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Function.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Number.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Boolean.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: RegExp.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: RegExpStringIterator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: DateUtil.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: DateCache.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Symbol.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Date.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: WeakMap.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: WeakRef.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: WeakSet.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: print.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: eval.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: escape.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: require.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: TextEncoder.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: AlignedHeapSegment.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: HadesGC.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Debugger.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: DebuggerInternal.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: ArrayStorage.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: CheckHeapWellFormedAcceptor.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: DictPropertyMap.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: DummyObject.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: OrderedHashMap.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: HermesValue.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSCallSite.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSCallableProxy.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSRegExpStringIterator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: ChromeTraceSerializer.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: ProfileGenerator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SingleObject.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: FillerCell.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: BoxedDouble.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: HBC.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: ISel.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Bytecode.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: BytecodeStream.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: BytecodeGenerator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: BytecodeDataProvider.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: BytecodeProviderFromSrc.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: ConsecutiveStringStorage.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: DebugInfo.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: Passes.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SerializedLiteralGenerator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: IRGen.cpp:hermes::isUTF8ContinuationByte(char) ESTreeIRGen-expr.cpp:hermes::isUTF8ContinuationByte(char) Line | Count | Source | 39 | 1.53k | inline static bool isUTF8ContinuationByte(char ch) { | 40 | 1.53k | return (ch & 0xC0) == 0x80; | 41 | 1.53k | } |
Unexecuted instantiation: SourceMapGenerator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SourceMapParser.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SourceMapTranslator.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: SourceMap.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSONParser.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSParser.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSParserImpl.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSParserImpl-flow.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSParserImpl-jsx.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSParserImpl-ts.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSLexer.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: ES6Class.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: RegexSerialization.cpp:hermes::isUTF8ContinuationByte(char) Unexecuted instantiation: JSONEmitter.cpp:hermes::isUTF8ContinuationByte(char) SourceErrorManager.cpp:hermes::isUTF8ContinuationByte(char) Line | Count | Source | 39 | 1.51M | inline static bool isUTF8ContinuationByte(char ch) { | 40 | 1.51M | return (ch & 0xC0) == 0x80; | 41 | 1.51M | } |
Unexecuted instantiation: UTF8.cpp:hermes::isUTF8ContinuationByte(char) |
42 | | |
43 | | /// \return true if this is a valid ASCII character. |
44 | | /// As in the range of 0-127. |
45 | | template <typename Char> |
46 | 39.2k | bool isASCII(Char c) { |
47 | | // We start with a mask representing all valid set bits of ASCII. Flip the |
48 | | // mask, so it now represents all invalid bits. Test if any bit is set that |
49 | | // would make it an invalid ASCII character. |
50 | 39.2k | constexpr uint32_t asciiMask = 0x7f; |
51 | 39.2k | return (c & static_cast<Char>(~asciiMask)) == 0; |
52 | 39.2k | } bool hermes::isASCII<char16_t>(char16_t) Line | Count | Source | 46 | 39.2k | bool isASCII(Char c) { | 47 | | // We start with a mask representing all valid set bits of ASCII. Flip the | 48 | | // mask, so it now represents all invalid bits. Test if any bit is set that | 49 | | // would make it an invalid ASCII character. | 50 | 39.2k | constexpr uint32_t asciiMask = 0x7f; | 51 | 39.2k | return (c & static_cast<Char>(~asciiMask)) == 0; | 52 | 39.2k | } |
Unexecuted instantiation: bool hermes::isASCII<char>(char) |
53 | | |
54 | | /// \return true if this is a pure ASCII char sequence. |
55 | | template <typename Iter> |
56 | 1.15k | inline bool isAllASCII(Iter begin, Iter end) { |
57 | 40.0k | while (begin < end) { |
58 | 39.2k | if (!isASCII(*begin)) |
59 | 286 | return false; |
60 | 38.9k | ++begin; |
61 | 38.9k | } |
62 | 872 | return true; |
63 | 1.15k | } bool hermes::isAllASCII<char16_t const*>(char16_t const*, char16_t const*) Line | Count | Source | 56 | 1.15k | inline bool isAllASCII(Iter begin, Iter end) { | 57 | 40.0k | while (begin < end) { | 58 | 39.2k | if (!isASCII(*begin)) | 59 | 286 | return false; | 60 | 38.9k | ++begin; | 61 | 38.9k | } | 62 | 872 | return true; | 63 | 1.15k | } |
Unexecuted instantiation: bool hermes::isAllASCII<std::__1::__wrap_iter<char*> >(std::__1::__wrap_iter<char*>, std::__1::__wrap_iter<char*>) |
64 | | |
65 | | /// Overload for char* and uint8_t*. |
66 | | bool isAllASCII(const uint8_t *start, const uint8_t *end); |
67 | | |
68 | 94 | inline bool isAllASCII(const char *start, const char *end) { |
69 | 94 | return isAllASCII((const uint8_t *)start, (const uint8_t *)end); |
70 | 94 | } |
71 | | |
72 | | /// Decode a sequence of UTF8 encoded bytes when it is known that the first byte |
73 | | /// is a start of an UTF8 sequence. |
74 | | /// \tparam allowSurrogates when false, values in the surrogate range are |
75 | | /// reported as errors |
76 | | template <bool allowSurrogates, typename F> |
77 | 93.6k | uint32_t _decodeUTF8SlowPath(const char *&from, F error) { |
78 | 93.6k | uint32_t ch = (uint32_t)from[0]; |
79 | 93.6k | uint32_t result; |
80 | | |
81 | 93.6k | assert(isUTF8Start(ch)); |
82 | | |
83 | 93.6k | if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) { |
84 | 81.3k | uint32_t ch1 = (uint32_t)from[1]; |
85 | 81.3k | if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) { |
86 | 1 | from += 1; |
87 | 1 | error("Invalid UTF-8 continuation byte"); |
88 | 1 | return UNICODE_REPLACEMENT_CHARACTER; |
89 | 1 | } |
90 | | |
91 | 81.3k | from += 2; |
92 | 81.3k | result = ((ch & 0x1F) << 6) | (ch1 & 0x3F); |
93 | 81.3k | if (LLVM_UNLIKELY(result <= 0x7F)) { |
94 | 0 | error("Non-canonical UTF-8 encoding"); |
95 | 0 | return UNICODE_REPLACEMENT_CHARACTER; |
96 | 0 | } |
97 | | |
98 | 81.3k | } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) { |
99 | 8.81k | uint32_t ch1 = (uint32_t)from[1]; |
100 | 8.81k | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { |
101 | 1 | from += 1; |
102 | 1 | error("Invalid UTF-8 continuation byte"); |
103 | 1 | return UNICODE_REPLACEMENT_CHARACTER; |
104 | 1 | } |
105 | 8.81k | uint32_t ch2 = (uint32_t)from[2]; |
106 | 8.81k | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { |
107 | 0 | from += 2; |
108 | 0 | error("Invalid UTF-8 continuation byte"); |
109 | 0 | return UNICODE_REPLACEMENT_CHARACTER; |
110 | 0 | } |
111 | 8.81k | from += 3; |
112 | 8.81k | result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F); |
113 | 8.81k | if (LLVM_UNLIKELY(result <= 0x7FF)) { |
114 | 0 | error("Non-canonical UTF-8 encoding"); |
115 | 0 | return UNICODE_REPLACEMENT_CHARACTER; |
116 | 0 | } |
117 | 8.81k | if (LLVM_UNLIKELY( |
118 | 8.81k | result >= UNICODE_SURROGATE_FIRST && |
119 | 8.81k | result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) { |
120 | 2 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); |
121 | 2 | return UNICODE_REPLACEMENT_CHARACTER; |
122 | 2 | } |
123 | | |
124 | 8.81k | } else if ((ch & 0xF8) == 0xF0) { |
125 | 3.46k | uint32_t ch1 = (uint32_t)from[1]; |
126 | 3.46k | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { |
127 | 0 | from += 1; |
128 | 0 | error("Invalid UTF-8 continuation byte"); |
129 | 0 | return UNICODE_REPLACEMENT_CHARACTER; |
130 | 0 | } |
131 | 3.46k | uint32_t ch2 = (uint32_t)from[2]; |
132 | 3.46k | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { |
133 | 0 | from += 2; |
134 | 0 | error("Invalid UTF-8 continuation byte"); |
135 | 0 | return UNICODE_REPLACEMENT_CHARACTER; |
136 | 0 | } |
137 | 3.46k | uint32_t ch3 = (uint32_t)from[3]; |
138 | 3.46k | if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) { |
139 | 0 | from += 3; |
140 | 0 | error("Invalid UTF-8 continuation byte"); |
141 | 0 | return UNICODE_REPLACEMENT_CHARACTER; |
142 | 0 | } |
143 | 3.46k | from += 4; |
144 | 3.46k | result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | |
145 | 3.46k | (ch3 & 0x3F); |
146 | 3.46k | if (LLVM_UNLIKELY(result <= 0xFFFF)) { |
147 | 0 | error("Non-canonical UTF-8 encoding"); |
148 | 0 | return UNICODE_REPLACEMENT_CHARACTER; |
149 | 0 | } |
150 | 3.46k | if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) { |
151 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); |
152 | 0 | return UNICODE_REPLACEMENT_CHARACTER; |
153 | 0 | } |
154 | | |
155 | 3.46k | } else { |
156 | 10 | from += 1; |
157 | 10 | error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch)); |
158 | 10 | return UNICODE_REPLACEMENT_CHARACTER; |
159 | 10 | } |
160 | | |
161 | 93.6k | return result; |
162 | 93.6k | } unsigned int hermes::_decodeUTF8SlowPath<false, hermes::parser::JSLexer::decodeUTF8()::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::parser::JSLexer::decodeUTF8()::{lambda(llvh::Twine const&)#1})Line | Count | Source | 77 | 4.05k | uint32_t _decodeUTF8SlowPath(const char *&from, F error) { | 78 | 4.05k | uint32_t ch = (uint32_t)from[0]; | 79 | 4.05k | uint32_t result; | 80 | | | 81 | 4.05k | assert(isUTF8Start(ch)); | 82 | | | 83 | 4.05k | if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) { | 84 | 3.97k | uint32_t ch1 = (uint32_t)from[1]; | 85 | 3.97k | if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) { | 86 | 0 | from += 1; | 87 | 0 | error("Invalid UTF-8 continuation byte"); | 88 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 89 | 0 | } | 90 | | | 91 | 3.97k | from += 2; | 92 | 3.97k | result = ((ch & 0x1F) << 6) | (ch1 & 0x3F); | 93 | 3.97k | if (LLVM_UNLIKELY(result <= 0x7F)) { | 94 | 0 | error("Non-canonical UTF-8 encoding"); | 95 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 96 | 0 | } | 97 | | | 98 | 3.97k | } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) { | 99 | 74 | uint32_t ch1 = (uint32_t)from[1]; | 100 | 74 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 101 | 1 | from += 1; | 102 | 1 | error("Invalid UTF-8 continuation byte"); | 103 | 1 | return UNICODE_REPLACEMENT_CHARACTER; | 104 | 1 | } | 105 | 73 | uint32_t ch2 = (uint32_t)from[2]; | 106 | 73 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 107 | 0 | from += 2; | 108 | 0 | error("Invalid UTF-8 continuation byte"); | 109 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 110 | 0 | } | 111 | 73 | from += 3; | 112 | 73 | result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F); | 113 | 73 | if (LLVM_UNLIKELY(result <= 0x7FF)) { | 114 | 0 | error("Non-canonical UTF-8 encoding"); | 115 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 116 | 0 | } | 117 | 73 | if (LLVM_UNLIKELY( | 118 | 73 | result >= UNICODE_SURROGATE_FIRST && | 119 | 73 | result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) { | 120 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 121 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 122 | 0 | } | 123 | | | 124 | 73 | } else if ((ch & 0xF8) == 0xF0) { | 125 | 5 | uint32_t ch1 = (uint32_t)from[1]; | 126 | 5 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 127 | 0 | from += 1; | 128 | 0 | error("Invalid UTF-8 continuation byte"); | 129 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 130 | 0 | } | 131 | 5 | uint32_t ch2 = (uint32_t)from[2]; | 132 | 5 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 133 | 0 | from += 2; | 134 | 0 | error("Invalid UTF-8 continuation byte"); | 135 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 136 | 0 | } | 137 | 5 | uint32_t ch3 = (uint32_t)from[3]; | 138 | 5 | if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) { | 139 | 0 | from += 3; | 140 | 0 | error("Invalid UTF-8 continuation byte"); | 141 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 142 | 0 | } | 143 | 5 | from += 4; | 144 | 5 | result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | | 145 | 5 | (ch3 & 0x3F); | 146 | 5 | if (LLVM_UNLIKELY(result <= 0xFFFF)) { | 147 | 0 | error("Non-canonical UTF-8 encoding"); | 148 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 149 | 0 | } | 150 | 5 | if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) { | 151 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 152 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 153 | 0 | } | 154 | | | 155 | 5 | } else { | 156 | 3 | from += 1; | 157 | 3 | error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch)); | 158 | 3 | return UNICODE_REPLACEMENT_CHARACTER; | 159 | 3 | } | 160 | | | 161 | 4.05k | return result; | 162 | 4.05k | } |
unsigned int hermes::_decodeUTF8SlowPath<false, hermes::parser::JSLexer::_decodeUTF8SlowPath(char const*&)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::parser::JSLexer::_decodeUTF8SlowPath(char const*&)::{lambda(llvh::Twine const&)#1})Line | Count | Source | 77 | 12.3k | uint32_t _decodeUTF8SlowPath(const char *&from, F error) { | 78 | 12.3k | uint32_t ch = (uint32_t)from[0]; | 79 | 12.3k | uint32_t result; | 80 | | | 81 | 12.3k | assert(isUTF8Start(ch)); | 82 | | | 83 | 12.3k | if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) { | 84 | 6.03k | uint32_t ch1 = (uint32_t)from[1]; | 85 | 6.03k | if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) { | 86 | 1 | from += 1; | 87 | 1 | error("Invalid UTF-8 continuation byte"); | 88 | 1 | return UNICODE_REPLACEMENT_CHARACTER; | 89 | 1 | } | 90 | | | 91 | 6.03k | from += 2; | 92 | 6.03k | result = ((ch & 0x1F) << 6) | (ch1 & 0x3F); | 93 | 6.03k | if (LLVM_UNLIKELY(result <= 0x7F)) { | 94 | 0 | error("Non-canonical UTF-8 encoding"); | 95 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 96 | 0 | } | 97 | | | 98 | 6.33k | } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) { | 99 | 3.04k | uint32_t ch1 = (uint32_t)from[1]; | 100 | 3.04k | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 101 | 0 | from += 1; | 102 | 0 | error("Invalid UTF-8 continuation byte"); | 103 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 104 | 0 | } | 105 | 3.04k | uint32_t ch2 = (uint32_t)from[2]; | 106 | 3.04k | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 107 | 0 | from += 2; | 108 | 0 | error("Invalid UTF-8 continuation byte"); | 109 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 110 | 0 | } | 111 | 3.04k | from += 3; | 112 | 3.04k | result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F); | 113 | 3.04k | if (LLVM_UNLIKELY(result <= 0x7FF)) { | 114 | 0 | error("Non-canonical UTF-8 encoding"); | 115 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 116 | 0 | } | 117 | 3.04k | if (LLVM_UNLIKELY( | 118 | 3.04k | result >= UNICODE_SURROGATE_FIRST && | 119 | 3.04k | result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) { | 120 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 121 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 122 | 0 | } | 123 | | | 124 | 3.28k | } else if ((ch & 0xF8) == 0xF0) { | 125 | 3.28k | uint32_t ch1 = (uint32_t)from[1]; | 126 | 3.28k | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 127 | 0 | from += 1; | 128 | 0 | error("Invalid UTF-8 continuation byte"); | 129 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 130 | 0 | } | 131 | 3.28k | uint32_t ch2 = (uint32_t)from[2]; | 132 | 3.28k | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 133 | 0 | from += 2; | 134 | 0 | error("Invalid UTF-8 continuation byte"); | 135 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 136 | 0 | } | 137 | 3.28k | uint32_t ch3 = (uint32_t)from[3]; | 138 | 3.28k | if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) { | 139 | 0 | from += 3; | 140 | 0 | error("Invalid UTF-8 continuation byte"); | 141 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 142 | 0 | } | 143 | 3.28k | from += 4; | 144 | 3.28k | result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | | 145 | 3.28k | (ch3 & 0x3F); | 146 | 3.28k | if (LLVM_UNLIKELY(result <= 0xFFFF)) { | 147 | 0 | error("Non-canonical UTF-8 encoding"); | 148 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 149 | 0 | } | 150 | 3.28k | if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) { | 151 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 152 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 153 | 0 | } | 154 | | | 155 | 3.28k | } else { | 156 | 4 | from += 1; | 157 | 4 | error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch)); | 158 | 4 | return UNICODE_REPLACEMENT_CHARACTER; | 159 | 4 | } | 160 | | | 161 | 12.3k | return result; | 162 | 12.3k | } |
unsigned int hermes::_decodeUTF8SlowPath<false, hermes::parser::JSLexer::_peekUTF8(char const*) const::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::parser::JSLexer::_peekUTF8(char const*) const::{lambda(llvh::Twine const&)#1})Line | Count | Source | 77 | 69.1k | uint32_t _decodeUTF8SlowPath(const char *&from, F error) { | 78 | 69.1k | uint32_t ch = (uint32_t)from[0]; | 79 | 69.1k | uint32_t result; | 80 | | | 81 | 69.1k | assert(isUTF8Start(ch)); | 82 | | | 83 | 69.1k | if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) { | 84 | 68.9k | uint32_t ch1 = (uint32_t)from[1]; | 85 | 68.9k | if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) { | 86 | 0 | from += 1; | 87 | 0 | error("Invalid UTF-8 continuation byte"); | 88 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 89 | 0 | } | 90 | | | 91 | 68.9k | from += 2; | 92 | 68.9k | result = ((ch & 0x1F) << 6) | (ch1 & 0x3F); | 93 | 68.9k | if (LLVM_UNLIKELY(result <= 0x7F)) { | 94 | 0 | error("Non-canonical UTF-8 encoding"); | 95 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 96 | 0 | } | 97 | | | 98 | 68.9k | } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) { | 99 | 68 | uint32_t ch1 = (uint32_t)from[1]; | 100 | 68 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 101 | 0 | from += 1; | 102 | 0 | error("Invalid UTF-8 continuation byte"); | 103 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 104 | 0 | } | 105 | 68 | uint32_t ch2 = (uint32_t)from[2]; | 106 | 68 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 107 | 0 | from += 2; | 108 | 0 | error("Invalid UTF-8 continuation byte"); | 109 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 110 | 0 | } | 111 | 68 | from += 3; | 112 | 68 | result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F); | 113 | 68 | if (LLVM_UNLIKELY(result <= 0x7FF)) { | 114 | 0 | error("Non-canonical UTF-8 encoding"); | 115 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 116 | 0 | } | 117 | 68 | if (LLVM_UNLIKELY( | 118 | 68 | result >= UNICODE_SURROGATE_FIRST && | 119 | 68 | result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) { | 120 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 121 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 122 | 0 | } | 123 | | | 124 | 178 | } else if ((ch & 0xF8) == 0xF0) { | 125 | 175 | uint32_t ch1 = (uint32_t)from[1]; | 126 | 175 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 127 | 0 | from += 1; | 128 | 0 | error("Invalid UTF-8 continuation byte"); | 129 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 130 | 0 | } | 131 | 175 | uint32_t ch2 = (uint32_t)from[2]; | 132 | 175 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 133 | 0 | from += 2; | 134 | 0 | error("Invalid UTF-8 continuation byte"); | 135 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 136 | 0 | } | 137 | 175 | uint32_t ch3 = (uint32_t)from[3]; | 138 | 175 | if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) { | 139 | 0 | from += 3; | 140 | 0 | error("Invalid UTF-8 continuation byte"); | 141 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 142 | 0 | } | 143 | 175 | from += 4; | 144 | 175 | result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | | 145 | 175 | (ch3 & 0x3F); | 146 | 175 | if (LLVM_UNLIKELY(result <= 0xFFFF)) { | 147 | 0 | error("Non-canonical UTF-8 encoding"); | 148 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 149 | 0 | } | 150 | 175 | if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) { | 151 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 152 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 153 | 0 | } | 154 | | | 155 | 175 | } else { | 156 | 3 | from += 1; | 157 | 3 | error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch)); | 158 | 3 | return UNICODE_REPLACEMENT_CHARACTER; | 159 | 3 | } | 160 | | | 161 | 69.1k | return result; | 162 | 69.1k | } |
unsigned int hermes::_decodeUTF8SlowPath<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1})Line | Count | Source | 77 | 3.53k | uint32_t _decodeUTF8SlowPath(const char *&from, F error) { | 78 | 3.53k | uint32_t ch = (uint32_t)from[0]; | 79 | 3.53k | uint32_t result; | 80 | | | 81 | 3.53k | assert(isUTF8Start(ch)); | 82 | | | 83 | 3.53k | if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) { | 84 | 549 | uint32_t ch1 = (uint32_t)from[1]; | 85 | 549 | if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) { | 86 | 0 | from += 1; | 87 | 0 | error("Invalid UTF-8 continuation byte"); | 88 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 89 | 0 | } | 90 | | | 91 | 549 | from += 2; | 92 | 549 | result = ((ch & 0x1F) << 6) | (ch1 & 0x3F); | 93 | 549 | if (LLVM_UNLIKELY(result <= 0x7F)) { | 94 | 0 | error("Non-canonical UTF-8 encoding"); | 95 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 96 | 0 | } | 97 | | | 98 | 2.98k | } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) { | 99 | 2.98k | uint32_t ch1 = (uint32_t)from[1]; | 100 | 2.98k | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 101 | 0 | from += 1; | 102 | 0 | error("Invalid UTF-8 continuation byte"); | 103 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 104 | 0 | } | 105 | 2.98k | uint32_t ch2 = (uint32_t)from[2]; | 106 | 2.98k | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 107 | 0 | from += 2; | 108 | 0 | error("Invalid UTF-8 continuation byte"); | 109 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 110 | 0 | } | 111 | 2.98k | from += 3; | 112 | 2.98k | result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F); | 113 | 2.98k | if (LLVM_UNLIKELY(result <= 0x7FF)) { | 114 | 0 | error("Non-canonical UTF-8 encoding"); | 115 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 116 | 0 | } | 117 | 2.98k | if (LLVM_UNLIKELY( | 118 | 2.98k | result >= UNICODE_SURROGATE_FIRST && | 119 | 2.98k | result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) { | 120 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 121 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 122 | 0 | } | 123 | | | 124 | 2.98k | } else if ((ch & 0xF8) == 0xF0) { | 125 | 0 | uint32_t ch1 = (uint32_t)from[1]; | 126 | 0 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 127 | 0 | from += 1; | 128 | 0 | error("Invalid UTF-8 continuation byte"); | 129 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 130 | 0 | } | 131 | 0 | uint32_t ch2 = (uint32_t)from[2]; | 132 | 0 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 133 | 0 | from += 2; | 134 | 0 | error("Invalid UTF-8 continuation byte"); | 135 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 136 | 0 | } | 137 | 0 | uint32_t ch3 = (uint32_t)from[3]; | 138 | 0 | if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) { | 139 | 0 | from += 3; | 140 | 0 | error("Invalid UTF-8 continuation byte"); | 141 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 142 | 0 | } | 143 | 0 | from += 4; | 144 | 0 | result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | | 145 | 0 | (ch3 & 0x3F); | 146 | 0 | if (LLVM_UNLIKELY(result <= 0xFFFF)) { | 147 | 0 | error("Non-canonical UTF-8 encoding"); | 148 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 149 | 0 | } | 150 | 0 | if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) { | 151 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 152 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 153 | 0 | } | 154 | |
| 155 | 0 | } else { | 156 | 0 | from += 1; | 157 | 0 | error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch)); | 158 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 159 | 0 | } | 160 | | | 161 | 3.53k | return result; | 162 | 3.53k | } |
Unexecuted instantiation: HBC.cpp:unsigned int hermes::_decodeUTF8SlowPath<false, hermes::hbc::generateBytecodeModule(hermes::Module*, hermes::Function*, hermes::Function*, hermes::BytecodeGenerationOptions const&, hermes::OptValue<unsigned int>, hermes::SourceMapGenerator*, std::__1::unique_ptr<hermes::hbc::BCProviderBase, std::__1::default_delete<hermes::hbc::BCProviderBase> >)::$_4::operator()(llvh::StringRef) const::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::hbc::generateBytecodeModule(hermes::Module*, hermes::Function*, hermes::Function*, hermes::BytecodeGenerationOptions const&, hermes::OptValue<unsigned int>, hermes::SourceMapGenerator*, std::__1::unique_ptr<hermes::hbc::BCProviderBase, std::__1::default_delete<hermes::hbc::BCProviderBase> >)::$_4::operator()(llvh::StringRef) const::{lambda(llvh::Twine const&)#1})BytecodeGenerator.cpp:unsigned int hermes::_decodeUTF8SlowPath<false, hermes::hbc::(anonymous namespace)::ensureUTF8Identifer(hermes::StringTable&, hermes::Identifier, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)::$_0>(char const*&, hermes::hbc::(anonymous namespace)::ensureUTF8Identifer(hermes::StringTable&, hermes::Identifier, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)::$_0) Line | Count | Source | 77 | 2 | uint32_t _decodeUTF8SlowPath(const char *&from, F error) { | 78 | 2 | uint32_t ch = (uint32_t)from[0]; | 79 | 2 | uint32_t result; | 80 | | | 81 | 2 | assert(isUTF8Start(ch)); | 82 | | | 83 | 2 | if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) { | 84 | 0 | uint32_t ch1 = (uint32_t)from[1]; | 85 | 0 | if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) { | 86 | 0 | from += 1; | 87 | 0 | error("Invalid UTF-8 continuation byte"); | 88 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 89 | 0 | } | 90 | | | 91 | 0 | from += 2; | 92 | 0 | result = ((ch & 0x1F) << 6) | (ch1 & 0x3F); | 93 | 0 | if (LLVM_UNLIKELY(result <= 0x7F)) { | 94 | 0 | error("Non-canonical UTF-8 encoding"); | 95 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 96 | 0 | } | 97 | |
| 98 | 2 | } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) { | 99 | 2 | uint32_t ch1 = (uint32_t)from[1]; | 100 | 2 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 101 | 0 | from += 1; | 102 | 0 | error("Invalid UTF-8 continuation byte"); | 103 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 104 | 0 | } | 105 | 2 | uint32_t ch2 = (uint32_t)from[2]; | 106 | 2 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 107 | 0 | from += 2; | 108 | 0 | error("Invalid UTF-8 continuation byte"); | 109 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 110 | 0 | } | 111 | 2 | from += 3; | 112 | 2 | result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F); | 113 | 2 | if (LLVM_UNLIKELY(result <= 0x7FF)) { | 114 | 0 | error("Non-canonical UTF-8 encoding"); | 115 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 116 | 0 | } | 117 | 2 | if (LLVM_UNLIKELY( | 118 | 2 | result >= UNICODE_SURROGATE_FIRST && | 119 | 2 | result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) { | 120 | 2 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 121 | 2 | return UNICODE_REPLACEMENT_CHARACTER; | 122 | 2 | } | 123 | | | 124 | 2 | } else if ((ch & 0xF8) == 0xF0) { | 125 | 0 | uint32_t ch1 = (uint32_t)from[1]; | 126 | 0 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 127 | 0 | from += 1; | 128 | 0 | error("Invalid UTF-8 continuation byte"); | 129 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 130 | 0 | } | 131 | 0 | uint32_t ch2 = (uint32_t)from[2]; | 132 | 0 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 133 | 0 | from += 2; | 134 | 0 | error("Invalid UTF-8 continuation byte"); | 135 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 136 | 0 | } | 137 | 0 | uint32_t ch3 = (uint32_t)from[3]; | 138 | 0 | if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) { | 139 | 0 | from += 3; | 140 | 0 | error("Invalid UTF-8 continuation byte"); | 141 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 142 | 0 | } | 143 | 0 | from += 4; | 144 | 0 | result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | | 145 | 0 | (ch3 & 0x3F); | 146 | 0 | if (LLVM_UNLIKELY(result <= 0xFFFF)) { | 147 | 0 | error("Non-canonical UTF-8 encoding"); | 148 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 149 | 0 | } | 150 | 0 | if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) { | 151 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 152 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 153 | 0 | } | 154 | |
| 155 | 0 | } else { | 156 | 0 | from += 1; | 157 | 0 | error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch)); | 158 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 159 | 0 | } | 160 | | | 161 | 0 | return result; | 162 | 2 | } |
unsigned int hermes::_decodeUTF8SlowPath<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1})Line | Count | Source | 77 | 4.52k | uint32_t _decodeUTF8SlowPath(const char *&from, F error) { | 78 | 4.52k | uint32_t ch = (uint32_t)from[0]; | 79 | 4.52k | uint32_t result; | 80 | | | 81 | 4.52k | assert(isUTF8Start(ch)); | 82 | | | 83 | 4.52k | if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) { | 84 | 1.89k | uint32_t ch1 = (uint32_t)from[1]; | 85 | 1.89k | if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) { | 86 | 0 | from += 1; | 87 | 0 | error("Invalid UTF-8 continuation byte"); | 88 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 89 | 0 | } | 90 | | | 91 | 1.89k | from += 2; | 92 | 1.89k | result = ((ch & 0x1F) << 6) | (ch1 & 0x3F); | 93 | 1.89k | if (LLVM_UNLIKELY(result <= 0x7F)) { | 94 | 0 | error("Non-canonical UTF-8 encoding"); | 95 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 96 | 0 | } | 97 | | | 98 | 2.62k | } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) { | 99 | 2.62k | uint32_t ch1 = (uint32_t)from[1]; | 100 | 2.62k | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 101 | 0 | from += 1; | 102 | 0 | error("Invalid UTF-8 continuation byte"); | 103 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 104 | 0 | } | 105 | 2.62k | uint32_t ch2 = (uint32_t)from[2]; | 106 | 2.62k | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 107 | 0 | from += 2; | 108 | 0 | error("Invalid UTF-8 continuation byte"); | 109 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 110 | 0 | } | 111 | 2.62k | from += 3; | 112 | 2.62k | result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F); | 113 | 2.62k | if (LLVM_UNLIKELY(result <= 0x7FF)) { | 114 | 0 | error("Non-canonical UTF-8 encoding"); | 115 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 116 | 0 | } | 117 | 2.62k | if (LLVM_UNLIKELY( | 118 | 2.62k | result >= UNICODE_SURROGATE_FIRST && | 119 | 2.62k | result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) { | 120 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 121 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 122 | 0 | } | 123 | | | 124 | 2.62k | } else if ((ch & 0xF8) == 0xF0) { | 125 | 0 | uint32_t ch1 = (uint32_t)from[1]; | 126 | 0 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 127 | 0 | from += 1; | 128 | 0 | error("Invalid UTF-8 continuation byte"); | 129 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 130 | 0 | } | 131 | 0 | uint32_t ch2 = (uint32_t)from[2]; | 132 | 0 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 133 | 0 | from += 2; | 134 | 0 | error("Invalid UTF-8 continuation byte"); | 135 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 136 | 0 | } | 137 | 0 | uint32_t ch3 = (uint32_t)from[3]; | 138 | 0 | if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) { | 139 | 0 | from += 3; | 140 | 0 | error("Invalid UTF-8 continuation byte"); | 141 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 142 | 0 | } | 143 | 0 | from += 4; | 144 | 0 | result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | | 145 | 0 | (ch3 & 0x3F); | 146 | 0 | if (LLVM_UNLIKELY(result <= 0xFFFF)) { | 147 | 0 | error("Non-canonical UTF-8 encoding"); | 148 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 149 | 0 | } | 150 | 0 | if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) { | 151 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 152 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 153 | 0 | } | 154 | |
| 155 | 0 | } else { | 156 | 0 | from += 1; | 157 | 0 | error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch)); | 158 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 159 | 0 | } | 160 | | | 161 | 4.52k | return result; | 162 | 4.52k | } |
DebugInfo.cpp:unsigned int hermes::_decodeUTF8SlowPath<false, hermes::hbc::DebugInfoGenerator::appendString(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, hermes::Identifier)::$_0>(char const*&, hermes::hbc::DebugInfoGenerator::appendString(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, hermes::Identifier)::$_0) Line | Count | Source | 77 | 6 | uint32_t _decodeUTF8SlowPath(const char *&from, F error) { | 78 | 6 | uint32_t ch = (uint32_t)from[0]; | 79 | 6 | uint32_t result; | 80 | | | 81 | 6 | assert(isUTF8Start(ch)); | 82 | | | 83 | 6 | if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) { | 84 | 0 | uint32_t ch1 = (uint32_t)from[1]; | 85 | 0 | if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) { | 86 | 0 | from += 1; | 87 | 0 | error("Invalid UTF-8 continuation byte"); | 88 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 89 | 0 | } | 90 | | | 91 | 0 | from += 2; | 92 | 0 | result = ((ch & 0x1F) << 6) | (ch1 & 0x3F); | 93 | 0 | if (LLVM_UNLIKELY(result <= 0x7F)) { | 94 | 0 | error("Non-canonical UTF-8 encoding"); | 95 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 96 | 0 | } | 97 | |
| 98 | 6 | } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) { | 99 | 4 | uint32_t ch1 = (uint32_t)from[1]; | 100 | 4 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 101 | 0 | from += 1; | 102 | 0 | error("Invalid UTF-8 continuation byte"); | 103 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 104 | 0 | } | 105 | 4 | uint32_t ch2 = (uint32_t)from[2]; | 106 | 4 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 107 | 0 | from += 2; | 108 | 0 | error("Invalid UTF-8 continuation byte"); | 109 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 110 | 0 | } | 111 | 4 | from += 3; | 112 | 4 | result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F); | 113 | 4 | if (LLVM_UNLIKELY(result <= 0x7FF)) { | 114 | 0 | error("Non-canonical UTF-8 encoding"); | 115 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 116 | 0 | } | 117 | 4 | if (LLVM_UNLIKELY( | 118 | 4 | result >= UNICODE_SURROGATE_FIRST && | 119 | 4 | result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) { | 120 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 121 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 122 | 0 | } | 123 | | | 124 | 4 | } else if ((ch & 0xF8) == 0xF0) { | 125 | 2 | uint32_t ch1 = (uint32_t)from[1]; | 126 | 2 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 127 | 0 | from += 1; | 128 | 0 | error("Invalid UTF-8 continuation byte"); | 129 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 130 | 0 | } | 131 | 2 | uint32_t ch2 = (uint32_t)from[2]; | 132 | 2 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 133 | 0 | from += 2; | 134 | 0 | error("Invalid UTF-8 continuation byte"); | 135 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 136 | 0 | } | 137 | 2 | uint32_t ch3 = (uint32_t)from[3]; | 138 | 2 | if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) { | 139 | 0 | from += 3; | 140 | 0 | error("Invalid UTF-8 continuation byte"); | 141 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 142 | 0 | } | 143 | 2 | from += 4; | 144 | 2 | result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | | 145 | 2 | (ch3 & 0x3F); | 146 | 2 | if (LLVM_UNLIKELY(result <= 0xFFFF)) { | 147 | 0 | error("Non-canonical UTF-8 encoding"); | 148 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 149 | 0 | } | 150 | 2 | if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) { | 151 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 152 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 153 | 0 | } | 154 | | | 155 | 2 | } else { | 156 | 0 | from += 1; | 157 | 0 | error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch)); | 158 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 159 | 0 | } | 160 | | | 161 | 6 | return result; | 162 | 6 | } |
Unexecuted instantiation: unsigned int hermes::_decodeUTF8SlowPath<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1})Unexecuted instantiation: JSONEmitter.cpp:unsigned int hermes::_decodeUTF8SlowPath<true, hermes::JSONEmitter::primitiveEmitString(llvh::StringRef)::$_0>(char const*&, hermes::JSONEmitter::primitiveEmitString(llvh::StringRef)::$_0) Unexecuted instantiation: SourceErrorManager.cpp:unsigned int hermes::_decodeUTF8SlowPath<true, hermes::SourceErrorManager::buildSourceAndCaretLine(llvh::SMDiagnostic const&, hermes::SourceErrorOutputOptions)::$_0>(char const*&, hermes::SourceErrorManager::buildSourceAndCaretLine(llvh::SMDiagnostic const&, hermes::SourceErrorOutputOptions)::$_0) unsigned int hermes::_decodeUTF8SlowPath<true, hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1})Line | Count | Source | 77 | 4 | uint32_t _decodeUTF8SlowPath(const char *&from, F error) { | 78 | 4 | uint32_t ch = (uint32_t)from[0]; | 79 | 4 | uint32_t result; | 80 | | | 81 | 4 | assert(isUTF8Start(ch)); | 82 | | | 83 | 4 | if (LLVM_LIKELY((ch & 0xE0) == 0xC0)) { | 84 | 0 | uint32_t ch1 = (uint32_t)from[1]; | 85 | 0 | if (LLVM_UNLIKELY((ch1 & 0xC0) != 0x80)) { | 86 | 0 | from += 1; | 87 | 0 | error("Invalid UTF-8 continuation byte"); | 88 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 89 | 0 | } | 90 | | | 91 | 0 | from += 2; | 92 | 0 | result = ((ch & 0x1F) << 6) | (ch1 & 0x3F); | 93 | 0 | if (LLVM_UNLIKELY(result <= 0x7F)) { | 94 | 0 | error("Non-canonical UTF-8 encoding"); | 95 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 96 | 0 | } | 97 | |
| 98 | 4 | } else if (LLVM_LIKELY((ch & 0xF0) == 0xE0)) { | 99 | 4 | uint32_t ch1 = (uint32_t)from[1]; | 100 | 4 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 101 | 0 | from += 1; | 102 | 0 | error("Invalid UTF-8 continuation byte"); | 103 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 104 | 0 | } | 105 | 4 | uint32_t ch2 = (uint32_t)from[2]; | 106 | 4 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 107 | 0 | from += 2; | 108 | 0 | error("Invalid UTF-8 continuation byte"); | 109 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 110 | 0 | } | 111 | 4 | from += 3; | 112 | 4 | result = ((ch & 0x0F) << 12) | ((ch1 & 0x3F) << 6) | (ch2 & 0x3F); | 113 | 4 | if (LLVM_UNLIKELY(result <= 0x7FF)) { | 114 | 0 | error("Non-canonical UTF-8 encoding"); | 115 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 116 | 0 | } | 117 | 4 | if (LLVM_UNLIKELY( | 118 | 4 | result >= UNICODE_SURROGATE_FIRST && | 119 | 4 | result <= UNICODE_SURROGATE_LAST && !allowSurrogates)) { | 120 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 121 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 122 | 0 | } | 123 | | | 124 | 4 | } else if ((ch & 0xF8) == 0xF0) { | 125 | 0 | uint32_t ch1 = (uint32_t)from[1]; | 126 | 0 | if (LLVM_UNLIKELY((ch1 & 0x40) != 0 || (ch1 & 0x80) == 0)) { | 127 | 0 | from += 1; | 128 | 0 | error("Invalid UTF-8 continuation byte"); | 129 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 130 | 0 | } | 131 | 0 | uint32_t ch2 = (uint32_t)from[2]; | 132 | 0 | if (LLVM_UNLIKELY((ch2 & 0x40) != 0 || (ch2 & 0x80) == 0)) { | 133 | 0 | from += 2; | 134 | 0 | error("Invalid UTF-8 continuation byte"); | 135 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 136 | 0 | } | 137 | 0 | uint32_t ch3 = (uint32_t)from[3]; | 138 | 0 | if (LLVM_UNLIKELY((ch3 & 0x40) != 0 || (ch3 & 0x80) == 0)) { | 139 | 0 | from += 3; | 140 | 0 | error("Invalid UTF-8 continuation byte"); | 141 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 142 | 0 | } | 143 | 0 | from += 4; | 144 | 0 | result = ((ch & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | | 145 | 0 | (ch3 & 0x3F); | 146 | 0 | if (LLVM_UNLIKELY(result <= 0xFFFF)) { | 147 | 0 | error("Non-canonical UTF-8 encoding"); | 148 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 149 | 0 | } | 150 | 0 | if (LLVM_UNLIKELY(result > UNICODE_MAX_VALUE)) { | 151 | 0 | error("Invalid UTF-8 code point 0x" + llvh::Twine::utohexstr(result)); | 152 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 153 | 0 | } | 154 | |
| 155 | 0 | } else { | 156 | 0 | from += 1; | 157 | 0 | error("Invalid UTF-8 lead byte 0x" + llvh::Twine::utohexstr((uint8_t)ch)); | 158 | 0 | return UNICODE_REPLACEMENT_CHARACTER; | 159 | 0 | } | 160 | | | 161 | 4 | return result; | 162 | 4 | } |
|
163 | | |
164 | | /// Scans back from \p ptr until the start of the previous UTF-8 codepoint. |
165 | | /// Logically, this is equivalent to `--ptr` in the codepoint space. |
166 | | /// It could be a regular ASCII character, or a multi-byte encoded character. |
167 | | /// This function assumes that the input is valid! |
168 | 0 | inline const char *previousUTF8Start(const char *ptr) { |
169 | 0 | --ptr; |
170 | 0 | // If the previous codepoint is ASCII, we are done. |
171 | 0 | if (!(*ptr & 0x80)) |
172 | 0 | return ptr; |
173 | 0 | // Scan backwards until we find a leading byte (11xxxxxx) |
174 | 0 | while ((*ptr & 0xC0) != 0xC0) |
175 | 0 | --ptr; |
176 | 0 | return ptr; |
177 | 0 | } |
178 | | |
179 | | /// Decode a sequence of UTF8 encoded bytes into a Unicode codepoint. |
180 | | /// In case of decoding errors, the provided callback is invoked with an |
181 | | /// apropriate messsage and UNICODE_REPLACEMENT_CHARACTER is returned. |
182 | | /// |
183 | | /// \tparam allowSurrogates when false, values in the surrogate range are |
184 | | /// reported as errors |
185 | | /// \param error callback invoked with an error message |
186 | | /// \return the codepoint |
187 | | template <bool allowSurrogates, typename F> |
188 | 5.12M | inline uint32_t decodeUTF8(const char *&from, F error) { |
189 | 5.12M | if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII? |
190 | 5.11M | return *from++; |
191 | | |
192 | 12.1k | return _decodeUTF8SlowPath<allowSurrogates>(from, error); |
193 | 5.12M | } unsigned int hermes::decodeUTF8<false, hermes::parser::JSLexer::decodeUTF8()::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::parser::JSLexer::decodeUTF8()::{lambda(llvh::Twine const&)#1})Line | Count | Source | 188 | 4.05k | inline uint32_t decodeUTF8(const char *&from, F error) { | 189 | 4.05k | if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII? | 190 | 4 | return *from++; | 191 | | | 192 | 4.05k | return _decodeUTF8SlowPath<allowSurrogates>(from, error); | 193 | 4.05k | } |
unsigned int hermes::decodeUTF8<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1})Line | Count | Source | 188 | 1.99M | inline uint32_t decodeUTF8(const char *&from, F error) { | 189 | 1.99M | if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII? | 190 | 1.99M | return *from++; | 191 | | | 192 | 3.53k | return _decodeUTF8SlowPath<allowSurrogates>(from, error); | 193 | 1.99M | } |
BytecodeGenerator.cpp:unsigned int hermes::decodeUTF8<false, hermes::hbc::(anonymous namespace)::ensureUTF8Identifer(hermes::StringTable&, hermes::Identifier, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)::$_0>(char const*&, hermes::hbc::(anonymous namespace)::ensureUTF8Identifer(hermes::StringTable&, hermes::Identifier, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&)::$_0) Line | Count | Source | 188 | 2.40k | inline uint32_t decodeUTF8(const char *&from, F error) { | 189 | 2.40k | if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII? | 190 | 2.40k | return *from++; | 191 | | | 192 | 2 | return _decodeUTF8SlowPath<allowSurrogates>(from, error); | 193 | 2.40k | } |
unsigned int hermes::decodeUTF8<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1})Line | Count | Source | 188 | 3.11M | inline uint32_t decodeUTF8(const char *&from, F error) { | 189 | 3.11M | if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII? | 190 | 3.11M | return *from++; | 191 | | | 192 | 4.52k | return _decodeUTF8SlowPath<allowSurrogates>(from, error); | 193 | 3.11M | } |
DebugInfo.cpp:unsigned int hermes::decodeUTF8<false, hermes::hbc::DebugInfoGenerator::appendString(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, hermes::Identifier)::$_0>(char const*&, hermes::hbc::DebugInfoGenerator::appendString(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, hermes::Identifier)::$_0) Line | Count | Source | 188 | 1.27k | inline uint32_t decodeUTF8(const char *&from, F error) { | 189 | 1.27k | if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII? | 190 | 1.27k | return *from++; | 191 | | | 192 | 6 | return _decodeUTF8SlowPath<allowSurrogates>(from, error); | 193 | 1.27k | } |
unsigned int hermes::decodeUTF8<true, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1})Line | Count | Source | 188 | 143 | inline uint32_t decodeUTF8(const char *&from, F error) { | 189 | 143 | if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII? | 190 | 143 | return *from++; | 191 | | | 192 | 0 | return _decodeUTF8SlowPath<allowSurrogates>(from, error); | 193 | 143 | } |
Unexecuted instantiation: JSONEmitter.cpp:unsigned int hermes::decodeUTF8<true, hermes::JSONEmitter::primitiveEmitString(llvh::StringRef)::$_0>(char const*&, hermes::JSONEmitter::primitiveEmitString(llvh::StringRef)::$_0) Unexecuted instantiation: SourceErrorManager.cpp:unsigned int hermes::decodeUTF8<true, hermes::SourceErrorManager::buildSourceAndCaretLine(llvh::SMDiagnostic const&, hermes::SourceErrorOutputOptions)::$_0>(char const*&, hermes::SourceErrorManager::buildSourceAndCaretLine(llvh::SMDiagnostic const&, hermes::SourceErrorOutputOptions)::$_0) unsigned int hermes::decodeUTF8<true, hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1}>(char const*&, hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1})Line | Count | Source | 188 | 4 | inline uint32_t decodeUTF8(const char *&from, F error) { | 189 | 4 | if (LLVM_LIKELY((*from & 0x80) == 0)) // Ordinary ASCII? | 190 | 0 | return *from++; | 191 | | | 192 | 4 | return _decodeUTF8SlowPath<allowSurrogates>(from, error); | 193 | 4 | } |
|
194 | | |
195 | | /// Encode a 32-bit value, into UTF16. If the value is a part of a surrogate |
196 | | /// pair, it is encoded without any conversion. |
197 | | template <typename OutIt> |
198 | 5.11M | inline void encodeUTF16(OutIt &dest, uint32_t cp) { |
199 | 5.11M | if (LLVM_LIKELY(cp < 0x10000)) { |
200 | 5.11M | *dest = (uint16_t)cp; |
201 | 5.11M | ++dest; // Use pre-increment in case this is an iterator. |
202 | 5.11M | } else { |
203 | 0 | assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value"); |
204 | 0 | cp -= 0x10000; |
205 | 0 | *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF); |
206 | 0 | ++dest; |
207 | 0 | *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF); |
208 | 0 | ++dest; |
209 | 0 | } |
210 | 5.11M | } void hermes::encodeUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >&, unsigned int) Line | Count | Source | 198 | 1.99M | inline void encodeUTF16(OutIt &dest, uint32_t cp) { | 199 | 1.99M | if (LLVM_LIKELY(cp < 0x10000)) { | 200 | 1.99M | *dest = (uint16_t)cp; | 201 | 1.99M | ++dest; // Use pre-increment in case this is an iterator. | 202 | 1.99M | } else { | 203 | 0 | assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value"); | 204 | 0 | cp -= 0x10000; | 205 | 0 | *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF); | 206 | 0 | ++dest; | 207 | 0 | *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF); | 208 | 0 | ++dest; | 209 | 0 | } | 210 | 1.99M | } |
void hermes::encodeUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >&, unsigned int) Line | Count | Source | 198 | 3.11M | inline void encodeUTF16(OutIt &dest, uint32_t cp) { | 199 | 3.11M | if (LLVM_LIKELY(cp < 0x10000)) { | 200 | 3.11M | *dest = (uint16_t)cp; | 201 | 3.11M | ++dest; // Use pre-increment in case this is an iterator. | 202 | 3.11M | } else { | 203 | 0 | assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value"); | 204 | 0 | cp -= 0x10000; | 205 | 0 | *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF); | 206 | 0 | ++dest; | 207 | 0 | *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF); | 208 | 0 | ++dest; | 209 | 0 | } | 210 | 3.11M | } |
void hermes::encodeUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >&, unsigned int) Line | Count | Source | 198 | 143 | inline void encodeUTF16(OutIt &dest, uint32_t cp) { | 199 | 143 | if (LLVM_LIKELY(cp < 0x10000)) { | 200 | 143 | *dest = (uint16_t)cp; | 201 | 143 | ++dest; // Use pre-increment in case this is an iterator. | 202 | 143 | } else { | 203 | 0 | assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value"); | 204 | 0 | cp -= 0x10000; | 205 | 0 | *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF); | 206 | 0 | ++dest; | 207 | 0 | *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF); | 208 | 0 | ++dest; | 209 | 0 | } | 210 | 143 | } |
Unexecuted instantiation: void hermes::encodeUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 2u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 2u> >&, unsigned int) void hermes::encodeUTF16<char16_t*>(char16_t*&, unsigned int) Line | Count | Source | 198 | 4 | inline void encodeUTF16(OutIt &dest, uint32_t cp) { | 199 | 4 | if (LLVM_LIKELY(cp < 0x10000)) { | 200 | 4 | *dest = (uint16_t)cp; | 201 | 4 | ++dest; // Use pre-increment in case this is an iterator. | 202 | 4 | } else { | 203 | 0 | assert(cp <= UNICODE_MAX_VALUE && "invalid Unicode value"); | 204 | 0 | cp -= 0x10000; | 205 | 0 | *dest = UTF16_HIGH_SURROGATE + ((cp >> 10) & 0x3FF); | 206 | 0 | ++dest; | 207 | 0 | *dest = UTF16_LOW_SURROGATE + (cp & 0x3FF); | 208 | 0 | ++dest; | 209 | 0 | } | 210 | 4 | } |
|
211 | | |
212 | | /// Decode a UTF-8 sequence, which is assumed to be valid, but may possibly |
213 | | /// contain explicitly encoded surrogate pairs, into a UTF-16 sequence. |
214 | | /// \return the updated destination iterator |
215 | | template <typename OutIt> |
216 | | inline OutIt convertUTF8WithSurrogatesToUTF16( |
217 | | OutIt dest, |
218 | | const char *begin8, |
219 | 4.25k | const char *end8) { |
220 | 5.12M | while (begin8 < end8) |
221 | 5.11M | encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) { |
222 | 0 | llvm_unreachable("invalid UTF-8"); |
223 | 0 | })); Unexecuted instantiation: hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}::operator()(llvh::Twine const&) constUnexecuted instantiation: hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*)::{lambda(llvh::Twine const&)#1}::operator()(llvh::Twine const&) constUnexecuted instantiation: hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*)::{lambda(llvh::Twine const&)#1}::operator()(llvh::Twine const&) constUnexecuted instantiation: hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*)::{lambda(llvh::Twine const&)#1}::operator()(llvh::Twine const&) const |
224 | 4.25k | return dest; |
225 | 4.25k | } std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 16u> >, char const*, char const*) Line | Count | Source | 219 | 1.99k | const char *end8) { | 220 | 2.00M | while (begin8 < end8) | 221 | 1.99M | encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) { | 222 | 1.99M | llvm_unreachable("invalid UTF-8"); | 223 | 1.99M | })); | 224 | 1.99k | return dest; | 225 | 1.99k | } |
std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > > >(std::__1::back_insert_iterator<std::__1::vector<char16_t, std::__1::allocator<char16_t> > >, char const*, char const*) Line | Count | Source | 219 | 271 | const char *end8) { | 220 | 3.11M | while (begin8 < end8) | 221 | 3.11M | encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) { | 222 | 3.11M | llvm_unreachable("invalid UTF-8"); | 223 | 3.11M | })); | 224 | 271 | return dest; | 225 | 271 | } |
std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > hermes::convertUTF8WithSurrogatesToUTF16<std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> > >(std::__1::back_insert_iterator<llvh::SmallVector<char16_t, 6u> >, char const*, char const*) Line | Count | Source | 219 | 1.99k | const char *end8) { | 220 | 2.13k | while (begin8 < end8) | 221 | 143 | encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) { | 222 | 143 | llvm_unreachable("invalid UTF-8"); | 223 | 143 | })); | 224 | 1.99k | return dest; | 225 | 1.99k | } |
char16_t* hermes::convertUTF8WithSurrogatesToUTF16<char16_t*>(char16_t*, char const*, char const*) Line | Count | Source | 219 | 2 | const char *end8) { | 220 | 6 | while (begin8 < end8) | 221 | 4 | encodeUTF16(dest, decodeUTF8<true>(begin8, [](const llvh::Twine &) { | 222 | 4 | llvm_unreachable("invalid UTF-8"); | 223 | 4 | })); | 224 | 2 | return dest; | 225 | 2 | } |
|
226 | | |
227 | | /// Convert a UTF-16 encoded string \p input to UTF-8 stored in \p dest, |
228 | | /// encoding each surrogate halves individually into UTF-8. |
229 | | /// This is the inverse function of convertUTF8WithSurrogatesToUTF16. |
230 | | /// Note the result is not valid utf-8 if it contains surrogate values. |
231 | | /// Only use it to get the internal representation of utf-8 strings in hermes |
232 | | /// compiler. |
233 | | void convertUTF16ToUTF8WithSingleSurrogates( |
234 | | std::string &dest, |
235 | | llvh::ArrayRef<char16_t> input); |
236 | | |
237 | | /// Convert a UTF-16 encoded string \p input to UTF-8 stored in \p dest, |
238 | | /// replacing unpaired surrogates halves with the Unicode replacement character. |
239 | | /// \param maxCharacters If non-zero, the maximum number of characters to |
240 | | /// convert. |
241 | | /// \return false if the string was truncated, true if the whole string was |
242 | | /// written out successfully. |
243 | | bool convertUTF16ToUTF8WithReplacements( |
244 | | std::string &dest, |
245 | | llvh::ArrayRef<char16_t> input, |
246 | | size_t maxCharacters = 0); |
247 | | |
248 | | /// Convert a UTF-16 encoded string \p input to a pre-allocated UTF-8 buffer |
249 | | /// \p outBuffer of length \p outBufferLength, replacing unpaired surrogates |
250 | | /// halves with the Unicode replacement character. |
251 | | /// \return a std::pair with the first element being the number of UTF-16 |
252 | | /// characters converted, and the second element being the number of UTF-8 |
253 | | /// characters written |
254 | | std::pair<uint32_t, uint32_t> convertUTF16ToUTF8BufferWithReplacements( |
255 | | llvh::MutableArrayRef<uint8_t> outBuffer, |
256 | | llvh::ArrayRef<char16_t> input); |
257 | | |
258 | | /// Convert a UTF-8 encoded string (with surrogates) \p input to a UTF-8 one |
259 | | /// (without surrogates), storing the conversion in \p output. Output characters |
260 | | /// are appended to \p output. |
261 | | void convertUTF8WithSurrogatesToUTF8WithReplacements( |
262 | | std::string &output, |
263 | | llvh::StringRef input); |
264 | | |
265 | | } // namespace hermes |
266 | | |
267 | | #endif // HERMES_SUPPORT_UTF8_H |