/src/hermes/lib/VM/JSLib/TextEncoder.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #include "JSLibInternal.h" |
9 | | |
10 | | #include "llvh/Support/ConvertUTF.h" |
11 | | |
12 | | #include "hermes/VM/JSTypedArray.h" |
13 | | |
14 | | namespace hermes { |
15 | | namespace vm { |
16 | | |
17 | 53 | Handle<JSObject> createTextEncoderConstructor(Runtime &runtime) { |
18 | 53 | auto textEncoderPrototype = |
19 | 53 | Handle<JSObject>::vmcast(&runtime.textEncoderPrototype); |
20 | | |
21 | | // Per https://webidl.spec.whatwg.org/#javascript-binding, @@toStringTag |
22 | | // should be writable=false, enumerable=false, and configurable=true. |
23 | 53 | DefinePropertyFlags dpf = DefinePropertyFlags::getNewNonEnumerableFlags(); |
24 | 53 | dpf.writable = 0; |
25 | 53 | defineProperty( |
26 | 53 | runtime, |
27 | 53 | textEncoderPrototype, |
28 | 53 | Predefined::getSymbolID(Predefined::SymbolToStringTag), |
29 | 53 | runtime.getPredefinedStringHandle(Predefined::TextEncoder), |
30 | 53 | dpf); |
31 | | |
32 | | // Based on |
33 | | // Object.getOwnPropertyDescriptor(TextEncoder.prototype, 'encoding'), both |
34 | | // Chrome and Safari have the 'encoding' property as enumerable and |
35 | | // configurable. We set things up to be the same. |
36 | 53 | defineAccessor( |
37 | 53 | runtime, |
38 | 53 | textEncoderPrototype, |
39 | 53 | Predefined::getSymbolID(Predefined::encoding), |
40 | 53 | nullptr, |
41 | 53 | textEncoderPrototypeEncoding, |
42 | 53 | nullptr, |
43 | 53 | /* enumerable */ true, |
44 | 53 | /* configurable */ true); |
45 | | |
46 | 53 | defineMethod( |
47 | 53 | runtime, |
48 | 53 | textEncoderPrototype, |
49 | 53 | Predefined::getSymbolID(Predefined::encode), |
50 | 53 | nullptr, |
51 | 53 | textEncoderPrototypeEncode, |
52 | 53 | 1); |
53 | | |
54 | 53 | defineMethod( |
55 | 53 | runtime, |
56 | 53 | textEncoderPrototype, |
57 | 53 | Predefined::getSymbolID(Predefined::encodeInto), |
58 | 53 | nullptr, |
59 | 53 | textEncoderPrototypeEncodeInto, |
60 | 53 | 2); |
61 | | |
62 | 53 | auto cons = defineSystemConstructor<JSObject>( |
63 | 53 | runtime, |
64 | 53 | Predefined::getSymbolID(Predefined::TextEncoder), |
65 | 53 | textEncoderConstructor, |
66 | 53 | textEncoderPrototype, |
67 | 53 | 0, |
68 | 53 | CellKind::JSObjectKind); |
69 | | |
70 | 53 | defineProperty( |
71 | 53 | runtime, |
72 | 53 | textEncoderPrototype, |
73 | 53 | Predefined::getSymbolID(Predefined::constructor), |
74 | 53 | cons); |
75 | | |
76 | 53 | return cons; |
77 | 53 | } |
78 | | |
79 | | CallResult<HermesValue> |
80 | 0 | textEncoderConstructor(void *, Runtime &runtime, NativeArgs args) { |
81 | 0 | GCScope gcScope{runtime}; |
82 | |
|
83 | 0 | if (LLVM_UNLIKELY(!args.isConstructorCall())) { |
84 | 0 | return runtime.raiseTypeError( |
85 | 0 | "TextEncoder must be called as a constructor"); |
86 | 0 | } |
87 | | |
88 | 0 | auto selfHandle = args.vmcastThis<JSObject>(); |
89 | |
|
90 | 0 | auto valueHandle = Runtime::getUndefinedValue(); |
91 | 0 | if (LLVM_UNLIKELY( |
92 | 0 | JSObject::defineNewOwnProperty( |
93 | 0 | selfHandle, |
94 | 0 | runtime, |
95 | 0 | Predefined::getSymbolID( |
96 | 0 | Predefined::InternalPropertyTextEncoderType), |
97 | 0 | PropertyFlags::defaultNewNamedPropertyFlags(), |
98 | 0 | valueHandle) == ExecutionStatus::EXCEPTION)) { |
99 | 0 | return ExecutionStatus::EXCEPTION; |
100 | 0 | } |
101 | | |
102 | 0 | return selfHandle.getHermesValue(); |
103 | 0 | } |
104 | | |
105 | | CallResult<HermesValue> |
106 | 0 | textEncoderPrototypeEncoding(void *, Runtime &runtime, NativeArgs args) { |
107 | 0 | GCScope gcScope{runtime}; |
108 | |
|
109 | 0 | auto selfHandle = args.dyncastThis<JSObject>(); |
110 | 0 | if (!selfHandle) { |
111 | 0 | return runtime.raiseTypeError( |
112 | 0 | "TextEncoder.prototype.encoding called on non-TextEncoder object"); |
113 | 0 | } |
114 | | |
115 | 0 | NamedPropertyDescriptor desc; |
116 | 0 | bool exists = JSObject::getOwnNamedDescriptor( |
117 | 0 | selfHandle, |
118 | 0 | runtime, |
119 | 0 | Predefined::getSymbolID(Predefined::InternalPropertyTextEncoderType), |
120 | 0 | desc); |
121 | 0 | if (LLVM_UNLIKELY(!exists)) { |
122 | 0 | return runtime.raiseTypeError( |
123 | 0 | "TextEncoder.prototype.encoding called on non-TextEncoder object"); |
124 | 0 | } |
125 | | |
126 | 0 | return HermesValue::encodeStringValue( |
127 | 0 | runtime.getPredefinedString(Predefined::utf8)); |
128 | 0 | } |
129 | | |
130 | | CallResult<HermesValue> |
131 | 0 | textEncoderPrototypeEncode(void *, Runtime &runtime, NativeArgs args) { |
132 | 0 | GCScope gcScope{runtime}; |
133 | 0 | auto selfHandle = args.dyncastThis<JSObject>(); |
134 | 0 | if (LLVM_UNLIKELY(!selfHandle)) { |
135 | 0 | return runtime.raiseTypeError( |
136 | 0 | "TextEncoder.prototype.encode() called on non-TextEncoder object"); |
137 | 0 | } |
138 | 0 | NamedPropertyDescriptor desc; |
139 | 0 | bool exists = JSObject::getOwnNamedDescriptor( |
140 | 0 | selfHandle, |
141 | 0 | runtime, |
142 | 0 | Predefined::getSymbolID(Predefined::InternalPropertyTextEncoderType), |
143 | 0 | desc); |
144 | 0 | if (LLVM_UNLIKELY(!exists)) { |
145 | 0 | return runtime.raiseTypeError( |
146 | 0 | "TextEncoder.prototype.encode() called on non-TextEncoder object"); |
147 | 0 | } |
148 | | |
149 | 0 | auto strRes = toString_RJS(runtime, args.getArgHandle(0)); |
150 | 0 | if (LLVM_UNLIKELY(strRes == ExecutionStatus::EXCEPTION)) { |
151 | 0 | return ExecutionStatus::EXCEPTION; |
152 | 0 | } |
153 | 0 | Handle<StringPrimitive> string = runtime.makeHandle(std::move(*strRes)); |
154 | | |
155 | | // If input string is empty, then the function can return early. This also |
156 | | // avoids having to check later before calling std::memcpy to avoid undefined |
157 | | // behavior. |
158 | 0 | if (LLVM_UNLIKELY(string->getStringLength() == 0)) { |
159 | 0 | auto result = Uint8Array::allocate(runtime); |
160 | 0 | if (LLVM_UNLIKELY(result == ExecutionStatus::EXCEPTION)) { |
161 | 0 | return ExecutionStatus::EXCEPTION; |
162 | 0 | } |
163 | 0 | return result->getHermesValue(); |
164 | 0 | } |
165 | | |
166 | 0 | if (string->isASCII()) { |
167 | | // ASCII string can trivially be converted to UTF-8 because ASCII is a |
168 | | // strict subset. |
169 | 0 | auto result = Uint8Array::allocate(runtime, string->getStringLength()); |
170 | 0 | if (LLVM_UNLIKELY(result == ExecutionStatus::EXCEPTION)) { |
171 | 0 | return ExecutionStatus::EXCEPTION; |
172 | 0 | } |
173 | | |
174 | 0 | Handle<JSTypedArrayBase> typedArray = result.getValue(); |
175 | 0 | llvh::ArrayRef<char> strRef = string->getStringRef<char>(); |
176 | |
|
177 | 0 | std::memcpy( |
178 | 0 | typedArray->begin(runtime), strRef.data(), string->getStringLength()); |
179 | 0 | return typedArray.getHermesValue(); |
180 | 0 | } else { |
181 | | // Convert UTF-16 to UTF-8 |
182 | 0 | llvh::ArrayRef<char16_t> strRef = string->getStringRef<char16_t>(); |
183 | 0 | std::string converted; |
184 | 0 | bool success = convertUTF16ToUTF8WithReplacements(converted, strRef); |
185 | 0 | if (LLVM_UNLIKELY(!success)) { |
186 | 0 | return runtime.raiseError("Failed to convert from UTF-16 to UTF-8"); |
187 | 0 | } |
188 | | |
189 | 0 | auto result = Uint8Array::allocate(runtime, converted.length()); |
190 | 0 | if (LLVM_UNLIKELY(result == ExecutionStatus::EXCEPTION)) { |
191 | 0 | return ExecutionStatus::EXCEPTION; |
192 | 0 | } |
193 | | |
194 | 0 | Handle<JSTypedArrayBase> typedArray = result.getValue(); |
195 | 0 | std::memcpy( |
196 | 0 | typedArray->begin(runtime), converted.data(), converted.length()); |
197 | 0 | return typedArray.getHermesValue(); |
198 | 0 | } |
199 | 0 | } |
200 | | |
201 | | CallResult<HermesValue> |
202 | 0 | textEncoderPrototypeEncodeInto(void *, Runtime &runtime, NativeArgs args) { |
203 | 0 | GCScope gcScope{runtime}; |
204 | 0 | auto selfHandle = args.dyncastThis<JSObject>(); |
205 | 0 | if (LLVM_UNLIKELY(!selfHandle)) { |
206 | 0 | return runtime.raiseTypeError( |
207 | 0 | "TextEncoder.prototype.encodeInto() called on non-TextEncoder object"); |
208 | 0 | } |
209 | 0 | NamedPropertyDescriptor desc; |
210 | 0 | bool exists = JSObject::getOwnNamedDescriptor( |
211 | 0 | selfHandle, |
212 | 0 | runtime, |
213 | 0 | Predefined::getSymbolID(Predefined::InternalPropertyTextEncoderType), |
214 | 0 | desc); |
215 | 0 | if (LLVM_UNLIKELY(!exists)) { |
216 | 0 | return runtime.raiseTypeError( |
217 | 0 | "TextEncoder.prototype.encodeInto() called on non-TextEncoder object"); |
218 | 0 | } |
219 | | |
220 | 0 | auto strRes = toString_RJS(runtime, args.getArgHandle(0)); |
221 | 0 | if (LLVM_UNLIKELY(strRes == ExecutionStatus::EXCEPTION)) { |
222 | 0 | return ExecutionStatus::EXCEPTION; |
223 | 0 | } |
224 | 0 | Handle<StringPrimitive> string = runtime.makeHandle(std::move(*strRes)); |
225 | |
|
226 | 0 | Handle<Uint8Array> typedArray = args.dyncastArg<Uint8Array>(1); |
227 | 0 | if (LLVM_UNLIKELY(!typedArray)) { |
228 | 0 | return runtime.raiseTypeError("The second argument should be a Uint8Array"); |
229 | 0 | } |
230 | | |
231 | 0 | if (LLVM_UNLIKELY(!typedArray->attached(runtime))) { |
232 | 0 | return runtime.raiseTypeError( |
233 | 0 | "TextEncoder.prototype.encodeInto() called on a detached Uint8Array"); |
234 | 0 | } |
235 | | |
236 | 0 | PseudoHandle<JSObject> objRes = JSObject::create(runtime, 2); |
237 | 0 | Handle<JSObject> obj = runtime.makeHandle(objRes.get()); |
238 | |
|
239 | 0 | uint32_t numRead = 0; |
240 | 0 | uint32_t numWritten = 0; |
241 | |
|
242 | 0 | if (LLVM_UNLIKELY(string->getStringLength() == 0)) { |
243 | 0 | numRead = 0; |
244 | 0 | numWritten = 0; |
245 | 0 | } else if (string->isASCII()) { |
246 | | // ASCII string can trivially be converted to UTF-8 because ASCII is a |
247 | | // strict subset. However, since the output array size is provided by the |
248 | | // caller, we will only copy as much length as provided. |
249 | 0 | llvh::ArrayRef<char> strRef = string->getStringRef<char>(); |
250 | |
|
251 | 0 | uint32_t copiedLength = |
252 | 0 | std::min(string->getStringLength(), typedArray->getLength()); |
253 | |
|
254 | 0 | std::memcpy(typedArray->begin(runtime), strRef.data(), copiedLength); |
255 | |
|
256 | 0 | numRead = copiedLength; |
257 | 0 | numWritten = copiedLength; |
258 | 0 | } else { |
259 | | // Convert UTF-16 to the given Uint8Array |
260 | 0 | llvh::ArrayRef<char16_t> strRef = string->getStringRef<char16_t>(); |
261 | 0 | std::pair<uint32_t, uint32_t> result = |
262 | 0 | convertUTF16ToUTF8BufferWithReplacements( |
263 | 0 | llvh::makeMutableArrayRef<uint8_t>( |
264 | 0 | typedArray->begin(runtime), typedArray->getLength()), |
265 | 0 | strRef); |
266 | 0 | numRead = result.first; |
267 | 0 | numWritten = result.second; |
268 | 0 | } |
269 | | |
270 | | // Construct the result JSObject containing information about how much data |
271 | | // was converted |
272 | 0 | auto numReadHandle = |
273 | 0 | runtime.makeHandle(HermesValue::encodeTrustedNumberValue(numRead)); |
274 | 0 | auto numWrittenHandle = |
275 | 0 | runtime.makeHandle(HermesValue::encodeTrustedNumberValue(numWritten)); |
276 | |
|
277 | 0 | auto res = JSObject::defineNewOwnProperty( |
278 | 0 | obj, |
279 | 0 | runtime, |
280 | 0 | Predefined::getSymbolID(Predefined::read), |
281 | 0 | PropertyFlags::defaultNewNamedPropertyFlags(), |
282 | 0 | numReadHandle); |
283 | 0 | if (LLVM_UNLIKELY(res == ExecutionStatus::EXCEPTION)) { |
284 | 0 | return ExecutionStatus::EXCEPTION; |
285 | 0 | } |
286 | | |
287 | 0 | res = JSObject::defineNewOwnProperty( |
288 | 0 | obj, |
289 | 0 | runtime, |
290 | 0 | Predefined::getSymbolID(Predefined::written), |
291 | 0 | PropertyFlags::defaultNewNamedPropertyFlags(), |
292 | 0 | numWrittenHandle); |
293 | 0 | if (LLVM_UNLIKELY(res == ExecutionStatus::EXCEPTION)) { |
294 | 0 | return ExecutionStatus::EXCEPTION; |
295 | 0 | } |
296 | | |
297 | 0 | return obj.getHermesValue(); |
298 | 0 | } |
299 | | |
300 | | } // namespace vm |
301 | | } // namespace hermes |