Coverage Report

Created: 2025-12-12 07:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/hermes/lib/VM/JSLib/escape.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#include "JSLibInternal.h"
9
10
#include "hermes/Support/UTF8.h"
11
#include "hermes/VM/Operations.h"
12
#include "hermes/VM/SmallXString.h"
13
#include "hermes/VM/StringView.h"
14
15
#include "llvh/Support/ConvertUTF.h"
16
#pragma GCC diagnostic push
17
18
#ifdef HERMES_COMPILER_SUPPORTS_WSHORTEN_64_TO_32
19
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
20
#endif
21
namespace hermes {
22
namespace vm {
23
24
using llvh::ConversionResult;
25
using llvh::UTF32;
26
using llvh::UTF8;
27
28
/// \return true if c is a character that doesn't need to be escaped.
29
0
static inline bool noEscape(char16_t c) {
30
0
  return (u'A' <= c && c <= u'Z') || (u'a' <= c && c <= u'z') ||
31
0
      (u'0' <= c && c <= u'9') || c == u'@' || c == u'*' || c == u'_' ||
32
0
      c == u'+' || c == u'-' || c == u'.' || c == '/';
33
0
}
34
35
/// \param x must be between 0 and 15 inclusive.
36
/// \return the result of converting x to a hex character.
37
1.49M
static inline char16_t toHexChar(int x) {
38
1.49M
  assert(0 <= x && x <= 15 && "toHexChar argument out of bounds");
39
1.49M
  if (0 <= x && x <= 9) {
40
1.37M
    return x + u'0';
41
1.37M
  }
42
124k
  return x - 10 + u'A';
43
1.49M
}
44
45
/// \return true if c is a valid hex char in the range [0-9a-fA-F].
46
0
static inline bool isHexChar(char16_t c) {
47
  // Convert to lowercase.
48
0
  char16_t cLow = c | 32;
49
0
  return (u'0' <= c && c <= u'9') || (u'a' <= cLow && cLow <= 'f');
50
0
}
51
52
/// \param c must be a hex char.
53
/// \return the result of converting c into a number (result is 8 bits).
54
0
static inline int fromHexChar(char16_t c) {
55
0
  assert(isHexChar(c) && "fromHexChar argument out of bounds");
56
0
  if (u'0' <= c && c <= u'9') {
57
0
    return c - u'0';
58
0
  }
59
  // Convert to lowercase.
60
0
  c |= 32;
61
0
  return c - u'a' + 10;
62
0
}
63
64
/// Convert the argument to string and escape unicode characters.
65
0
CallResult<HermesValue> escape(void *, Runtime &runtime, NativeArgs args) {
66
0
  auto res = toString_RJS(runtime, args.getArgHandle(0));
67
0
  if (LLVM_UNLIKELY(res == ExecutionStatus::EXCEPTION)) {
68
0
    return ExecutionStatus::EXCEPTION;
69
0
  }
70
0
  auto string = runtime.makeHandle(std::move(*res));
71
0
  auto len = string->getStringLength();
72
0
  SmallU16String<32> R{};
73
0
  R.reserve(len);
74
75
0
  for (char16_t c : StringPrimitive::createStringView(runtime, string)) {
76
0
    if (noEscape(c)) {
77
      // Just append.
78
0
      R.push_back(c);
79
0
    } else if (c < 256) {
80
      // R += "%xy" where xy is the 2 bytes of c.
81
0
      R.push_back(u'%');
82
0
      R.push_back(toHexChar((c >> 4) & 0xf));
83
0
      R.push_back(toHexChar(c & 0xf));
84
0
    } else {
85
      // R += "%uwxyz" where wxyz is the 4 bytes of c.
86
0
      R.append(u"%u");
87
0
      R.push_back(toHexChar((c >> 12) & 0xf));
88
0
      R.push_back(toHexChar((c >> 8) & 0xf));
89
0
      R.push_back(toHexChar((c >> 4) & 0xf));
90
0
      R.push_back(toHexChar(c & 0xf));
91
0
    }
92
0
  }
93
94
0
  return StringPrimitive::create(runtime, R);
95
0
}
96
97
/// Convert the argument to string and unescape unicode characters.
98
0
CallResult<HermesValue> unescape(void *, Runtime &runtime, NativeArgs args) {
99
0
  auto res = toString_RJS(runtime, args.getArgHandle(0));
100
0
  if (LLVM_UNLIKELY(res == ExecutionStatus::EXCEPTION)) {
101
0
    return ExecutionStatus::EXCEPTION;
102
0
  }
103
0
  auto strPrim = runtime.makeHandle(std::move(*res));
104
0
  auto len = strPrim->getStringLength();
105
0
  SmallU16String<32> R{};
106
0
  R.reserve(len);
107
108
0
  uint32_t k = 0;
109
0
  auto str = StringPrimitive::createStringView(runtime, strPrim);
110
0
  while (k < len) {
111
0
    char16_t c = str[k];
112
    // Resultant char to append to R.
113
0
    char16_t r = c;
114
0
    if (c == u'%') {
115
      // Try to read a hex string instead.
116
0
      if (k + 6 <= len && str[k + 1] == u'u' &&
117
0
          std::all_of(str.begin() + k + 2, str.begin() + k + 6, isHexChar)) {
118
        // Long form %uwxyz
119
0
        r = (fromHexChar(str[k + 2]) << 12) | (fromHexChar(str[k + 3]) << 8) |
120
0
            (fromHexChar(str[k + 4]) << 4) | fromHexChar(str[k + 5]);
121
0
        k += 5;
122
0
      } else if (
123
0
          k + 3 <= len && isHexChar(str[k + 1]) && isHexChar(str[k + 2])) {
124
        // Short form %xy
125
0
        r = (fromHexChar(str[k + 1]) << 4) | fromHexChar(str[k + 2]);
126
0
        k += 2;
127
0
      }
128
0
    }
129
0
    R.push_back(r);
130
0
    ++k;
131
0
  }
132
133
0
  return StringPrimitive::create(runtime, R);
134
0
}
135
136
/// Removes one character from the end of \p str.
137
/// Used to remove the null terminator when UTF16Ref is constructed from
138
/// literals.
139
2.99M
static inline UTF16Ref removeNullTerminator(const UTF16Ref str) {
140
2.99M
  return str.slice(0, str.size() - 1);
141
2.99M
}
142
143
/// Function used in place of a set to indicate if \p c is in the unescaped set.
144
using CharSetFn = bool (*)(char16_t c);
145
146
/// Is a member of uriUnescaped.
147
1.49M
static bool uriUnescaped(char16_t c) {
148
1.49M
  const UTF16Ref marks = removeNullTerminator(u"-_.!~*'()");
149
1.49M
  if (std::find(marks.begin(), marks.end(), c) != marks.end()) {
150
249k
    return true;
151
249k
  }
152
1.24M
  if (u'0' <= c && c <= u'9') {
153
498k
    return true;
154
498k
  }
155
  // Convert to lowercase and see if it's alphabetic.
156
747k
  c |= 32;
157
747k
  return u'a' <= c && c <= u'z';
158
1.24M
}
159
160
/// Is a member of uriReserved.
161
1.49M
static bool uriReserved(char16_t c) {
162
1.49M
  const UTF16Ref reserved = removeNullTerminator(u";/?:@&=+$,");
163
1.49M
  return std::find(reserved.begin(), reserved.end(), c) != reserved.end();
164
1.49M
}
165
166
/// Is a member of uriUnescaped plus '#', or is a member of uriReserved.
167
1.49M
static bool unescapedURISet(char16_t c) {
168
1.49M
  return uriReserved(c) || uriUnescaped(c) || c == '#';
169
1.49M
}
170
171
/// Is a member of uriReserved plus '#'.
172
0
static bool reservedURISet(char16_t c) {
173
0
  return uriReserved(c) || c == '#';
174
0
}
175
176
/// ES 5.1 15.1.3
177
/// Encode abstract method, takes a string and URI encodes it.
178
/// \param unescapedSet a function indicating which characters to not escape.
179
static CallResult<Handle<StringPrimitive>> encode(
180
    Runtime &runtime,
181
    Handle<StringPrimitive> strHandle,
182
124k
    CharSetFn unescapedSet) {
183
124k
  auto str = StringPrimitive::createStringView(runtime, strHandle);
184
124k
  auto strLen = str.length();
185
124k
  SmallU16String<32> R{};
186
124k
  R.reserve(strLen);
187
1.62M
  for (auto itr = str.begin(), e = str.end(); itr != e;) {
188
    // Use int32_t to allow for arithmetic past 16 bits.
189
1.49M
    uint32_t C = *itr;
190
1.49M
    if (unescapedSet(C)) {
191
747k
      R.push_back(C);
192
747k
    } else {
193
747k
      if (C >= 0xdc00 && C <= 0xdfff) {
194
0
        return runtime.raiseURIError("Malformed encodeURI input");
195
0
      }
196
      // Code point to convert to UTF8.
197
747k
      uint32_t V;
198
747k
      if (C < 0xd800 || C > 0xdbff) {
199
747k
        V = C;
200
747k
      } else {
201
0
        ++itr;
202
0
        if (itr == e) {
203
0
          return runtime.raiseURIError("Malformed encodeURI input");
204
0
        }
205
0
        uint32_t kChar = *itr;
206
0
        if (kChar < 0xdc00 || kChar > 0xdfff) {
207
0
          return runtime.raiseURIError("Malformed encodeURI input");
208
0
        }
209
0
        V = (C - 0xd800) * 0x400 + (kChar - 0xdc00) + 0x10000;
210
0
      }
211
747k
      char octets[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
212
747k
      char *targetStart = octets;
213
747k
      hermes::encodeUTF8(targetStart, V);
214
      // Length of the octets array.
215
747k
      uint32_t L = targetStart - octets;
216
1.49M
      for (uint32_t j = 0; j < L; ++j) {
217
747k
        auto jOctet = octets[j];
218
747k
        R.push_back(u'%');
219
747k
        R.push_back(toHexChar((jOctet >> 4) & 0xf));
220
747k
        R.push_back(toHexChar(jOctet & 0xf));
221
747k
      }
222
747k
    }
223
1.49M
    ++itr;
224
1.49M
  }
225
226
124k
  auto finalStr = StringPrimitive::create(runtime, R);
227
124k
  if (LLVM_UNLIKELY(finalStr == ExecutionStatus::EXCEPTION)) {
228
0
    return ExecutionStatus::EXCEPTION;
229
0
  }
230
124k
  return runtime.makeHandle<StringPrimitive>(*finalStr);
231
124k
}
232
233
124k
CallResult<HermesValue> encodeURI(void *, Runtime &runtime, NativeArgs args) {
234
124k
  auto strRes = toString_RJS(runtime, args.getArgHandle(0));
235
124k
  if (LLVM_UNLIKELY(strRes == ExecutionStatus::EXCEPTION)) {
236
0
    return ExecutionStatus::EXCEPTION;
237
0
  }
238
124k
  auto res =
239
124k
      encode(runtime, runtime.makeHandle(std::move(*strRes)), unescapedURISet);
240
124k
  if (res == ExecutionStatus::EXCEPTION)
241
0
    return ExecutionStatus::EXCEPTION;
242
124k
  return res->getHermesValue();
243
124k
}
244
245
CallResult<HermesValue>
246
0
encodeURIComponent(void *, Runtime &runtime, NativeArgs args) {
247
0
  auto strRes = toString_RJS(runtime, args.getArgHandle(0));
248
0
  if (LLVM_UNLIKELY(strRes == ExecutionStatus::EXCEPTION)) {
249
0
    return ExecutionStatus::EXCEPTION;
250
0
  }
251
0
  auto res =
252
0
      encode(runtime, runtime.makeHandle(std::move(*strRes)), uriUnescaped);
253
0
  if (res == ExecutionStatus::EXCEPTION)
254
0
    return ExecutionStatus::EXCEPTION;
255
0
  return res->getHermesValue();
256
0
}
257
258
/// ES 5.1 15.1.3
259
/// Decode abstract method, takes a string and URI decodes it.
260
/// \param reservedSet a function indicating which characters to not escape.
261
static CallResult<Handle<StringPrimitive>> decode(
262
    Runtime &runtime,
263
    Handle<StringPrimitive> strHandle,
264
0
    CharSetFn reservedSet) {
265
0
  auto str = StringPrimitive::createStringView(runtime, strHandle);
266
0
  auto strLen = str.length();
267
0
  SmallU16String<32> R{};
268
0
  R.reserve(strLen);
269
0
  for (auto itr = str.begin(), e = str.end(); itr != e;) {
270
0
    char16_t C = *itr;
271
0
    if (C != u'%') {
272
      // Regular character, continue.
273
0
      R.push_back(C);
274
0
    } else {
275
0
      auto start = itr;
276
0
      if (itr + 2 >= e || !(isHexChar(*(itr + 1)) && isHexChar(*(itr + 2)))) {
277
0
        return runtime.raiseURIError("Malformed decodeURI input");
278
0
      }
279
0
      uint8_t B = (fromHexChar(*(itr + 1)) << 4) | fromHexChar(*(itr + 2));
280
0
      itr += 2;
281
0
      if ((B & 0x80) == 0) {
282
        // Most significant bit of B is 0.
283
0
        C = B;
284
0
        if (!reservedSet(C)) {
285
0
          R.push_back(C);
286
0
        } else {
287
0
          R.insert(R.end(), start, itr + 1);
288
0
        }
289
0
      } else {
290
        // Most significant bit of B is 1.
291
0
        uint32_t n = 0;
292
        // Set n to be smallest such that (B << n) & 0x80 is 0.
293
        // n is set to the number of leading 1s in B.
294
0
        for (; n <= 8 && (((B << n) & 0x80) != 0); ++n) {
295
0
        }
296
0
        if (n == 1 || n > 4) {
297
0
          return runtime.raiseURIError("Malformed decodeURI input");
298
0
        }
299
        // Safe because we ensure that n <= 4.
300
0
        UTF8 octets[4]{B};
301
        // Not enough bytes to fill all n octets.
302
0
        if ((itr + (3 * (n - 1))) >= e) {
303
0
          return runtime.raiseURIError("Malformed decodeURI input");
304
0
        }
305
        // Populate octets.
306
0
        for (uint32_t j = 1; j < n; ++j) {
307
0
          ++itr;
308
0
          if (*itr != u'%' ||
309
0
              !(isHexChar(*(itr + 1)) && isHexChar(*(itr + 2)))) {
310
0
            return runtime.raiseURIError("Malformed decodeURI input");
311
0
          }
312
0
          B = (fromHexChar(*(itr + 1)) << 4) | fromHexChar(*(itr + 2));
313
0
          if (((B >> 6) & 0x3) != 0x2) {
314
            // The highest two bits aren't 10.
315
0
            return runtime.raiseURIError("Malformed decodeURI input");
316
0
          }
317
0
          itr += 2;
318
0
          octets[j] = B;
319
0
        }
320
        // Code point encoded by the n octets.
321
0
        uint32_t V;
322
0
        const UTF8 *sourceStart = octets;
323
0
        const UTF8 *sourceEnd = octets + n;
324
0
        UTF32 *targetStart = &V;
325
0
        UTF32 *targetEnd = &V + 1;
326
0
        ConversionResult cRes = ConvertUTF8toUTF32(
327
0
            &sourceStart,
328
0
            sourceEnd,
329
0
            &targetStart,
330
0
            targetEnd,
331
0
            llvh::strictConversion);
332
0
        if (cRes != ConversionResult::conversionOK) {
333
0
          return runtime.raiseURIError("Malformed decodeURI input");
334
0
        }
335
0
        if (V < 0x10000) {
336
          // Safe to cast.
337
0
          C = static_cast<char16_t>(V);
338
0
          if (!reservedSet(C)) {
339
0
            R.push_back(C);
340
0
          } else {
341
0
            R.insert(R.end(), start, itr + 1);
342
0
          }
343
0
        } else {
344
          // V >= 0x10000
345
          // Notice that L and H are both only 2 byte values,
346
          // because of they way that they're computed.
347
0
          char16_t L = ((V - 0x10000) & 0x3ff) + 0xdc00;
348
0
          char16_t H = (((V - 0x10000) >> 10) & 0x3ff) + 0xd800;
349
0
          R.push_back(H);
350
0
          R.push_back(L);
351
0
        }
352
0
      }
353
0
    }
354
0
    ++itr;
355
0
  }
356
357
0
  return runtime.makeHandle<StringPrimitive>(
358
0
      *StringPrimitive::create(runtime, R));
359
0
}
360
361
0
CallResult<HermesValue> decodeURI(void *, Runtime &runtime, NativeArgs args) {
362
0
  auto strRes = toString_RJS(runtime, args.getArgHandle(0));
363
0
  if (LLVM_UNLIKELY(strRes == ExecutionStatus::EXCEPTION)) {
364
0
    return ExecutionStatus::EXCEPTION;
365
0
  }
366
0
  auto res =
367
0
      decode(runtime, runtime.makeHandle(std::move(*strRes)), reservedURISet);
368
0
  if (res == ExecutionStatus::EXCEPTION)
369
0
    return ExecutionStatus::EXCEPTION;
370
0
  return res->getHermesValue();
371
0
}
372
373
CallResult<HermesValue>
374
0
decodeURIComponent(void *, Runtime &runtime, NativeArgs args) {
375
0
  auto strRes = toString_RJS(runtime, args.getArgHandle(0));
376
0
  if (LLVM_UNLIKELY(strRes == ExecutionStatus::EXCEPTION)) {
377
0
    return ExecutionStatus::EXCEPTION;
378
0
  }
379
0
  auto emptySet = [](char16_t) { return false; };
380
0
  auto res = decode(runtime, runtime.makeHandle(std::move(*strRes)), emptySet);
381
0
  if (res == ExecutionStatus::EXCEPTION)
382
0
    return ExecutionStatus::EXCEPTION;
383
0
  return res->getHermesValue();
384
0
}
385
386
} // namespace vm
387
} // namespace hermes