Coverage Report

Created: 2025-01-28 06:38

/src/hermes/lib/VM/JSRegExp.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#include "hermes/VM/JSRegExp.h"
9
10
#include "hermes/Regex/Executor.h"
11
#include "hermes/Regex/Regex.h"
12
#include "hermes/Regex/RegexTraits.h"
13
#include "hermes/Support/UTF8.h"
14
#include "hermes/VM/BuildMetadata.h"
15
#include "hermes/VM/Operations.h"
16
#include "hermes/VM/RegExpMatch.h"
17
#include "hermes/VM/Runtime-inline.h"
18
#include "hermes/VM/StringView.h"
19
20
#pragma GCC diagnostic push
21
22
#ifdef HERMES_COMPILER_SUPPORTS_WSHORTEN_64_TO_32
23
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
24
#endif
25
namespace hermes {
26
namespace vm {
27
28
//===----------------------------------------------------------------------===//
29
// class JSRegExp
30
31
const ObjectVTable JSRegExp::vt{
32
    VTable(
33
        CellKind::JSRegExpKind,
34
        cellSize<JSRegExp>(),
35
        JSRegExp::_finalizeImpl,
36
        JSRegExp::_mallocSizeImpl,
37
        nullptr
38
#ifdef HERMES_MEMORY_INSTRUMENTATION
39
        ,
40
        VTable::HeapSnapshotMetadata{
41
            HeapSnapshot::NodeType::Regexp,
42
            JSRegExp::_snapshotNameImpl,
43
            JSRegExp::_snapshotAddEdgesImpl,
44
            JSRegExp::_snapshotAddNodesImpl,
45
            nullptr}
46
#endif
47
48
        ),
49
    JSRegExp::_getOwnIndexedRangeImpl,
50
    JSRegExp::_haveOwnIndexedImpl,
51
    JSRegExp::_getOwnIndexedPropertyFlagsImpl,
52
    JSRegExp::_getOwnIndexedImpl,
53
    JSRegExp::_setOwnIndexedImpl,
54
    JSRegExp::_deleteOwnIndexedImpl,
55
    JSRegExp::_checkAllOwnIndexedImpl,
56
};
57
58
1
void JSRegExpBuildMeta(const GCCell *cell, Metadata::Builder &mb) {
59
1
  mb.addJSObjectOverlapSlots(JSObject::numOverlapSlots<JSRegExp>());
60
1
  JSObjectBuildMeta(cell, mb);
61
1
  const auto *self = static_cast<const JSRegExp *>(cell);
62
1
  mb.setVTable(&JSRegExp::vt);
63
1
  mb.addField(&self->pattern_);
64
1
  mb.addField(&self->groupNameMappings_);
65
1
}
66
67
PseudoHandle<JSRegExp> JSRegExp::create(
68
    Runtime &runtime,
69
8
    Handle<JSObject> parentHandle) {
70
8
  auto *cell = runtime.makeAFixed<JSRegExp, HasFinalizer::Yes>(
71
8
      runtime,
72
8
      parentHandle,
73
8
      runtime.getHiddenClassForPrototype(
74
8
          *parentHandle, numOverlapSlots<JSRegExp>()));
75
8
  return JSObjectInit::initToPseudoHandle(runtime, cell);
76
8
}
77
78
Handle<HiddenClass> JSRegExp::createMatchClass(
79
    Runtime &runtime,
80
160
    Handle<HiddenClass> arrayClass) {
81
  // Adds the property \p name to matchClass which, upon return, will point to
82
  // the newly created hidden class.
83
480
  auto addProperty = [&](Handle<HiddenClass> clazz, Predefined::Str name) {
84
480
    auto added = HiddenClass::addProperty(
85
480
        clazz,
86
480
        runtime,
87
480
        Predefined::getSymbolID(name),
88
480
        PropertyFlags::defaultNewNamedPropertyFlags());
89
480
    assert(
90
480
        added != ExecutionStatus::EXCEPTION &&
91
480
        "Adding the first properties shouldn't cause overflow");
92
480
    return added->first;
93
480
  };
94
95
160
  Handle<HiddenClass> addIndex = addProperty(arrayClass, Predefined::index);
96
160
  Handle<HiddenClass> addInput = addProperty(addIndex, Predefined::input);
97
160
  return addProperty(addInput, Predefined::groups);
98
160
}
99
100
void JSRegExp::initialize(
101
    Handle<JSRegExp> selfHandle,
102
    Runtime &runtime,
103
    Handle<StringPrimitive> pattern,
104
    Handle<StringPrimitive> flags,
105
8
    llvh::ArrayRef<uint8_t> bytecode) {
106
8
  assert(
107
8
      pattern && flags &&
108
8
      "Null pattern and/or flags passed to JSRegExp::initialize");
109
8
  selfHandle->pattern_.set(runtime, *pattern, runtime.getHeap());
110
111
8
  DefinePropertyFlags dpf = DefinePropertyFlags::getDefaultNewPropertyFlags();
112
8
  dpf.enumerable = 0;
113
8
  dpf.configurable = 0;
114
115
8
  auto res = JSObject::defineOwnProperty(
116
8
      selfHandle,
117
8
      runtime,
118
8
      Predefined::getSymbolID(Predefined::lastIndex),
119
8
      dpf,
120
8
      HandleRootOwner::getZeroValue());
121
8
  (void)res;
122
8
  assert(
123
8
      res != ExecutionStatus::EXCEPTION && *res &&
124
8
      "defineOwnProperty() failed");
125
126
8
  selfHandle->initializeBytecode(bytecode);
127
8
}
128
129
ExecutionStatus JSRegExp::initialize(
130
    Handle<JSRegExp> selfHandle,
131
    Runtime &runtime,
132
    Handle<JSRegExp> otherHandle,
133
0
    Handle<StringPrimitive> flags) {
134
0
  llvh::SmallVector<char16_t, 16> flagsText16;
135
0
  flags->appendUTF16String(flagsText16);
136
137
0
  auto sflags = regex::SyntaxFlags::fromString(flagsText16);
138
0
  if (!sflags) {
139
0
    return runtime.raiseSyntaxError("Invalid RegExp: Invalid flags");
140
0
  }
141
142
0
  auto pattern = runtime.makeHandle(getPattern(otherHandle.get(), runtime));
143
144
  // Fast path to avoid recompiling the RegExp if the flags match
145
0
  if (LLVM_LIKELY(
146
0
          sflags->toByte() == getSyntaxFlags(otherHandle.get()).toByte())) {
147
0
    initialize(
148
0
        selfHandle,
149
0
        runtime,
150
0
        pattern,
151
0
        flags,
152
0
        {otherHandle->bytecode_, otherHandle->bytecodeSize_});
153
0
    return ExecutionStatus::RETURNED;
154
0
  }
155
0
  return initialize(selfHandle, runtime, pattern, flags);
156
0
}
157
158
/// ES11 21.2.3.2.2 RegExpInitialize ( obj, pattern, flags )
159
ExecutionStatus JSRegExp::initialize(
160
    Handle<JSRegExp> selfHandle,
161
    Runtime &runtime,
162
    Handle<StringPrimitive> pattern,
163
1
    Handle<StringPrimitive> flags) {
164
1
  assert(
165
1
      pattern && flags &&
166
1
      "Null pattern and/or flags passed to JSRegExp::initialize");
167
1
  llvh::SmallVector<char16_t, 6> flagsText16;
168
1
  flags->appendUTF16String(flagsText16);
169
170
1
  llvh::SmallVector<char16_t, 16> patternText16;
171
1
  pattern->appendUTF16String(patternText16);
172
173
  // Build the regex.
174
1
  regex::Regex<regex::UTF16RegexTraits> regex(patternText16, flagsText16);
175
176
1
  if (!regex.valid()) {
177
0
    return runtime.raiseSyntaxError(
178
0
        TwineChar16("Invalid RegExp: ") +
179
0
        regex::constants::messageForError(regex.getError()));
180
0
  }
181
  // The regex is valid. Compile and store its bytecode.
182
1
  auto bytecode = regex.compile();
183
  // Also store the name mappings.
184
1
  if (LLVM_UNLIKELY(
185
1
          initializeGroupNameMappingObj(
186
1
              runtime,
187
1
              selfHandle,
188
1
              regex.getOrderedNamedGroups(),
189
1
              regex.getGroupNamesMapping()) == ExecutionStatus::EXCEPTION)) {
190
0
    return ExecutionStatus::EXCEPTION;
191
0
  }
192
1
  initialize(selfHandle, runtime, pattern, flags, bytecode);
193
1
  return ExecutionStatus::RETURNED;
194
1
}
195
196
ExecutionStatus JSRegExp::initializeGroupNameMappingObj(
197
    Runtime &runtime,
198
    Handle<JSRegExp> selfHandle,
199
    std::deque<llvh::SmallVector<char16_t, 5>> &orderedNamedGroups,
200
1
    regex::ParsedGroupNamesMapping &parsedMappings) {
201
1
  GCScope gcScope(runtime);
202
1
  if (parsedMappings.size() == 0)
203
1
    return ExecutionStatus::RETURNED;
204
205
0
  auto objRes = JSObject::create(runtime, parsedMappings.size());
206
0
  auto obj = runtime.makeHandle(objRes.get());
207
208
0
  MutableHandle<HermesValue> numberHandle{runtime};
209
0
  for (const auto &identifier : orderedNamedGroups) {
210
0
    GCScopeMarkerRAII marker{gcScope};
211
0
    auto symbolRes =
212
0
        runtime.getIdentifierTable().getSymbolHandle(runtime, identifier);
213
0
    if (LLVM_UNLIKELY(symbolRes == ExecutionStatus::EXCEPTION)) {
214
0
      return ExecutionStatus::EXCEPTION;
215
0
    }
216
0
    auto idx = parsedMappings[identifier];
217
0
    numberHandle.set(HermesValue::encodeUntrustedNumberValue(idx));
218
0
    auto res = JSObject::defineNewOwnProperty(
219
0
        obj,
220
0
        runtime,
221
0
        symbolRes->get(),
222
0
        PropertyFlags::defaultNewNamedPropertyFlags(),
223
0
        numberHandle);
224
0
    if (LLVM_UNLIKELY(res == ExecutionStatus::EXCEPTION))
225
0
      return ExecutionStatus::EXCEPTION;
226
0
  }
227
228
0
  selfHandle->groupNameMappings_.set(runtime, *obj, runtime.getHeap());
229
0
  return ExecutionStatus::RETURNED;
230
0
}
231
232
0
Handle<JSObject> JSRegExp::getGroupNameMappings(Runtime &runtime) {
233
0
  if (auto *ptr = vmcast_or_null<JSObject>(groupNameMappings_.get(runtime)))
234
0
    return runtime.makeHandle(ptr);
235
0
  return Runtime::makeNullHandle<JSObject>();
236
0
}
237
238
0
void JSRegExp::setGroupNameMappings(Runtime &runtime, JSObject *groupObj) {
239
0
  groupNameMappings_.set(runtime, groupObj, runtime.getHeap());
240
0
}
241
242
8
void JSRegExp::initializeBytecode(llvh::ArrayRef<uint8_t> bytecode) {
243
8
  size_t sz = bytecode.size();
244
8
  assert(
245
8
      sz <= std::numeric_limits<uint32_t>::max() &&
246
8
      "Bytecode size cannot exceed 32 bits");
247
8
  auto header =
248
8
      reinterpret_cast<const regex::RegexBytecodeHeader *>(bytecode.data());
249
8
  syntaxFlags_ = regex::SyntaxFlags::fromByte(header->syntaxFlags);
250
8
  bytecodeSize_ = sz;
251
8
  bytecode_ = (uint8_t *)checkedMalloc(sz);
252
8
  memcpy(bytecode_, bytecode.data(), sz);
253
8
}
254
255
PseudoHandle<StringPrimitive> JSRegExp::getPattern(
256
    JSRegExp *self,
257
2
    PointerBase &base) {
258
2
  return createPseudoHandle(self->pattern_.get(base));
259
2
}
260
261
template <typename CharT, typename Traits>
262
CallResult<RegExpMatch> performSearch(
263
    Runtime &runtime,
264
    llvh::ArrayRef<uint8_t> bytecode,
265
    const CharT *start,
266
    uint32_t stringLength,
267
    uint32_t searchStartOffset,
268
0
    regex::constants::MatchFlagType matchFlags) {
269
0
  std::vector<regex::CapturedRange> nativeMatchRanges;
270
0
  auto matchResult = regex::searchWithBytecode(
271
0
      bytecode,
272
0
      start,
273
0
      searchStartOffset,
274
0
      stringLength,
275
0
      &nativeMatchRanges,
276
0
      matchFlags,
277
0
      runtime.getOverflowGuardForRegex());
278
0
  if (matchResult == regex::MatchRuntimeResult::StackOverflow) {
279
0
    return runtime.raiseRangeError("Maximum regex stack depth reached");
280
0
  } else if (matchResult == regex::MatchRuntimeResult::NoMatch) {
281
0
    return RegExpMatch{}; // not found.
282
0
  }
283
0
  size_t matchRangeCount = nativeMatchRanges.size();
284
0
  assert(matchRangeCount > 0);
285
0
  RegExpMatch match;
286
0
  match.reserve(matchRangeCount);
287
0
  for (size_t i = 0; i < matchRangeCount; i++) {
288
0
    const auto &submatch = nativeMatchRanges[i];
289
0
    if (!submatch.matched()) {
290
0
      assert(i > 0 && "match_result[0] should always match");
291
0
      match.push_back(llvh::None);
292
0
    } else {
293
0
      uint32_t pos = submatch.start;
294
0
      uint32_t length = submatch.end - submatch.start;
295
0
      match.push_back(RegExpMatchRange{pos, length});
296
0
    }
297
0
  }
298
0
  assert(!match.empty() && "Unexpected empty match");
299
0
  return match;
300
0
}
Unexecuted instantiation: hermes::vm::CallResult<llvh::SmallVector<hermes::OptValue<hermes::vm::RegExpMatchRange>, 4u>, (hermes::vm::detail::CallResultSpecialize)0> hermes::vm::performSearch<char, hermes::regex::ASCIIRegexTraits>(hermes::vm::Runtime&, llvh::ArrayRef<unsigned char>, char const*, unsigned int, unsigned int, hermes::regex::constants::MatchFlagType)
Unexecuted instantiation: hermes::vm::CallResult<llvh::SmallVector<hermes::OptValue<hermes::vm::RegExpMatchRange>, 4u>, (hermes::vm::detail::CallResultSpecialize)0> hermes::vm::performSearch<char16_t, hermes::regex::UTF16RegexTraits>(hermes::vm::Runtime&, llvh::ArrayRef<unsigned char>, char16_t const*, unsigned int, unsigned int, hermes::regex::constants::MatchFlagType)
301
302
CallResult<RegExpMatch> JSRegExp::search(
303
    Handle<JSRegExp> selfHandle,
304
    Runtime &runtime,
305
    Handle<StringPrimitive> strHandle,
306
0
    uint32_t searchStartOffset) {
307
0
  assert(selfHandle->bytecode_ && "Missing bytecode");
308
0
  auto input = StringPrimitive::createStringView(runtime, strHandle);
309
310
  // Note we may still have a match if searchStartOffset == str.size(),
311
  // if the regexp can match an empty string
312
0
  if (searchStartOffset > input.length()) {
313
0
    return RegExpMatch{}; // no match possible
314
0
  }
315
316
0
  auto matchFlags = regex::constants::matchDefault;
317
318
  // Respect the sticky flag, which forces us to match only at the given
319
  // location.
320
0
  if (selfHandle->syntaxFlags_.sticky) {
321
0
    matchFlags |= regex::constants::matchOnlyAtStart;
322
0
  }
323
324
0
  CallResult<RegExpMatch> matchResult = RegExpMatch{};
325
0
  if (input.isASCII()) {
326
0
    matchFlags |= regex::constants::matchInputAllAscii;
327
0
    matchResult = performSearch<char, regex::ASCIIRegexTraits>(
328
0
        runtime,
329
0
        llvh::makeArrayRef(selfHandle->bytecode_, selfHandle->bytecodeSize_),
330
0
        input.castToCharPtr(),
331
0
        input.length(),
332
0
        searchStartOffset,
333
0
        matchFlags);
334
0
  } else {
335
0
    matchResult = performSearch<char16_t, regex::UTF16RegexTraits>(
336
0
        runtime,
337
0
        llvh::makeArrayRef(selfHandle->bytecode_, selfHandle->bytecodeSize_),
338
0
        input.castToChar16Ptr(),
339
0
        input.length(),
340
0
        searchStartOffset,
341
0
        matchFlags);
342
0
  }
343
344
  // Only update on successful match.
345
0
  if (LLVM_UNLIKELY(matchResult == ExecutionStatus::EXCEPTION)) {
346
0
    return ExecutionStatus::EXCEPTION;
347
0
  } else if (!matchResult->empty()) {
348
0
    runtime.regExpLastInput = strHandle.getHermesValue();
349
0
    runtime.regExpLastRegExp = selfHandle.getHermesValue();
350
0
    runtime.regExpLastMatch = *matchResult;
351
0
  }
352
0
  return matchResult;
353
0
}
354
355
8
JSRegExp::~JSRegExp() {
356
8
  free(bytecode_);
357
8
}
358
359
8
void JSRegExp::_finalizeImpl(GCCell *cell, GC &gc) {
360
8
  JSRegExp *self = vmcast<JSRegExp>(cell);
361
8
  if (self->bytecode_) {
362
8
    gc.getIDTracker().untrackNative(self->bytecode_);
363
8
  }
364
8
  self->~JSRegExp();
365
8
}
366
367
0
size_t JSRegExp::_mallocSizeImpl(GCCell *cell) {
368
0
  auto *self = vmcast<JSRegExp>(cell);
369
0
  return self->bytecodeSize_;
370
0
}
371
372
#ifdef HERMES_MEMORY_INSTRUMENTATION
373
0
std::string JSRegExp::_snapshotNameImpl(GCCell *cell, GC &gc) {
374
0
  auto *const self = vmcast<JSRegExp>(cell);
375
0
  return converter(getPattern(self, gc.getPointerBase()).get());
376
0
}
377
378
0
void JSRegExp::_snapshotAddEdgesImpl(GCCell *cell, GC &gc, HeapSnapshot &snap) {
379
0
  auto *const self = vmcast<JSRegExp>(cell);
380
  // Call the super type to add any other custom edges.
381
0
  JSObject::_snapshotAddEdgesImpl(self, gc, snap);
382
0
  if (self->bytecode_) {
383
0
    snap.addNamedEdge(
384
0
        HeapSnapshot::EdgeType::Internal,
385
0
        "bytecode",
386
0
        gc.getNativeID(self->bytecode_));
387
0
  }
388
0
}
389
390
0
void JSRegExp::_snapshotAddNodesImpl(GCCell *cell, GC &gc, HeapSnapshot &snap) {
391
0
  auto *const self = vmcast<JSRegExp>(cell);
392
0
  if (self->bytecode_) {
393
    // Add a native node for regex bytecode, to account for native size
394
    // directly owned by the regex.
395
0
    snap.beginNode();
396
0
    snap.endNode(
397
0
        HeapSnapshot::NodeType::Native,
398
0
        "RegExpBytecode",
399
0
        gc.getNativeID(self->bytecode_),
400
0
        self->bytecodeSize_,
401
0
        0);
402
0
  }
403
0
}
404
#endif
405
406
/// \return an escaped string equivalent to \p pattern.
407
/// This is used to construct the 'source' property of RegExp. This requires
408
/// us to return a string from which the regexp may be reconstructed as if
409
/// from a /foo/ style literal. Note this is different from the RegExp
410
/// constructor that takes a string, e.g. new RegExp("/") returns a regexp
411
/// that matches /, but
412
/// /// does not (it's a comment!). So we may have to perform surgery on the
413
/// pattern.
414
CallResult<HermesValue> JSRegExp::escapePattern(
415
    Handle<StringPrimitive> pattern,
416
2
    Runtime &runtime) {
417
2
  SmallU16String<32> result;
418
2
  result.reserve(pattern->getStringLength());
419
2
  auto patternView = StringPrimitive::createStringView(runtime, pattern);
420
2
  bool isBackslashed = false;
421
91.3k
  for (char16_t c : patternView) {
422
91.3k
    switch (c) {
423
2
      case u'/':
424
        // Avoid premature end of regex.
425
        // TODO nice to have: don't do this if we are in square brackets.
426
        // /[/]/ is valid and the middle / does not need to be escaped.
427
        // However /[\/]/ is also valid and means the same thing
428
        // (CharacterEscape production from regexp grammar). Still it would be
429
        // nice to not unnecessarily mangle the user's supplied pattern.
430
2
        result.append(isBackslashed ? "/" : "\\/");
431
2
        break;
432
433
        // Escape line terminators. See ES5.1 7.3.
434
86.2k
      case u'\n':
435
86.2k
        result.append(isBackslashed ? "n" : "\\n");
436
86.2k
        break;
437
438
1
      case u'\r':
439
1
        result.append(isBackslashed ? "r" : "\\r");
440
1
        break;
441
442
0
      case 0x2028:
443
0
        result.append(isBackslashed ? "u2028" : "\\u2028");
444
0
        break;
445
446
0
      case 0x2029:
447
0
        result.append(isBackslashed ? "u2029" : "\\u2029");
448
0
        break;
449
450
5.17k
      default:
451
5.17k
        result.append(c);
452
5.17k
        break;
453
91.3k
    }
454
91.3k
    isBackslashed = (c == u'\\') && !isBackslashed;
455
91.3k
  }
456
  // "If P is the empty String, this specification can be met by letting S be
457
  // '(?:)'."
458
2
  if (result.empty()) {
459
0
    result = u"(?:)";
460
0
  }
461
462
  // Avoid unnecessary allocation in the likely event the source and pattern
463
  // match.
464
2
  if (patternView.equals(result.arrayRef())) {
465
1
    return pattern.getHermesValue();
466
1
  }
467
1
  return StringPrimitive::create(runtime, result);
468
2
}
469
470
} // namespace vm
471
} // namespace hermes