Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include <functional>
6 :
7 : #include "src/arguments-inl.h"
8 : #include "src/conversions-inl.h"
9 : #include "src/counters.h"
10 : #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
11 : #include "src/isolate-inl.h"
12 : #include "src/message-template.h"
13 : #include "src/objects/js-array-inl.h"
14 : #include "src/regexp/jsregexp-inl.h"
15 : #include "src/regexp/regexp-utils.h"
16 : #include "src/runtime/runtime-utils.h"
17 : #include "src/string-builder-inl.h"
18 : #include "src/string-search.h"
19 : #include "src/zone/zone-chunk-list.h"
20 :
21 : namespace v8 {
22 : namespace internal {
23 :
24 : namespace {
25 :
26 : // Returns -1 for failure.
27 381 : uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
28 : bool has_named_captures) {
29 : const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
30 : const uint32_t kAdditionalArgsWithNamedCaptures = 3;
31 381 : if (num_captures > Code::kMaxArguments) return -1;
32 : uint32_t argc = has_named_captures
33 : ? num_captures + kAdditionalArgsWithNamedCaptures
34 372 : : num_captures + kAdditionalArgsWithoutNamedCaptures;
35 : STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
36 : kAdditionalArgsWithNamedCaptures);
37 372 : return (argc > Code::kMaxArguments) ? -1 : argc;
38 : }
39 :
40 : // Looks up the capture of the given name. Returns the (1-based) numbered
41 : // capture index or -1 on failure.
42 135 : int LookupNamedCapture(const std::function<bool(String)>& name_matches,
43 : FixedArray capture_name_map) {
44 : // TODO(jgruber): Sort capture_name_map and do binary search via
45 : // internalized strings.
46 :
47 : int maybe_capture_index = -1;
48 135 : const int named_capture_count = capture_name_map->length() >> 1;
49 378 : for (int j = 0; j < named_capture_count; j++) {
50 : // The format of {capture_name_map} is documented at
51 : // JSRegExp::kIrregexpCaptureNameMapIndex.
52 315 : const int name_ix = j * 2;
53 315 : const int index_ix = j * 2 + 1;
54 :
55 315 : String capture_name = String::cast(capture_name_map->get(name_ix));
56 558 : if (!name_matches(capture_name)) continue;
57 :
58 72 : maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
59 72 : break;
60 : }
61 :
62 135 : return maybe_capture_index;
63 : }
64 :
65 : } // namespace
66 :
67 : class CompiledReplacement {
68 : public:
69 : explicit CompiledReplacement(Zone* zone)
70 : : parts_(zone), replacement_substrings_(zone) {}
71 :
72 : // Return whether the replacement is simple.
73 : bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
74 : Handle<String> replacement, int capture_count,
75 : int subject_length);
76 :
77 : // Use Apply only if Compile returned false.
78 : void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
79 : int32_t* match);
80 :
81 : // Number of distinct parts of the replacement pattern.
82 3240 : int parts() { return static_cast<int>(parts_.size()); }
83 :
84 : private:
85 : enum PartType {
86 : SUBJECT_PREFIX = 1,
87 : SUBJECT_SUFFIX,
88 : SUBJECT_CAPTURE,
89 : REPLACEMENT_SUBSTRING,
90 : REPLACEMENT_STRING,
91 : EMPTY_REPLACEMENT,
92 : NUMBER_OF_PART_TYPES
93 : };
94 :
95 : struct ReplacementPart {
96 : static inline ReplacementPart SubjectMatch() {
97 : return ReplacementPart(SUBJECT_CAPTURE, 0);
98 : }
99 : static inline ReplacementPart SubjectCapture(int capture_index) {
100 : return ReplacementPart(SUBJECT_CAPTURE, capture_index);
101 : }
102 : static inline ReplacementPart SubjectPrefix() {
103 : return ReplacementPart(SUBJECT_PREFIX, 0);
104 : }
105 : static inline ReplacementPart SubjectSuffix(int subject_length) {
106 : return ReplacementPart(SUBJECT_SUFFIX, subject_length);
107 : }
108 : static inline ReplacementPart ReplacementString() {
109 : return ReplacementPart(REPLACEMENT_STRING, 0);
110 : }
111 : static inline ReplacementPart EmptyReplacement() {
112 : return ReplacementPart(EMPTY_REPLACEMENT, 0);
113 : }
114 : static inline ReplacementPart ReplacementSubString(int from, int to) {
115 : DCHECK_LE(0, from);
116 : DCHECK_GT(to, from);
117 639 : return ReplacementPart(-from, to);
118 : }
119 :
120 : // If tag <= 0 then it is the negation of a start index of a substring of
121 : // the replacement pattern, otherwise it's a value from PartType.
122 : ReplacementPart(int tag, int data) : tag(tag), data(data) {
123 : // Must be non-positive or a PartType value.
124 : DCHECK(tag < NUMBER_OF_PART_TYPES);
125 : }
126 : // Either a value of PartType or a non-positive number that is
127 : // the negation of an index into the replacement string.
128 : int tag;
129 : // The data value's interpretation depends on the value of tag:
130 : // tag == SUBJECT_PREFIX ||
131 : // tag == SUBJECT_SUFFIX: data is unused.
132 : // tag == SUBJECT_CAPTURE: data is the number of the capture.
133 : // tag == REPLACEMENT_SUBSTRING ||
134 : // tag == REPLACEMENT_STRING: data is index into array of substrings
135 : // of the replacement string.
136 : // tag == EMPTY_REPLACEMENT: data is unused.
137 : // tag <= 0: Temporary representation of the substring of the replacement
138 : // string ranging over -tag .. data.
139 : // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
140 : // substring objects.
141 : int data;
142 : };
143 :
144 : template <typename Char>
145 2277 : bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
146 : Vector<Char> characters,
147 : FixedArray capture_name_map, int capture_count,
148 : int subject_length) {
149 : // Equivalent to String::GetSubstitution, except that this method converts
150 : // the replacement string into an internal representation that avoids
151 : // repeated parsing when used repeatedly.
152 2277 : int length = characters.length();
153 : int last = 0;
154 6138 : for (int i = 0; i < length; i++) {
155 7740 : Char c = characters[i];
156 3870 : if (c == '$') {
157 2439 : int next_index = i + 1;
158 2439 : if (next_index == length) { // No next character!
159 : break;
160 : }
161 4860 : Char c2 = characters[next_index];
162 2430 : switch (c2) {
163 : case '$':
164 72 : if (i > last) {
165 : // There is a substring before. Include the first "$".
166 45 : parts->push_back(
167 90 : ReplacementPart::ReplacementSubString(last, next_index));
168 45 : last = next_index + 1; // Continue after the second "$".
169 : } else {
170 : // Let the next substring start with the second "$".
171 : last = next_index;
172 : }
173 : i = next_index;
174 72 : break;
175 : case '`':
176 18 : if (i > last) {
177 18 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
178 : }
179 18 : parts->push_back(ReplacementPart::SubjectPrefix());
180 : i = next_index;
181 18 : last = i + 1;
182 18 : break;
183 : case '\'':
184 18 : if (i > last) {
185 18 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
186 : }
187 18 : parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
188 : i = next_index;
189 18 : last = i + 1;
190 18 : break;
191 : case '&':
192 18 : if (i > last) {
193 18 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
194 : }
195 18 : parts->push_back(ReplacementPart::SubjectMatch());
196 : i = next_index;
197 18 : last = i + 1;
198 18 : break;
199 : case '0':
200 : case '1':
201 : case '2':
202 : case '3':
203 : case '4':
204 : case '5':
205 : case '6':
206 : case '7':
207 : case '8':
208 : case '9': {
209 2223 : int capture_ref = c2 - '0';
210 2223 : if (capture_ref > capture_count) {
211 : i = next_index;
212 : continue;
213 : }
214 1521 : int second_digit_index = next_index + 1;
215 1521 : if (second_digit_index < length) {
216 : // Peek ahead to see if we have two digits.
217 2772 : Char c3 = characters[second_digit_index];
218 1386 : if ('0' <= c3 && c3 <= '9') { // Double digits.
219 1332 : int double_digit_ref = capture_ref * 10 + c3 - '0';
220 1332 : if (double_digit_ref <= capture_count) {
221 : next_index = second_digit_index;
222 : capture_ref = double_digit_ref;
223 : }
224 : }
225 : }
226 1521 : if (capture_ref > 0) {
227 1485 : if (i > last) {
228 63 : parts->push_back(
229 126 : ReplacementPart::ReplacementSubString(last, i));
230 : }
231 : DCHECK(capture_ref <= capture_count);
232 1485 : parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
233 1485 : last = next_index + 1;
234 : }
235 : i = next_index;
236 1521 : break;
237 : }
238 : case '<': {
239 72 : if (capture_name_map.is_null()) {
240 : i = next_index;
241 : break;
242 : }
243 :
244 : // Scan until the next '>', and let the enclosed substring be the
245 : // groupName.
246 :
247 72 : const int name_start_index = next_index + 1;
248 : int closing_bracket_index = -1;
249 306 : for (int j = name_start_index; j < length; j++) {
250 576 : if (characters[j] == '>') {
251 : closing_bracket_index = j;
252 : break;
253 : }
254 : }
255 :
256 : // If no closing bracket is found, '$<' is treated as a string
257 : // literal.
258 72 : if (closing_bracket_index == -1) {
259 : i = next_index;
260 : break;
261 : }
262 :
263 : Vector<Char> requested_name =
264 54 : characters.SubVector(name_start_index, closing_bracket_index);
265 :
266 : // Let capture be ? Get(namedCaptures, groupName).
267 :
268 : const int capture_index = LookupNamedCapture(
269 : [=](String capture_name) {
270 : return capture_name->IsEqualTo(requested_name);
271 135 : },
272 108 : capture_name_map);
273 :
274 : // If capture is undefined or does not exist, replace the text
275 : // through the following '>' with the empty string.
276 : // Otherwise, replace the text through the following '>' with
277 : // ? ToString(capture).
278 :
279 : DCHECK(capture_index == -1 ||
280 : (1 <= capture_index && capture_index <= capture_count));
281 :
282 54 : if (i > last) {
283 0 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
284 : }
285 54 : parts->push_back(
286 : (capture_index == -1)
287 : ? ReplacementPart::EmptyReplacement()
288 108 : : ReplacementPart::SubjectCapture(capture_index));
289 54 : last = closing_bracket_index + 1;
290 : i = closing_bracket_index;
291 54 : break;
292 : }
293 : default:
294 : i = next_index;
295 : break;
296 : }
297 : }
298 : }
299 2277 : if (length > last) {
300 1215 : if (last == 0) {
301 : // Replacement is simple. Do not use Apply to do the replacement.
302 : return true;
303 : } else {
304 477 : parts->push_back(ReplacementPart::ReplacementSubString(last, length));
305 : }
306 : }
307 : return false;
308 : }
309 :
310 : ZoneChunkList<ReplacementPart> parts_;
311 : ZoneVector<Handle<String>> replacement_substrings_;
312 : };
313 :
314 2277 : bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
315 : Handle<String> replacement, int capture_count,
316 : int subject_length) {
317 : {
318 : DisallowHeapAllocation no_gc;
319 2277 : String::FlatContent content = replacement->GetFlatContent(no_gc);
320 : DCHECK(content.IsFlat());
321 :
322 2277 : FixedArray capture_name_map;
323 2277 : if (capture_count > 0) {
324 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
325 2970 : Object maybe_capture_name_map = regexp->CaptureNameMap();
326 1485 : if (maybe_capture_name_map->IsFixedArray()) {
327 72 : capture_name_map = FixedArray::cast(maybe_capture_name_map);
328 : }
329 : }
330 :
331 : bool simple;
332 2277 : if (content.IsOneByte()) {
333 : simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
334 : capture_name_map, capture_count,
335 2277 : subject_length);
336 : } else {
337 : DCHECK(content.IsTwoByte());
338 : simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
339 : capture_name_map, capture_count,
340 0 : subject_length);
341 : }
342 2277 : if (simple) return true;
343 : }
344 :
345 : // Find substrings of replacement string and create them as String objects.
346 : int substring_index = 0;
347 5310 : for (ReplacementPart& part : parts_) {
348 2232 : int tag = part.tag;
349 2232 : if (tag <= 0) { // A replacement string slice.
350 639 : int from = -tag;
351 639 : int to = part.data;
352 : replacement_substrings_.push_back(
353 1278 : isolate->factory()->NewSubString(replacement, from, to));
354 639 : part.tag = REPLACEMENT_SUBSTRING;
355 639 : part.data = substring_index;
356 639 : substring_index++;
357 1593 : } else if (tag == REPLACEMENT_STRING) {
358 0 : replacement_substrings_.push_back(replacement);
359 0 : part.data = substring_index;
360 0 : substring_index++;
361 : }
362 : }
363 : return false;
364 : }
365 :
366 :
367 1818 : void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
368 : int match_from, int match_to, int32_t* match) {
369 : DCHECK_LT(0, parts_.size());
370 6588 : for (ReplacementPart& part : parts_) {
371 2952 : switch (part.tag) {
372 : case SUBJECT_PREFIX:
373 45 : if (match_from > 0) builder->AddSubjectSlice(0, match_from);
374 : break;
375 : case SUBJECT_SUFFIX: {
376 45 : int subject_length = part.data;
377 45 : if (match_to < subject_length) {
378 36 : builder->AddSubjectSlice(match_to, subject_length);
379 : }
380 : break;
381 : }
382 : case SUBJECT_CAPTURE: {
383 1764 : int capture = part.data;
384 1764 : int from = match[capture * 2];
385 1764 : int to = match[capture * 2 + 1];
386 1764 : if (from >= 0 && to > from) {
387 1683 : builder->AddSubjectSlice(from, to);
388 : }
389 : break;
390 : }
391 : case REPLACEMENT_SUBSTRING:
392 : case REPLACEMENT_STRING:
393 2088 : builder->AddString(replacement_substrings_[part.data]);
394 1044 : break;
395 : case EMPTY_REPLACEMENT:
396 : break;
397 : default:
398 0 : UNREACHABLE();
399 : }
400 : }
401 1818 : }
402 :
403 31867 : void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
404 : std::vector<int>* indices, unsigned int limit) {
405 : DCHECK_LT(0, limit);
406 : // Collect indices of pattern in subject using memchr.
407 : // Stop after finding at most limit values.
408 31867 : const uint8_t* subject_start = subject.start();
409 63734 : const uint8_t* subject_end = subject_start + subject.length();
410 : const uint8_t* pos = subject_start;
411 144554 : while (limit > 0) {
412 : pos = reinterpret_cast<const uint8_t*>(
413 112660 : memchr(pos, pattern, subject_end - pos));
414 144527 : if (pos == nullptr) return;
415 161640 : indices->push_back(static_cast<int>(pos - subject_start));
416 80820 : pos++;
417 80820 : limit--;
418 : }
419 : }
420 :
421 369 : void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
422 : std::vector<int>* indices, unsigned int limit) {
423 : DCHECK_LT(0, limit);
424 369 : const uc16* subject_start = subject.start();
425 738 : const uc16* subject_end = subject_start + subject.length();
426 12564 : for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
427 12195 : if (*pos == pattern) {
428 738 : indices->push_back(static_cast<int>(pos - subject_start));
429 369 : limit--;
430 : }
431 : }
432 369 : }
433 :
434 : template <typename SubjectChar, typename PatternChar>
435 14 : void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
436 : Vector<const PatternChar> pattern,
437 : std::vector<int>* indices, unsigned int limit) {
438 : DCHECK_LT(0, limit);
439 : // Collect indices of pattern in subject.
440 : // Stop after finding at most limit values.
441 0 : int pattern_length = pattern.length();
442 14 : int index = 0;
443 0 : StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
444 28 : while (limit > 0) {
445 56 : index = search.Search(subject, index);
446 42 : if (index < 0) return;
447 14 : indices->push_back(index);
448 14 : index += pattern_length;
449 14 : limit--;
450 : }
451 : }
452 :
453 32250 : void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
454 : std::vector<int>* indices, unsigned int limit) {
455 : {
456 : DisallowHeapAllocation no_gc;
457 32250 : String::FlatContent subject_content = subject->GetFlatContent(no_gc);
458 32250 : String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
459 : DCHECK(subject_content.IsFlat());
460 : DCHECK(pattern_content.IsFlat());
461 32250 : if (subject_content.IsOneByte()) {
462 31881 : Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
463 31881 : if (pattern_content.IsOneByte()) {
464 : Vector<const uint8_t> pattern_vector =
465 : pattern_content.ToOneByteVector();
466 31881 : if (pattern_vector.length() == 1) {
467 : FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
468 31867 : limit);
469 : } else {
470 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
471 14 : limit);
472 : }
473 : } else {
474 : FindStringIndices(isolate, subject_vector,
475 0 : pattern_content.ToUC16Vector(), indices, limit);
476 : }
477 : } else {
478 369 : Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
479 369 : if (pattern_content.IsOneByte()) {
480 : Vector<const uint8_t> pattern_vector =
481 : pattern_content.ToOneByteVector();
482 369 : if (pattern_vector.length() == 1) {
483 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
484 369 : limit);
485 : } else {
486 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487 0 : limit);
488 : }
489 : } else {
490 : Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
491 0 : if (pattern_vector.length() == 1) {
492 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
493 0 : limit);
494 : } else {
495 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
496 0 : limit);
497 : }
498 : }
499 : }
500 : }
501 32250 : }
502 :
503 : namespace {
504 32214 : std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
505 32214 : std::vector<int>* list = isolate->regexp_indices();
506 : list->clear();
507 32214 : return list;
508 : }
509 :
510 32250 : void TruncateRegexpIndicesList(Isolate* isolate) {
511 : // Same size as smallest zone segment, preserving behavior from the
512 : // runtime zone.
513 : static const int kMaxRegexpIndicesListCapacity = 8 * KB;
514 32250 : std::vector<int>* indicies = isolate->regexp_indices();
515 32250 : if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
516 : // Throw away backing storage.
517 : indicies->clear();
518 : indicies->shrink_to_fit();
519 : }
520 32250 : }
521 : } // namespace
522 :
523 : template <typename ResultSeqString>
524 36 : V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
525 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
526 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
527 : DCHECK(subject->IsFlat());
528 : DCHECK(replacement->IsFlat());
529 :
530 36 : std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
531 :
532 : DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
533 : String pattern =
534 72 : String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
535 : int subject_len = subject->length();
536 : int pattern_len = pattern->length();
537 : int replacement_len = replacement->length();
538 :
539 36 : FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
540 :
541 36 : if (indices->empty()) return *subject;
542 :
543 : // Detect integer overflow.
544 : int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
545 : static_cast<int64_t>(pattern_len)) *
546 : static_cast<int64_t>(indices->size()) +
547 72 : static_cast<int64_t>(subject_len);
548 : int result_len;
549 36 : if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
550 : STATIC_ASSERT(String::kMaxLength < kMaxInt);
551 : result_len = kMaxInt; // Provoke exception.
552 : } else {
553 36 : result_len = static_cast<int>(result_len_64);
554 : }
555 36 : if (result_len == 0) {
556 0 : return ReadOnlyRoots(isolate).empty_string();
557 : }
558 :
559 : int subject_pos = 0;
560 : int result_pos = 0;
561 :
562 : MaybeHandle<SeqString> maybe_res;
563 : if (ResultSeqString::kHasOneByteEncoding) {
564 36 : maybe_res = isolate->factory()->NewRawOneByteString(result_len);
565 : } else {
566 0 : maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
567 : }
568 : Handle<SeqString> untyped_res;
569 36 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
570 36 : Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
571 :
572 : DisallowHeapAllocation no_gc;
573 135 : for (int index : *indices) {
574 : // Copy non-matched subject content.
575 63 : if (subject_pos < index) {
576 54 : String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
577 54 : subject_pos, index);
578 54 : result_pos += index - subject_pos;
579 : }
580 :
581 : // Replace match.
582 63 : if (replacement_len > 0) {
583 63 : String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
584 63 : replacement_len);
585 63 : result_pos += replacement_len;
586 : }
587 :
588 63 : subject_pos = index + pattern_len;
589 : }
590 : // Add remaining subject content at the end.
591 36 : if (subject_pos < subject_len) {
592 27 : String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
593 27 : subject_pos, subject_len);
594 : }
595 :
596 36 : int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
597 36 : RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0,
598 : match_indices);
599 :
600 36 : TruncateRegexpIndicesList(isolate);
601 :
602 36 : return *result;
603 : }
604 :
605 2277 : V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
606 2277 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
607 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
608 : DCHECK(subject->IsFlat());
609 : DCHECK(replacement->IsFlat());
610 :
611 2277 : int capture_count = regexp->CaptureCount();
612 : int subject_length = subject->length();
613 :
614 2277 : JSRegExp::Type typeTag = regexp->TypeTag();
615 2277 : if (typeTag == JSRegExp::IRREGEXP) {
616 : // Ensure the RegExp is compiled so we can access the capture-name map.
617 2160 : if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) {
618 : DCHECK(isolate->has_pending_exception());
619 0 : return ReadOnlyRoots(isolate).exception();
620 : }
621 : }
622 :
623 : // CompiledReplacement uses zone allocation.
624 2277 : Zone zone(isolate->allocator(), ZONE_NAME);
625 : CompiledReplacement compiled_replacement(&zone);
626 : const bool simple_replace = compiled_replacement.Compile(
627 2277 : isolate, regexp, replacement, capture_count, subject_length);
628 :
629 : // Shortcut for simple non-regexp global replacements
630 2277 : if (typeTag == JSRegExp::ATOM && simple_replace) {
631 72 : if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
632 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
633 36 : isolate, subject, regexp, replacement, last_match_info);
634 : } else {
635 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
636 0 : isolate, subject, regexp, replacement, last_match_info);
637 : }
638 : }
639 :
640 2241 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
641 2241 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
642 :
643 : int32_t* current_match = global_cache.FetchNext();
644 2241 : if (current_match == nullptr) {
645 621 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
646 621 : return *subject;
647 : }
648 :
649 : // Guessing the number of parts that the final result string is built
650 : // from. Global regexps can match any number of times, so we guess
651 : // conservatively.
652 1620 : int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
653 1620 : ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
654 :
655 : // Number of parts added by compiled replacement plus preceding
656 : // string and possibly suffix after last match. It is possible for
657 : // all components to use two elements when encoded as two smis.
658 1620 : const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
659 :
660 : int prev = 0;
661 :
662 1935 : do {
663 1935 : builder.EnsureCapacity(parts_added_per_loop);
664 :
665 1935 : int start = current_match[0];
666 1935 : int end = current_match[1];
667 :
668 1935 : if (prev < start) {
669 342 : builder.AddSubjectSlice(prev, start);
670 : }
671 :
672 1935 : if (simple_replace) {
673 117 : builder.AddString(replacement);
674 : } else {
675 1818 : compiled_replacement.Apply(&builder, start, end, current_match);
676 : }
677 : prev = end;
678 :
679 : current_match = global_cache.FetchNext();
680 : } while (current_match != nullptr);
681 :
682 1620 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
683 :
684 1620 : if (prev < subject_length) {
685 486 : builder.EnsureCapacity(2);
686 486 : builder.AddSubjectSlice(prev, subject_length);
687 : }
688 :
689 : RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
690 1620 : global_cache.LastSuccessfulMatch());
691 :
692 5517 : RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
693 : }
694 :
695 : template <typename ResultSeqString>
696 0 : V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
697 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
698 : Handle<RegExpMatchInfo> last_match_info) {
699 : DCHECK(subject->IsFlat());
700 :
701 : // Shortcut for simple non-regexp global replacements
702 0 : if (regexp->TypeTag() == JSRegExp::ATOM) {
703 0 : Handle<String> empty_string = isolate->factory()->empty_string();
704 0 : if (subject->IsOneByteRepresentation()) {
705 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
706 0 : isolate, subject, regexp, empty_string, last_match_info);
707 : } else {
708 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
709 0 : isolate, subject, regexp, empty_string, last_match_info);
710 : }
711 : }
712 :
713 0 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
714 0 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
715 :
716 : int32_t* current_match = global_cache.FetchNext();
717 0 : if (current_match == nullptr) {
718 0 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
719 0 : return *subject;
720 : }
721 :
722 0 : int start = current_match[0];
723 0 : int end = current_match[1];
724 0 : int capture_count = regexp->CaptureCount();
725 : int subject_length = subject->length();
726 :
727 0 : int new_length = subject_length - (end - start);
728 0 : if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
729 :
730 : Handle<ResultSeqString> answer;
731 : if (ResultSeqString::kHasOneByteEncoding) {
732 0 : answer = Handle<ResultSeqString>::cast(
733 0 : isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
734 : } else {
735 0 : answer = Handle<ResultSeqString>::cast(
736 0 : isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
737 : }
738 :
739 : int prev = 0;
740 : int position = 0;
741 :
742 : DisallowHeapAllocation no_gc;
743 0 : do {
744 0 : start = current_match[0];
745 0 : end = current_match[1];
746 0 : if (prev < start) {
747 : // Add substring subject[prev;start] to answer string.
748 0 : String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
749 0 : start);
750 0 : position += start - prev;
751 : }
752 : prev = end;
753 :
754 : current_match = global_cache.FetchNext();
755 : } while (current_match != nullptr);
756 :
757 0 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
758 :
759 0 : RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
760 : global_cache.LastSuccessfulMatch());
761 :
762 0 : if (prev < subject_length) {
763 : // Add substring subject[prev;length] to answer string.
764 0 : String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
765 0 : subject_length);
766 0 : position += subject_length - prev;
767 : }
768 :
769 0 : if (position == 0) return ReadOnlyRoots(isolate).empty_string();
770 :
771 : // Shorten string and fill
772 : int string_size = ResultSeqString::SizeFor(position);
773 : int allocated_string_size = ResultSeqString::SizeFor(new_length);
774 0 : int delta = allocated_string_size - string_size;
775 :
776 : answer->set_length(position);
777 0 : if (delta == 0) return *answer;
778 :
779 0 : Address end_of_string = answer->address() + string_size;
780 0 : Heap* heap = isolate->heap();
781 :
782 : // The trimming is performed on a newly allocated object, which is on a
783 : // freshly allocated page or on an already swept page. Hence, the sweeper
784 : // thread can not get confused with the filler creation. No synchronization
785 : // needed.
786 : // TODO(hpayer): We should shrink the large object page if the size
787 : // of the object changed significantly.
788 0 : if (!heap->IsLargeObject(*answer)) {
789 0 : heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
790 : }
791 0 : return *answer;
792 : }
793 :
794 36142 : RUNTIME_FUNCTION(Runtime_StringSplit) {
795 36142 : HandleScope handle_scope(isolate);
796 : DCHECK_EQ(3, args.length());
797 72284 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
798 72284 : CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
799 72284 : CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
800 36142 : CHECK_LT(0, limit);
801 :
802 36142 : int subject_length = subject->length();
803 36142 : int pattern_length = pattern->length();
804 36142 : CHECK_LT(0, pattern_length);
805 :
806 36142 : if (limit == 0xFFFFFFFFu) {
807 36106 : FixedArray last_match_cache_unused;
808 : Handle<Object> cached_answer(
809 : RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
810 : &last_match_cache_unused,
811 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
812 108318 : isolate);
813 36106 : if (*cached_answer != Smi::kZero) {
814 : // The cache FixedArray is a COW-array and can therefore be reused.
815 : Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
816 7856 : Handle<FixedArray>::cast(cached_answer));
817 : return *result;
818 : }
819 : }
820 :
821 : // The limit can be very large (0xFFFFFFFFu), but since the pattern
822 : // isn't empty, we can never create more parts than ~half the length
823 : // of the subject.
824 :
825 32214 : subject = String::Flatten(isolate, subject);
826 32214 : pattern = String::Flatten(isolate, pattern);
827 :
828 96642 : std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
829 :
830 32214 : FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
831 :
832 32214 : if (static_cast<uint32_t>(indices->size()) < limit) {
833 32187 : indices->push_back(subject_length);
834 : }
835 :
836 : // The list indices now contains the end of each part to create.
837 :
838 : // Create JSArray of substrings separated by separator.
839 32214 : int part_count = static_cast<int>(indices->size());
840 :
841 : Handle<JSArray> result =
842 : isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
843 32214 : INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
844 :
845 : DCHECK(result->HasObjectElements());
846 :
847 64428 : Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
848 :
849 32214 : if (part_count == 1 && indices->at(0) == subject_length) {
850 47846 : elements->set(0, *subject);
851 : } else {
852 : int part_start = 0;
853 187135 : FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
854 : int part_end = indices->at(i);
855 : Handle<String> substring =
856 : isolate->factory()->NewProperSubString(subject, part_start, part_end);
857 : elements->set(i, *substring);
858 : part_start = part_end + pattern_length;
859 : });
860 : }
861 :
862 32214 : if (limit == 0xFFFFFFFFu) {
863 32178 : if (result->HasObjectElements()) {
864 : RegExpResultsCache::Enter(isolate, subject, pattern, elements,
865 : isolate->factory()->empty_fixed_array(),
866 64356 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
867 : }
868 : }
869 :
870 32214 : TruncateRegexpIndicesList(isolate);
871 :
872 36142 : return *result;
873 : }
874 :
875 4340256 : RUNTIME_FUNCTION(Runtime_RegExpExec) {
876 4340256 : HandleScope scope(isolate);
877 : DCHECK_EQ(4, args.length());
878 8680512 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
879 8680512 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
880 8680512 : CONVERT_INT32_ARG_CHECKED(index, 2);
881 8680512 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
882 : // Due to the way the JS calls are constructed this must be less than the
883 : // length of a string, i.e. it is always a Smi. We check anyway for security.
884 4340256 : CHECK_LE(0, index);
885 4340256 : CHECK_GE(subject->length(), index);
886 4340256 : isolate->counters()->regexp_entry_runtime()->Increment();
887 8680512 : RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject,
888 4340256 : index, last_match_info));
889 : }
890 :
891 : namespace {
892 :
893 3300 : class MatchInfoBackedMatch : public String::Match {
894 : public:
895 3300 : MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
896 : Handle<String> subject,
897 : Handle<RegExpMatchInfo> match_info)
898 6600 : : isolate_(isolate), match_info_(match_info) {
899 3300 : subject_ = String::Flatten(isolate, subject);
900 :
901 3300 : if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
902 6384 : Object o = regexp->CaptureNameMap();
903 3192 : has_named_captures_ = o->IsFixedArray();
904 3192 : if (has_named_captures_) {
905 99 : capture_name_map_ = handle(FixedArray::cast(o), isolate);
906 : }
907 : } else {
908 108 : has_named_captures_ = false;
909 : }
910 3300 : }
911 :
912 18 : Handle<String> GetMatch() override {
913 18 : return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
914 : }
915 :
916 18 : Handle<String> GetPrefix() override {
917 18 : const int match_start = match_info_->Capture(0);
918 18 : return isolate_->factory()->NewSubString(subject_, 0, match_start);
919 : }
920 :
921 18 : Handle<String> GetSuffix() override {
922 18 : const int match_end = match_info_->Capture(1);
923 : return isolate_->factory()->NewSubString(subject_, match_end,
924 18 : subject_->length());
925 : }
926 :
927 171 : bool HasNamedCaptures() override { return has_named_captures_; }
928 :
929 3300 : int CaptureCount() override {
930 3300 : return match_info_->NumberOfCaptureRegisters() / 2;
931 : }
932 :
933 7152 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
934 : Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
935 7152 : isolate_, match_info_, i, capture_exists);
936 7107 : return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
937 21411 : : isolate_->factory()->empty_string();
938 : }
939 :
940 81 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
941 : CaptureState* state) override {
942 : DCHECK(has_named_captures_);
943 : const int capture_index = LookupNamedCapture(
944 360 : [=](String capture_name) { return capture_name->Equals(*name); },
945 243 : *capture_name_map_);
946 :
947 81 : if (capture_index == -1) {
948 36 : *state = INVALID;
949 36 : return name; // Arbitrary string handle.
950 : }
951 :
952 : DCHECK(1 <= capture_index && capture_index <= CaptureCount());
953 :
954 : bool capture_exists;
955 : Handle<String> capture_value;
956 90 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
957 : GetCapture(capture_index, &capture_exists),
958 : String);
959 :
960 45 : if (!capture_exists) {
961 18 : *state = UNMATCHED;
962 36 : return isolate_->factory()->empty_string();
963 : } else {
964 27 : *state = MATCHED;
965 27 : return capture_value;
966 : }
967 : }
968 :
969 : private:
970 : Isolate* isolate_;
971 : Handle<String> subject_;
972 : Handle<RegExpMatchInfo> match_info_;
973 :
974 : bool has_named_captures_;
975 : Handle<FixedArray> capture_name_map_;
976 : };
977 :
978 882 : class VectorBackedMatch : public String::Match {
979 : public:
980 441 : VectorBackedMatch(Isolate* isolate, Handle<String> subject,
981 : Handle<String> match, int match_position,
982 : ZoneVector<Handle<Object>>* captures,
983 : Handle<Object> groups_obj)
984 : : isolate_(isolate),
985 : match_(match),
986 : match_position_(match_position),
987 882 : captures_(captures) {
988 441 : subject_ = String::Flatten(isolate, subject);
989 :
990 : DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
991 882 : has_named_captures_ = !groups_obj->IsUndefined(isolate);
992 441 : if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
993 441 : }
994 :
995 0 : Handle<String> GetMatch() override { return match_; }
996 :
997 0 : Handle<String> GetPrefix() override {
998 0 : return isolate_->factory()->NewSubString(subject_, 0, match_position_);
999 : }
1000 :
1001 0 : Handle<String> GetSuffix() override {
1002 0 : const int match_end_position = match_position_ + match_->length();
1003 : return isolate_->factory()->NewSubString(subject_, match_end_position,
1004 0 : subject_->length());
1005 : }
1006 :
1007 288 : bool HasNamedCaptures() override { return has_named_captures_; }
1008 :
1009 882 : int CaptureCount() override { return static_cast<int>(captures_->size()); }
1010 :
1011 81 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1012 162 : Handle<Object> capture_obj = captures_->at(i);
1013 243 : if (capture_obj->IsUndefined(isolate_)) {
1014 0 : *capture_exists = false;
1015 0 : return isolate_->factory()->empty_string();
1016 : }
1017 81 : *capture_exists = true;
1018 81 : return Object::ToString(isolate_, capture_obj);
1019 : }
1020 :
1021 225 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
1022 : CaptureState* state) override {
1023 : DCHECK(has_named_captures_);
1024 :
1025 : Maybe<bool> maybe_capture_exists =
1026 225 : JSReceiver::HasProperty(groups_obj_, name);
1027 225 : if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1028 :
1029 225 : if (!maybe_capture_exists.FromJust()) {
1030 90 : *state = INVALID;
1031 90 : return name; // Arbitrary string handle.
1032 : }
1033 :
1034 : Handle<Object> capture_obj;
1035 270 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1036 : Object::GetProperty(isolate_, groups_obj_, name),
1037 : String);
1038 405 : if (capture_obj->IsUndefined(isolate_)) {
1039 36 : *state = UNMATCHED;
1040 72 : return isolate_->factory()->empty_string();
1041 : } else {
1042 99 : *state = MATCHED;
1043 99 : return Object::ToString(isolate_, capture_obj);
1044 : }
1045 : }
1046 :
1047 : private:
1048 : Isolate* isolate_;
1049 : Handle<String> subject_;
1050 : Handle<String> match_;
1051 : const int match_position_;
1052 : ZoneVector<Handle<Object>>* captures_;
1053 :
1054 : bool has_named_captures_;
1055 : Handle<JSReceiver> groups_obj_;
1056 : };
1057 :
1058 : // Create the groups object (see also the RegExp result creation in
1059 : // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
1060 72 : Handle<JSObject> ConstructNamedCaptureGroupsObject(
1061 : Isolate* isolate, Handle<FixedArray> capture_map,
1062 : const std::function<Object(int)>& f_get_capture) {
1063 72 : Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1064 :
1065 72 : const int capture_count = capture_map->length() >> 1;
1066 216 : for (int i = 0; i < capture_count; i++) {
1067 144 : const int name_ix = i * 2;
1068 144 : const int index_ix = i * 2 + 1;
1069 :
1070 : Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1071 : isolate);
1072 144 : const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1073 : DCHECK(1 <= capture_ix && capture_ix <= capture_count);
1074 :
1075 144 : Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1076 : DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1077 :
1078 144 : JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1079 : }
1080 :
1081 72 : return groups;
1082 : }
1083 :
1084 : // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1085 : // separate last match info. See comment on that function.
1086 : template <bool has_capture>
1087 93256 : static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1088 : Handle<JSRegExp> regexp,
1089 : Handle<RegExpMatchInfo> last_match_array,
1090 : Handle<JSArray> result_array) {
1091 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1092 : DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1093 : DCHECK(subject->IsFlat());
1094 :
1095 93256 : int capture_count = regexp->CaptureCount();
1096 : int subject_length = subject->length();
1097 :
1098 : static const int kMinLengthToCache = 0x1000;
1099 :
1100 93256 : if (subject_length > kMinLengthToCache) {
1101 36 : FixedArray last_match_cache;
1102 : Object cached_answer = RegExpResultsCache::Lookup(
1103 : isolate->heap(), *subject, regexp->data(), &last_match_cache,
1104 36 : RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1105 36 : if (cached_answer->IsFixedArray()) {
1106 0 : int capture_registers = (capture_count + 1) * 2;
1107 0 : int32_t* last_match = NewArray<int32_t>(capture_registers);
1108 0 : for (int i = 0; i < capture_registers; i++) {
1109 0 : last_match[i] = Smi::ToInt(last_match_cache->get(i));
1110 : }
1111 : Handle<FixedArray> cached_fixed_array =
1112 : Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1113 : // The cache FixedArray is a COW-array and we need to return a copy.
1114 : Handle<FixedArray> copied_fixed_array =
1115 : isolate->factory()->CopyFixedArrayWithMap(
1116 0 : cached_fixed_array, isolate->factory()->fixed_array_map());
1117 0 : JSArray::SetContent(result_array, copied_fixed_array);
1118 0 : RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1119 : capture_count, last_match);
1120 : DeleteArray(last_match);
1121 0 : return *result_array;
1122 : }
1123 : }
1124 :
1125 93256 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1126 93346 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1127 :
1128 : // Ensured in Runtime_RegExpExecMultiple.
1129 : DCHECK(result_array->HasObjectElements());
1130 186332 : Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1131 186332 : isolate);
1132 93166 : if (result_elements->length() < 16) {
1133 0 : result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1134 : }
1135 :
1136 93166 : FixedArrayBuilder builder(result_elements);
1137 :
1138 : // Position to search from.
1139 : int match_start = -1;
1140 : int match_end = 0;
1141 : bool first = true;
1142 :
1143 : // Two smis before and after the match, for very long strings.
1144 : static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1145 :
1146 : while (true) {
1147 : int32_t* current_match = global_cache.FetchNext();
1148 456683 : if (current_match == nullptr) break;
1149 363517 : match_start = current_match[0];
1150 363517 : builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1151 363517 : if (match_end < match_start) {
1152 58326 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1153 : match_start);
1154 : }
1155 363517 : match_end = current_match[1];
1156 : {
1157 : // Avoid accumulating new handles inside loop.
1158 : HandleScope temp_scope(isolate);
1159 : Handle<String> match;
1160 363517 : if (!first) {
1161 270413 : match = isolate->factory()->NewProperSubString(subject, match_start,
1162 : match_end);
1163 : } else {
1164 93104 : match =
1165 : isolate->factory()->NewSubString(subject, match_start, match_end);
1166 : first = false;
1167 : }
1168 :
1169 : if (has_capture) {
1170 : // Arguments array to replace function is match, captures, index and
1171 : // subject, i.e., 3 + capture count in total. If the RegExp contains
1172 : // named captures, they are also passed as the last argument.
1173 :
1174 344436 : Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1175 344436 : const bool has_named_captures = maybe_capture_map->IsFixedArray();
1176 :
1177 : const int argc =
1178 172218 : has_named_captures ? 4 + capture_count : 3 + capture_count;
1179 :
1180 172218 : Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1181 : int cursor = 0;
1182 :
1183 344436 : elements->set(cursor++, *match);
1184 608891 : for (int i = 1; i <= capture_count; i++) {
1185 436673 : int start = current_match[i * 2];
1186 436673 : if (start >= 0) {
1187 436580 : int end = current_match[i * 2 + 1];
1188 : DCHECK(start <= end);
1189 : Handle<String> substring =
1190 436580 : isolate->factory()->NewSubString(subject, start, end);
1191 873160 : elements->set(cursor++, *substring);
1192 : } else {
1193 : DCHECK_GT(0, current_match[i * 2 + 1]);
1194 186 : elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1195 : }
1196 : }
1197 :
1198 172218 : elements->set(cursor++, Smi::FromInt(match_start));
1199 344436 : elements->set(cursor++, *subject);
1200 :
1201 172218 : if (has_named_captures) {
1202 : Handle<FixedArray> capture_map =
1203 54 : Handle<FixedArray>::cast(maybe_capture_map);
1204 : Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1205 324 : isolate, capture_map, [=](int ix) { return elements->get(ix); });
1206 108 : elements->set(cursor++, *groups);
1207 : }
1208 :
1209 : DCHECK_EQ(cursor, argc);
1210 344436 : builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1211 : } else {
1212 191299 : builder.Add(*match);
1213 : }
1214 : }
1215 : }
1216 :
1217 93215 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1218 :
1219 93117 : if (match_start >= 0) {
1220 : // Finished matching, with at least one match.
1221 93104 : if (match_end < subject_length) {
1222 411 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1223 : subject_length);
1224 : }
1225 :
1226 93104 : RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1227 : capture_count,
1228 : global_cache.LastSuccessfulMatch());
1229 :
1230 93104 : if (subject_length > kMinLengthToCache) {
1231 : // Store the last successful match into the array for caching.
1232 : // TODO(yangguo): do not expose last match to JS and simplify caching.
1233 36 : int capture_registers = (capture_count + 1) * 2;
1234 : Handle<FixedArray> last_match_cache =
1235 36 : isolate->factory()->NewFixedArray(capture_registers);
1236 : int32_t* last_match = global_cache.LastSuccessfulMatch();
1237 162 : for (int i = 0; i < capture_registers; i++) {
1238 126 : last_match_cache->set(i, Smi::FromInt(last_match[i]));
1239 : }
1240 : Handle<FixedArray> result_fixed_array =
1241 36 : FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1242 : // Cache the result and copy the FixedArray into a COW array.
1243 : Handle<FixedArray> copied_fixed_array =
1244 : isolate->factory()->CopyFixedArrayWithMap(
1245 36 : result_fixed_array, isolate->factory()->fixed_array_map());
1246 36 : RegExpResultsCache::Enter(
1247 : isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1248 : last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1249 : }
1250 186208 : return *builder.ToJSArray(result_array);
1251 : } else {
1252 13 : return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1253 : }
1254 : }
1255 :
1256 : // Legacy implementation of RegExp.prototype[Symbol.replace] which
1257 : // doesn't properly call the underlying exec method.
1258 5604 : V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1259 : Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1260 : Handle<Object> replace_obj) {
1261 : // Functional fast-paths are dispatched directly by replace builtin.
1262 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1263 : DCHECK(!replace_obj->IsCallable());
1264 :
1265 : Factory* factory = isolate->factory();
1266 :
1267 11208 : const int flags = regexp->GetFlags();
1268 5604 : const bool global = (flags & JSRegExp::kGlobal) != 0;
1269 5604 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1270 :
1271 : Handle<String> replace;
1272 11208 : ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1273 : Object::ToString(isolate, replace_obj), String);
1274 5604 : replace = String::Flatten(isolate, replace);
1275 :
1276 5604 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1277 :
1278 5604 : if (!global) {
1279 : // Non-global regexp search, string replace.
1280 :
1281 : uint32_t last_index = 0;
1282 3327 : if (sticky) {
1283 : Handle<Object> last_index_obj(regexp->last_index(), isolate);
1284 36 : ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1285 : Object::ToLength(isolate, last_index_obj),
1286 : String);
1287 18 : last_index = PositiveNumberToUint32(*last_index_obj);
1288 : }
1289 :
1290 : Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1291 : isolate);
1292 :
1293 : // A lastIndex exceeding the string length always always returns null
1294 : // (signalling failure) in RegExpBuiltinExec, thus we can skip the call.
1295 3327 : if (last_index <= static_cast<uint32_t>(string->length())) {
1296 6618 : ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj,
1297 : RegExpImpl::Exec(isolate, regexp, string,
1298 : last_index, last_match_info),
1299 : String);
1300 : }
1301 :
1302 6654 : if (match_indices_obj->IsNull(isolate)) {
1303 45 : if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1304 27 : return string;
1305 : }
1306 :
1307 3300 : auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1308 :
1309 3300 : const int start_index = match_indices->Capture(0);
1310 3300 : const int end_index = match_indices->Capture(1);
1311 :
1312 3300 : if (sticky)
1313 0 : regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1314 :
1315 3300 : IncrementalStringBuilder builder(isolate);
1316 3300 : builder.AppendString(factory->NewSubString(string, 0, start_index));
1317 :
1318 3300 : if (replace->length() > 0) {
1319 3300 : MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1320 : Handle<String> replacement;
1321 6600 : ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1322 : String::GetSubstitution(isolate, &m, replace),
1323 : String);
1324 3300 : builder.AppendString(replacement);
1325 : }
1326 :
1327 : builder.AppendString(
1328 3300 : factory->NewSubString(string, end_index, string->length()));
1329 3300 : return builder.Finish();
1330 : } else {
1331 : // Global regexp search, string replace.
1332 : DCHECK(global);
1333 4554 : RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1334 : String);
1335 :
1336 2277 : if (replace->length() == 0) {
1337 0 : if (string->HasOnlyOneByteChars()) {
1338 : Object result =
1339 : StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1340 0 : isolate, string, regexp, last_match_info);
1341 0 : return handle(String::cast(result), isolate);
1342 : } else {
1343 : Object result =
1344 : StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1345 0 : isolate, string, regexp, last_match_info);
1346 0 : return handle(String::cast(result), isolate);
1347 : }
1348 : }
1349 :
1350 : Object result = StringReplaceGlobalRegExpWithString(
1351 2277 : isolate, string, regexp, replace, last_match_info);
1352 2277 : if (result->IsString()) {
1353 2277 : return handle(String::cast(result), isolate);
1354 : } else {
1355 0 : return MaybeHandle<String>();
1356 : }
1357 : }
1358 :
1359 : UNREACHABLE();
1360 : }
1361 :
1362 : } // namespace
1363 :
1364 : // This is only called for StringReplaceGlobalRegExpWithFunction.
1365 93256 : RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1366 93256 : HandleScope handles(isolate);
1367 : DCHECK_EQ(4, args.length());
1368 :
1369 186512 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1370 186512 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1371 186512 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1372 186512 : CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1373 93256 : CHECK(result_array->HasObjectElements());
1374 :
1375 93256 : subject = String::Flatten(isolate, subject);
1376 93256 : CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1377 :
1378 93256 : if (regexp->CaptureCount() == 0) {
1379 : return SearchRegExpMultiple<false>(isolate, subject, regexp,
1380 90272 : last_match_info, result_array);
1381 : } else {
1382 : return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1383 2984 : result_array);
1384 93256 : }
1385 : }
1386 :
1387 9425 : RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1388 9425 : HandleScope scope(isolate);
1389 : DCHECK_EQ(3, args.length());
1390 18850 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1391 18850 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1392 18850 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1393 :
1394 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1395 : DCHECK(replace_obj->map()->is_callable());
1396 :
1397 9425 : Factory* factory = isolate->factory();
1398 9425 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1399 :
1400 9425 : const int flags = regexp->GetFlags();
1401 : DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1402 :
1403 : // TODO(jgruber): This should be an easy port to CSA with massive payback.
1404 :
1405 9425 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1406 : uint32_t last_index = 0;
1407 9425 : if (sticky) {
1408 0 : Handle<Object> last_index_obj(regexp->last_index(), isolate);
1409 0 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1410 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1411 0 : last_index = PositiveNumberToUint32(*last_index_obj);
1412 :
1413 0 : if (last_index > static_cast<uint32_t>(subject->length())) last_index = 0;
1414 : }
1415 :
1416 : Handle<Object> match_indices_obj;
1417 18850 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1418 : isolate, match_indices_obj,
1419 : RegExpImpl::Exec(isolate, regexp, subject, last_index, last_match_info));
1420 :
1421 18834 : if (match_indices_obj->IsNull(isolate)) {
1422 9135 : if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1423 : return *subject;
1424 : }
1425 :
1426 : Handle<RegExpMatchInfo> match_indices =
1427 282 : Handle<RegExpMatchInfo>::cast(match_indices_obj);
1428 :
1429 282 : const int index = match_indices->Capture(0);
1430 282 : const int end_of_match = match_indices->Capture(1);
1431 :
1432 282 : if (sticky)
1433 0 : regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1434 :
1435 282 : IncrementalStringBuilder builder(isolate);
1436 282 : builder.AppendString(factory->NewSubString(subject, 0, index));
1437 :
1438 : // Compute the parameter list consisting of the match, captures, index,
1439 : // and subject for the replace function invocation. If the RegExp contains
1440 : // named captures, they are also passed as the last argument.
1441 :
1442 : // The number of captures plus one for the match.
1443 282 : const int m = match_indices->NumberOfCaptureRegisters() / 2;
1444 :
1445 : bool has_named_captures = false;
1446 : Handle<FixedArray> capture_map;
1447 282 : if (m > 1) {
1448 : // The existence of capture groups implies IRREGEXP kind.
1449 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1450 :
1451 108 : Object maybe_capture_map = regexp->CaptureNameMap();
1452 216 : if (maybe_capture_map->IsFixedArray()) {
1453 : has_named_captures = true;
1454 18 : capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1455 : }
1456 : }
1457 :
1458 282 : const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1459 282 : if (argc == static_cast<uint32_t>(-1)) {
1460 0 : THROW_NEW_ERROR_RETURN_FAILURE(
1461 : isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1462 : }
1463 564 : ScopedVector<Handle<Object>> argv(argc);
1464 :
1465 : int cursor = 0;
1466 435 : for (int j = 0; j < m; j++) {
1467 : bool ok;
1468 : Handle<String> capture =
1469 435 : RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1470 435 : if (ok) {
1471 363 : argv[cursor++] = capture;
1472 : } else {
1473 72 : argv[cursor++] = factory->undefined_value();
1474 : }
1475 : }
1476 :
1477 564 : argv[cursor++] = handle(Smi::FromInt(index), isolate);
1478 282 : argv[cursor++] = subject;
1479 :
1480 282 : if (has_named_captures) {
1481 18 : argv[cursor++] = ConstructNamedCaptureGroupsObject(
1482 90 : isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1483 : }
1484 :
1485 : DCHECK_EQ(cursor, argc);
1486 :
1487 : Handle<Object> replacement_obj;
1488 846 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1489 : isolate, replacement_obj,
1490 : Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1491 : argv.start()));
1492 :
1493 : Handle<String> replacement;
1494 564 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1495 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1496 :
1497 282 : builder.AppendString(replacement);
1498 : builder.AppendString(
1499 282 : factory->NewSubString(subject, end_of_match, subject->length()));
1500 :
1501 9989 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1502 : }
1503 :
1504 : namespace {
1505 :
1506 278 : V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1507 : Handle<Object> object,
1508 : uint32_t* out) {
1509 556 : if (object->IsUndefined(isolate)) {
1510 89 : *out = kMaxUInt32;
1511 89 : return object;
1512 : }
1513 :
1514 : Handle<Object> number;
1515 378 : ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1516 : Object);
1517 189 : *out = NumberToUint32(*number);
1518 189 : return object;
1519 : }
1520 :
1521 224 : Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1522 : Handle<FixedArray> elems,
1523 : int num_elems) {
1524 : return isolate->factory()->NewJSArrayWithElements(
1525 448 : FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1526 : }
1527 :
1528 : } // namespace
1529 :
1530 : // Slow path for:
1531 : // ES#sec-regexp.prototype-@@replace
1532 : // RegExp.prototype [ @@split ] ( string, limit )
1533 278 : RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1534 278 : HandleScope scope(isolate);
1535 : DCHECK_EQ(3, args.length());
1536 :
1537 556 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1538 556 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1539 278 : CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1540 :
1541 278 : Factory* factory = isolate->factory();
1542 :
1543 278 : Handle<JSFunction> regexp_fun = isolate->regexp_function();
1544 : Handle<Object> ctor;
1545 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1546 : isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1547 :
1548 : Handle<Object> flags_obj;
1549 834 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1550 : isolate, flags_obj,
1551 : JSObject::GetProperty(isolate, recv, factory->flags_string()));
1552 :
1553 : Handle<String> flags;
1554 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1555 : Object::ToString(isolate, flags_obj));
1556 :
1557 278 : Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1558 278 : const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1559 :
1560 278 : Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1561 278 : const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1562 :
1563 : Handle<String> new_flags = flags;
1564 278 : if (!sticky) {
1565 484 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1566 : factory->NewConsString(flags, y_str));
1567 : }
1568 :
1569 : Handle<JSReceiver> splitter;
1570 : {
1571 : const int argc = 2;
1572 :
1573 278 : ScopedVector<Handle<Object>> argv(argc);
1574 278 : argv[0] = recv;
1575 278 : argv[1] = new_flags;
1576 :
1577 : Handle<Object> splitter_obj;
1578 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1579 : isolate, splitter_obj,
1580 : Execution::New(isolate, ctor, argc, argv.start()));
1581 :
1582 278 : splitter = Handle<JSReceiver>::cast(splitter_obj);
1583 : }
1584 :
1585 : uint32_t limit;
1586 278 : RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1587 :
1588 278 : const uint32_t length = string->length();
1589 :
1590 323 : if (limit == 0) return *factory->NewJSArray(0);
1591 :
1592 233 : if (length == 0) {
1593 : Handle<Object> result;
1594 27 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1595 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1596 : factory->undefined_value()));
1597 :
1598 18 : if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1599 :
1600 9 : Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1601 18 : elems->set(0, *string);
1602 18 : return *factory->NewJSArrayWithElements(elems);
1603 : }
1604 :
1605 : static const int kInitialArraySize = 8;
1606 224 : Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1607 : uint32_t num_elems = 0;
1608 :
1609 : uint32_t string_index = 0;
1610 : uint32_t prev_string_index = 0;
1611 1103 : while (string_index < length) {
1612 969 : RETURN_FAILURE_ON_EXCEPTION(
1613 : isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1614 :
1615 : Handle<Object> result;
1616 2907 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1617 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1618 : factory->undefined_value()));
1619 :
1620 1938 : if (result->IsNull(isolate)) {
1621 : string_index = static_cast<uint32_t>(
1622 493 : RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1623 : continue;
1624 : }
1625 :
1626 : Handle<Object> last_index_obj;
1627 952 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1628 : isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1629 :
1630 952 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1631 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1632 :
1633 : const uint32_t end =
1634 476 : std::min(PositiveNumberToUint32(*last_index_obj), length);
1635 476 : if (end == prev_string_index) {
1636 : string_index = static_cast<uint32_t>(
1637 63 : RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1638 : continue;
1639 : }
1640 :
1641 : {
1642 : Handle<String> substr =
1643 413 : factory->NewSubString(string, prev_string_index, string_index);
1644 413 : elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1645 413 : if (num_elems == limit) {
1646 180 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1647 : }
1648 : }
1649 :
1650 : prev_string_index = end;
1651 :
1652 : Handle<Object> num_captures_obj;
1653 969 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1654 : isolate, num_captures_obj,
1655 : Object::GetProperty(isolate, result,
1656 : isolate->factory()->length_string()));
1657 :
1658 646 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1659 : isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1660 323 : const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1661 :
1662 0 : for (uint32_t i = 1; i < num_captures; i++) {
1663 : Handle<Object> capture;
1664 0 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1665 : isolate, capture, Object::GetElement(isolate, result, i));
1666 0 : elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1667 0 : if (num_elems == limit) {
1668 0 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1669 : }
1670 : }
1671 :
1672 : string_index = prev_string_index;
1673 : }
1674 :
1675 : {
1676 : Handle<String> substr =
1677 134 : factory->NewSubString(string, prev_string_index, length);
1678 134 : elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1679 : }
1680 :
1681 268 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1682 : }
1683 :
1684 : // Slow path for:
1685 : // ES#sec-regexp.prototype-@@replace
1686 : // RegExp.prototype [ @@replace ] ( string, replaceValue )
1687 6720 : RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1688 6162 : HandleScope scope(isolate);
1689 : DCHECK_EQ(3, args.length());
1690 :
1691 12324 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1692 12324 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1693 6162 : Handle<Object> replace_obj = args.at(2);
1694 :
1695 6162 : Factory* factory = isolate->factory();
1696 :
1697 6162 : string = String::Flatten(isolate, string);
1698 :
1699 12324 : const bool functional_replace = replace_obj->IsCallable();
1700 :
1701 : // Fast-path for unmodified JSRegExps (and non-functional replace).
1702 6162 : if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1703 : // We should never get here with functional replace because unmodified
1704 : // regexp and functional replace should be fully handled in CSA code.
1705 5604 : CHECK(!functional_replace);
1706 11208 : RETURN_RESULT_OR_FAILURE(
1707 : isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
1708 : replace_obj));
1709 : }
1710 :
1711 558 : const uint32_t length = string->length();
1712 :
1713 : Handle<String> replace;
1714 558 : if (!functional_replace) {
1715 990 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1716 : Object::ToString(isolate, replace_obj));
1717 : }
1718 :
1719 : Handle<Object> global_obj;
1720 1674 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1721 : isolate, global_obj,
1722 : JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1723 558 : const bool global = global_obj->BooleanValue(isolate);
1724 :
1725 : bool unicode = false;
1726 558 : if (global) {
1727 : Handle<Object> unicode_obj;
1728 918 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1729 : isolate, unicode_obj,
1730 : JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1731 306 : unicode = unicode_obj->BooleanValue(isolate);
1732 :
1733 306 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1734 : RegExpUtils::SetLastIndex(isolate, recv, 0));
1735 : }
1736 :
1737 1116 : Zone zone(isolate->allocator(), ZONE_NAME);
1738 1116 : ZoneVector<Handle<Object>> results(&zone);
1739 :
1740 : while (true) {
1741 : Handle<Object> result;
1742 2700 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1743 : isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1744 : factory->undefined_value()));
1745 :
1746 1782 : if (result->IsNull(isolate)) break;
1747 :
1748 540 : results.push_back(result);
1749 540 : if (!global) break;
1750 :
1751 : Handle<Object> match_obj;
1752 684 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1753 : Object::GetElement(isolate, result, 0));
1754 :
1755 : Handle<String> match;
1756 684 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1757 : Object::ToString(isolate, match_obj));
1758 :
1759 342 : if (match->length() == 0) {
1760 90 : RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1761 : isolate, recv, string, unicode));
1762 : }
1763 : }
1764 :
1765 : // TODO(jgruber): Look into ReplacementStringBuilder instead.
1766 549 : IncrementalStringBuilder builder(isolate);
1767 : uint32_t next_source_position = 0;
1768 :
1769 1080 : for (const auto& result : results) {
1770 540 : HandleScope handle_scope(isolate);
1771 : Handle<Object> captures_length_obj;
1772 1620 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1773 : isolate, captures_length_obj,
1774 : Object::GetProperty(isolate, result, factory->length_string()));
1775 :
1776 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1777 : isolate, captures_length_obj,
1778 : Object::ToLength(isolate, captures_length_obj));
1779 : const uint32_t captures_length =
1780 540 : PositiveNumberToUint32(*captures_length_obj);
1781 :
1782 : Handle<Object> match_obj;
1783 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1784 : Object::GetElement(isolate, result, 0));
1785 :
1786 : Handle<String> match;
1787 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1788 : Object::ToString(isolate, match_obj));
1789 :
1790 540 : const int match_length = match->length();
1791 :
1792 : Handle<Object> position_obj;
1793 1620 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1794 : isolate, position_obj,
1795 : Object::GetProperty(isolate, result, factory->index_string()));
1796 :
1797 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1798 : isolate, position_obj, Object::ToInteger(isolate, position_obj));
1799 : const uint32_t position =
1800 540 : std::min(PositiveNumberToUint32(*position_obj), length);
1801 :
1802 : // Do not reserve capacity since captures_length is user-controlled.
1803 1071 : ZoneVector<Handle<Object>> captures(&zone);
1804 :
1805 1181043 : for (uint32_t n = 0; n < captures_length; n++) {
1806 : Handle<Object> capture;
1807 2362086 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1808 : isolate, capture, Object::GetElement(isolate, result, n));
1809 :
1810 2362086 : if (!capture->IsUndefined(isolate)) {
1811 2214 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1812 : Object::ToString(isolate, capture));
1813 : }
1814 1181043 : captures.push_back(capture);
1815 : }
1816 :
1817 540 : Handle<Object> groups_obj = isolate->factory()->undefined_value();
1818 1620 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1819 : isolate, groups_obj,
1820 : Object::GetProperty(isolate, result, factory->groups_string()));
1821 :
1822 1080 : const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1823 :
1824 : Handle<String> replacement;
1825 540 : if (functional_replace) {
1826 : const uint32_t argc =
1827 99 : GetArgcForReplaceCallable(captures_length, has_named_captures);
1828 99 : if (argc == static_cast<uint32_t>(-1)) {
1829 18 : THROW_NEW_ERROR_RETURN_FAILURE(
1830 : isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1831 : }
1832 :
1833 90 : ScopedVector<Handle<Object>> argv(argc);
1834 :
1835 : int cursor = 0;
1836 234 : for (uint32_t j = 0; j < captures_length; j++) {
1837 234 : argv[cursor++] = captures[j];
1838 : }
1839 :
1840 180 : argv[cursor++] = handle(Smi::FromInt(position), isolate);
1841 90 : argv[cursor++] = string;
1842 90 : if (has_named_captures) argv[cursor++] = groups_obj;
1843 :
1844 : DCHECK_EQ(cursor, argc);
1845 :
1846 : Handle<Object> replacement_obj;
1847 270 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1848 : isolate, replacement_obj,
1849 : Execution::Call(isolate, replace_obj, factory->undefined_value(),
1850 : argc, argv.start()));
1851 :
1852 180 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1853 90 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1854 : } else {
1855 : DCHECK(!functional_replace);
1856 882 : if (!groups_obj->IsUndefined(isolate)) {
1857 558 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1858 : isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1859 : }
1860 : VectorBackedMatch m(isolate, string, match, position, &captures,
1861 441 : groups_obj);
1862 882 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1863 441 : isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1864 : }
1865 :
1866 531 : if (position >= next_source_position) {
1867 : builder.AppendString(
1868 531 : factory->NewSubString(string, next_source_position, position));
1869 531 : builder.AppendString(replacement);
1870 :
1871 531 : next_source_position = position + match_length;
1872 : }
1873 531 : }
1874 :
1875 540 : if (next_source_position < length) {
1876 : builder.AppendString(
1877 324 : factory->NewSubString(string, next_source_position, length));
1878 : }
1879 :
1880 7242 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1881 : }
1882 :
1883 378291 : RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1884 378291 : HandleScope scope(isolate);
1885 : DCHECK_EQ(3, args.length());
1886 : // TODO(pwong): To follow the spec more closely and simplify calling code,
1887 : // this could handle the canonicalization of pattern and flags. See
1888 : // https://tc39.github.io/ecma262/#sec-regexpinitialize
1889 756582 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1890 756582 : CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1891 756582 : CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1892 :
1893 378291 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1894 : JSRegExp::Initialize(regexp, source, flags));
1895 :
1896 378291 : return *regexp;
1897 : }
1898 :
1899 0 : RUNTIME_FUNCTION(Runtime_IsRegExp) {
1900 : SealHandleScope shs(isolate);
1901 : DCHECK_EQ(1, args.length());
1902 0 : CONVERT_ARG_CHECKED(Object, obj, 0);
1903 0 : return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1904 : }
1905 :
1906 : } // namespace internal
1907 178779 : } // namespace v8
|