Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include <functional>
6 :
7 : #include "src/arguments-inl.h"
8 : #include "src/conversions-inl.h"
9 : #include "src/counters.h"
10 : #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
11 : #include "src/isolate-inl.h"
12 : #include "src/message-template.h"
13 : #include "src/objects/js-array-inl.h"
14 : #include "src/regexp/jsregexp-inl.h"
15 : #include "src/regexp/regexp-utils.h"
16 : #include "src/runtime/runtime-utils.h"
17 : #include "src/string-builder-inl.h"
18 : #include "src/string-search.h"
19 : #include "src/zone/zone-chunk-list.h"
20 :
21 : namespace v8 {
22 : namespace internal {
23 :
24 : namespace {
25 :
26 : // Returns -1 for failure.
27 : uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
28 : bool has_named_captures) {
29 : const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
30 : const uint32_t kAdditionalArgsWithNamedCaptures = 3;
31 381 : if (num_captures > Code::kMaxArguments) return -1;
32 : uint32_t argc = has_named_captures
33 : ? num_captures + kAdditionalArgsWithNamedCaptures
34 372 : : num_captures + kAdditionalArgsWithoutNamedCaptures;
35 : STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
36 : kAdditionalArgsWithNamedCaptures);
37 372 : return (argc > Code::kMaxArguments) ? -1 : argc;
38 : }
39 :
40 : // Looks up the capture of the given name. Returns the (1-based) numbered
41 : // capture index or -1 on failure.
42 135 : int LookupNamedCapture(const std::function<bool(String)>& name_matches,
43 : FixedArray capture_name_map) {
44 : // TODO(jgruber): Sort capture_name_map and do binary search via
45 : // internalized strings.
46 :
47 : int maybe_capture_index = -1;
48 135 : const int named_capture_count = capture_name_map->length() >> 1;
49 621 : for (int j = 0; j < named_capture_count; j++) {
50 : // The format of {capture_name_map} is documented at
51 : // JSRegExp::kIrregexpCaptureNameMapIndex.
52 315 : const int name_ix = j * 2;
53 315 : const int index_ix = j * 2 + 1;
54 :
55 : String capture_name = String::cast(capture_name_map->get(name_ix));
56 315 : if (!name_matches(capture_name)) continue;
57 :
58 : maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
59 : break;
60 : }
61 :
62 135 : return maybe_capture_index;
63 : }
64 :
65 : } // namespace
66 :
67 2286 : class CompiledReplacement {
68 : public:
69 : explicit CompiledReplacement(Zone* zone)
70 : : parts_(zone), replacement_substrings_(zone) {}
71 :
72 : // Return whether the replacement is simple.
73 : bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
74 : Handle<String> replacement, int capture_count,
75 : int subject_length);
76 :
77 : // Use Apply only if Compile returned false.
78 : void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
79 : int32_t* match);
80 :
81 : // Number of distinct parts of the replacement pattern.
82 1629 : int parts() { return static_cast<int>(parts_.size()); }
83 :
84 : private:
85 : enum PartType {
86 : SUBJECT_PREFIX = 1,
87 : SUBJECT_SUFFIX,
88 : SUBJECT_CAPTURE,
89 : REPLACEMENT_SUBSTRING,
90 : REPLACEMENT_STRING,
91 : EMPTY_REPLACEMENT,
92 : NUMBER_OF_PART_TYPES
93 : };
94 :
95 : struct ReplacementPart {
96 : static inline ReplacementPart SubjectMatch() {
97 : return ReplacementPart(SUBJECT_CAPTURE, 0);
98 : }
99 : static inline ReplacementPart SubjectCapture(int capture_index) {
100 : return ReplacementPart(SUBJECT_CAPTURE, capture_index);
101 : }
102 : static inline ReplacementPart SubjectPrefix() {
103 : return ReplacementPart(SUBJECT_PREFIX, 0);
104 : }
105 : static inline ReplacementPart SubjectSuffix(int subject_length) {
106 : return ReplacementPart(SUBJECT_SUFFIX, subject_length);
107 : }
108 : static inline ReplacementPart ReplacementString() {
109 : return ReplacementPart(REPLACEMENT_STRING, 0);
110 : }
111 : static inline ReplacementPart EmptyReplacement() {
112 : return ReplacementPart(EMPTY_REPLACEMENT, 0);
113 : }
114 : static inline ReplacementPart ReplacementSubString(int from, int to) {
115 : DCHECK_LE(0, from);
116 : DCHECK_GT(to, from);
117 639 : return ReplacementPart(-from, to);
118 : }
119 :
120 : // If tag <= 0 then it is the negation of a start index of a substring of
121 : // the replacement pattern, otherwise it's a value from PartType.
122 : ReplacementPart(int tag, int data) : tag(tag), data(data) {
123 : // Must be non-positive or a PartType value.
124 : DCHECK(tag < NUMBER_OF_PART_TYPES);
125 : }
126 : // Either a value of PartType or a non-positive number that is
127 : // the negation of an index into the replacement string.
128 : int tag;
129 : // The data value's interpretation depends on the value of tag:
130 : // tag == SUBJECT_PREFIX ||
131 : // tag == SUBJECT_SUFFIX: data is unused.
132 : // tag == SUBJECT_CAPTURE: data is the number of the capture.
133 : // tag == REPLACEMENT_SUBSTRING ||
134 : // tag == REPLACEMENT_STRING: data is index into array of substrings
135 : // of the replacement string.
136 : // tag == EMPTY_REPLACEMENT: data is unused.
137 : // tag <= 0: Temporary representation of the substring of the replacement
138 : // string ranging over -tag .. data.
139 : // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
140 : // substring objects.
141 : int data;
142 : };
143 :
144 : template <typename Char>
145 2286 : bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
146 : Vector<Char> characters,
147 : FixedArray capture_name_map, int capture_count,
148 : int subject_length) {
149 : // Equivalent to String::GetSubstitution, except that this method converts
150 : // the replacement string into an internal representation that avoids
151 : // repeated parsing when used repeatedly.
152 : int length = characters.length();
153 : int last = 0;
154 10044 : for (int i = 0; i < length; i++) {
155 7776 : Char c = characters[i];
156 3888 : if (c == '$') {
157 2448 : int next_index = i + 1;
158 2448 : if (next_index == length) { // No next character!
159 : break;
160 : }
161 4878 : Char c2 = characters[next_index];
162 2439 : switch (c2) {
163 : case '$':
164 72 : if (i > last) {
165 : // There is a substring before. Include the first "$".
166 45 : parts->push_back(
167 : ReplacementPart::ReplacementSubString(last, next_index));
168 45 : last = next_index + 1; // Continue after the second "$".
169 : } else {
170 : // Let the next substring start with the second "$".
171 : last = next_index;
172 : }
173 : i = next_index;
174 : break;
175 : case '`':
176 18 : if (i > last) {
177 18 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
178 : }
179 18 : parts->push_back(ReplacementPart::SubjectPrefix());
180 : i = next_index;
181 18 : last = i + 1;
182 18 : break;
183 : case '\'':
184 18 : if (i > last) {
185 18 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
186 : }
187 18 : parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
188 : i = next_index;
189 18 : last = i + 1;
190 18 : break;
191 : case '&':
192 18 : if (i > last) {
193 18 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
194 : }
195 18 : parts->push_back(ReplacementPart::SubjectMatch());
196 : i = next_index;
197 18 : last = i + 1;
198 18 : break;
199 : case '0':
200 : case '1':
201 : case '2':
202 : case '3':
203 : case '4':
204 : case '5':
205 : case '6':
206 : case '7':
207 : case '8':
208 : case '9': {
209 2232 : int capture_ref = c2 - '0';
210 2232 : if (capture_ref > capture_count) {
211 : i = next_index;
212 : continue;
213 : }
214 1521 : int second_digit_index = next_index + 1;
215 1521 : if (second_digit_index < length) {
216 : // Peek ahead to see if we have two digits.
217 2772 : Char c3 = characters[second_digit_index];
218 1386 : if ('0' <= c3 && c3 <= '9') { // Double digits.
219 1332 : int double_digit_ref = capture_ref * 10 + c3 - '0';
220 1332 : if (double_digit_ref <= capture_count) {
221 : next_index = second_digit_index;
222 : capture_ref = double_digit_ref;
223 : }
224 : }
225 : }
226 1521 : if (capture_ref > 0) {
227 1485 : if (i > last) {
228 63 : parts->push_back(
229 : ReplacementPart::ReplacementSubString(last, i));
230 : }
231 : DCHECK(capture_ref <= capture_count);
232 1485 : parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
233 1485 : last = next_index + 1;
234 : }
235 : i = next_index;
236 : break;
237 : }
238 : case '<': {
239 72 : if (capture_name_map.is_null()) {
240 : i = next_index;
241 : break;
242 : }
243 :
244 : // Scan until the next '>', and let the enclosed substring be the
245 : // groupName.
246 :
247 72 : const int name_start_index = next_index + 1;
248 : int closing_bracket_index = -1;
249 540 : for (int j = name_start_index; j < length; j++) {
250 576 : if (characters[j] == '>') {
251 : closing_bracket_index = j;
252 : break;
253 : }
254 : }
255 :
256 : // If no closing bracket is found, '$<' is treated as a string
257 : // literal.
258 72 : if (closing_bracket_index == -1) {
259 : i = next_index;
260 : break;
261 : }
262 :
263 : Vector<Char> requested_name =
264 54 : characters.SubVector(name_start_index, closing_bracket_index);
265 :
266 : // Let capture be ? Get(namedCaptures, groupName).
267 :
268 54 : const int capture_index = LookupNamedCapture(
269 : [=](String capture_name) {
270 : return capture_name->IsEqualTo(requested_name);
271 135 : },
272 54 : capture_name_map);
273 :
274 : // If capture is undefined or does not exist, replace the text
275 : // through the following '>' with the empty string.
276 : // Otherwise, replace the text through the following '>' with
277 : // ? ToString(capture).
278 :
279 : DCHECK(capture_index == -1 ||
280 : (1 <= capture_index && capture_index <= capture_count));
281 :
282 54 : if (i > last) {
283 0 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
284 : }
285 54 : parts->push_back(
286 : (capture_index == -1)
287 : ? ReplacementPart::EmptyReplacement()
288 : : ReplacementPart::SubjectCapture(capture_index));
289 54 : last = closing_bracket_index + 1;
290 : i = closing_bracket_index;
291 54 : break;
292 : }
293 : default:
294 : i = next_index;
295 : break;
296 : }
297 : }
298 : }
299 2286 : if (length > last) {
300 1224 : if (last == 0) {
301 : // Replacement is simple. Do not use Apply to do the replacement.
302 : return true;
303 : } else {
304 477 : parts->push_back(ReplacementPart::ReplacementSubString(last, length));
305 : }
306 : }
307 : return false;
308 : }
309 :
310 : ZoneChunkList<ReplacementPart> parts_;
311 : ZoneVector<Handle<String>> replacement_substrings_;
312 : };
313 :
314 2286 : bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
315 : Handle<String> replacement, int capture_count,
316 : int subject_length) {
317 : {
318 : DisallowHeapAllocation no_gc;
319 2286 : String::FlatContent content = replacement->GetFlatContent(no_gc);
320 : DCHECK(content.IsFlat());
321 :
322 2286 : FixedArray capture_name_map;
323 2286 : if (capture_count > 0) {
324 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
325 : Object maybe_capture_name_map = regexp->CaptureNameMap();
326 1485 : if (maybe_capture_name_map->IsFixedArray()) {
327 72 : capture_name_map = FixedArray::cast(maybe_capture_name_map);
328 : }
329 : }
330 :
331 : bool simple;
332 2286 : if (content.IsOneByte()) {
333 2286 : simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
334 : capture_name_map, capture_count,
335 2286 : subject_length);
336 : } else {
337 : DCHECK(content.IsTwoByte());
338 0 : simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
339 : capture_name_map, capture_count,
340 0 : subject_length);
341 : }
342 2286 : if (simple) return true;
343 : }
344 :
345 : // Find substrings of replacement string and create them as String objects.
346 : int substring_index = 0;
347 3771 : for (ReplacementPart& part : parts_) {
348 2232 : int tag = part.tag;
349 2232 : if (tag <= 0) { // A replacement string slice.
350 639 : int from = -tag;
351 639 : int to = part.data;
352 639 : replacement_substrings_.push_back(
353 1278 : isolate->factory()->NewSubString(replacement, from, to));
354 639 : part.tag = REPLACEMENT_SUBSTRING;
355 639 : part.data = substring_index;
356 639 : substring_index++;
357 1593 : } else if (tag == REPLACEMENT_STRING) {
358 0 : replacement_substrings_.push_back(replacement);
359 0 : part.data = substring_index;
360 0 : substring_index++;
361 : }
362 : }
363 : return false;
364 : }
365 :
366 :
367 1818 : void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
368 : int match_from, int match_to, int32_t* match) {
369 : DCHECK_LT(0, parts_.size());
370 4770 : for (ReplacementPart& part : parts_) {
371 2952 : switch (part.tag) {
372 : case SUBJECT_PREFIX:
373 45 : if (match_from > 0) builder->AddSubjectSlice(0, match_from);
374 : break;
375 : case SUBJECT_SUFFIX: {
376 45 : int subject_length = part.data;
377 45 : if (match_to < subject_length) {
378 36 : builder->AddSubjectSlice(match_to, subject_length);
379 : }
380 : break;
381 : }
382 : case SUBJECT_CAPTURE: {
383 1764 : int capture = part.data;
384 1764 : int from = match[capture * 2];
385 1764 : int to = match[capture * 2 + 1];
386 1764 : if (from >= 0 && to > from) {
387 1683 : builder->AddSubjectSlice(from, to);
388 : }
389 : break;
390 : }
391 : case REPLACEMENT_SUBSTRING:
392 : case REPLACEMENT_STRING:
393 2088 : builder->AddString(replacement_substrings_[part.data]);
394 1044 : break;
395 : case EMPTY_REPLACEMENT:
396 : break;
397 : default:
398 0 : UNREACHABLE();
399 : }
400 : }
401 1818 : }
402 :
403 31892 : void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
404 : std::vector<int>* indices, unsigned int limit) {
405 : DCHECK_LT(0, limit);
406 : // Collect indices of pattern in subject using memchr.
407 : // Stop after finding at most limit values.
408 : const uint8_t* subject_start = subject.start();
409 31892 : const uint8_t* subject_end = subject_start + subject.length();
410 : const uint8_t* pos = subject_start;
411 195530 : while (limit > 0) {
412 : pos = reinterpret_cast<const uint8_t*>(
413 113684 : memchr(pos, pattern, subject_end - pos));
414 113684 : if (pos == nullptr) return;
415 163638 : indices->push_back(static_cast<int>(pos - subject_start));
416 81819 : pos++;
417 81819 : limit--;
418 : }
419 : }
420 :
421 369 : void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
422 : std::vector<int>* indices, unsigned int limit) {
423 : DCHECK_LT(0, limit);
424 : const uc16* subject_start = subject.start();
425 369 : const uc16* subject_end = subject_start + subject.length();
426 24759 : for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
427 12195 : if (*pos == pattern) {
428 738 : indices->push_back(static_cast<int>(pos - subject_start));
429 369 : limit--;
430 : }
431 : }
432 369 : }
433 :
434 : template <typename SubjectChar, typename PatternChar>
435 14 : void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
436 : Vector<const PatternChar> pattern,
437 : std::vector<int>* indices, unsigned int limit) {
438 : DCHECK_LT(0, limit);
439 : // Collect indices of pattern in subject.
440 : // Stop after finding at most limit values.
441 : int pattern_length = pattern.length();
442 14 : int index = 0;
443 0 : StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
444 42 : while (limit > 0) {
445 56 : index = search.Search(subject, index);
446 42 : if (index < 0) return;
447 14 : indices->push_back(index);
448 14 : index += pattern_length;
449 14 : limit--;
450 : }
451 : }
452 :
453 32275 : void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
454 : std::vector<int>* indices, unsigned int limit) {
455 : {
456 : DisallowHeapAllocation no_gc;
457 32275 : String::FlatContent subject_content = subject->GetFlatContent(no_gc);
458 32275 : String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
459 : DCHECK(subject_content.IsFlat());
460 : DCHECK(pattern_content.IsFlat());
461 32275 : if (subject_content.IsOneByte()) {
462 31906 : Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
463 31906 : if (pattern_content.IsOneByte()) {
464 : Vector<const uint8_t> pattern_vector =
465 : pattern_content.ToOneByteVector();
466 31906 : if (pattern_vector.length() == 1) {
467 31892 : FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
468 31892 : limit);
469 : } else {
470 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
471 14 : limit);
472 : }
473 : } else {
474 : FindStringIndices(isolate, subject_vector,
475 0 : pattern_content.ToUC16Vector(), indices, limit);
476 : }
477 : } else {
478 369 : Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
479 369 : if (pattern_content.IsOneByte()) {
480 : Vector<const uint8_t> pattern_vector =
481 : pattern_content.ToOneByteVector();
482 369 : if (pattern_vector.length() == 1) {
483 369 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
484 369 : limit);
485 : } else {
486 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487 0 : limit);
488 : }
489 : } else {
490 : Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
491 0 : if (pattern_vector.length() == 1) {
492 0 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
493 0 : limit);
494 : } else {
495 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
496 0 : limit);
497 : }
498 : }
499 : }
500 : }
501 32275 : }
502 :
503 : namespace {
504 32239 : std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
505 : std::vector<int>* list = isolate->regexp_indices();
506 : list->clear();
507 32239 : return list;
508 : }
509 :
510 32239 : void TruncateRegexpIndicesList(Isolate* isolate) {
511 : // Same size as smallest zone segment, preserving behavior from the
512 : // runtime zone.
513 : static const int kMaxRegexpIndicesListCapacity = 8 * KB;
514 : std::vector<int>* indicies = isolate->regexp_indices();
515 32275 : if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
516 : // Throw away backing storage.
517 : indicies->clear();
518 : indicies->shrink_to_fit();
519 : }
520 32239 : }
521 : } // namespace
522 :
523 : template <typename ResultSeqString>
524 36 : V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
525 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
526 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
527 : DCHECK(subject->IsFlat());
528 : DCHECK(replacement->IsFlat());
529 :
530 : std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
531 :
532 : DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
533 : String pattern =
534 : String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
535 : int subject_len = subject->length();
536 : int pattern_len = pattern->length();
537 : int replacement_len = replacement->length();
538 :
539 36 : FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
540 :
541 36 : if (indices->empty()) return *subject;
542 :
543 : // Detect integer overflow.
544 : int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
545 : static_cast<int64_t>(pattern_len)) *
546 : static_cast<int64_t>(indices->size()) +
547 72 : static_cast<int64_t>(subject_len);
548 : int result_len;
549 36 : if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
550 : STATIC_ASSERT(String::kMaxLength < kMaxInt);
551 : result_len = kMaxInt; // Provoke exception.
552 : } else {
553 36 : result_len = static_cast<int>(result_len_64);
554 : }
555 36 : if (result_len == 0) {
556 0 : return ReadOnlyRoots(isolate).empty_string();
557 : }
558 :
559 : int subject_pos = 0;
560 : int result_pos = 0;
561 :
562 : MaybeHandle<SeqString> maybe_res;
563 : if (ResultSeqString::kHasOneByteEncoding) {
564 36 : maybe_res = isolate->factory()->NewRawOneByteString(result_len);
565 : } else {
566 0 : maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
567 : }
568 : Handle<SeqString> untyped_res;
569 36 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
570 : Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
571 :
572 : DisallowHeapAllocation no_gc;
573 99 : for (int index : *indices) {
574 : // Copy non-matched subject content.
575 63 : if (subject_pos < index) {
576 108 : String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
577 : subject_pos, index);
578 54 : result_pos += index - subject_pos;
579 : }
580 :
581 : // Replace match.
582 63 : if (replacement_len > 0) {
583 126 : String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
584 : replacement_len);
585 63 : result_pos += replacement_len;
586 : }
587 :
588 63 : subject_pos = index + pattern_len;
589 : }
590 : // Add remaining subject content at the end.
591 36 : if (subject_pos < subject_len) {
592 54 : String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
593 : subject_pos, subject_len);
594 : }
595 :
596 36 : int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
597 36 : RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0,
598 : match_indices);
599 :
600 : TruncateRegexpIndicesList(isolate);
601 :
602 36 : return *result;
603 : }
604 :
605 2286 : V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
606 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
607 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
608 : DCHECK(subject->IsFlat());
609 : DCHECK(replacement->IsFlat());
610 :
611 2286 : int capture_count = regexp->CaptureCount();
612 : int subject_length = subject->length();
613 :
614 2286 : JSRegExp::Type typeTag = regexp->TypeTag();
615 2286 : if (typeTag == JSRegExp::IRREGEXP) {
616 : // Ensure the RegExp is compiled so we can access the capture-name map.
617 2169 : if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) {
618 : DCHECK(isolate->has_pending_exception());
619 0 : return ReadOnlyRoots(isolate).exception();
620 : }
621 : }
622 :
623 : // CompiledReplacement uses zone allocation.
624 4572 : Zone zone(isolate->allocator(), ZONE_NAME);
625 : CompiledReplacement compiled_replacement(&zone);
626 : const bool simple_replace = compiled_replacement.Compile(
627 2286 : isolate, regexp, replacement, capture_count, subject_length);
628 :
629 : // Shortcut for simple non-regexp global replacements
630 2286 : if (typeTag == JSRegExp::ATOM && simple_replace) {
631 72 : if (subject->IsOneByteRepresentation() &&
632 : replacement->IsOneByteRepresentation()) {
633 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
634 36 : isolate, subject, regexp, replacement, last_match_info);
635 : } else {
636 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
637 0 : isolate, subject, regexp, replacement, last_match_info);
638 : }
639 : }
640 :
641 2250 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
642 2250 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
643 :
644 : int32_t* current_match = global_cache.FetchNext();
645 2250 : if (current_match == nullptr) {
646 621 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
647 621 : return *subject;
648 : }
649 :
650 : // Guessing the number of parts that the final result string is built
651 : // from. Global regexps can match any number of times, so we guess
652 : // conservatively.
653 1629 : int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
654 1629 : ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
655 :
656 : int prev = 0;
657 :
658 : do {
659 1998 : int start = current_match[0];
660 1998 : int end = current_match[1];
661 :
662 1998 : if (prev < start) {
663 378 : builder.AddSubjectSlice(prev, start);
664 : }
665 :
666 1998 : if (simple_replace) {
667 180 : builder.AddString(replacement);
668 : } else {
669 1818 : compiled_replacement.Apply(&builder, start, end, current_match);
670 : }
671 : prev = end;
672 :
673 : current_match = global_cache.FetchNext();
674 1998 : } while (current_match != nullptr);
675 :
676 1629 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
677 :
678 1629 : if (prev < subject_length) {
679 486 : builder.AddSubjectSlice(prev, subject_length);
680 : }
681 :
682 : RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
683 1629 : global_cache.LastSuccessfulMatch());
684 :
685 3258 : RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
686 : }
687 :
688 : template <typename ResultSeqString>
689 0 : V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
690 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
691 : Handle<RegExpMatchInfo> last_match_info) {
692 : DCHECK(subject->IsFlat());
693 :
694 : // Shortcut for simple non-regexp global replacements
695 0 : if (regexp->TypeTag() == JSRegExp::ATOM) {
696 0 : Handle<String> empty_string = isolate->factory()->empty_string();
697 0 : if (subject->IsOneByteRepresentation()) {
698 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
699 0 : isolate, subject, regexp, empty_string, last_match_info);
700 : } else {
701 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
702 0 : isolate, subject, regexp, empty_string, last_match_info);
703 : }
704 : }
705 :
706 0 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
707 0 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
708 :
709 : int32_t* current_match = global_cache.FetchNext();
710 0 : if (current_match == nullptr) {
711 0 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
712 0 : return *subject;
713 : }
714 :
715 0 : int start = current_match[0];
716 0 : int end = current_match[1];
717 0 : int capture_count = regexp->CaptureCount();
718 : int subject_length = subject->length();
719 :
720 0 : int new_length = subject_length - (end - start);
721 0 : if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
722 :
723 : Handle<ResultSeqString> answer;
724 : if (ResultSeqString::kHasOneByteEncoding) {
725 0 : answer = Handle<ResultSeqString>::cast(
726 : isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
727 : } else {
728 0 : answer = Handle<ResultSeqString>::cast(
729 : isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
730 : }
731 :
732 : int prev = 0;
733 : int position = 0;
734 :
735 : DisallowHeapAllocation no_gc;
736 0 : do {
737 0 : start = current_match[0];
738 0 : end = current_match[1];
739 0 : if (prev < start) {
740 : // Add substring subject[prev;start] to answer string.
741 0 : String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
742 : start);
743 0 : position += start - prev;
744 : }
745 : prev = end;
746 :
747 : current_match = global_cache.FetchNext();
748 : } while (current_match != nullptr);
749 :
750 0 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
751 :
752 0 : RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
753 : global_cache.LastSuccessfulMatch());
754 :
755 0 : if (prev < subject_length) {
756 : // Add substring subject[prev;length] to answer string.
757 0 : String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
758 : subject_length);
759 0 : position += subject_length - prev;
760 : }
761 :
762 0 : if (position == 0) return ReadOnlyRoots(isolate).empty_string();
763 :
764 : // Shorten string and fill
765 : int string_size = ResultSeqString::SizeFor(position);
766 : int allocated_string_size = ResultSeqString::SizeFor(new_length);
767 0 : int delta = allocated_string_size - string_size;
768 :
769 : answer->set_length(position);
770 0 : if (delta == 0) return *answer;
771 :
772 0 : Address end_of_string = answer->address() + string_size;
773 : Heap* heap = isolate->heap();
774 :
775 : // The trimming is performed on a newly allocated object, which is on a
776 : // freshly allocated page or on an already swept page. Hence, the sweeper
777 : // thread can not get confused with the filler creation. No synchronization
778 : // needed.
779 : // TODO(hpayer): We should shrink the large object page if the size
780 : // of the object changed significantly.
781 0 : if (!heap->IsLargeObject(*answer)) {
782 0 : heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
783 : }
784 0 : return *answer;
785 : }
786 :
787 72310 : RUNTIME_FUNCTION(Runtime_StringSplit) {
788 : HandleScope handle_scope(isolate);
789 : DCHECK_EQ(3, args.length());
790 36155 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
791 36155 : CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
792 72310 : CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
793 36155 : CHECK_LT(0, limit);
794 :
795 36155 : int subject_length = subject->length();
796 : int pattern_length = pattern->length();
797 36155 : CHECK_LT(0, pattern_length);
798 :
799 36155 : if (limit == 0xFFFFFFFFu) {
800 36119 : FixedArray last_match_cache_unused;
801 : Handle<Object> cached_answer(
802 : RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
803 : &last_match_cache_unused,
804 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
805 108357 : isolate);
806 36119 : if (*cached_answer != Smi::kZero) {
807 : // The cache FixedArray is a COW-array and can therefore be reused.
808 : Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
809 3916 : Handle<FixedArray>::cast(cached_answer));
810 : return *result;
811 : }
812 : }
813 :
814 : // The limit can be very large (0xFFFFFFFFu), but since the pattern
815 : // isn't empty, we can never create more parts than ~half the length
816 : // of the subject.
817 :
818 32239 : subject = String::Flatten(isolate, subject);
819 32239 : pattern = String::Flatten(isolate, pattern);
820 :
821 32239 : std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
822 :
823 32239 : FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
824 :
825 32239 : if (static_cast<uint32_t>(indices->size()) < limit) {
826 32212 : indices->push_back(subject_length);
827 : }
828 :
829 : // The list indices now contains the end of each part to create.
830 :
831 : // Create JSArray of substrings separated by separator.
832 32239 : int part_count = static_cast<int>(indices->size());
833 :
834 : Handle<JSArray> result =
835 : isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
836 32239 : INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
837 :
838 : DCHECK(result->HasObjectElements());
839 :
840 : Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
841 :
842 32239 : if (part_count == 1 && indices->at(0) == subject_length) {
843 47846 : elements->set(0, *subject);
844 : } else {
845 : int part_start = 0;
846 296304 : FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
847 : int part_end = indices->at(i);
848 : Handle<String> substring =
849 : isolate->factory()->NewProperSubString(subject, part_start, part_end);
850 : elements->set(i, *substring);
851 : part_start = part_end + pattern_length;
852 : });
853 : }
854 :
855 32239 : if (limit == 0xFFFFFFFFu) {
856 32203 : if (result->HasObjectElements()) {
857 32203 : RegExpResultsCache::Enter(isolate, subject, pattern, elements,
858 : isolate->factory()->empty_fixed_array(),
859 32203 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
860 : }
861 : }
862 :
863 32239 : TruncateRegexpIndicesList(isolate);
864 :
865 : return *result;
866 : }
867 :
868 8675570 : RUNTIME_FUNCTION(Runtime_RegExpExec) {
869 : HandleScope scope(isolate);
870 : DCHECK_EQ(4, args.length());
871 4337785 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
872 4337785 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
873 8675570 : CONVERT_INT32_ARG_CHECKED(index, 2);
874 4337785 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
875 : // Due to the way the JS calls are constructed this must be less than the
876 : // length of a string, i.e. it is always a Smi. We check anyway for security.
877 4337785 : CHECK_LE(0, index);
878 4337785 : CHECK_GE(subject->length(), index);
879 4337785 : isolate->counters()->regexp_entry_runtime()->Increment();
880 8675570 : RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject,
881 : index, last_match_info));
882 : }
883 :
884 : namespace {
885 :
886 6636 : class MatchInfoBackedMatch : public String::Match {
887 : public:
888 3318 : MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
889 : Handle<String> subject,
890 : Handle<RegExpMatchInfo> match_info)
891 6636 : : isolate_(isolate), match_info_(match_info) {
892 3318 : subject_ = String::Flatten(isolate, subject);
893 :
894 3318 : if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
895 : Object o = regexp->CaptureNameMap();
896 3210 : has_named_captures_ = o->IsFixedArray();
897 3210 : if (has_named_captures_) {
898 99 : capture_name_map_ = handle(FixedArray::cast(o), isolate);
899 : }
900 : } else {
901 108 : has_named_captures_ = false;
902 : }
903 3318 : }
904 :
905 18 : Handle<String> GetMatch() override {
906 18 : return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
907 : }
908 :
909 18 : Handle<String> GetPrefix() override {
910 : const int match_start = match_info_->Capture(0);
911 18 : return isolate_->factory()->NewSubString(subject_, 0, match_start);
912 : }
913 :
914 18 : Handle<String> GetSuffix() override {
915 : const int match_end = match_info_->Capture(1);
916 18 : return isolate_->factory()->NewSubString(subject_, match_end,
917 18 : subject_->length());
918 : }
919 :
920 171 : bool HasNamedCaptures() override { return has_named_captures_; }
921 :
922 3318 : int CaptureCount() override {
923 3318 : return match_info_->NumberOfCaptureRegisters() / 2;
924 : }
925 :
926 7152 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
927 : Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
928 7152 : isolate_, match_info_, i, capture_exists);
929 7107 : return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
930 21411 : : isolate_->factory()->empty_string();
931 : }
932 :
933 81 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
934 : CaptureState* state) override {
935 : DCHECK(has_named_captures_);
936 162 : const int capture_index = LookupNamedCapture(
937 180 : [=](String capture_name) { return capture_name->Equals(*name); },
938 81 : *capture_name_map_);
939 :
940 81 : if (capture_index == -1) {
941 36 : *state = INVALID;
942 36 : return name; // Arbitrary string handle.
943 : }
944 :
945 : DCHECK(1 <= capture_index && capture_index <= CaptureCount());
946 :
947 : bool capture_exists;
948 : Handle<String> capture_value;
949 90 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
950 : GetCapture(capture_index, &capture_exists),
951 : String);
952 :
953 45 : if (!capture_exists) {
954 18 : *state = UNMATCHED;
955 36 : return isolate_->factory()->empty_string();
956 : } else {
957 27 : *state = MATCHED;
958 27 : return capture_value;
959 : }
960 : }
961 :
962 : private:
963 : Isolate* isolate_;
964 : Handle<String> subject_;
965 : Handle<RegExpMatchInfo> match_info_;
966 :
967 : bool has_named_captures_;
968 : Handle<FixedArray> capture_name_map_;
969 : };
970 :
971 441 : class VectorBackedMatch : public String::Match {
972 : public:
973 441 : VectorBackedMatch(Isolate* isolate, Handle<String> subject,
974 : Handle<String> match, int match_position,
975 : ZoneVector<Handle<Object>>* captures,
976 : Handle<Object> groups_obj)
977 : : isolate_(isolate),
978 : match_(match),
979 : match_position_(match_position),
980 882 : captures_(captures) {
981 441 : subject_ = String::Flatten(isolate, subject);
982 :
983 : DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
984 441 : has_named_captures_ = !groups_obj->IsUndefined(isolate);
985 441 : if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
986 441 : }
987 :
988 0 : Handle<String> GetMatch() override { return match_; }
989 :
990 0 : Handle<String> GetPrefix() override {
991 0 : return isolate_->factory()->NewSubString(subject_, 0, match_position_);
992 : }
993 :
994 0 : Handle<String> GetSuffix() override {
995 0 : const int match_end_position = match_position_ + match_->length();
996 0 : return isolate_->factory()->NewSubString(subject_, match_end_position,
997 0 : subject_->length());
998 : }
999 :
1000 288 : bool HasNamedCaptures() override { return has_named_captures_; }
1001 :
1002 882 : int CaptureCount() override { return static_cast<int>(captures_->size()); }
1003 :
1004 81 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1005 162 : Handle<Object> capture_obj = captures_->at(i);
1006 162 : if (capture_obj->IsUndefined(isolate_)) {
1007 0 : *capture_exists = false;
1008 0 : return isolate_->factory()->empty_string();
1009 : }
1010 81 : *capture_exists = true;
1011 81 : return Object::ToString(isolate_, capture_obj);
1012 : }
1013 :
1014 225 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
1015 : CaptureState* state) override {
1016 : DCHECK(has_named_captures_);
1017 :
1018 : Maybe<bool> maybe_capture_exists =
1019 225 : JSReceiver::HasProperty(groups_obj_, name);
1020 225 : if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1021 :
1022 225 : if (!maybe_capture_exists.FromJust()) {
1023 90 : *state = INVALID;
1024 90 : return name; // Arbitrary string handle.
1025 : }
1026 :
1027 : Handle<Object> capture_obj;
1028 270 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1029 : Object::GetProperty(isolate_, groups_obj_, name),
1030 : String);
1031 270 : if (capture_obj->IsUndefined(isolate_)) {
1032 36 : *state = UNMATCHED;
1033 72 : return isolate_->factory()->empty_string();
1034 : } else {
1035 99 : *state = MATCHED;
1036 99 : return Object::ToString(isolate_, capture_obj);
1037 : }
1038 : }
1039 :
1040 : private:
1041 : Isolate* isolate_;
1042 : Handle<String> subject_;
1043 : Handle<String> match_;
1044 : const int match_position_;
1045 : ZoneVector<Handle<Object>>* captures_;
1046 :
1047 : bool has_named_captures_;
1048 : Handle<JSReceiver> groups_obj_;
1049 : };
1050 :
1051 : // Create the groups object (see also the RegExp result creation in
1052 : // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
1053 72 : Handle<JSObject> ConstructNamedCaptureGroupsObject(
1054 : Isolate* isolate, Handle<FixedArray> capture_map,
1055 : const std::function<Object(int)>& f_get_capture) {
1056 72 : Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1057 :
1058 72 : const int capture_count = capture_map->length() >> 1;
1059 360 : for (int i = 0; i < capture_count; i++) {
1060 144 : const int name_ix = i * 2;
1061 144 : const int index_ix = i * 2 + 1;
1062 :
1063 : Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1064 : isolate);
1065 : const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1066 : DCHECK(1 <= capture_ix && capture_ix <= capture_count);
1067 :
1068 : Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1069 : DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1070 :
1071 144 : JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1072 : }
1073 :
1074 72 : return groups;
1075 : }
1076 :
1077 : // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1078 : // separate last match info. See comment on that function.
1079 : template <bool has_capture>
1080 93248 : static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1081 : Handle<JSRegExp> regexp,
1082 : Handle<RegExpMatchInfo> last_match_array,
1083 : Handle<JSArray> result_array) {
1084 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1085 : DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1086 : DCHECK(subject->IsFlat());
1087 :
1088 93248 : int capture_count = regexp->CaptureCount();
1089 : int subject_length = subject->length();
1090 :
1091 : static const int kMinLengthToCache = 0x1000;
1092 :
1093 93248 : if (subject_length > kMinLengthToCache) {
1094 36 : FixedArray last_match_cache;
1095 : Object cached_answer = RegExpResultsCache::Lookup(
1096 : isolate->heap(), *subject, regexp->data(), &last_match_cache,
1097 36 : RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1098 36 : if (cached_answer->IsFixedArray()) {
1099 0 : int capture_registers = (capture_count + 1) * 2;
1100 0 : int32_t* last_match = NewArray<int32_t>(capture_registers);
1101 0 : for (int i = 0; i < capture_registers; i++) {
1102 0 : last_match[i] = Smi::ToInt(last_match_cache->get(i));
1103 : }
1104 : Handle<FixedArray> cached_fixed_array =
1105 : Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1106 : // The cache FixedArray is a COW-array and we need to return a copy.
1107 : Handle<FixedArray> copied_fixed_array =
1108 : isolate->factory()->CopyFixedArrayWithMap(
1109 0 : cached_fixed_array, isolate->factory()->fixed_array_map());
1110 0 : JSArray::SetContent(result_array, copied_fixed_array);
1111 0 : RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1112 : capture_count, last_match);
1113 : DeleteArray(last_match);
1114 0 : return *result_array;
1115 : }
1116 : }
1117 :
1118 93248 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1119 93343 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1120 :
1121 : // Ensured in Runtime_RegExpExecMultiple.
1122 : DCHECK(result_array->HasObjectElements());
1123 : Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1124 : isolate);
1125 93153 : if (result_elements->length() < 16) {
1126 0 : result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1127 : }
1128 :
1129 93153 : FixedArrayBuilder builder(result_elements);
1130 :
1131 : // Position to search from.
1132 : int match_start = -1;
1133 : int match_end = 0;
1134 : bool first = true;
1135 :
1136 : // Two smis before and after the match, for very long strings.
1137 : static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1138 :
1139 363508 : while (true) {
1140 : int32_t* current_match = global_cache.FetchNext();
1141 456661 : if (current_match == nullptr) break;
1142 363508 : match_start = current_match[0];
1143 363508 : builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1144 363508 : if (match_end < match_start) {
1145 58326 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1146 : match_start);
1147 : }
1148 363508 : match_end = current_match[1];
1149 : {
1150 : // Avoid accumulating new handles inside loop.
1151 : HandleScope temp_scope(isolate);
1152 : Handle<String> match;
1153 363508 : if (!first) {
1154 270413 : match = isolate->factory()->NewProperSubString(subject, match_start,
1155 : match_end);
1156 : } else {
1157 93095 : match =
1158 : isolate->factory()->NewSubString(subject, match_start, match_end);
1159 : first = false;
1160 : }
1161 :
1162 : if (has_capture) {
1163 : // Arguments array to replace function is match, captures, index and
1164 : // subject, i.e., 3 + capture count in total. If the RegExp contains
1165 : // named captures, they are also passed as the last argument.
1166 :
1167 : Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1168 : const bool has_named_captures = maybe_capture_map->IsFixedArray();
1169 :
1170 : const int argc =
1171 172209 : has_named_captures ? 4 + capture_count : 3 + capture_count;
1172 :
1173 172209 : Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1174 : int cursor = 0;
1175 :
1176 344418 : elements->set(cursor++, *match);
1177 1045537 : for (int i = 1; i <= capture_count; i++) {
1178 436664 : int start = current_match[i * 2];
1179 436664 : if (start >= 0) {
1180 436571 : int end = current_match[i * 2 + 1];
1181 : DCHECK(start <= end);
1182 : Handle<String> substring =
1183 436571 : isolate->factory()->NewSubString(subject, start, end);
1184 873142 : elements->set(cursor++, *substring);
1185 : } else {
1186 : DCHECK_GT(0, current_match[i * 2 + 1]);
1187 186 : elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1188 : }
1189 : }
1190 :
1191 172209 : elements->set(cursor++, Smi::FromInt(match_start));
1192 344418 : elements->set(cursor++, *subject);
1193 :
1194 172209 : if (has_named_captures) {
1195 : Handle<FixedArray> capture_map =
1196 54 : Handle<FixedArray>::cast(maybe_capture_map);
1197 54 : Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1198 54 : isolate, capture_map, [=](int ix) { return elements->get(ix); });
1199 108 : elements->set(cursor++, *groups);
1200 : }
1201 :
1202 : DCHECK_EQ(cursor, argc);
1203 172209 : builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1204 : } else {
1205 191299 : builder.Add(*match);
1206 : }
1207 : }
1208 : }
1209 :
1210 93198 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1211 :
1212 93108 : if (match_start >= 0) {
1213 : // Finished matching, with at least one match.
1214 93095 : if (match_end < subject_length) {
1215 411 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1216 : subject_length);
1217 : }
1218 :
1219 93095 : RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1220 : capture_count,
1221 : global_cache.LastSuccessfulMatch());
1222 :
1223 93095 : if (subject_length > kMinLengthToCache) {
1224 : // Store the last successful match into the array for caching.
1225 : // TODO(yangguo): do not expose last match to JS and simplify caching.
1226 36 : int capture_registers = (capture_count + 1) * 2;
1227 : Handle<FixedArray> last_match_cache =
1228 36 : isolate->factory()->NewFixedArray(capture_registers);
1229 : int32_t* last_match = global_cache.LastSuccessfulMatch();
1230 288 : for (int i = 0; i < capture_registers; i++) {
1231 126 : last_match_cache->set(i, Smi::FromInt(last_match[i]));
1232 : }
1233 : Handle<FixedArray> result_fixed_array =
1234 36 : FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1235 : // Cache the result and copy the FixedArray into a COW array.
1236 : Handle<FixedArray> copied_fixed_array =
1237 : isolate->factory()->CopyFixedArrayWithMap(
1238 36 : result_fixed_array, isolate->factory()->fixed_array_map());
1239 36 : RegExpResultsCache::Enter(
1240 : isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1241 : last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1242 : }
1243 186190 : return *builder.ToJSArray(result_array);
1244 : } else {
1245 13 : return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1246 : }
1247 : }
1248 :
1249 : // Legacy implementation of RegExp.prototype[Symbol.replace] which
1250 : // doesn't properly call the underlying exec method.
1251 5631 : V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1252 : Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1253 : Handle<String> replace) {
1254 : // Functional fast-paths are dispatched directly by replace builtin.
1255 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1256 :
1257 : Factory* factory = isolate->factory();
1258 :
1259 : const int flags = regexp->GetFlags();
1260 5631 : const bool global = (flags & JSRegExp::kGlobal) != 0;
1261 5631 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1262 :
1263 5631 : replace = String::Flatten(isolate, replace);
1264 :
1265 5631 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1266 :
1267 5631 : if (!global) {
1268 : // Non-global regexp search, string replace.
1269 :
1270 : uint32_t last_index = 0;
1271 3345 : if (sticky) {
1272 : Handle<Object> last_index_obj(regexp->last_index(), isolate);
1273 72 : ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1274 : Object::ToLength(isolate, last_index_obj),
1275 : String);
1276 : last_index = PositiveNumberToUint32(*last_index_obj);
1277 : }
1278 :
1279 : Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1280 : isolate);
1281 :
1282 : // A lastIndex exceeding the string length always returns null (signalling
1283 : // failure) in RegExpBuiltinExec, thus we can skip the call.
1284 3345 : if (last_index <= static_cast<uint32_t>(string->length())) {
1285 6654 : ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj,
1286 : RegExpImpl::Exec(isolate, regexp, string,
1287 : last_index, last_match_info),
1288 : String);
1289 : }
1290 :
1291 3345 : if (match_indices_obj->IsNull(isolate)) {
1292 27 : if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1293 27 : return string;
1294 : }
1295 :
1296 : auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1297 :
1298 : const int start_index = match_indices->Capture(0);
1299 : const int end_index = match_indices->Capture(1);
1300 :
1301 3318 : if (sticky) {
1302 : regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1303 : }
1304 :
1305 3318 : IncrementalStringBuilder builder(isolate);
1306 3318 : builder.AppendString(factory->NewSubString(string, 0, start_index));
1307 :
1308 3318 : if (replace->length() > 0) {
1309 3318 : MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1310 : Handle<String> replacement;
1311 6636 : ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1312 : String::GetSubstitution(isolate, &m, replace),
1313 : String);
1314 3318 : builder.AppendString(replacement);
1315 : }
1316 :
1317 3318 : builder.AppendString(
1318 3318 : factory->NewSubString(string, end_index, string->length()));
1319 3318 : return builder.Finish();
1320 : } else {
1321 : // Global regexp search, string replace.
1322 : DCHECK(global);
1323 4572 : RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1324 : String);
1325 :
1326 2286 : if (replace->length() == 0) {
1327 0 : if (string->IsOneByteRepresentation()) {
1328 : Object result =
1329 : StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1330 0 : isolate, string, regexp, last_match_info);
1331 0 : return handle(String::cast(result), isolate);
1332 : } else {
1333 : Object result =
1334 : StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1335 0 : isolate, string, regexp, last_match_info);
1336 0 : return handle(String::cast(result), isolate);
1337 : }
1338 : }
1339 :
1340 : Object result = StringReplaceGlobalRegExpWithString(
1341 2286 : isolate, string, regexp, replace, last_match_info);
1342 2286 : if (result->IsString()) {
1343 2286 : return handle(String::cast(result), isolate);
1344 : } else {
1345 0 : return MaybeHandle<String>();
1346 : }
1347 : }
1348 :
1349 : UNREACHABLE();
1350 : }
1351 :
1352 : } // namespace
1353 :
1354 : // This is only called for StringReplaceGlobalRegExpWithFunction.
1355 186496 : RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1356 : HandleScope handles(isolate);
1357 : DCHECK_EQ(4, args.length());
1358 :
1359 93248 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1360 93248 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1361 93248 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1362 93248 : CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1363 :
1364 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1365 93248 : CHECK(result_array->HasObjectElements());
1366 :
1367 93248 : subject = String::Flatten(isolate, subject);
1368 186496 : CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1369 :
1370 : Object result;
1371 93248 : if (regexp->CaptureCount() == 0) {
1372 : result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1373 90272 : last_match_info, result_array);
1374 : } else {
1375 : result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1376 2976 : last_match_info, result_array);
1377 : }
1378 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1379 : return result;
1380 : }
1381 :
1382 19016 : RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1383 : HandleScope scope(isolate);
1384 : DCHECK_EQ(3, args.length());
1385 9508 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1386 9508 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1387 9508 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1388 :
1389 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1390 : DCHECK(replace_obj->map()->is_callable());
1391 :
1392 : Factory* factory = isolate->factory();
1393 9508 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1394 :
1395 : const int flags = regexp->GetFlags();
1396 : DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1397 :
1398 : // TODO(jgruber): This should be an easy port to CSA with massive payback.
1399 :
1400 9508 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1401 : uint32_t last_index = 0;
1402 9508 : if (sticky) {
1403 : Handle<Object> last_index_obj(regexp->last_index(), isolate);
1404 36 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1405 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1406 : last_index = PositiveNumberToUint32(*last_index_obj);
1407 : }
1408 :
1409 : Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1410 : isolate);
1411 :
1412 : // A lastIndex exceeding the string length always returns null (signalling
1413 : // failure) in RegExpBuiltinExec, thus we can skip the call.
1414 9508 : if (last_index <= static_cast<uint32_t>(subject->length())) {
1415 18980 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1416 : isolate, match_indices_obj,
1417 : RegExpImpl::Exec(isolate, regexp, subject, last_index,
1418 : last_match_info));
1419 : }
1420 :
1421 9499 : if (match_indices_obj->IsNull(isolate)) {
1422 9217 : if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1423 : return *subject;
1424 : }
1425 :
1426 : Handle<RegExpMatchInfo> match_indices =
1427 282 : Handle<RegExpMatchInfo>::cast(match_indices_obj);
1428 :
1429 : const int index = match_indices->Capture(0);
1430 : const int end_of_match = match_indices->Capture(1);
1431 :
1432 282 : if (sticky) {
1433 : regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1434 : }
1435 :
1436 282 : IncrementalStringBuilder builder(isolate);
1437 282 : builder.AppendString(factory->NewSubString(subject, 0, index));
1438 :
1439 : // Compute the parameter list consisting of the match, captures, index,
1440 : // and subject for the replace function invocation. If the RegExp contains
1441 : // named captures, they are also passed as the last argument.
1442 :
1443 : // The number of captures plus one for the match.
1444 282 : const int m = match_indices->NumberOfCaptureRegisters() / 2;
1445 :
1446 : bool has_named_captures = false;
1447 : Handle<FixedArray> capture_map;
1448 282 : if (m > 1) {
1449 : // The existence of capture groups implies IRREGEXP kind.
1450 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1451 :
1452 : Object maybe_capture_map = regexp->CaptureNameMap();
1453 108 : if (maybe_capture_map->IsFixedArray()) {
1454 : has_named_captures = true;
1455 18 : capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1456 : }
1457 : }
1458 :
1459 282 : const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1460 282 : if (argc == static_cast<uint32_t>(-1)) {
1461 0 : THROW_NEW_ERROR_RETURN_FAILURE(
1462 : isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1463 : }
1464 282 : ScopedVector<Handle<Object>> argv(argc);
1465 :
1466 : int cursor = 0;
1467 1152 : for (int j = 0; j < m; j++) {
1468 : bool ok;
1469 : Handle<String> capture =
1470 435 : RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1471 435 : if (ok) {
1472 726 : argv[cursor++] = capture;
1473 : } else {
1474 144 : argv[cursor++] = factory->undefined_value();
1475 : }
1476 : }
1477 :
1478 564 : argv[cursor++] = handle(Smi::FromInt(index), isolate);
1479 564 : argv[cursor++] = subject;
1480 :
1481 282 : if (has_named_captures) {
1482 18 : argv[cursor++] = ConstructNamedCaptureGroupsObject(
1483 72 : isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1484 : }
1485 :
1486 : DCHECK_EQ(cursor, argc);
1487 :
1488 : Handle<Object> replacement_obj;
1489 564 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1490 : isolate, replacement_obj,
1491 : Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1492 : argv.start()));
1493 :
1494 : Handle<String> replacement;
1495 564 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1496 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1497 :
1498 282 : builder.AppendString(replacement);
1499 282 : builder.AppendString(
1500 282 : factory->NewSubString(subject, end_of_match, subject->length()));
1501 :
1502 564 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1503 : }
1504 :
1505 : namespace {
1506 :
1507 278 : V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1508 : Handle<Object> object,
1509 : uint32_t* out) {
1510 278 : if (object->IsUndefined(isolate)) {
1511 89 : *out = kMaxUInt32;
1512 89 : return object;
1513 : }
1514 :
1515 : Handle<Object> number;
1516 378 : ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1517 : Object);
1518 189 : *out = NumberToUint32(*number);
1519 189 : return object;
1520 : }
1521 :
1522 224 : Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1523 : Handle<FixedArray> elems,
1524 : int num_elems) {
1525 : return isolate->factory()->NewJSArrayWithElements(
1526 448 : FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1527 : }
1528 :
1529 : } // namespace
1530 :
1531 : // Slow path for:
1532 : // ES#sec-regexp.prototype-@@replace
1533 : // RegExp.prototype [ @@split ] ( string, limit )
1534 556 : RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1535 : HandleScope scope(isolate);
1536 : DCHECK_EQ(3, args.length());
1537 :
1538 278 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1539 278 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1540 : CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1541 :
1542 : Factory* factory = isolate->factory();
1543 :
1544 278 : Handle<JSFunction> regexp_fun = isolate->regexp_function();
1545 : Handle<Object> ctor;
1546 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1547 : isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1548 :
1549 : Handle<Object> flags_obj;
1550 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1551 : isolate, flags_obj,
1552 : JSObject::GetProperty(isolate, recv, factory->flags_string()));
1553 :
1554 : Handle<String> flags;
1555 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1556 : Object::ToString(isolate, flags_obj));
1557 :
1558 278 : Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1559 278 : const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1560 :
1561 278 : Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1562 278 : const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1563 :
1564 : Handle<String> new_flags = flags;
1565 278 : if (!sticky) {
1566 484 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1567 : factory->NewConsString(flags, y_str));
1568 : }
1569 :
1570 : Handle<JSReceiver> splitter;
1571 : {
1572 : const int argc = 2;
1573 :
1574 278 : ScopedVector<Handle<Object>> argv(argc);
1575 278 : argv[0] = recv;
1576 278 : argv[1] = new_flags;
1577 :
1578 : Handle<Object> splitter_obj;
1579 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1580 : isolate, splitter_obj,
1581 : Execution::New(isolate, ctor, argc, argv.start()));
1582 :
1583 278 : splitter = Handle<JSReceiver>::cast(splitter_obj);
1584 : }
1585 :
1586 : uint32_t limit;
1587 556 : RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1588 :
1589 278 : const uint32_t length = string->length();
1590 :
1591 278 : if (limit == 0) return *factory->NewJSArray(0);
1592 :
1593 233 : if (length == 0) {
1594 : Handle<Object> result;
1595 18 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1596 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1597 : factory->undefined_value()));
1598 :
1599 9 : if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1600 :
1601 9 : Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1602 18 : elems->set(0, *string);
1603 : return *factory->NewJSArrayWithElements(elems);
1604 : }
1605 :
1606 : static const int kInitialArraySize = 8;
1607 224 : Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1608 : uint32_t num_elems = 0;
1609 :
1610 : uint32_t string_index = 0;
1611 : uint32_t prev_string_index = 0;
1612 1103 : while (string_index < length) {
1613 1938 : RETURN_FAILURE_ON_EXCEPTION(
1614 : isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1615 :
1616 : Handle<Object> result;
1617 1938 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1618 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1619 : factory->undefined_value()));
1620 :
1621 969 : if (result->IsNull(isolate)) {
1622 : string_index = static_cast<uint32_t>(
1623 493 : RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1624 : continue;
1625 : }
1626 :
1627 : Handle<Object> last_index_obj;
1628 952 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1629 : isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1630 :
1631 952 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1632 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1633 :
1634 : const uint32_t end =
1635 476 : std::min(PositiveNumberToUint32(*last_index_obj), length);
1636 476 : if (end == prev_string_index) {
1637 : string_index = static_cast<uint32_t>(
1638 63 : RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1639 : continue;
1640 : }
1641 :
1642 : {
1643 : Handle<String> substr =
1644 413 : factory->NewSubString(string, prev_string_index, string_index);
1645 413 : elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1646 413 : if (num_elems == limit) {
1647 180 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1648 : }
1649 : }
1650 :
1651 : prev_string_index = end;
1652 :
1653 : Handle<Object> num_captures_obj;
1654 646 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1655 : isolate, num_captures_obj,
1656 : Object::GetProperty(isolate, result,
1657 : isolate->factory()->length_string()));
1658 :
1659 646 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1660 : isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1661 : const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1662 :
1663 323 : for (uint32_t i = 1; i < num_captures; i++) {
1664 : Handle<Object> capture;
1665 0 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1666 : isolate, capture, Object::GetElement(isolate, result, i));
1667 0 : elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1668 0 : if (num_elems == limit) {
1669 0 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1670 : }
1671 : }
1672 :
1673 : string_index = prev_string_index;
1674 : }
1675 :
1676 : {
1677 : Handle<String> substr =
1678 134 : factory->NewSubString(string, prev_string_index, length);
1679 134 : elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1680 : }
1681 :
1682 268 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1683 : }
1684 :
1685 : // Slow path for:
1686 : // ES#sec-regexp.prototype-@@replace
1687 : // RegExp.prototype [ @@replace ] ( string, replaceValue )
1688 12378 : RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1689 : HandleScope scope(isolate);
1690 : DCHECK_EQ(3, args.length());
1691 :
1692 6189 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1693 6189 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1694 : Handle<Object> replace_obj = args.at(2);
1695 :
1696 : Factory* factory = isolate->factory();
1697 :
1698 6189 : string = String::Flatten(isolate, string);
1699 :
1700 : const bool functional_replace = replace_obj->IsCallable();
1701 :
1702 : Handle<String> replace;
1703 6189 : if (!functional_replace) {
1704 12252 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1705 : Object::ToString(isolate, replace_obj));
1706 : }
1707 :
1708 : // Fast-path for unmodified JSRegExps (and non-functional replace).
1709 6189 : if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1710 : // We should never get here with functional replace because unmodified
1711 : // regexp and functional replace should be fully handled in CSA code.
1712 5631 : CHECK(!functional_replace);
1713 : Handle<Object> result;
1714 11262 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1715 : isolate, result,
1716 : RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1717 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1718 : return *result;
1719 : }
1720 :
1721 558 : const uint32_t length = string->length();
1722 :
1723 : Handle<Object> global_obj;
1724 1116 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1725 : isolate, global_obj,
1726 : JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1727 558 : const bool global = global_obj->BooleanValue(isolate);
1728 :
1729 : bool unicode = false;
1730 558 : if (global) {
1731 : Handle<Object> unicode_obj;
1732 612 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1733 : isolate, unicode_obj,
1734 : JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1735 306 : unicode = unicode_obj->BooleanValue(isolate);
1736 :
1737 612 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1738 : RegExpUtils::SetLastIndex(isolate, recv, 0));
1739 : }
1740 :
1741 1116 : Zone zone(isolate->allocator(), ZONE_NAME);
1742 1116 : ZoneVector<Handle<Object>> results(&zone);
1743 :
1744 : while (true) {
1745 : Handle<Object> result;
1746 1800 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1747 : isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1748 : factory->undefined_value()));
1749 :
1750 891 : if (result->IsNull(isolate)) break;
1751 :
1752 540 : results.push_back(result);
1753 540 : if (!global) break;
1754 :
1755 : Handle<Object> match_obj;
1756 684 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1757 : Object::GetElement(isolate, result, 0));
1758 :
1759 : Handle<String> match;
1760 684 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1761 : Object::ToString(isolate, match_obj));
1762 :
1763 342 : if (match->length() == 0) {
1764 180 : RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1765 : isolate, recv, string, unicode));
1766 : }
1767 : }
1768 :
1769 : // TODO(jgruber): Look into ReplacementStringBuilder instead.
1770 549 : IncrementalStringBuilder builder(isolate);
1771 : uint32_t next_source_position = 0;
1772 :
1773 549 : for (const auto& result : results) {
1774 : HandleScope handle_scope(isolate);
1775 : Handle<Object> captures_length_obj;
1776 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1777 : isolate, captures_length_obj,
1778 : Object::GetProperty(isolate, result, factory->length_string()));
1779 :
1780 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1781 : isolate, captures_length_obj,
1782 : Object::ToLength(isolate, captures_length_obj));
1783 : const uint32_t captures_length =
1784 : PositiveNumberToUint32(*captures_length_obj);
1785 :
1786 : Handle<Object> match_obj;
1787 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1788 : Object::GetElement(isolate, result, 0));
1789 :
1790 : Handle<String> match;
1791 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1792 : Object::ToString(isolate, match_obj));
1793 :
1794 : const int match_length = match->length();
1795 :
1796 : Handle<Object> position_obj;
1797 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1798 : isolate, position_obj,
1799 : Object::GetProperty(isolate, result, factory->index_string()));
1800 :
1801 540 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1802 : isolate, position_obj, Object::ToInteger(isolate, position_obj));
1803 : const uint32_t position =
1804 1080 : std::min(PositiveNumberToUint32(*position_obj), length);
1805 :
1806 : // Do not reserve capacity since captures_length is user-controlled.
1807 1071 : ZoneVector<Handle<Object>> captures(&zone);
1808 :
1809 2362626 : for (uint32_t n = 0; n < captures_length; n++) {
1810 : Handle<Object> capture;
1811 2362086 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1812 : isolate, capture, Object::GetElement(isolate, result, n));
1813 :
1814 1181043 : if (!capture->IsUndefined(isolate)) {
1815 2214 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1816 : Object::ToString(isolate, capture));
1817 : }
1818 1181043 : captures.push_back(capture);
1819 : }
1820 :
1821 : Handle<Object> groups_obj = isolate->factory()->undefined_value();
1822 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1823 : isolate, groups_obj,
1824 : Object::GetProperty(isolate, result, factory->groups_string()));
1825 :
1826 : const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1827 :
1828 : Handle<String> replacement;
1829 540 : if (functional_replace) {
1830 : const uint32_t argc =
1831 : GetArgcForReplaceCallable(captures_length, has_named_captures);
1832 99 : if (argc == static_cast<uint32_t>(-1)) {
1833 18 : THROW_NEW_ERROR_RETURN_FAILURE(
1834 : isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1835 : }
1836 :
1837 90 : ScopedVector<Handle<Object>> argv(argc);
1838 :
1839 : int cursor = 0;
1840 558 : for (uint32_t j = 0; j < captures_length; j++) {
1841 702 : argv[cursor++] = captures[j];
1842 : }
1843 :
1844 270 : argv[cursor++] = handle(Smi::FromInt(position), isolate);
1845 180 : argv[cursor++] = string;
1846 162 : if (has_named_captures) argv[cursor++] = groups_obj;
1847 :
1848 : DCHECK_EQ(cursor, argc);
1849 :
1850 : Handle<Object> replacement_obj;
1851 180 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1852 : isolate, replacement_obj,
1853 : Execution::Call(isolate, replace_obj, factory->undefined_value(),
1854 : argc, argv.start()));
1855 :
1856 180 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1857 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1858 : } else {
1859 : DCHECK(!functional_replace);
1860 441 : if (!groups_obj->IsUndefined(isolate)) {
1861 558 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1862 : isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1863 : }
1864 : VectorBackedMatch m(isolate, string, match, position, &captures,
1865 441 : groups_obj);
1866 882 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1867 : isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1868 : }
1869 :
1870 531 : if (position >= next_source_position) {
1871 531 : builder.AppendString(
1872 531 : factory->NewSubString(string, next_source_position, position));
1873 531 : builder.AppendString(replacement);
1874 :
1875 531 : next_source_position = position + match_length;
1876 : }
1877 : }
1878 :
1879 540 : if (next_source_position < length) {
1880 324 : builder.AppendString(
1881 324 : factory->NewSubString(string, next_source_position, length));
1882 : }
1883 :
1884 1080 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1885 : }
1886 :
1887 757022 : RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1888 : HandleScope scope(isolate);
1889 : DCHECK_EQ(3, args.length());
1890 : // TODO(pwong): To follow the spec more closely and simplify calling code,
1891 : // this could handle the canonicalization of pattern and flags. See
1892 : // https://tc39.github.io/ecma262/#sec-regexpinitialize
1893 378511 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1894 378511 : CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1895 378511 : CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1896 :
1897 757022 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1898 : JSRegExp::Initialize(regexp, source, flags));
1899 :
1900 : return *regexp;
1901 : }
1902 :
1903 0 : RUNTIME_FUNCTION(Runtime_IsRegExp) {
1904 : SealHandleScope shs(isolate);
1905 : DCHECK_EQ(1, args.length());
1906 : CONVERT_ARG_CHECKED(Object, obj, 0);
1907 : return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1908 : }
1909 :
1910 : } // namespace internal
1911 122004 : } // namespace v8
|