Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include <functional>
6 :
7 : #include "src/arguments-inl.h"
8 : #include "src/conversions-inl.h"
9 : #include "src/counters.h"
10 : #include "src/isolate-inl.h"
11 : #include "src/message-template.h"
12 : #include "src/objects/js-array-inl.h"
13 : #include "src/regexp/jsregexp-inl.h"
14 : #include "src/regexp/jsregexp.h"
15 : #include "src/regexp/regexp-utils.h"
16 : #include "src/runtime/runtime-utils.h"
17 : #include "src/string-builder-inl.h"
18 : #include "src/string-search.h"
19 : #include "src/zone/zone-chunk-list.h"
20 :
21 : namespace v8 {
22 : namespace internal {
23 :
24 : namespace {
25 :
26 : // Returns -1 for failure.
27 381 : uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
28 : bool has_named_captures) {
29 : const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
30 : const uint32_t kAdditionalArgsWithNamedCaptures = 3;
31 381 : if (num_captures > Code::kMaxArguments) return -1;
32 : uint32_t argc = has_named_captures
33 : ? num_captures + kAdditionalArgsWithNamedCaptures
34 372 : : num_captures + kAdditionalArgsWithoutNamedCaptures;
35 : STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
36 : kAdditionalArgsWithNamedCaptures);
37 372 : return (argc > Code::kMaxArguments) ? -1 : argc;
38 : }
39 :
40 : // Looks up the capture of the given name. Returns the (1-based) numbered
41 : // capture index or -1 on failure.
42 135 : int LookupNamedCapture(const std::function<bool(String)>& name_matches,
43 : FixedArray capture_name_map) {
44 : // TODO(jgruber): Sort capture_name_map and do binary search via
45 : // internalized strings.
46 :
47 : int maybe_capture_index = -1;
48 135 : const int named_capture_count = capture_name_map->length() >> 1;
49 378 : for (int j = 0; j < named_capture_count; j++) {
50 : // The format of {capture_name_map} is documented at
51 : // JSRegExp::kIrregexpCaptureNameMapIndex.
52 315 : const int name_ix = j * 2;
53 315 : const int index_ix = j * 2 + 1;
54 :
55 315 : String capture_name = String::cast(capture_name_map->get(name_ix));
56 558 : if (!name_matches(capture_name)) continue;
57 :
58 72 : maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
59 72 : break;
60 : }
61 :
62 135 : return maybe_capture_index;
63 : }
64 :
65 : } // namespace
66 :
67 : class CompiledReplacement {
68 : public:
69 : explicit CompiledReplacement(Zone* zone)
70 : : parts_(zone), replacement_substrings_(zone) {}
71 :
72 : // Return whether the replacement is simple.
73 : bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
74 : Handle<String> replacement, int capture_count,
75 : int subject_length);
76 :
77 : // Use Apply only if Compile returned false.
78 : void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
79 : int32_t* match);
80 :
81 : // Number of distinct parts of the replacement pattern.
82 3240 : int parts() { return static_cast<int>(parts_.size()); }
83 :
84 : private:
85 : enum PartType {
86 : SUBJECT_PREFIX = 1,
87 : SUBJECT_SUFFIX,
88 : SUBJECT_CAPTURE,
89 : REPLACEMENT_SUBSTRING,
90 : REPLACEMENT_STRING,
91 : EMPTY_REPLACEMENT,
92 : NUMBER_OF_PART_TYPES
93 : };
94 :
95 : struct ReplacementPart {
96 : static inline ReplacementPart SubjectMatch() {
97 : return ReplacementPart(SUBJECT_CAPTURE, 0);
98 : }
99 : static inline ReplacementPart SubjectCapture(int capture_index) {
100 : return ReplacementPart(SUBJECT_CAPTURE, capture_index);
101 : }
102 : static inline ReplacementPart SubjectPrefix() {
103 : return ReplacementPart(SUBJECT_PREFIX, 0);
104 : }
105 : static inline ReplacementPart SubjectSuffix(int subject_length) {
106 : return ReplacementPart(SUBJECT_SUFFIX, subject_length);
107 : }
108 : static inline ReplacementPart ReplacementString() {
109 : return ReplacementPart(REPLACEMENT_STRING, 0);
110 : }
111 : static inline ReplacementPart EmptyReplacement() {
112 : return ReplacementPart(EMPTY_REPLACEMENT, 0);
113 : }
114 : static inline ReplacementPart ReplacementSubString(int from, int to) {
115 : DCHECK_LE(0, from);
116 : DCHECK_GT(to, from);
117 639 : return ReplacementPart(-from, to);
118 : }
119 :
120 : // If tag <= 0 then it is the negation of a start index of a substring of
121 : // the replacement pattern, otherwise it's a value from PartType.
122 : ReplacementPart(int tag, int data) : tag(tag), data(data) {
123 : // Must be non-positive or a PartType value.
124 : DCHECK(tag < NUMBER_OF_PART_TYPES);
125 : }
126 : // Either a value of PartType or a non-positive number that is
127 : // the negation of an index into the replacement string.
128 : int tag;
129 : // The data value's interpretation depends on the value of tag:
130 : // tag == SUBJECT_PREFIX ||
131 : // tag == SUBJECT_SUFFIX: data is unused.
132 : // tag == SUBJECT_CAPTURE: data is the number of the capture.
133 : // tag == REPLACEMENT_SUBSTRING ||
134 : // tag == REPLACEMENT_STRING: data is index into array of substrings
135 : // of the replacement string.
136 : // tag == EMPTY_REPLACEMENT: data is unused.
137 : // tag <= 0: Temporary representation of the substring of the replacement
138 : // string ranging over -tag .. data.
139 : // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
140 : // substring objects.
141 : int data;
142 : };
143 :
144 : template <typename Char>
145 2880 : bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
146 : Vector<Char> characters,
147 : FixedArray capture_name_map, int capture_count,
148 : int subject_length) {
149 : // Equivalent to String::GetSubstitution, except that this method converts
150 : // the replacement string into an internal representation that avoids
151 : // repeated parsing when used repeatedly.
152 2880 : int length = characters.length();
153 : int last = 0;
154 10359 : for (int i = 0; i < length; i++) {
155 14976 : Char c = characters[i];
156 7488 : if (c == '$') {
157 2439 : int next_index = i + 1;
158 2439 : if (next_index == length) { // No next character!
159 : break;
160 : }
161 4860 : Char c2 = characters[next_index];
162 2430 : switch (c2) {
163 : case '$':
164 72 : if (i > last) {
165 : // There is a substring before. Include the first "$".
166 45 : parts->push_back(
167 90 : ReplacementPart::ReplacementSubString(last, next_index));
168 45 : last = next_index + 1; // Continue after the second "$".
169 : } else {
170 : // Let the next substring start with the second "$".
171 : last = next_index;
172 : }
173 : i = next_index;
174 72 : break;
175 : case '`':
176 18 : if (i > last) {
177 18 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
178 : }
179 18 : parts->push_back(ReplacementPart::SubjectPrefix());
180 : i = next_index;
181 18 : last = i + 1;
182 18 : break;
183 : case '\'':
184 18 : if (i > last) {
185 18 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
186 : }
187 18 : parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
188 : i = next_index;
189 18 : last = i + 1;
190 18 : break;
191 : case '&':
192 18 : if (i > last) {
193 18 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
194 : }
195 18 : parts->push_back(ReplacementPart::SubjectMatch());
196 : i = next_index;
197 18 : last = i + 1;
198 18 : break;
199 : case '0':
200 : case '1':
201 : case '2':
202 : case '3':
203 : case '4':
204 : case '5':
205 : case '6':
206 : case '7':
207 : case '8':
208 : case '9': {
209 2223 : int capture_ref = c2 - '0';
210 2223 : if (capture_ref > capture_count) {
211 : i = next_index;
212 : continue;
213 : }
214 1521 : int second_digit_index = next_index + 1;
215 1521 : if (second_digit_index < length) {
216 : // Peek ahead to see if we have two digits.
217 2772 : Char c3 = characters[second_digit_index];
218 1386 : if ('0' <= c3 && c3 <= '9') { // Double digits.
219 1332 : int double_digit_ref = capture_ref * 10 + c3 - '0';
220 1332 : if (double_digit_ref <= capture_count) {
221 : next_index = second_digit_index;
222 : capture_ref = double_digit_ref;
223 : }
224 : }
225 : }
226 1521 : if (capture_ref > 0) {
227 1485 : if (i > last) {
228 63 : parts->push_back(
229 126 : ReplacementPart::ReplacementSubString(last, i));
230 : }
231 : DCHECK(capture_ref <= capture_count);
232 1485 : parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
233 1485 : last = next_index + 1;
234 : }
235 : i = next_index;
236 1521 : break;
237 : }
238 : case '<': {
239 72 : if (capture_name_map.is_null()) {
240 : i = next_index;
241 : break;
242 : }
243 :
244 : // Scan until the next '>', and let the enclosed substring be the
245 : // groupName.
246 :
247 72 : const int name_start_index = next_index + 1;
248 : int closing_bracket_index = -1;
249 306 : for (int j = name_start_index; j < length; j++) {
250 576 : if (characters[j] == '>') {
251 : closing_bracket_index = j;
252 : break;
253 : }
254 : }
255 :
256 : // If no closing bracket is found, '$<' is treated as a string
257 : // literal.
258 72 : if (closing_bracket_index == -1) {
259 : i = next_index;
260 : break;
261 : }
262 :
263 : Vector<Char> requested_name =
264 54 : characters.SubVector(name_start_index, closing_bracket_index);
265 :
266 : // Let capture be ? Get(namedCaptures, groupName).
267 :
268 : const int capture_index = LookupNamedCapture(
269 : [=](String capture_name) {
270 : return capture_name->IsEqualTo(requested_name);
271 135 : },
272 108 : capture_name_map);
273 :
274 : // If capture is undefined or does not exist, replace the text
275 : // through the following '>' with the empty string.
276 : // Otherwise, replace the text through the following '>' with
277 : // ? ToString(capture).
278 :
279 : DCHECK(capture_index == -1 ||
280 : (1 <= capture_index && capture_index <= capture_count));
281 :
282 54 : if (i > last) {
283 0 : parts->push_back(ReplacementPart::ReplacementSubString(last, i));
284 : }
285 54 : parts->push_back(
286 : (capture_index == -1)
287 : ? ReplacementPart::EmptyReplacement()
288 108 : : ReplacementPart::SubjectCapture(capture_index));
289 54 : last = closing_bracket_index + 1;
290 : i = closing_bracket_index;
291 54 : break;
292 : }
293 : default:
294 : i = next_index;
295 : break;
296 : }
297 : }
298 : }
299 2880 : if (length > last) {
300 1818 : if (last == 0) {
301 : // Replacement is simple. Do not use Apply to do the replacement.
302 : return true;
303 : } else {
304 477 : parts->push_back(ReplacementPart::ReplacementSubString(last, length));
305 : }
306 : }
307 : return false;
308 : }
309 :
310 : ZoneChunkList<ReplacementPart> parts_;
311 : ZoneVector<Handle<String>> replacement_substrings_;
312 : };
313 :
314 2880 : bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
315 : Handle<String> replacement, int capture_count,
316 : int subject_length) {
317 : {
318 : DisallowHeapAllocation no_gc;
319 2880 : String::FlatContent content = replacement->GetFlatContent(no_gc);
320 : DCHECK(content.IsFlat());
321 :
322 2880 : FixedArray capture_name_map;
323 2880 : if (capture_count > 0) {
324 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
325 2970 : Object maybe_capture_name_map = regexp->CaptureNameMap();
326 1485 : if (maybe_capture_name_map->IsFixedArray()) {
327 72 : capture_name_map = FixedArray::cast(maybe_capture_name_map);
328 : }
329 : }
330 :
331 : bool simple;
332 2880 : if (content.IsOneByte()) {
333 : simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
334 : capture_name_map, capture_count,
335 2880 : subject_length);
336 : } else {
337 : DCHECK(content.IsTwoByte());
338 : simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
339 : capture_name_map, capture_count,
340 0 : subject_length);
341 : }
342 2880 : if (simple) return true;
343 : }
344 :
345 : // Find substrings of replacement string and create them as String objects.
346 : int substring_index = 0;
347 5310 : for (ReplacementPart& part : parts_) {
348 2232 : int tag = part.tag;
349 2232 : if (tag <= 0) { // A replacement string slice.
350 639 : int from = -tag;
351 639 : int to = part.data;
352 : replacement_substrings_.push_back(
353 1278 : isolate->factory()->NewSubString(replacement, from, to));
354 639 : part.tag = REPLACEMENT_SUBSTRING;
355 639 : part.data = substring_index;
356 639 : substring_index++;
357 1593 : } else if (tag == REPLACEMENT_STRING) {
358 0 : replacement_substrings_.push_back(replacement);
359 0 : part.data = substring_index;
360 0 : substring_index++;
361 : }
362 : }
363 : return false;
364 : }
365 :
366 :
367 1818 : void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
368 : int match_from, int match_to, int32_t* match) {
369 : DCHECK_LT(0, parts_.size());
370 6588 : for (ReplacementPart& part : parts_) {
371 2952 : switch (part.tag) {
372 : case SUBJECT_PREFIX:
373 45 : if (match_from > 0) builder->AddSubjectSlice(0, match_from);
374 : break;
375 : case SUBJECT_SUFFIX: {
376 45 : int subject_length = part.data;
377 45 : if (match_to < subject_length) {
378 36 : builder->AddSubjectSlice(match_to, subject_length);
379 : }
380 : break;
381 : }
382 : case SUBJECT_CAPTURE: {
383 1764 : int capture = part.data;
384 1764 : int from = match[capture * 2];
385 1764 : int to = match[capture * 2 + 1];
386 1764 : if (from >= 0 && to > from) {
387 1683 : builder->AddSubjectSlice(from, to);
388 : }
389 : break;
390 : }
391 : case REPLACEMENT_SUBSTRING:
392 : case REPLACEMENT_STRING:
393 2088 : builder->AddString(replacement_substrings_[part.data]);
394 1044 : break;
395 : case EMPTY_REPLACEMENT:
396 : break;
397 : default:
398 0 : UNREACHABLE();
399 : }
400 : }
401 1818 : }
402 :
403 33165 : void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
404 : std::vector<int>* indices, unsigned int limit) {
405 : DCHECK_LT(0, limit);
406 : // Collect indices of pattern in subject using memchr.
407 : // Stop after finding at most limit values.
408 33165 : const uint8_t* subject_start = subject.start();
409 66330 : const uint8_t* subject_end = subject_start + subject.length();
410 : const uint8_t* pos = subject_start;
411 149831 : while (limit > 0) {
412 : pos = reinterpret_cast<const uint8_t*>(
413 116639 : memchr(pos, pattern, subject_end - pos));
414 149804 : if (pos == nullptr) return;
415 167002 : indices->push_back(static_cast<int>(pos - subject_start));
416 83501 : pos++;
417 83501 : limit--;
418 : }
419 : }
420 :
421 369 : void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
422 : std::vector<int>* indices, unsigned int limit) {
423 : DCHECK_LT(0, limit);
424 369 : const uc16* subject_start = subject.start();
425 738 : const uc16* subject_end = subject_start + subject.length();
426 12564 : for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
427 12195 : if (*pos == pattern) {
428 738 : indices->push_back(static_cast<int>(pos - subject_start));
429 369 : limit--;
430 : }
431 : }
432 369 : }
433 :
434 : template <typename SubjectChar, typename PatternChar>
435 14 : void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
436 : Vector<const PatternChar> pattern,
437 : std::vector<int>* indices, unsigned int limit) {
438 : DCHECK_LT(0, limit);
439 : // Collect indices of pattern in subject.
440 : // Stop after finding at most limit values.
441 0 : int pattern_length = pattern.length();
442 14 : int index = 0;
443 0 : StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
444 28 : while (limit > 0) {
445 56 : index = search.Search(subject, index);
446 42 : if (index < 0) return;
447 14 : indices->push_back(index);
448 14 : index += pattern_length;
449 14 : limit--;
450 : }
451 : }
452 :
453 33548 : void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
454 : std::vector<int>* indices, unsigned int limit) {
455 : {
456 : DisallowHeapAllocation no_gc;
457 33548 : String::FlatContent subject_content = subject->GetFlatContent(no_gc);
458 33548 : String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
459 : DCHECK(subject_content.IsFlat());
460 : DCHECK(pattern_content.IsFlat());
461 33548 : if (subject_content.IsOneByte()) {
462 33179 : Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
463 33179 : if (pattern_content.IsOneByte()) {
464 : Vector<const uint8_t> pattern_vector =
465 : pattern_content.ToOneByteVector();
466 33179 : if (pattern_vector.length() == 1) {
467 : FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
468 33165 : limit);
469 : } else {
470 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
471 14 : limit);
472 : }
473 : } else {
474 : FindStringIndices(isolate, subject_vector,
475 0 : pattern_content.ToUC16Vector(), indices, limit);
476 : }
477 : } else {
478 369 : Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
479 369 : if (pattern_content.IsOneByte()) {
480 : Vector<const uint8_t> pattern_vector =
481 : pattern_content.ToOneByteVector();
482 369 : if (pattern_vector.length() == 1) {
483 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
484 369 : limit);
485 : } else {
486 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487 0 : limit);
488 : }
489 : } else {
490 : Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
491 0 : if (pattern_vector.length() == 1) {
492 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
493 0 : limit);
494 : } else {
495 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
496 0 : limit);
497 : }
498 : }
499 : }
500 : }
501 33548 : }
502 :
503 : namespace {
504 32909 : std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
505 32909 : std::vector<int>* list = isolate->regexp_indices();
506 : list->clear();
507 32909 : return list;
508 : }
509 :
510 33080 : void TruncateRegexpIndicesList(Isolate* isolate) {
511 : // Same size as smallest zone segment, preserving behavior from the
512 : // runtime zone.
513 : static const int kMaxRegexpIndicesListCapacity = 8 * KB;
514 33080 : std::vector<int>* indicies = isolate->regexp_indices();
515 33080 : if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
516 : // Throw away backing storage.
517 : indicies->clear();
518 : indicies->shrink_to_fit();
519 : }
520 33080 : }
521 : } // namespace
522 :
523 : template <typename ResultSeqString>
524 639 : V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
525 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
526 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
527 : DCHECK(subject->IsFlat());
528 : DCHECK(replacement->IsFlat());
529 :
530 639 : std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
531 :
532 : DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
533 : String pattern =
534 1278 : String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
535 : int subject_len = subject->length();
536 : int pattern_len = pattern->length();
537 : int replacement_len = replacement->length();
538 :
539 639 : FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
540 :
541 1107 : if (indices->empty()) return *subject;
542 :
543 : // Detect integer overflow.
544 : int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
545 : static_cast<int64_t>(pattern_len)) *
546 : static_cast<int64_t>(indices->size()) +
547 342 : static_cast<int64_t>(subject_len);
548 : int result_len;
549 171 : if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
550 : STATIC_ASSERT(String::kMaxLength < kMaxInt);
551 : result_len = kMaxInt; // Provoke exception.
552 : } else {
553 171 : result_len = static_cast<int>(result_len_64);
554 : }
555 171 : if (result_len == 0) {
556 0 : return ReadOnlyRoots(isolate).empty_string();
557 : }
558 :
559 : int subject_pos = 0;
560 : int result_pos = 0;
561 :
562 : MaybeHandle<SeqString> maybe_res;
563 : if (ResultSeqString::kHasOneByteEncoding) {
564 171 : maybe_res = isolate->factory()->NewRawOneByteString(result_len);
565 : } else {
566 0 : maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
567 : }
568 : Handle<SeqString> untyped_res;
569 171 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
570 171 : Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
571 :
572 : DisallowHeapAllocation no_gc;
573 594 : for (int index : *indices) {
574 : // Copy non-matched subject content.
575 252 : if (subject_pos < index) {
576 99 : String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
577 99 : subject_pos, index);
578 99 : result_pos += index - subject_pos;
579 : }
580 :
581 : // Replace match.
582 252 : if (replacement_len > 0) {
583 252 : String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
584 252 : replacement_len);
585 252 : result_pos += replacement_len;
586 : }
587 :
588 252 : subject_pos = index + pattern_len;
589 : }
590 : // Add remaining subject content at the end.
591 171 : if (subject_pos < subject_len) {
592 63 : String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
593 63 : subject_pos, subject_len);
594 : }
595 :
596 171 : int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
597 171 : RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0,
598 : match_indices);
599 :
600 171 : TruncateRegexpIndicesList(isolate);
601 :
602 171 : return *result;
603 : }
604 :
605 2880 : V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
606 2880 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
607 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
608 : DCHECK(subject->IsFlat());
609 : DCHECK(replacement->IsFlat());
610 :
611 2880 : int capture_count = regexp->CaptureCount();
612 : int subject_length = subject->length();
613 :
614 2880 : JSRegExp::Type typeTag = regexp->TypeTag();
615 2880 : if (typeTag == JSRegExp::IRREGEXP) {
616 : // Ensure the RegExp is compiled so we can access the capture-name map.
617 2160 : if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) {
618 : DCHECK(isolate->has_pending_exception());
619 0 : return ReadOnlyRoots(isolate).exception();
620 : }
621 : }
622 :
623 : // CompiledReplacement uses zone allocation.
624 2880 : Zone zone(isolate->allocator(), ZONE_NAME);
625 : CompiledReplacement compiled_replacement(&zone);
626 : const bool simple_replace = compiled_replacement.Compile(
627 2880 : isolate, regexp, replacement, capture_count, subject_length);
628 :
629 : // Shortcut for simple non-regexp global replacements
630 2880 : if (typeTag == JSRegExp::ATOM && simple_replace) {
631 1278 : if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
632 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
633 639 : isolate, subject, regexp, replacement, last_match_info);
634 : } else {
635 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
636 0 : isolate, subject, regexp, replacement, last_match_info);
637 : }
638 : }
639 :
640 2241 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
641 2241 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
642 :
643 : int32_t* current_match = global_cache.FetchNext();
644 2241 : if (current_match == nullptr) {
645 621 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
646 621 : return *subject;
647 : }
648 :
649 : // Guessing the number of parts that the final result string is built
650 : // from. Global regexps can match any number of times, so we guess
651 : // conservatively.
652 1620 : int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
653 1620 : ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
654 :
655 : // Number of parts added by compiled replacement plus preceding
656 : // string and possibly suffix after last match. It is possible for
657 : // all components to use two elements when encoded as two smis.
658 1620 : const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
659 :
660 : int prev = 0;
661 :
662 1935 : do {
663 1935 : builder.EnsureCapacity(parts_added_per_loop);
664 :
665 1935 : int start = current_match[0];
666 1935 : int end = current_match[1];
667 :
668 1935 : if (prev < start) {
669 342 : builder.AddSubjectSlice(prev, start);
670 : }
671 :
672 1935 : if (simple_replace) {
673 117 : builder.AddString(replacement);
674 : } else {
675 1818 : compiled_replacement.Apply(&builder, start, end, current_match);
676 : }
677 : prev = end;
678 :
679 : current_match = global_cache.FetchNext();
680 : } while (current_match != nullptr);
681 :
682 1620 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
683 :
684 1620 : if (prev < subject_length) {
685 486 : builder.EnsureCapacity(2);
686 486 : builder.AddSubjectSlice(prev, subject_length);
687 : }
688 :
689 : RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
690 1620 : global_cache.LastSuccessfulMatch());
691 :
692 6120 : RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
693 : }
694 :
695 : template <typename ResultSeqString>
696 0 : V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
697 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
698 : Handle<RegExpMatchInfo> last_match_info) {
699 : DCHECK(subject->IsFlat());
700 :
701 : // Shortcut for simple non-regexp global replacements
702 0 : if (regexp->TypeTag() == JSRegExp::ATOM) {
703 0 : Handle<String> empty_string = isolate->factory()->empty_string();
704 0 : if (subject->IsOneByteRepresentation()) {
705 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
706 0 : isolate, subject, regexp, empty_string, last_match_info);
707 : } else {
708 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
709 0 : isolate, subject, regexp, empty_string, last_match_info);
710 : }
711 : }
712 :
713 0 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
714 0 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
715 :
716 : int32_t* current_match = global_cache.FetchNext();
717 0 : if (current_match == nullptr) {
718 0 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
719 0 : return *subject;
720 : }
721 :
722 0 : int start = current_match[0];
723 0 : int end = current_match[1];
724 0 : int capture_count = regexp->CaptureCount();
725 : int subject_length = subject->length();
726 :
727 0 : int new_length = subject_length - (end - start);
728 0 : if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
729 :
730 : Handle<ResultSeqString> answer;
731 : if (ResultSeqString::kHasOneByteEncoding) {
732 0 : answer = Handle<ResultSeqString>::cast(
733 0 : isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
734 : } else {
735 0 : answer = Handle<ResultSeqString>::cast(
736 0 : isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
737 : }
738 :
739 : int prev = 0;
740 : int position = 0;
741 :
742 : DisallowHeapAllocation no_gc;
743 0 : do {
744 0 : start = current_match[0];
745 0 : end = current_match[1];
746 0 : if (prev < start) {
747 : // Add substring subject[prev;start] to answer string.
748 0 : String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
749 0 : start);
750 0 : position += start - prev;
751 : }
752 : prev = end;
753 :
754 : current_match = global_cache.FetchNext();
755 : } while (current_match != nullptr);
756 :
757 0 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
758 :
759 0 : RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
760 : global_cache.LastSuccessfulMatch());
761 :
762 0 : if (prev < subject_length) {
763 : // Add substring subject[prev;length] to answer string.
764 0 : String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
765 0 : subject_length);
766 0 : position += subject_length - prev;
767 : }
768 :
769 0 : if (position == 0) return ReadOnlyRoots(isolate).empty_string();
770 :
771 : // Shorten string and fill
772 : int string_size = ResultSeqString::SizeFor(position);
773 : int allocated_string_size = ResultSeqString::SizeFor(new_length);
774 0 : int delta = allocated_string_size - string_size;
775 :
776 : answer->set_length(position);
777 0 : if (delta == 0) return *answer;
778 :
779 0 : Address end_of_string = answer->address() + string_size;
780 0 : Heap* heap = isolate->heap();
781 :
782 : // The trimming is performed on a newly allocated object, which is on a
783 : // freshly allocated page or on an already swept page. Hence, the sweeper
784 : // thread can not get confused with the filler creation. No synchronization
785 : // needed.
786 : // TODO(hpayer): We should shrink the large object page if the size
787 : // of the object changed significantly.
788 0 : if (!heap->IsLargeObject(*answer)) {
789 0 : heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
790 : }
791 0 : return *answer;
792 : }
793 :
794 : namespace {
795 :
796 603 : Object StringReplaceGlobalRegExpWithStringHelper(
797 : Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
798 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
799 1206 : CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
800 :
801 603 : subject = String::Flatten(isolate, subject);
802 :
803 603 : if (replacement->length() == 0) {
804 0 : if (subject->HasOnlyOneByteChars()) {
805 : return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
806 0 : isolate, subject, regexp, last_match_info);
807 : } else {
808 : return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
809 0 : isolate, subject, regexp, last_match_info);
810 : }
811 : }
812 :
813 603 : replacement = String::Flatten(isolate, replacement);
814 :
815 : return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
816 603 : replacement, last_match_info);
817 : }
818 :
819 : } // namespace
820 :
821 36879 : RUNTIME_FUNCTION(Runtime_StringSplit) {
822 36879 : HandleScope handle_scope(isolate);
823 : DCHECK_EQ(3, args.length());
824 73758 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
825 73758 : CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
826 73758 : CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
827 36879 : CHECK_LT(0, limit);
828 :
829 36879 : int subject_length = subject->length();
830 36879 : int pattern_length = pattern->length();
831 36879 : CHECK_LT(0, pattern_length);
832 :
833 36879 : if (limit == 0xFFFFFFFFu) {
834 36843 : FixedArray last_match_cache_unused;
835 : Handle<Object> cached_answer(
836 : RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
837 : &last_match_cache_unused,
838 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
839 110529 : isolate);
840 36843 : if (*cached_answer != Smi::kZero) {
841 : // The cache FixedArray is a COW-array and can therefore be reused.
842 : Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
843 7940 : Handle<FixedArray>::cast(cached_answer));
844 : return *result;
845 : }
846 : }
847 :
848 : // The limit can be very large (0xFFFFFFFFu), but since the pattern
849 : // isn't empty, we can never create more parts than ~half the length
850 : // of the subject.
851 :
852 32909 : subject = String::Flatten(isolate, subject);
853 32909 : pattern = String::Flatten(isolate, pattern);
854 :
855 98727 : std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
856 :
857 32909 : FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
858 :
859 32909 : if (static_cast<uint32_t>(indices->size()) < limit) {
860 32882 : indices->push_back(subject_length);
861 : }
862 :
863 : // The list indices now contains the end of each part to create.
864 :
865 : // Create JSArray of substrings separated by separator.
866 32909 : int part_count = static_cast<int>(indices->size());
867 :
868 : Handle<JSArray> result =
869 : isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
870 32909 : INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
871 :
872 : DCHECK(result->HasObjectElements());
873 :
874 65818 : Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
875 :
876 32909 : if (part_count == 1 && indices->at(0) == subject_length) {
877 47858 : elements->set(0, *subject);
878 : } else {
879 : int part_start = 0;
880 194186 : FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
881 : int part_end = indices->at(i);
882 : Handle<String> substring =
883 : isolate->factory()->NewProperSubString(subject, part_start, part_end);
884 : elements->set(i, *substring);
885 : part_start = part_end + pattern_length;
886 : });
887 : }
888 :
889 32909 : if (limit == 0xFFFFFFFFu) {
890 32873 : if (result->HasObjectElements()) {
891 : RegExpResultsCache::Enter(isolate, subject, pattern, elements,
892 : isolate->factory()->empty_fixed_array(),
893 65746 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
894 : }
895 : }
896 :
897 32909 : TruncateRegexpIndicesList(isolate);
898 :
899 36879 : return *result;
900 : }
901 :
902 91937 : RUNTIME_FUNCTION(Runtime_RegExpExec) {
903 91937 : HandleScope scope(isolate);
904 : DCHECK_EQ(4, args.length());
905 183874 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
906 183874 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
907 183874 : CONVERT_INT32_ARG_CHECKED(index, 2);
908 183874 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
909 : // Due to the way the JS calls are constructed this must be less than the
910 : // length of a string, i.e. it is always a Smi. We check anyway for security.
911 91937 : CHECK_LE(0, index);
912 91937 : CHECK_GE(subject->length(), index);
913 91937 : isolate->counters()->regexp_entry_runtime()->Increment();
914 183874 : RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject,
915 91937 : index, last_match_info));
916 : }
917 :
918 603 : RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
919 603 : HandleScope scope(isolate);
920 : DCHECK_EQ(3, args.length());
921 1206 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
922 1206 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
923 1206 : CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
924 :
925 : Handle<RegExpMatchInfo> internal_match_info =
926 603 : isolate->regexp_internal_match_info();
927 :
928 : return StringReplaceGlobalRegExpWithStringHelper(
929 603 : isolate, regexp, subject, replacement, internal_match_info);
930 : }
931 :
932 : namespace {
933 :
934 3300 : class MatchInfoBackedMatch : public String::Match {
935 : public:
936 3300 : MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
937 : Handle<String> subject,
938 : Handle<RegExpMatchInfo> match_info)
939 6600 : : isolate_(isolate), match_info_(match_info) {
940 3300 : subject_ = String::Flatten(isolate, subject);
941 :
942 3300 : if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
943 6384 : Object o = regexp->CaptureNameMap();
944 3192 : has_named_captures_ = o->IsFixedArray();
945 3192 : if (has_named_captures_) {
946 99 : capture_name_map_ = handle(FixedArray::cast(o), isolate);
947 : }
948 : } else {
949 108 : has_named_captures_ = false;
950 : }
951 3300 : }
952 :
953 18 : Handle<String> GetMatch() override {
954 18 : return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
955 : }
956 :
957 18 : Handle<String> GetPrefix() override {
958 18 : const int match_start = match_info_->Capture(0);
959 18 : return isolate_->factory()->NewSubString(subject_, 0, match_start);
960 : }
961 :
962 18 : Handle<String> GetSuffix() override {
963 18 : const int match_end = match_info_->Capture(1);
964 : return isolate_->factory()->NewSubString(subject_, match_end,
965 18 : subject_->length());
966 : }
967 :
968 171 : bool HasNamedCaptures() override { return has_named_captures_; }
969 :
970 3300 : int CaptureCount() override {
971 3300 : return match_info_->NumberOfCaptureRegisters() / 2;
972 : }
973 :
974 7152 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
975 : Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
976 7152 : isolate_, match_info_, i, capture_exists);
977 7107 : return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
978 21411 : : isolate_->factory()->empty_string();
979 : }
980 :
981 81 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
982 : CaptureState* state) override {
983 : DCHECK(has_named_captures_);
984 : const int capture_index = LookupNamedCapture(
985 360 : [=](String capture_name) { return capture_name->Equals(*name); },
986 243 : *capture_name_map_);
987 :
988 81 : if (capture_index == -1) {
989 36 : *state = INVALID;
990 36 : return name; // Arbitrary string handle.
991 : }
992 :
993 : DCHECK(1 <= capture_index && capture_index <= CaptureCount());
994 :
995 : bool capture_exists;
996 : Handle<String> capture_value;
997 90 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
998 : GetCapture(capture_index, &capture_exists),
999 : String);
1000 :
1001 45 : if (!capture_exists) {
1002 18 : *state = UNMATCHED;
1003 36 : return isolate_->factory()->empty_string();
1004 : } else {
1005 27 : *state = MATCHED;
1006 27 : return capture_value;
1007 : }
1008 : }
1009 :
1010 : private:
1011 : Isolate* isolate_;
1012 : Handle<String> subject_;
1013 : Handle<RegExpMatchInfo> match_info_;
1014 :
1015 : bool has_named_captures_;
1016 : Handle<FixedArray> capture_name_map_;
1017 : };
1018 :
1019 882 : class VectorBackedMatch : public String::Match {
1020 : public:
1021 441 : VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1022 : Handle<String> match, int match_position,
1023 : ZoneVector<Handle<Object>>* captures,
1024 : Handle<Object> groups_obj)
1025 : : isolate_(isolate),
1026 : match_(match),
1027 : match_position_(match_position),
1028 882 : captures_(captures) {
1029 441 : subject_ = String::Flatten(isolate, subject);
1030 :
1031 : DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1032 882 : has_named_captures_ = !groups_obj->IsUndefined(isolate);
1033 441 : if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1034 441 : }
1035 :
1036 0 : Handle<String> GetMatch() override { return match_; }
1037 :
1038 0 : Handle<String> GetPrefix() override {
1039 0 : return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1040 : }
1041 :
1042 0 : Handle<String> GetSuffix() override {
1043 0 : const int match_end_position = match_position_ + match_->length();
1044 : return isolate_->factory()->NewSubString(subject_, match_end_position,
1045 0 : subject_->length());
1046 : }
1047 :
1048 288 : bool HasNamedCaptures() override { return has_named_captures_; }
1049 :
1050 882 : int CaptureCount() override { return static_cast<int>(captures_->size()); }
1051 :
1052 81 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1053 162 : Handle<Object> capture_obj = captures_->at(i);
1054 243 : if (capture_obj->IsUndefined(isolate_)) {
1055 0 : *capture_exists = false;
1056 0 : return isolate_->factory()->empty_string();
1057 : }
1058 81 : *capture_exists = true;
1059 81 : return Object::ToString(isolate_, capture_obj);
1060 : }
1061 :
1062 225 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
1063 : CaptureState* state) override {
1064 : DCHECK(has_named_captures_);
1065 :
1066 : Maybe<bool> maybe_capture_exists =
1067 225 : JSReceiver::HasProperty(groups_obj_, name);
1068 225 : if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1069 :
1070 225 : if (!maybe_capture_exists.FromJust()) {
1071 90 : *state = INVALID;
1072 90 : return name; // Arbitrary string handle.
1073 : }
1074 :
1075 : Handle<Object> capture_obj;
1076 270 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1077 : Object::GetProperty(isolate_, groups_obj_, name),
1078 : String);
1079 405 : if (capture_obj->IsUndefined(isolate_)) {
1080 36 : *state = UNMATCHED;
1081 72 : return isolate_->factory()->empty_string();
1082 : } else {
1083 99 : *state = MATCHED;
1084 99 : return Object::ToString(isolate_, capture_obj);
1085 : }
1086 : }
1087 :
1088 : private:
1089 : Isolate* isolate_;
1090 : Handle<String> subject_;
1091 : Handle<String> match_;
1092 : const int match_position_;
1093 : ZoneVector<Handle<Object>>* captures_;
1094 :
1095 : bool has_named_captures_;
1096 : Handle<JSReceiver> groups_obj_;
1097 : };
1098 :
1099 : // Create the groups object (see also the RegExp result creation in
1100 : // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
1101 72 : Handle<JSObject> ConstructNamedCaptureGroupsObject(
1102 : Isolate* isolate, Handle<FixedArray> capture_map,
1103 : const std::function<Object(int)>& f_get_capture) {
1104 72 : Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1105 :
1106 72 : const int capture_count = capture_map->length() >> 1;
1107 216 : for (int i = 0; i < capture_count; i++) {
1108 144 : const int name_ix = i * 2;
1109 144 : const int index_ix = i * 2 + 1;
1110 :
1111 : Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1112 : isolate);
1113 144 : const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1114 : DCHECK(1 <= capture_ix && capture_ix <= capture_count);
1115 :
1116 144 : Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1117 : DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1118 :
1119 144 : JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1120 : }
1121 :
1122 72 : return groups;
1123 : }
1124 :
1125 : // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1126 : // separate last match info. See comment on that function.
1127 : template <bool has_capture>
1128 93251 : static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1129 : Handle<JSRegExp> regexp,
1130 : Handle<RegExpMatchInfo> last_match_array,
1131 : Handle<JSArray> result_array) {
1132 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1133 : DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1134 : DCHECK(subject->IsFlat());
1135 :
1136 93251 : int capture_count = regexp->CaptureCount();
1137 : int subject_length = subject->length();
1138 :
1139 : static const int kMinLengthToCache = 0x1000;
1140 :
1141 93251 : if (subject_length > kMinLengthToCache) {
1142 36 : FixedArray last_match_cache;
1143 : Object cached_answer = RegExpResultsCache::Lookup(
1144 : isolate->heap(), *subject, regexp->data(), &last_match_cache,
1145 72 : RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1146 36 : if (cached_answer->IsFixedArray()) {
1147 0 : int capture_registers = (capture_count + 1) * 2;
1148 0 : int32_t* last_match = NewArray<int32_t>(capture_registers);
1149 0 : for (int i = 0; i < capture_registers; i++) {
1150 0 : last_match[i] = Smi::ToInt(last_match_cache->get(i));
1151 : }
1152 : Handle<FixedArray> cached_fixed_array =
1153 : Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1154 : // The cache FixedArray is a COW-array and we need to return a copy.
1155 : Handle<FixedArray> copied_fixed_array =
1156 : isolate->factory()->CopyFixedArrayWithMap(
1157 0 : cached_fixed_array, isolate->factory()->fixed_array_map());
1158 0 : JSArray::SetContent(result_array, copied_fixed_array);
1159 0 : RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1160 : capture_count, last_match);
1161 : DeleteArray(last_match);
1162 0 : return *result_array;
1163 : }
1164 : }
1165 :
1166 93251 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1167 93381 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1168 :
1169 : // Ensured in Runtime_RegExpExecMultiple.
1170 : DCHECK(result_array->HasObjectElements());
1171 186242 : Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1172 186242 : isolate);
1173 93121 : if (result_elements->length() < 16) {
1174 0 : result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1175 : }
1176 :
1177 93121 : FixedArrayBuilder builder(result_elements);
1178 :
1179 : // Position to search from.
1180 : int match_start = -1;
1181 : int match_end = 0;
1182 : bool first = true;
1183 :
1184 : // Two smis before and after the match, for very long strings.
1185 : static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1186 :
1187 : while (true) {
1188 : int32_t* current_match = global_cache.FetchNext();
1189 456593 : if (current_match == nullptr) break;
1190 363472 : match_start = current_match[0];
1191 363472 : builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1192 363472 : if (match_end < match_start) {
1193 58326 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1194 : match_start);
1195 : }
1196 363472 : match_end = current_match[1];
1197 : {
1198 : // Avoid accumulating new handles inside loop.
1199 : HandleScope temp_scope(isolate);
1200 : Handle<String> match;
1201 363472 : if (!first) {
1202 270413 : match = isolate->factory()->NewProperSubString(subject, match_start,
1203 : match_end);
1204 : } else {
1205 93059 : match =
1206 : isolate->factory()->NewSubString(subject, match_start, match_end);
1207 : first = false;
1208 : }
1209 :
1210 : if (has_capture) {
1211 : // Arguments array to replace function is match, captures, index and
1212 : // subject, i.e., 3 + capture count in total. If the RegExp contains
1213 : // named captures, they are also passed as the last argument.
1214 :
1215 344346 : Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1216 344346 : const bool has_named_captures = maybe_capture_map->IsFixedArray();
1217 :
1218 : const int argc =
1219 172173 : has_named_captures ? 4 + capture_count : 3 + capture_count;
1220 :
1221 172173 : Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1222 : int cursor = 0;
1223 :
1224 344346 : elements->set(cursor++, *match);
1225 608801 : for (int i = 1; i <= capture_count; i++) {
1226 436628 : int start = current_match[i * 2];
1227 436628 : if (start >= 0) {
1228 436535 : int end = current_match[i * 2 + 1];
1229 : DCHECK(start <= end);
1230 : Handle<String> substring =
1231 436535 : isolate->factory()->NewSubString(subject, start, end);
1232 873070 : elements->set(cursor++, *substring);
1233 : } else {
1234 : DCHECK_GT(0, current_match[i * 2 + 1]);
1235 186 : elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1236 : }
1237 : }
1238 :
1239 172173 : elements->set(cursor++, Smi::FromInt(match_start));
1240 344346 : elements->set(cursor++, *subject);
1241 :
1242 172173 : if (has_named_captures) {
1243 : Handle<FixedArray> capture_map =
1244 54 : Handle<FixedArray>::cast(maybe_capture_map);
1245 : Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1246 324 : isolate, capture_map, [=](int ix) { return elements->get(ix); });
1247 108 : elements->set(cursor++, *groups);
1248 : }
1249 :
1250 : DCHECK_EQ(cursor, argc);
1251 344346 : builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1252 : } else {
1253 191299 : builder.Add(*match);
1254 : }
1255 : }
1256 : }
1257 :
1258 93170 : if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1259 :
1260 93072 : if (match_start >= 0) {
1261 : // Finished matching, with at least one match.
1262 93059 : if (match_end < subject_length) {
1263 411 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1264 : subject_length);
1265 : }
1266 :
1267 93059 : RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1268 : capture_count,
1269 : global_cache.LastSuccessfulMatch());
1270 :
1271 93059 : if (subject_length > kMinLengthToCache) {
1272 : // Store the last successful match into the array for caching.
1273 : // TODO(yangguo): do not expose last match to JS and simplify caching.
1274 36 : int capture_registers = (capture_count + 1) * 2;
1275 : Handle<FixedArray> last_match_cache =
1276 36 : isolate->factory()->NewFixedArray(capture_registers);
1277 : int32_t* last_match = global_cache.LastSuccessfulMatch();
1278 162 : for (int i = 0; i < capture_registers; i++) {
1279 126 : last_match_cache->set(i, Smi::FromInt(last_match[i]));
1280 : }
1281 : Handle<FixedArray> result_fixed_array =
1282 36 : FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1283 : // Cache the result and copy the FixedArray into a COW array.
1284 : Handle<FixedArray> copied_fixed_array =
1285 : isolate->factory()->CopyFixedArrayWithMap(
1286 36 : result_fixed_array, isolate->factory()->fixed_array_map());
1287 36 : RegExpResultsCache::Enter(
1288 : isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1289 72 : last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1290 : }
1291 186118 : return *builder.ToJSArray(result_array);
1292 : } else {
1293 13 : return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1294 : }
1295 : }
1296 :
1297 : // Legacy implementation of RegExp.prototype[Symbol.replace] which
1298 : // doesn't properly call the underlying exec method.
1299 5604 : V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1300 : Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1301 : Handle<Object> replace_obj) {
1302 : // Functional fast-paths are dispatched directly by replace builtin.
1303 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1304 : DCHECK(!replace_obj->IsCallable());
1305 :
1306 : Factory* factory = isolate->factory();
1307 :
1308 11208 : const int flags = regexp->GetFlags();
1309 5604 : const bool global = (flags & JSRegExp::kGlobal) != 0;
1310 5604 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1311 :
1312 : Handle<String> replace;
1313 11208 : ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1314 : Object::ToString(isolate, replace_obj), String);
1315 5604 : replace = String::Flatten(isolate, replace);
1316 :
1317 5604 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1318 :
1319 5604 : if (!global) {
1320 : // Non-global regexp search, string replace.
1321 :
1322 : uint32_t last_index = 0;
1323 3327 : if (sticky) {
1324 36 : Handle<Object> last_index_obj(regexp->last_index(), isolate);
1325 36 : ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1326 : Object::ToLength(isolate, last_index_obj),
1327 : String);
1328 18 : last_index = PositiveNumberToUint32(*last_index_obj);
1329 : }
1330 :
1331 : Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1332 : isolate);
1333 :
1334 : // A lastIndex exceeding the string length always always returns null
1335 : // (signalling failure) in RegExpBuiltinExec, thus we can skip the call.
1336 3327 : if (last_index <= static_cast<uint32_t>(string->length())) {
1337 6618 : ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj,
1338 : RegExpImpl::Exec(isolate, regexp, string,
1339 : last_index, last_match_info),
1340 : String);
1341 : }
1342 :
1343 6654 : if (match_indices_obj->IsNull(isolate)) {
1344 45 : if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1345 27 : return string;
1346 : }
1347 :
1348 3300 : auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1349 :
1350 3300 : const int start_index = match_indices->Capture(0);
1351 3300 : const int end_index = match_indices->Capture(1);
1352 :
1353 3300 : if (sticky)
1354 0 : regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1355 :
1356 3300 : IncrementalStringBuilder builder(isolate);
1357 3300 : builder.AppendString(factory->NewSubString(string, 0, start_index));
1358 :
1359 3300 : if (replace->length() > 0) {
1360 3300 : MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1361 : Handle<String> replacement;
1362 6600 : ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1363 : String::GetSubstitution(isolate, &m, replace),
1364 : String);
1365 3300 : builder.AppendString(replacement);
1366 : }
1367 :
1368 : builder.AppendString(
1369 3300 : factory->NewSubString(string, end_index, string->length()));
1370 3300 : return builder.Finish();
1371 : } else {
1372 : // Global regexp search, string replace.
1373 : DCHECK(global);
1374 4554 : RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1375 : String);
1376 :
1377 2277 : if (replace->length() == 0) {
1378 0 : if (string->HasOnlyOneByteChars()) {
1379 : Object result =
1380 : StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1381 0 : isolate, string, regexp, last_match_info);
1382 0 : return handle(String::cast(result), isolate);
1383 : } else {
1384 : Object result =
1385 : StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1386 0 : isolate, string, regexp, last_match_info);
1387 0 : return handle(String::cast(result), isolate);
1388 : }
1389 : }
1390 :
1391 : Object result = StringReplaceGlobalRegExpWithString(
1392 2277 : isolate, string, regexp, replace, last_match_info);
1393 2277 : if (result->IsString()) {
1394 2277 : return handle(String::cast(result), isolate);
1395 : } else {
1396 0 : return MaybeHandle<String>();
1397 : }
1398 : }
1399 :
1400 : UNREACHABLE();
1401 : }
1402 :
1403 : } // namespace
1404 :
1405 : // This is only called for StringReplaceGlobalRegExpWithFunction.
1406 93251 : RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1407 93251 : HandleScope handles(isolate);
1408 : DCHECK_EQ(4, args.length());
1409 :
1410 186502 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1411 186502 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1412 186502 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1413 186502 : CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1414 93251 : CHECK(result_array->HasObjectElements());
1415 :
1416 93251 : subject = String::Flatten(isolate, subject);
1417 93251 : CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1418 :
1419 93251 : if (regexp->CaptureCount() == 0) {
1420 : return SearchRegExpMultiple<false>(isolate, subject, regexp,
1421 90272 : last_match_info, result_array);
1422 : } else {
1423 : return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1424 2979 : result_array);
1425 93251 : }
1426 : }
1427 :
1428 9611 : RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1429 9611 : HandleScope scope(isolate);
1430 : DCHECK_EQ(3, args.length());
1431 19222 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1432 19222 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1433 19222 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1434 :
1435 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1436 : DCHECK(replace_obj->map()->is_callable());
1437 :
1438 9611 : Factory* factory = isolate->factory();
1439 9611 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1440 :
1441 9611 : const int flags = regexp->GetFlags();
1442 : DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1443 :
1444 : // TODO(jgruber): This should be an easy port to CSA with massive payback.
1445 :
1446 9611 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1447 : uint32_t last_index = 0;
1448 9611 : if (sticky) {
1449 0 : Handle<Object> last_index_obj(regexp->last_index(), isolate);
1450 0 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1451 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1452 0 : last_index = PositiveNumberToUint32(*last_index_obj);
1453 :
1454 0 : if (last_index > static_cast<uint32_t>(subject->length())) last_index = 0;
1455 : }
1456 :
1457 : Handle<Object> match_indices_obj;
1458 19222 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1459 : isolate, match_indices_obj,
1460 : RegExpImpl::Exec(isolate, regexp, subject, last_index, last_match_info));
1461 :
1462 19204 : if (match_indices_obj->IsNull(isolate)) {
1463 9320 : if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1464 : return *subject;
1465 : }
1466 :
1467 : Handle<RegExpMatchInfo> match_indices =
1468 282 : Handle<RegExpMatchInfo>::cast(match_indices_obj);
1469 :
1470 282 : const int index = match_indices->Capture(0);
1471 282 : const int end_of_match = match_indices->Capture(1);
1472 :
1473 282 : if (sticky)
1474 0 : regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1475 :
1476 282 : IncrementalStringBuilder builder(isolate);
1477 282 : builder.AppendString(factory->NewSubString(subject, 0, index));
1478 :
1479 : // Compute the parameter list consisting of the match, captures, index,
1480 : // and subject for the replace function invocation. If the RegExp contains
1481 : // named captures, they are also passed as the last argument.
1482 :
1483 : // The number of captures plus one for the match.
1484 282 : const int m = match_indices->NumberOfCaptureRegisters() / 2;
1485 :
1486 : bool has_named_captures = false;
1487 : Handle<FixedArray> capture_map;
1488 282 : if (m > 1) {
1489 : // The existence of capture groups implies IRREGEXP kind.
1490 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1491 :
1492 108 : Object maybe_capture_map = regexp->CaptureNameMap();
1493 216 : if (maybe_capture_map->IsFixedArray()) {
1494 : has_named_captures = true;
1495 18 : capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1496 : }
1497 : }
1498 :
1499 282 : const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1500 282 : if (argc == static_cast<uint32_t>(-1)) {
1501 0 : THROW_NEW_ERROR_RETURN_FAILURE(
1502 : isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1503 : }
1504 564 : ScopedVector<Handle<Object>> argv(argc);
1505 :
1506 : int cursor = 0;
1507 435 : for (int j = 0; j < m; j++) {
1508 : bool ok;
1509 : Handle<String> capture =
1510 435 : RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1511 435 : if (ok) {
1512 363 : argv[cursor++] = capture;
1513 : } else {
1514 72 : argv[cursor++] = factory->undefined_value();
1515 : }
1516 : }
1517 :
1518 564 : argv[cursor++] = handle(Smi::FromInt(index), isolate);
1519 282 : argv[cursor++] = subject;
1520 :
1521 282 : if (has_named_captures) {
1522 18 : argv[cursor++] = ConstructNamedCaptureGroupsObject(
1523 90 : isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1524 : }
1525 :
1526 : DCHECK_EQ(cursor, argc);
1527 :
1528 : Handle<Object> replacement_obj;
1529 846 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1530 : isolate, replacement_obj,
1531 : Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1532 : argv.start()));
1533 :
1534 : Handle<String> replacement;
1535 564 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1536 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1537 :
1538 282 : builder.AppendString(replacement);
1539 : builder.AppendString(
1540 282 : factory->NewSubString(subject, end_of_match, subject->length()));
1541 :
1542 10175 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1543 : }
1544 :
1545 : namespace {
1546 :
1547 278 : V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1548 : Handle<Object> object,
1549 : uint32_t* out) {
1550 556 : if (object->IsUndefined(isolate)) {
1551 89 : *out = kMaxUInt32;
1552 89 : return object;
1553 : }
1554 :
1555 : Handle<Object> number;
1556 378 : ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1557 : Object);
1558 189 : *out = NumberToUint32(*number);
1559 189 : return object;
1560 : }
1561 :
1562 224 : Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1563 : Handle<FixedArray> elems,
1564 : int num_elems) {
1565 : return isolate->factory()->NewJSArrayWithElements(
1566 448 : FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1567 : }
1568 :
1569 : } // namespace
1570 :
1571 : // Slow path for:
1572 : // ES#sec-regexp.prototype-@@replace
1573 : // RegExp.prototype [ @@split ] ( string, limit )
1574 278 : RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1575 278 : HandleScope scope(isolate);
1576 : DCHECK_EQ(3, args.length());
1577 :
1578 556 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1579 556 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1580 278 : CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1581 :
1582 278 : Factory* factory = isolate->factory();
1583 :
1584 278 : Handle<JSFunction> regexp_fun = isolate->regexp_function();
1585 : Handle<Object> ctor;
1586 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1587 : isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1588 :
1589 : Handle<Object> flags_obj;
1590 834 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1591 : isolate, flags_obj,
1592 : JSObject::GetProperty(isolate, recv, factory->flags_string()));
1593 :
1594 : Handle<String> flags;
1595 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1596 : Object::ToString(isolate, flags_obj));
1597 :
1598 278 : Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1599 278 : const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1600 :
1601 278 : Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1602 278 : const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1603 :
1604 : Handle<String> new_flags = flags;
1605 278 : if (!sticky) {
1606 484 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1607 : factory->NewConsString(flags, y_str));
1608 : }
1609 :
1610 : Handle<JSReceiver> splitter;
1611 : {
1612 : const int argc = 2;
1613 :
1614 278 : ScopedVector<Handle<Object>> argv(argc);
1615 278 : argv[0] = recv;
1616 278 : argv[1] = new_flags;
1617 :
1618 : Handle<Object> splitter_obj;
1619 556 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1620 : isolate, splitter_obj,
1621 : Execution::New(isolate, ctor, argc, argv.start()));
1622 :
1623 278 : splitter = Handle<JSReceiver>::cast(splitter_obj);
1624 : }
1625 :
1626 : uint32_t limit;
1627 278 : RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1628 :
1629 278 : const uint32_t length = string->length();
1630 :
1631 323 : if (limit == 0) return *factory->NewJSArray(0);
1632 :
1633 233 : if (length == 0) {
1634 : Handle<Object> result;
1635 27 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1636 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1637 : factory->undefined_value()));
1638 :
1639 18 : if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1640 :
1641 9 : Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1642 18 : elems->set(0, *string);
1643 18 : return *factory->NewJSArrayWithElements(elems);
1644 : }
1645 :
1646 : static const int kInitialArraySize = 8;
1647 224 : Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1648 : uint32_t num_elems = 0;
1649 :
1650 : uint32_t string_index = 0;
1651 : uint32_t prev_string_index = 0;
1652 1103 : while (string_index < length) {
1653 969 : RETURN_FAILURE_ON_EXCEPTION(
1654 : isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1655 :
1656 : Handle<Object> result;
1657 2907 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1658 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1659 : factory->undefined_value()));
1660 :
1661 1938 : if (result->IsNull(isolate)) {
1662 : string_index = static_cast<uint32_t>(
1663 493 : RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1664 : continue;
1665 : }
1666 :
1667 : Handle<Object> last_index_obj;
1668 952 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1669 : isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1670 :
1671 952 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1672 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1673 :
1674 : const uint32_t end =
1675 476 : std::min(PositiveNumberToUint32(*last_index_obj), length);
1676 476 : if (end == prev_string_index) {
1677 : string_index = static_cast<uint32_t>(
1678 63 : RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1679 : continue;
1680 : }
1681 :
1682 : {
1683 : Handle<String> substr =
1684 413 : factory->NewSubString(string, prev_string_index, string_index);
1685 413 : elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1686 413 : if (num_elems == limit) {
1687 180 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1688 : }
1689 : }
1690 :
1691 : prev_string_index = end;
1692 :
1693 : Handle<Object> num_captures_obj;
1694 969 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1695 : isolate, num_captures_obj,
1696 : Object::GetProperty(isolate, result,
1697 : isolate->factory()->length_string()));
1698 :
1699 646 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1700 : isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1701 323 : const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1702 :
1703 0 : for (uint32_t i = 1; i < num_captures; i++) {
1704 : Handle<Object> capture;
1705 0 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1706 : isolate, capture, Object::GetElement(isolate, result, i));
1707 0 : elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1708 0 : if (num_elems == limit) {
1709 0 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1710 : }
1711 : }
1712 :
1713 : string_index = prev_string_index;
1714 : }
1715 :
1716 : {
1717 : Handle<String> substr =
1718 134 : factory->NewSubString(string, prev_string_index, length);
1719 134 : elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1720 : }
1721 :
1722 268 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1723 : }
1724 :
1725 : // Slow path for:
1726 : // ES#sec-regexp.prototype-@@replace
1727 : // RegExp.prototype [ @@replace ] ( string, replaceValue )
1728 6720 : RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1729 6162 : HandleScope scope(isolate);
1730 : DCHECK_EQ(3, args.length());
1731 :
1732 12324 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1733 12324 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1734 6162 : Handle<Object> replace_obj = args.at(2);
1735 :
1736 6162 : Factory* factory = isolate->factory();
1737 :
1738 6162 : string = String::Flatten(isolate, string);
1739 :
1740 12324 : const bool functional_replace = replace_obj->IsCallable();
1741 :
1742 : // Fast-path for unmodified JSRegExps (and non-functional replace).
1743 6162 : if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1744 : // We should never get here with functional replace because unmodified
1745 : // regexp and functional replace should be fully handled in CSA code.
1746 5604 : CHECK(!functional_replace);
1747 11208 : RETURN_RESULT_OR_FAILURE(
1748 : isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
1749 : replace_obj));
1750 : }
1751 :
1752 558 : const uint32_t length = string->length();
1753 :
1754 : Handle<String> replace;
1755 558 : if (!functional_replace) {
1756 990 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1757 : Object::ToString(isolate, replace_obj));
1758 : }
1759 :
1760 : Handle<Object> global_obj;
1761 1674 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1762 : isolate, global_obj,
1763 : JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1764 558 : const bool global = global_obj->BooleanValue(isolate);
1765 :
1766 : bool unicode = false;
1767 558 : if (global) {
1768 : Handle<Object> unicode_obj;
1769 918 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1770 : isolate, unicode_obj,
1771 : JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1772 306 : unicode = unicode_obj->BooleanValue(isolate);
1773 :
1774 306 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1775 : RegExpUtils::SetLastIndex(isolate, recv, 0));
1776 : }
1777 :
1778 1116 : Zone zone(isolate->allocator(), ZONE_NAME);
1779 1116 : ZoneVector<Handle<Object>> results(&zone);
1780 :
1781 : while (true) {
1782 : Handle<Object> result;
1783 2700 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1784 : isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1785 : factory->undefined_value()));
1786 :
1787 1782 : if (result->IsNull(isolate)) break;
1788 :
1789 540 : results.push_back(result);
1790 540 : if (!global) break;
1791 :
1792 : Handle<Object> match_obj;
1793 684 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1794 : Object::GetElement(isolate, result, 0));
1795 :
1796 : Handle<String> match;
1797 684 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1798 : Object::ToString(isolate, match_obj));
1799 :
1800 342 : if (match->length() == 0) {
1801 90 : RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1802 : isolate, recv, string, unicode));
1803 : }
1804 : }
1805 :
1806 : // TODO(jgruber): Look into ReplacementStringBuilder instead.
1807 549 : IncrementalStringBuilder builder(isolate);
1808 : uint32_t next_source_position = 0;
1809 :
1810 1080 : for (const auto& result : results) {
1811 540 : HandleScope handle_scope(isolate);
1812 : Handle<Object> captures_length_obj;
1813 1620 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1814 : isolate, captures_length_obj,
1815 : Object::GetProperty(isolate, result, factory->length_string()));
1816 :
1817 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1818 : isolate, captures_length_obj,
1819 : Object::ToLength(isolate, captures_length_obj));
1820 : const uint32_t captures_length =
1821 540 : PositiveNumberToUint32(*captures_length_obj);
1822 :
1823 : Handle<Object> match_obj;
1824 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1825 : Object::GetElement(isolate, result, 0));
1826 :
1827 : Handle<String> match;
1828 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1829 : Object::ToString(isolate, match_obj));
1830 :
1831 540 : const int match_length = match->length();
1832 :
1833 : Handle<Object> position_obj;
1834 1620 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1835 : isolate, position_obj,
1836 : Object::GetProperty(isolate, result, factory->index_string()));
1837 :
1838 1080 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1839 : isolate, position_obj, Object::ToInteger(isolate, position_obj));
1840 : const uint32_t position =
1841 540 : std::min(PositiveNumberToUint32(*position_obj), length);
1842 :
1843 : // Do not reserve capacity since captures_length is user-controlled.
1844 1071 : ZoneVector<Handle<Object>> captures(&zone);
1845 :
1846 1181043 : for (uint32_t n = 0; n < captures_length; n++) {
1847 : Handle<Object> capture;
1848 2362086 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1849 : isolate, capture, Object::GetElement(isolate, result, n));
1850 :
1851 2362086 : if (!capture->IsUndefined(isolate)) {
1852 2214 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1853 : Object::ToString(isolate, capture));
1854 : }
1855 1181043 : captures.push_back(capture);
1856 : }
1857 :
1858 540 : Handle<Object> groups_obj = isolate->factory()->undefined_value();
1859 1620 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1860 : isolate, groups_obj,
1861 : Object::GetProperty(isolate, result, factory->groups_string()));
1862 :
1863 1080 : const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1864 :
1865 : Handle<String> replacement;
1866 540 : if (functional_replace) {
1867 : const uint32_t argc =
1868 99 : GetArgcForReplaceCallable(captures_length, has_named_captures);
1869 99 : if (argc == static_cast<uint32_t>(-1)) {
1870 18 : THROW_NEW_ERROR_RETURN_FAILURE(
1871 : isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1872 : }
1873 :
1874 90 : ScopedVector<Handle<Object>> argv(argc);
1875 :
1876 : int cursor = 0;
1877 234 : for (uint32_t j = 0; j < captures_length; j++) {
1878 234 : argv[cursor++] = captures[j];
1879 : }
1880 :
1881 180 : argv[cursor++] = handle(Smi::FromInt(position), isolate);
1882 90 : argv[cursor++] = string;
1883 90 : if (has_named_captures) argv[cursor++] = groups_obj;
1884 :
1885 : DCHECK_EQ(cursor, argc);
1886 :
1887 : Handle<Object> replacement_obj;
1888 270 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1889 : isolate, replacement_obj,
1890 : Execution::Call(isolate, replace_obj, factory->undefined_value(),
1891 : argc, argv.start()));
1892 :
1893 180 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1894 90 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1895 : } else {
1896 : DCHECK(!functional_replace);
1897 882 : if (!groups_obj->IsUndefined(isolate)) {
1898 558 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1899 : isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1900 : }
1901 : VectorBackedMatch m(isolate, string, match, position, &captures,
1902 441 : groups_obj);
1903 882 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1904 441 : isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1905 : }
1906 :
1907 531 : if (position >= next_source_position) {
1908 : builder.AppendString(
1909 531 : factory->NewSubString(string, next_source_position, position));
1910 531 : builder.AppendString(replacement);
1911 :
1912 531 : next_source_position = position + match_length;
1913 : }
1914 531 : }
1915 :
1916 540 : if (next_source_position < length) {
1917 : builder.AppendString(
1918 324 : factory->NewSubString(string, next_source_position, length));
1919 : }
1920 :
1921 7242 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1922 : }
1923 :
1924 379521 : RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1925 379521 : HandleScope scope(isolate);
1926 : DCHECK_EQ(3, args.length());
1927 : // TODO(pwong): To follow the spec more closely and simplify calling code,
1928 : // this could handle the canonicalization of pattern and flags. See
1929 : // https://tc39.github.io/ecma262/#sec-regexpinitialize
1930 759042 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1931 759042 : CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1932 759042 : CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1933 :
1934 379521 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1935 : JSRegExp::Initialize(regexp, source, flags));
1936 :
1937 379521 : return *regexp;
1938 : }
1939 :
1940 0 : RUNTIME_FUNCTION(Runtime_IsRegExp) {
1941 : SealHandleScope shs(isolate);
1942 : DCHECK_EQ(1, args.length());
1943 0 : CONVERT_ARG_CHECKED(Object, obj, 0);
1944 0 : return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1945 : }
1946 :
1947 : } // namespace internal
1948 183867 : } // namespace v8
|