Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/runtime/runtime-utils.h"
6 :
7 : #include <functional>
8 :
9 : #include "src/arguments.h"
10 : #include "src/conversions-inl.h"
11 : #include "src/isolate-inl.h"
12 : #include "src/messages.h"
13 : #include "src/regexp/jsregexp-inl.h"
14 : #include "src/regexp/jsregexp.h"
15 : #include "src/regexp/regexp-utils.h"
16 : #include "src/string-builder.h"
17 : #include "src/string-search.h"
18 :
19 : namespace v8 {
20 : namespace internal {
21 :
22 : namespace {
23 :
24 : // Looks up the capture of the given name. Returns the (1-based) numbered
25 : // capture index or -1 on failure.
26 108 : int LookupNamedCapture(std::function<bool(String*)> name_matches,
27 : FixedArray* capture_name_map) {
28 : // TODO(jgruber): Sort capture_name_map and do binary search via
29 : // internalized strings.
30 :
31 : int maybe_capture_index = -1;
32 108 : const int named_capture_count = capture_name_map->length() >> 1;
33 324 : for (int j = 0; j < named_capture_count; j++) {
34 : // The format of {capture_name_map} is documented at
35 : // JSRegExp::kIrregexpCaptureNameMapIndex.
36 270 : const int name_ix = j * 2;
37 270 : const int index_ix = j * 2 + 1;
38 :
39 : String* capture_name = String::cast(capture_name_map->get(name_ix));
40 270 : if (!name_matches(capture_name)) continue;
41 :
42 : maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
43 54 : break;
44 : }
45 :
46 108 : return maybe_capture_index;
47 : }
48 :
49 : } // namespace
50 :
51 : class CompiledReplacement {
52 : public:
53 68627 : explicit CompiledReplacement(Zone* zone)
54 68627 : : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
55 :
56 : // Return whether the replacement is simple.
57 : bool Compile(Handle<JSRegExp> regexp, Handle<String> replacement,
58 : int capture_count, int subject_length);
59 :
60 : // Use Apply only if Compile returned false.
61 : void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
62 : int32_t* match);
63 :
64 : // Number of distinct parts of the replacement pattern.
65 17286 : int parts() { return parts_.length(); }
66 :
67 : Zone* zone() const { return zone_; }
68 :
69 : private:
70 : enum PartType {
71 : SUBJECT_PREFIX = 1,
72 : SUBJECT_SUFFIX,
73 : SUBJECT_CAPTURE,
74 : REPLACEMENT_SUBSTRING,
75 : REPLACEMENT_STRING,
76 : EMPTY_REPLACEMENT,
77 : NUMBER_OF_PART_TYPES
78 : };
79 :
80 : struct ReplacementPart {
81 : static inline ReplacementPart SubjectMatch() {
82 : return ReplacementPart(SUBJECT_CAPTURE, 0);
83 : }
84 : static inline ReplacementPart SubjectCapture(int capture_index) {
85 : return ReplacementPart(SUBJECT_CAPTURE, capture_index);
86 : }
87 : static inline ReplacementPart SubjectPrefix() {
88 : return ReplacementPart(SUBJECT_PREFIX, 0);
89 : }
90 : static inline ReplacementPart SubjectSuffix(int subject_length) {
91 : return ReplacementPart(SUBJECT_SUFFIX, subject_length);
92 : }
93 : static inline ReplacementPart ReplacementString() {
94 : return ReplacementPart(REPLACEMENT_STRING, 0);
95 : }
96 : static inline ReplacementPart EmptyReplacement() {
97 : return ReplacementPart(EMPTY_REPLACEMENT, 0);
98 : }
99 : static inline ReplacementPart ReplacementSubString(int from, int to) {
100 : DCHECK_LE(0, from);
101 : DCHECK_GT(to, from);
102 709 : return ReplacementPart(-from, to);
103 : }
104 :
105 : // If tag <= 0 then it is the negation of a start index of a substring of
106 : // the replacement pattern, otherwise it's a value from PartType.
107 : ReplacementPart(int tag, int data) : tag(tag), data(data) {
108 : // Must be non-positive or a PartType value.
109 : DCHECK(tag < NUMBER_OF_PART_TYPES);
110 : }
111 : // Either a value of PartType or a non-positive number that is
112 : // the negation of an index into the replacement string.
113 : int tag;
114 : // The data value's interpretation depends on the value of tag:
115 : // tag == SUBJECT_PREFIX ||
116 : // tag == SUBJECT_SUFFIX: data is unused.
117 : // tag == SUBJECT_CAPTURE: data is the number of the capture.
118 : // tag == REPLACEMENT_SUBSTRING ||
119 : // tag == REPLACEMENT_STRING: data is index into array of substrings
120 : // of the replacement string.
121 : // tag == EMPTY_REPLACEMENT: data is unused.
122 : // tag <= 0: Temporary representation of the substring of the replacement
123 : // string ranging over -tag .. data.
124 : // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
125 : // substring objects.
126 : int data;
127 : };
128 :
129 : template <typename Char>
130 68627 : bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
131 : Vector<Char> characters,
132 : FixedArray* capture_name_map, int capture_count,
133 : int subject_length, Zone* zone) {
134 : // Equivalent to String::GetSubstitution, except that this method converts
135 : // the replacement string into an internal representation that avoids
136 : // repeated parsing when used repeatedly.
137 : DCHECK_IMPLIES(capture_name_map != nullptr,
138 : FLAG_harmony_regexp_named_captures);
139 :
140 68627 : int length = characters.length();
141 : int last = 0;
142 345552840 : for (int i = 0; i < length; i++) {
143 690968446 : Char c = characters[i];
144 345484223 : if (c == '$') {
145 2697 : int next_index = i + 1;
146 2697 : if (next_index == length) { // No next character!
147 : break;
148 : }
149 5374 : Char c2 = characters[next_index];
150 2687 : switch (c2) {
151 : case '$':
152 80 : if (i > last) {
153 : // There is a substring before. Include the first "$".
154 50 : parts->Add(
155 : ReplacementPart::ReplacementSubString(last, next_index),
156 100 : zone);
157 50 : last = next_index + 1; // Continue after the second "$".
158 : } else {
159 : // Let the next substring start with the second "$".
160 : last = next_index;
161 : }
162 : i = next_index;
163 80 : break;
164 : case '`':
165 20 : if (i > last) {
166 20 : parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
167 : }
168 20 : parts->Add(ReplacementPart::SubjectPrefix(), zone);
169 : i = next_index;
170 20 : last = i + 1;
171 20 : break;
172 : case '\'':
173 20 : if (i > last) {
174 20 : parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
175 : }
176 20 : parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
177 : i = next_index;
178 20 : last = i + 1;
179 20 : break;
180 : case '&':
181 20 : if (i > last) {
182 20 : parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
183 : }
184 20 : parts->Add(ReplacementPart::SubjectMatch(), zone);
185 : i = next_index;
186 20 : last = i + 1;
187 20 : break;
188 : case '0':
189 : case '1':
190 : case '2':
191 : case '3':
192 : case '4':
193 : case '5':
194 : case '6':
195 : case '7':
196 : case '8':
197 : case '9': {
198 2465 : int capture_ref = c2 - '0';
199 2465 : if (capture_ref > capture_count) {
200 : i = next_index;
201 : continue;
202 : }
203 1685 : int second_digit_index = next_index + 1;
204 1685 : if (second_digit_index < length) {
205 : // Peek ahead to see if we have two digits.
206 3076 : Char c3 = characters[second_digit_index];
207 1538 : if ('0' <= c3 && c3 <= '9') { // Double digits.
208 1480 : int double_digit_ref = capture_ref * 10 + c3 - '0';
209 1480 : if (double_digit_ref <= capture_count) {
210 : next_index = second_digit_index;
211 : capture_ref = double_digit_ref;
212 : }
213 : }
214 : }
215 1685 : if (capture_ref > 0) {
216 1645 : if (i > last) {
217 69 : parts->Add(ReplacementPart::ReplacementSubString(last, i),
218 138 : zone);
219 : }
220 : DCHECK(capture_ref <= capture_count);
221 1645 : parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
222 1645 : last = next_index + 1;
223 : }
224 : i = next_index;
225 1685 : break;
226 : }
227 : case '<': {
228 72 : if (capture_name_map == nullptr) {
229 : i = next_index;
230 : break;
231 : }
232 :
233 : // Scan until the next '>', and let the enclosed substring be the
234 : // groupName.
235 :
236 72 : const int name_start_index = next_index + 1;
237 : int closing_bracket_index = -1;
238 306 : for (int j = name_start_index; j < length; j++) {
239 576 : if (characters[j] == '>') {
240 : closing_bracket_index = j;
241 : break;
242 : }
243 : }
244 :
245 : // If no closing bracket is found, '$<' is treated as a string
246 : // literal.
247 72 : if (closing_bracket_index == -1) {
248 : i = next_index;
249 : break;
250 : }
251 :
252 : Vector<Char> requested_name =
253 54 : characters.SubVector(name_start_index, closing_bracket_index);
254 :
255 : // Let capture be ? Get(namedCaptures, groupName).
256 :
257 : const int capture_index = LookupNamedCapture(
258 : [=](String* capture_name) {
259 : return capture_name->IsEqualTo(requested_name);
260 135 : },
261 108 : capture_name_map);
262 :
263 : // If capture is undefined or does not exist, replace the text
264 : // through the following '>' with the empty string.
265 : // Otherwise, replace the text through the following '>' with
266 : // ? ToString(capture).
267 :
268 : DCHECK(capture_index == -1 ||
269 : (1 <= capture_index && capture_index <= capture_count));
270 :
271 54 : if (i > last) {
272 0 : parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
273 : }
274 54 : parts->Add((capture_index == -1)
275 : ? ReplacementPart::EmptyReplacement()
276 : : ReplacementPart::SubjectCapture(capture_index),
277 108 : zone);
278 54 : last = closing_bracket_index + 1;
279 : i = closing_bracket_index;
280 54 : break;
281 : }
282 : default:
283 : i = next_index;
284 : break;
285 : }
286 : }
287 : }
288 68627 : if (length > last) {
289 67455 : if (last == 0) {
290 : // Replacement is simple. Do not use Apply to do the replacement.
291 : return true;
292 : } else {
293 530 : parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
294 : }
295 : }
296 : return false;
297 : }
298 :
299 : ZoneList<ReplacementPart> parts_;
300 : ZoneList<Handle<String> > replacement_substrings_;
301 : Zone* zone_;
302 : };
303 :
304 68627 : bool CompiledReplacement::Compile(Handle<JSRegExp> regexp,
305 : Handle<String> replacement, int capture_count,
306 69336 : int subject_length) {
307 : {
308 : DisallowHeapAllocation no_gc;
309 68627 : String::FlatContent content = replacement->GetFlatContent();
310 : DCHECK(content.IsFlat());
311 :
312 : FixedArray* capture_name_map = nullptr;
313 68627 : if (capture_count > 0) {
314 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
315 : Object* maybe_capture_name_map = regexp->CaptureNameMap();
316 1885 : if (maybe_capture_name_map->IsFixedArray()) {
317 : DCHECK(FLAG_harmony_regexp_named_captures);
318 : capture_name_map = FixedArray::cast(maybe_capture_name_map);
319 : }
320 : }
321 :
322 : bool simple;
323 68627 : if (content.IsOneByte()) {
324 : simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
325 : capture_name_map, capture_count,
326 72428 : subject_length, zone());
327 : } else {
328 : DCHECK(content.IsTwoByte());
329 : simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
330 : capture_name_map, capture_count,
331 1787 : subject_length, zone());
332 : }
333 68627 : if (simple) return true;
334 : }
335 :
336 : Isolate* isolate = replacement->GetIsolate();
337 : // Find substrings of replacement string and create them as String objects.
338 : int substring_index = 0;
339 4170 : for (int i = 0, n = parts_.length(); i < n; i++) {
340 2468 : int tag = parts_[i].tag;
341 2468 : if (tag <= 0) { // A replacement string slice.
342 709 : int from = -tag;
343 709 : int to = parts_[i].data;
344 : replacement_substrings_.Add(
345 709 : isolate->factory()->NewSubString(replacement, from, to), zone());
346 709 : parts_[i].tag = REPLACEMENT_SUBSTRING;
347 709 : parts_[i].data = substring_index;
348 709 : substring_index++;
349 1759 : } else if (tag == REPLACEMENT_STRING) {
350 0 : replacement_substrings_.Add(replacement, zone());
351 0 : parts_[i].data = substring_index;
352 0 : substring_index++;
353 : }
354 : }
355 : return false;
356 : }
357 :
358 :
359 2004 : void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
360 : int match_from, int match_to, int32_t* match) {
361 : DCHECK_LT(0, parts_.length());
362 5260 : for (int i = 0, n = parts_.length(); i < n; i++) {
363 3256 : ReplacementPart part = parts_[i];
364 3256 : switch (part.tag) {
365 : case SUBJECT_PREFIX:
366 50 : if (match_from > 0) builder->AddSubjectSlice(0, match_from);
367 : break;
368 : case SUBJECT_SUFFIX: {
369 : int subject_length = part.data;
370 50 : if (match_to < subject_length) {
371 40 : builder->AddSubjectSlice(match_to, subject_length);
372 : }
373 : break;
374 : }
375 : case SUBJECT_CAPTURE: {
376 : int capture = part.data;
377 1944 : int from = match[capture * 2];
378 1944 : int to = match[capture * 2 + 1];
379 1944 : if (from >= 0 && to > from) {
380 1856 : builder->AddSubjectSlice(from, to);
381 : }
382 : break;
383 : }
384 : case REPLACEMENT_SUBSTRING:
385 : case REPLACEMENT_STRING:
386 2316 : builder->AddString(replacement_substrings_[part.data]);
387 1158 : break;
388 : case EMPTY_REPLACEMENT:
389 : break;
390 : default:
391 0 : UNREACHABLE();
392 : }
393 : }
394 2004 : }
395 :
396 999289 : void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
397 : std::vector<int>* indices, unsigned int limit) {
398 : DCHECK_LT(0, limit);
399 : // Collect indices of pattern in subject using memchr.
400 : // Stop after finding at most limit values.
401 999289 : const uint8_t* subject_start = subject.start();
402 1998578 : const uint8_t* subject_end = subject_start + subject.length();
403 : const uint8_t* pos = subject_start;
404 2693560 : while (limit > 0) {
405 : pos = reinterpret_cast<const uint8_t*>(
406 1694241 : memchr(pos, pattern, subject_end - pos));
407 2693530 : if (pos == nullptr) return;
408 1389964 : indices->push_back(static_cast<int>(pos - subject_start));
409 694982 : pos++;
410 694982 : limit--;
411 : }
412 : }
413 :
414 37753 : void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
415 : std::vector<int>* indices, unsigned int limit) {
416 : DCHECK_LT(0, limit);
417 37753 : const uc16* subject_start = subject.start();
418 75506 : const uc16* subject_end = subject_start + subject.length();
419 1359861 : for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
420 1322108 : if (*pos == pattern) {
421 25972 : indices->push_back(static_cast<int>(pos - subject_start));
422 12986 : limit--;
423 : }
424 : }
425 37753 : }
426 :
427 : template <typename SubjectChar, typename PatternChar>
428 25323 : void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
429 : Vector<const PatternChar> pattern,
430 : std::vector<int>* indices, unsigned int limit) {
431 : DCHECK_LT(0, limit);
432 : // Collect indices of pattern in subject.
433 : // Stop after finding at most limit values.
434 0 : int pattern_length = pattern.length();
435 25323 : int index = 0;
436 0 : StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
437 26744 : while (limit > 0) {
438 53488 : index = search.Search(subject, index);
439 52067 : if (index < 0) return;
440 1421 : indices->push_back(index);
441 1421 : index += pattern_length;
442 1421 : limit--;
443 : }
444 : }
445 :
446 1062365 : void FindStringIndicesDispatch(Isolate* isolate, String* subject,
447 : String* pattern, std::vector<int>* indices,
448 : unsigned int limit) {
449 : {
450 : DisallowHeapAllocation no_gc;
451 1062365 : String::FlatContent subject_content = subject->GetFlatContent();
452 1062365 : String::FlatContent pattern_content = pattern->GetFlatContent();
453 : DCHECK(subject_content.IsFlat());
454 : DCHECK(pattern_content.IsFlat());
455 1062365 : if (subject_content.IsOneByte()) {
456 1024612 : Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
457 1024612 : if (pattern_content.IsOneByte()) {
458 : Vector<const uint8_t> pattern_vector =
459 : pattern_content.ToOneByteVector();
460 1024612 : if (pattern_vector.length() == 1) {
461 : FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
462 999289 : limit);
463 : } else {
464 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
465 25323 : limit);
466 : }
467 : } else {
468 : FindStringIndices(isolate, subject_vector,
469 0 : pattern_content.ToUC16Vector(), indices, limit);
470 : }
471 : } else {
472 37753 : Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
473 37753 : if (pattern_content.IsOneByte()) {
474 : Vector<const uint8_t> pattern_vector =
475 : pattern_content.ToOneByteVector();
476 37753 : if (pattern_vector.length() == 1) {
477 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
478 37753 : limit);
479 : } else {
480 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
481 0 : limit);
482 : }
483 : } else {
484 : Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
485 0 : if (pattern_vector.length() == 1) {
486 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
487 0 : limit);
488 : } else {
489 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
490 0 : limit);
491 : }
492 : }
493 : }
494 : }
495 1062365 : }
496 :
497 : namespace {
498 38387 : std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
499 38387 : std::vector<int>* list = isolate->regexp_indices();
500 : list->clear();
501 38387 : return list;
502 : }
503 :
504 41971 : void TruncateRegexpIndicesList(Isolate* isolate) {
505 : // Same size as smallest zone segment, preserving behavior from the
506 : // runtime zone.
507 : static const int kMaxRegexpIndicesListCapacity = 8 * KB;
508 41971 : std::vector<int>* indicies = isolate->regexp_indices();
509 41971 : if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
510 : // Throw away backing storage.
511 : indicies->clear();
512 : indicies->shrink_to_fit();
513 : }
514 41971 : }
515 : } // namespace
516 :
517 : template <typename ResultSeqString>
518 1023978 : MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
519 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
520 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
521 : DCHECK(subject->IsFlat());
522 : DCHECK(replacement->IsFlat());
523 :
524 1023978 : std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
525 :
526 : DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
527 : String* pattern =
528 : String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
529 : int subject_len = subject->length();
530 : int pattern_len = pattern->length();
531 : int replacement_len = replacement->length();
532 :
533 1023978 : FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xffffffff);
534 :
535 2044352 : if (indices->empty()) return *subject;
536 :
537 : // Detect integer overflow.
538 : int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
539 : static_cast<int64_t>(pattern_len)) *
540 : static_cast<int64_t>(indices->size()) +
541 7208 : static_cast<int64_t>(subject_len);
542 : int result_len;
543 3604 : if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
544 : STATIC_ASSERT(String::kMaxLength < kMaxInt);
545 : result_len = kMaxInt; // Provoke exception.
546 : } else {
547 3594 : result_len = static_cast<int>(result_len_64);
548 : }
549 3604 : if (result_len == 0) {
550 10 : return isolate->heap()->empty_string();
551 : }
552 :
553 : int subject_pos = 0;
554 : int result_pos = 0;
555 :
556 : MaybeHandle<SeqString> maybe_res;
557 : if (ResultSeqString::kHasOneByteEncoding) {
558 2806 : maybe_res = isolate->factory()->NewRawOneByteString(result_len);
559 : } else {
560 788 : maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
561 : }
562 : Handle<SeqString> untyped_res;
563 3594 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
564 : Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
565 :
566 14874 : for (int index : *indices) {
567 : // Copy non-matched subject content.
568 7706 : if (subject_pos < index) {
569 13584 : String::WriteToFlat(*subject, result->GetChars() + result_pos,
570 6792 : subject_pos, index);
571 6792 : result_pos += index - subject_pos;
572 : }
573 :
574 : // Replace match.
575 7706 : if (replacement_len > 0) {
576 14216 : String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
577 7108 : replacement_len);
578 7108 : result_pos += replacement_len;
579 : }
580 :
581 7706 : subject_pos = index + pattern_len;
582 : }
583 : // Add remaining subject content at the end.
584 3584 : if (subject_pos < subject_len) {
585 5076 : String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
586 2538 : subject_len);
587 : }
588 :
589 3584 : int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
590 3584 : RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
591 :
592 3584 : TruncateRegexpIndicesList(isolate);
593 :
594 3584 : return *result;
595 : }
596 :
597 68789 : MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
598 68627 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
599 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
600 : DCHECK(subject->IsFlat());
601 : DCHECK(replacement->IsFlat());
602 :
603 68789 : int capture_count = regexp->CaptureCount();
604 : int subject_length = subject->length();
605 :
606 68789 : JSRegExp::Type typeTag = regexp->TypeTag();
607 68789 : if (typeTag == JSRegExp::IRREGEXP) {
608 : // Ensure the RegExp is compiled so we can access the capture-name map.
609 9549 : if (RegExpImpl::IrregexpPrepare(regexp, subject) == -1) {
610 : DCHECK(isolate->has_pending_exception());
611 162 : return isolate->heap()->exception();
612 : }
613 : }
614 :
615 : // CompiledReplacement uses zone allocation.
616 68627 : Zone zone(isolate->allocator(), ZONE_NAME);
617 68627 : CompiledReplacement compiled_replacement(&zone);
618 : const bool simple_replace = compiled_replacement.Compile(
619 68627 : regexp, replacement, capture_count, subject_length);
620 :
621 : // Shortcut for simple non-regexp global replacements
622 68627 : if (typeTag == JSRegExp::ATOM && simple_replace) {
623 117161 : if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
624 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
625 56944 : isolate, subject, regexp, replacement, last_match_info);
626 : } else {
627 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
628 2206 : isolate, subject, regexp, replacement, last_match_info);
629 : }
630 : }
631 :
632 9477 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
633 9477 : if (global_cache.HasException()) return isolate->heap()->exception();
634 :
635 : int32_t* current_match = global_cache.FetchNext();
636 9477 : if (current_match == nullptr) {
637 834 : if (global_cache.HasException()) return isolate->heap()->exception();
638 777 : return *subject;
639 : }
640 :
641 : // Guessing the number of parts that the final result string is built
642 : // from. Global regexps can match any number of times, so we guess
643 : // conservatively.
644 8643 : int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
645 8643 : ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
646 :
647 : // Number of parts added by compiled replacement plus preceding
648 : // string and possibly suffix after last match. It is possible for
649 : // all components to use two elements when encoded as two smis.
650 8643 : const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
651 :
652 : int prev = 0;
653 :
654 12999110 : do {
655 : builder.EnsureCapacity(parts_added_per_loop);
656 :
657 12999110 : int start = current_match[0];
658 12999110 : int end = current_match[1];
659 :
660 12999110 : if (prev < start) {
661 180331 : builder.AddSubjectSlice(prev, start);
662 : }
663 :
664 12999110 : if (simple_replace) {
665 12997106 : builder.AddString(replacement);
666 : } else {
667 2004 : compiled_replacement.Apply(&builder, start, end, current_match);
668 : }
669 : prev = end;
670 :
671 : current_match = global_cache.FetchNext();
672 : } while (current_match != nullptr);
673 :
674 8643 : if (global_cache.HasException()) return isolate->heap()->exception();
675 :
676 8643 : if (prev < subject_length) {
677 : builder.EnsureCapacity(2);
678 7213 : builder.AddSubjectSlice(prev, subject_length);
679 : }
680 :
681 : RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
682 8643 : global_cache.LastSuccessfulMatch());
683 :
684 94537 : RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
685 : }
686 :
687 : template <typename ResultSeqString>
688 1091149 : MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
689 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
690 : Handle<RegExpMatchInfo> last_match_info) {
691 : DCHECK(subject->IsFlat());
692 :
693 : // Shortcut for simple non-regexp global replacements
694 1091149 : if (regexp->TypeTag() == JSRegExp::ATOM) {
695 964828 : Handle<String> empty_string = isolate->factory()->empty_string();
696 964828 : if (subject->IsOneByteRepresentation()) {
697 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
698 934135 : isolate, subject, regexp, empty_string, last_match_info);
699 : } else {
700 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
701 30693 : isolate, subject, regexp, empty_string, last_match_info);
702 : }
703 : }
704 :
705 126321 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
706 126321 : if (global_cache.HasException()) return isolate->heap()->exception();
707 :
708 : int32_t* current_match = global_cache.FetchNext();
709 126321 : if (current_match == nullptr) {
710 125074 : if (global_cache.HasException()) return isolate->heap()->exception();
711 125064 : return *subject;
712 : }
713 :
714 1247 : int start = current_match[0];
715 1247 : int end = current_match[1];
716 1247 : int capture_count = regexp->CaptureCount();
717 : int subject_length = subject->length();
718 :
719 1247 : int new_length = subject_length - (end - start);
720 1247 : if (new_length == 0) return isolate->heap()->empty_string();
721 :
722 : Handle<ResultSeqString> answer;
723 : if (ResultSeqString::kHasOneByteEncoding) {
724 : answer = Handle<ResultSeqString>::cast(
725 1836 : isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
726 : } else {
727 : answer = Handle<ResultSeqString>::cast(
728 494 : isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
729 : }
730 :
731 : int prev = 0;
732 : int position = 0;
733 :
734 1320733 : do {
735 1320733 : start = current_match[0];
736 1320733 : end = current_match[1];
737 1320733 : if (prev < start) {
738 : // Add substring subject[prev;start] to answer string.
739 2637270 : String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
740 1318635 : position += start - prev;
741 : }
742 : prev = end;
743 :
744 : current_match = global_cache.FetchNext();
745 : } while (current_match != nullptr);
746 :
747 1165 : if (global_cache.HasException()) return isolate->heap()->exception();
748 :
749 1165 : RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
750 : global_cache.LastSuccessfulMatch());
751 :
752 1165 : if (prev < subject_length) {
753 : // Add substring subject[prev;length] to answer string.
754 1442 : String::WriteToFlat(*subject, answer->GetChars() + position, prev,
755 721 : subject_length);
756 721 : position += subject_length - prev;
757 : }
758 :
759 1165 : if (position == 0) return isolate->heap()->empty_string();
760 :
761 : // Shorten string and fill
762 : int string_size = ResultSeqString::SizeFor(position);
763 : int allocated_string_size = ResultSeqString::SizeFor(new_length);
764 985 : int delta = allocated_string_size - string_size;
765 :
766 : answer->set_length(position);
767 1704 : if (delta == 0) return *answer;
768 :
769 266 : Address end_of_string = answer->address() + string_size;
770 266 : Heap* heap = isolate->heap();
771 :
772 : // The trimming is performed on a newly allocated object, which is on a
773 : // freshly allocated page or on an already swept page. Hence, the sweeper
774 : // thread can not get confused with the filler creation. No synchronization
775 : // needed.
776 : // TODO(hpayer): We should shrink the large object page if the size
777 : // of the object changed significantly.
778 266 : if (!heap->lo_space()->Contains(*answer)) {
779 256 : heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
780 : }
781 266 : return *answer;
782 : }
783 :
784 : namespace {
785 :
786 1157417 : Object* StringReplaceGlobalRegExpWithStringHelper(
787 : Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
788 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
789 1157417 : CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
790 :
791 1157417 : subject = String::Flatten(subject);
792 :
793 1157417 : if (replacement->length() == 0) {
794 1091149 : if (subject->HasOnlyOneByteChars()) {
795 : return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
796 1059966 : isolate, subject, regexp, last_match_info);
797 : } else {
798 : return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
799 31183 : isolate, subject, regexp, last_match_info);
800 : }
801 : }
802 :
803 66268 : replacement = String::Flatten(replacement);
804 :
805 : return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
806 66268 : replacement, last_match_info);
807 : }
808 :
809 : } // namespace
810 :
811 2291826 : RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
812 1145913 : HandleScope scope(isolate);
813 : DCHECK_EQ(4, args.length());
814 :
815 2291826 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
816 2291826 : CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
817 2291826 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
818 2291826 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
819 :
820 : return StringReplaceGlobalRegExpWithStringHelper(
821 1145913 : isolate, regexp, subject, replacement, last_match_info);
822 : }
823 :
824 99674 : RUNTIME_FUNCTION(Runtime_StringSplit) {
825 49837 : HandleScope handle_scope(isolate);
826 : DCHECK_EQ(3, args.length());
827 99674 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
828 99674 : CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
829 99674 : CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
830 49837 : CHECK_LT(0, limit);
831 :
832 49837 : int subject_length = subject->length();
833 49837 : int pattern_length = pattern->length();
834 49837 : CHECK_LT(0, pattern_length);
835 :
836 49837 : if (limit == 0xffffffffu) {
837 : FixedArray* last_match_cache_unused;
838 : Handle<Object> cached_answer(
839 : RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
840 : &last_match_cache_unused,
841 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
842 49797 : isolate);
843 49797 : if (*cached_answer != Smi::kZero) {
844 : // The cache FixedArray is a COW-array and can therefore be reused.
845 : Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
846 22900 : Handle<FixedArray>::cast(cached_answer));
847 : return *result;
848 : }
849 : }
850 :
851 : // The limit can be very large (0xffffffffu), but since the pattern
852 : // isn't empty, we can never create more parts than ~half the length
853 : // of the subject.
854 :
855 38387 : subject = String::Flatten(subject);
856 38387 : pattern = String::Flatten(pattern);
857 :
858 115161 : std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
859 :
860 38387 : FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
861 :
862 38387 : if (static_cast<uint32_t>(indices->size()) < limit) {
863 38357 : indices->push_back(subject_length);
864 : }
865 :
866 : // The list indices now contains the end of each part to create.
867 :
868 : // Create JSArray of substrings separated by separator.
869 38387 : int part_count = static_cast<int>(indices->size());
870 :
871 : Handle<JSArray> result =
872 : isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
873 38387 : INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
874 :
875 : DCHECK(result->HasObjectElements());
876 :
877 38387 : Handle<FixedArray> elements(FixedArray::cast(result->elements()));
878 :
879 38387 : if (part_count == 1 && indices->at(0) == subject_length) {
880 25428 : elements->set(0, *subject);
881 : } else {
882 : int part_start = 0;
883 399923 : FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
884 : int part_end = indices->at(i);
885 : Handle<String> substring =
886 : isolate->factory()->NewProperSubString(subject, part_start, part_end);
887 : elements->set(i, *substring);
888 : part_start = part_end + pattern_length;
889 : });
890 : }
891 :
892 38387 : if (limit == 0xffffffffu) {
893 38347 : if (result->HasObjectElements()) {
894 : RegExpResultsCache::Enter(isolate, subject, pattern, elements,
895 : isolate->factory()->empty_fixed_array(),
896 76694 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
897 : }
898 : }
899 :
900 38387 : TruncateRegexpIndicesList(isolate);
901 :
902 49837 : return *result;
903 : }
904 :
905 419632 : RUNTIME_FUNCTION(Runtime_RegExpExec) {
906 209816 : HandleScope scope(isolate);
907 : DCHECK_EQ(4, args.length());
908 419632 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
909 419632 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
910 419632 : CONVERT_INT32_ARG_CHECKED(index, 2);
911 419632 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
912 : // Due to the way the JS calls are constructed this must be less than the
913 : // length of a string, i.e. it is always a Smi. We check anyway for security.
914 209816 : CHECK_LE(0, index);
915 209816 : CHECK_GE(subject->length(), index);
916 209816 : isolate->counters()->regexp_entry_runtime()->Increment();
917 419632 : RETURN_RESULT_OR_FAILURE(
918 209816 : isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info));
919 : }
920 :
921 23008 : RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
922 11504 : HandleScope scope(isolate);
923 : DCHECK_EQ(3, args.length());
924 23008 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
925 23008 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
926 23008 : CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
927 :
928 : Handle<RegExpMatchInfo> internal_match_info =
929 11504 : isolate->regexp_internal_match_info();
930 :
931 : return StringReplaceGlobalRegExpWithStringHelper(
932 11504 : isolate, regexp, subject, replacement, internal_match_info);
933 : }
934 :
935 : namespace {
936 :
937 3540 : class MatchInfoBackedMatch : public String::Match {
938 : public:
939 3540 : MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
940 : Handle<String> subject,
941 : Handle<RegExpMatchInfo> match_info)
942 7080 : : isolate_(isolate), match_info_(match_info) {
943 3540 : subject_ = String::Flatten(subject);
944 :
945 3540 : if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
946 : Object* o = regexp->CaptureNameMap();
947 3420 : has_named_captures_ = o->IsFixedArray();
948 3420 : if (has_named_captures_) {
949 : DCHECK(FLAG_harmony_regexp_named_captures);
950 72 : capture_name_map_ = handle(FixedArray::cast(o));
951 : }
952 : } else {
953 120 : has_named_captures_ = false;
954 : }
955 3540 : }
956 :
957 20 : Handle<String> GetMatch() override {
958 20 : return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
959 : }
960 :
961 20 : Handle<String> GetPrefix() override {
962 : const int match_start = match_info_->Capture(0);
963 20 : return isolate_->factory()->NewSubString(subject_, 0, match_start);
964 : }
965 :
966 20 : Handle<String> GetSuffix() override {
967 : const int match_end = match_info_->Capture(1);
968 : return isolate_->factory()->NewSubString(subject_, match_end,
969 20 : subject_->length());
970 : }
971 :
972 135 : bool HasNamedCaptures() override { return has_named_captures_; }
973 :
974 3540 : int CaptureCount() override {
975 3540 : return match_info_->NumberOfCaptureRegisters() / 2;
976 : }
977 :
978 7844 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
979 : Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
980 7844 : isolate_, match_info_, i, capture_exists);
981 7807 : return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
982 23495 : : isolate_->factory()->empty_string();
983 : }
984 :
985 54 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
986 : CaptureState* state) override {
987 : DCHECK(has_named_captures_);
988 : const int capture_index = LookupNamedCapture(
989 135 : [=](String* capture_name) { return capture_name->Equals(*name); },
990 162 : *capture_name_map_);
991 :
992 54 : if (capture_index == -1) {
993 27 : *state = INVALID;
994 27 : return name; // Arbitrary string handle.
995 : }
996 :
997 : DCHECK(1 <= capture_index && capture_index <= CaptureCount());
998 :
999 : bool capture_exists;
1000 : Handle<String> capture_value;
1001 54 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
1002 : GetCapture(capture_index, &capture_exists),
1003 : String);
1004 :
1005 27 : if (!capture_exists) {
1006 9 : *state = UNMATCHED;
1007 18 : return isolate_->factory()->empty_string();
1008 : } else {
1009 18 : *state = MATCHED;
1010 18 : return capture_value;
1011 : }
1012 : }
1013 :
1014 : private:
1015 : Isolate* isolate_;
1016 : Handle<String> subject_;
1017 : Handle<RegExpMatchInfo> match_info_;
1018 :
1019 : bool has_named_captures_;
1020 : Handle<FixedArray> capture_name_map_;
1021 : };
1022 :
1023 532 : class VectorBackedMatch : public String::Match {
1024 : public:
1025 266 : VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1026 : Handle<String> match, int match_position,
1027 : ZoneVector<Handle<Object>>* captures,
1028 : Handle<Object> groups_obj)
1029 : : isolate_(isolate),
1030 : match_(match),
1031 : match_position_(match_position),
1032 532 : captures_(captures) {
1033 266 : subject_ = String::Flatten(subject);
1034 :
1035 : DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1036 266 : has_named_captures_ = !groups_obj->IsUndefined(isolate);
1037 266 : if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1038 266 : }
1039 :
1040 0 : Handle<String> GetMatch() override { return match_; }
1041 :
1042 0 : Handle<String> GetPrefix() override {
1043 0 : return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1044 : }
1045 :
1046 0 : Handle<String> GetSuffix() override {
1047 0 : const int match_end_position = match_position_ + match_->length();
1048 : return isolate_->factory()->NewSubString(subject_, match_end_position,
1049 0 : subject_->length());
1050 : }
1051 :
1052 216 : bool HasNamedCaptures() override { return has_named_captures_; }
1053 :
1054 532 : int CaptureCount() override { return static_cast<int>(captures_->size()); }
1055 :
1056 81 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1057 162 : Handle<Object> capture_obj = captures_->at(i);
1058 162 : if (capture_obj->IsUndefined(isolate_)) {
1059 0 : *capture_exists = false;
1060 0 : return isolate_->factory()->empty_string();
1061 : }
1062 81 : *capture_exists = true;
1063 81 : return Object::ToString(isolate_, capture_obj);
1064 : }
1065 :
1066 162 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
1067 : CaptureState* state) override {
1068 : DCHECK(has_named_captures_);
1069 :
1070 : Maybe<bool> maybe_capture_exists =
1071 162 : JSReceiver::HasProperty(groups_obj_, name);
1072 162 : if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1073 :
1074 162 : if (!maybe_capture_exists.FromJust()) {
1075 81 : *state = INVALID;
1076 81 : return name; // Arbitrary string handle.
1077 : }
1078 :
1079 : Handle<Object> capture_obj;
1080 162 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1081 : Object::GetProperty(groups_obj_, name), String);
1082 162 : if (capture_obj->IsUndefined(isolate_)) {
1083 27 : *state = UNMATCHED;
1084 54 : return isolate_->factory()->empty_string();
1085 : } else {
1086 54 : *state = MATCHED;
1087 54 : return Object::ToString(isolate_, capture_obj);
1088 : }
1089 : }
1090 :
1091 : private:
1092 : Isolate* isolate_;
1093 : Handle<String> subject_;
1094 : Handle<String> match_;
1095 : const int match_position_;
1096 : ZoneVector<Handle<Object>>* captures_;
1097 :
1098 : bool has_named_captures_;
1099 : Handle<JSReceiver> groups_obj_;
1100 : };
1101 :
1102 : // Create the groups object (see also the RegExp result creation in
1103 : // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
1104 72 : Handle<JSObject> ConstructNamedCaptureGroupsObject(
1105 : Isolate* isolate, Handle<FixedArray> capture_map,
1106 : std::function<Object*(int)> f_get_capture) {
1107 : DCHECK(FLAG_harmony_regexp_named_captures);
1108 72 : Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1109 :
1110 72 : const int capture_count = capture_map->length() >> 1;
1111 216 : for (int i = 0; i < capture_count; i++) {
1112 144 : const int name_ix = i * 2;
1113 144 : const int index_ix = i * 2 + 1;
1114 :
1115 : Handle<String> capture_name(String::cast(capture_map->get(name_ix)));
1116 : const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1117 : DCHECK(1 <= capture_ix && capture_ix <= capture_count);
1118 :
1119 144 : Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1120 : DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1121 :
1122 144 : JSObject::AddProperty(groups, capture_name, capture_value, NONE);
1123 : }
1124 :
1125 72 : return groups;
1126 : }
1127 :
1128 : // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1129 : // separate last match info. See comment on that function.
1130 : template <bool has_capture>
1131 93776 : static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1132 : Handle<JSRegExp> regexp,
1133 : Handle<RegExpMatchInfo> last_match_array,
1134 : Handle<JSArray> result_array) {
1135 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1136 : DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1137 : DCHECK(subject->IsFlat());
1138 :
1139 93776 : int capture_count = regexp->CaptureCount();
1140 : int subject_length = subject->length();
1141 :
1142 : static const int kMinLengthToCache = 0x1000;
1143 :
1144 93776 : if (subject_length > kMinLengthToCache) {
1145 : FixedArray* last_match_cache;
1146 : Object* cached_answer = RegExpResultsCache::Lookup(
1147 : isolate->heap(), *subject, regexp->data(), &last_match_cache,
1148 40 : RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1149 40 : if (cached_answer->IsFixedArray()) {
1150 0 : int capture_registers = (capture_count + 1) * 2;
1151 0 : int32_t* last_match = NewArray<int32_t>(capture_registers);
1152 0 : for (int i = 0; i < capture_registers; i++) {
1153 0 : last_match[i] = Smi::ToInt(last_match_cache->get(i));
1154 : }
1155 : Handle<FixedArray> cached_fixed_array =
1156 : Handle<FixedArray>(FixedArray::cast(cached_answer));
1157 : // The cache FixedArray is a COW-array and we need to return a copy.
1158 : Handle<FixedArray> copied_fixed_array =
1159 : isolate->factory()->CopyFixedArrayWithMap(
1160 0 : cached_fixed_array, isolate->factory()->fixed_array_map());
1161 0 : JSArray::SetContent(result_array, copied_fixed_array);
1162 0 : RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1163 : last_match);
1164 : DeleteArray(last_match);
1165 : return *result_array;
1166 : }
1167 : }
1168 :
1169 93776 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1170 93776 : if (global_cache.HasException()) return isolate->heap()->exception();
1171 :
1172 : // Ensured in Runtime_RegExpExecMultiple.
1173 : DCHECK(result_array->HasObjectElements());
1174 : Handle<FixedArray> result_elements(
1175 : FixedArray::cast(result_array->elements()));
1176 93638 : if (result_elements->length() < 16) {
1177 0 : result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1178 : }
1179 :
1180 : FixedArrayBuilder builder(result_elements);
1181 :
1182 : // Position to search from.
1183 : int match_start = -1;
1184 : int match_end = 0;
1185 : bool first = true;
1186 :
1187 : // Two smis before and after the match, for very long strings.
1188 : static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1189 :
1190 : while (true) {
1191 : int32_t* current_match = global_cache.FetchNext();
1192 487443 : if (current_match == nullptr) break;
1193 393805 : match_start = current_match[0];
1194 393805 : builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
1195 393805 : if (match_end < match_start) {
1196 64576 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1197 : match_start);
1198 : }
1199 393805 : match_end = current_match[1];
1200 : {
1201 : // Avoid accumulating new handles inside loop.
1202 : HandleScope temp_scope(isolate);
1203 : Handle<String> match;
1204 393805 : if (!first) {
1205 300227 : match = isolate->factory()->NewProperSubString(subject, match_start,
1206 : match_end);
1207 : } else {
1208 93578 : match =
1209 : isolate->factory()->NewSubString(subject, match_start, match_end);
1210 : first = false;
1211 : }
1212 :
1213 : if (has_capture) {
1214 : // Arguments array to replace function is match, captures, index and
1215 : // subject, i.e., 3 + capture count in total. If the RegExp contains
1216 : // named captures, they are also passed as the last argument.
1217 :
1218 : Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1219 : const bool has_named_captures = maybe_capture_map->IsFixedArray();
1220 : DCHECK_IMPLIES(has_named_captures, FLAG_harmony_regexp_named_captures);
1221 :
1222 : const int argc =
1223 191376 : has_named_captures ? 4 + capture_count : 3 + capture_count;
1224 :
1225 191376 : Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1226 : int cursor = 0;
1227 :
1228 191376 : elements->set(cursor++, *match);
1229 647248 : for (int i = 1; i <= capture_count; i++) {
1230 455872 : int start = current_match[i * 2];
1231 455872 : if (start >= 0) {
1232 455776 : int end = current_match[i * 2 + 1];
1233 : DCHECK(start <= end);
1234 : Handle<String> substring =
1235 455776 : isolate->factory()->NewSubString(subject, start, end);
1236 911552 : elements->set(cursor++, *substring);
1237 : } else {
1238 : DCHECK_GT(0, current_match[i * 2 + 1]);
1239 192 : elements->set(cursor++, isolate->heap()->undefined_value());
1240 : }
1241 : }
1242 :
1243 191376 : elements->set(cursor++, Smi::FromInt(match_start));
1244 191376 : elements->set(cursor++, *subject);
1245 :
1246 191376 : if (has_named_captures) {
1247 : Handle<FixedArray> capture_map =
1248 54 : Handle<FixedArray>::cast(maybe_capture_map);
1249 : Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1250 108 : isolate, capture_map, [=](int ix) { return elements->get(ix); });
1251 54 : elements->set(cursor++, *groups);
1252 : }
1253 :
1254 : DCHECK_EQ(cursor, argc);
1255 191376 : builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1256 : } else {
1257 202429 : builder.Add(*match);
1258 : }
1259 : }
1260 : }
1261 :
1262 93638 : if (global_cache.HasException()) return isolate->heap()->exception();
1263 :
1264 93592 : if (match_start >= 0) {
1265 : // Finished matching, with at least one match.
1266 93578 : if (match_end < subject_length) {
1267 439 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1268 : subject_length);
1269 : }
1270 :
1271 93578 : RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1272 : global_cache.LastSuccessfulMatch());
1273 :
1274 93578 : if (subject_length > kMinLengthToCache) {
1275 : // Store the last successful match into the array for caching.
1276 : // TODO(yangguo): do not expose last match to JS and simplify caching.
1277 40 : int capture_registers = (capture_count + 1) * 2;
1278 : Handle<FixedArray> last_match_cache =
1279 40 : isolate->factory()->NewFixedArray(capture_registers);
1280 : int32_t* last_match = global_cache.LastSuccessfulMatch();
1281 180 : for (int i = 0; i < capture_registers; i++) {
1282 140 : last_match_cache->set(i, Smi::FromInt(last_match[i]));
1283 : }
1284 : Handle<FixedArray> result_fixed_array = builder.array();
1285 80 : result_fixed_array->Shrink(builder.length());
1286 : // Cache the result and copy the FixedArray into a COW array.
1287 : Handle<FixedArray> copied_fixed_array =
1288 : isolate->factory()->CopyFixedArrayWithMap(
1289 40 : result_fixed_array, isolate->factory()->fixed_array_map());
1290 40 : RegExpResultsCache::Enter(
1291 : isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1292 : last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1293 : }
1294 187156 : return *builder.ToJSArray(result_array);
1295 : } else {
1296 14 : return isolate->heap()->null_value(); // No matches at all.
1297 : }
1298 : }
1299 :
1300 : // Legacy implementation of RegExp.prototype[Symbol.replace] which
1301 : // doesn't properly call the underlying exec method.
1302 6070 : MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
1303 : Handle<JSRegExp> regexp,
1304 : Handle<String> string,
1305 : Handle<Object> replace_obj) {
1306 : // Functional fast-paths are dispatched directly by replace builtin.
1307 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1308 : DCHECK(!replace_obj->IsCallable());
1309 :
1310 : Factory* factory = isolate->factory();
1311 :
1312 : const int flags = regexp->GetFlags();
1313 6070 : const bool global = (flags & JSRegExp::kGlobal) != 0;
1314 6070 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1315 :
1316 : Handle<String> replace;
1317 12140 : ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1318 : Object::ToString(isolate, replace_obj), String);
1319 6070 : replace = String::Flatten(replace);
1320 :
1321 6070 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1322 :
1323 6070 : if (!global) {
1324 : // Non-global regexp search, string replace.
1325 :
1326 : uint32_t last_index = 0;
1327 3549 : if (sticky) {
1328 : Handle<Object> last_index_obj(regexp->last_index(), isolate);
1329 0 : ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1330 : Object::ToLength(isolate, last_index_obj),
1331 : String);
1332 0 : last_index = PositiveNumberToUint32(*last_index_obj);
1333 :
1334 0 : if (static_cast<int>(last_index) > string->length()) last_index = 0;
1335 : }
1336 :
1337 : Handle<Object> match_indices_obj;
1338 7098 : ASSIGN_RETURN_ON_EXCEPTION(
1339 : isolate, match_indices_obj,
1340 : RegExpImpl::Exec(regexp, string, last_index, last_match_info), String);
1341 :
1342 3549 : if (match_indices_obj->IsNull(isolate)) {
1343 9 : if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1344 9 : return string;
1345 : }
1346 :
1347 : auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1348 :
1349 : const int start_index = match_indices->Capture(0);
1350 : const int end_index = match_indices->Capture(1);
1351 :
1352 3540 : if (sticky)
1353 0 : regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1354 :
1355 3540 : IncrementalStringBuilder builder(isolate);
1356 3540 : builder.AppendString(factory->NewSubString(string, 0, start_index));
1357 :
1358 3540 : if (replace->length() > 0) {
1359 3540 : MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1360 : Handle<String> replacement;
1361 7080 : ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1362 : String::GetSubstitution(isolate, &m, replace),
1363 : String);
1364 3540 : builder.AppendString(replacement);
1365 : }
1366 :
1367 : builder.AppendString(
1368 3540 : factory->NewSubString(string, end_index, string->length()));
1369 3540 : return builder.Finish();
1370 : } else {
1371 : // Global regexp search, string replace.
1372 : DCHECK(global);
1373 5042 : RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1374 : String);
1375 :
1376 2521 : if (replace->length() == 0) {
1377 0 : if (string->HasOnlyOneByteChars()) {
1378 : Object* result =
1379 : StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1380 0 : isolate, string, regexp, last_match_info);
1381 0 : return handle(String::cast(result), isolate);
1382 : } else {
1383 : Object* result =
1384 : StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1385 0 : isolate, string, regexp, last_match_info);
1386 0 : return handle(String::cast(result), isolate);
1387 : }
1388 : }
1389 :
1390 : Object* result = StringReplaceGlobalRegExpWithString(
1391 2521 : isolate, string, regexp, replace, last_match_info);
1392 2521 : if (result->IsString()) {
1393 2521 : return handle(String::cast(result), isolate);
1394 : } else {
1395 0 : return MaybeHandle<String>();
1396 : }
1397 : }
1398 :
1399 : UNREACHABLE();
1400 : }
1401 :
1402 : } // namespace
1403 :
1404 : // This is only called for StringReplaceGlobalRegExpWithFunction.
1405 187552 : RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1406 93776 : HandleScope handles(isolate);
1407 : DCHECK_EQ(4, args.length());
1408 :
1409 187552 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1410 187552 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1411 187552 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1412 187552 : CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1413 93776 : CHECK(result_array->HasObjectElements());
1414 :
1415 93776 : subject = String::Flatten(subject);
1416 93776 : CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1417 :
1418 93776 : if (regexp->CaptureCount() == 0) {
1419 : return SearchRegExpMultiple<false>(isolate, subject, regexp,
1420 90291 : last_match_info, result_array);
1421 : } else {
1422 : return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1423 3485 : result_array);
1424 93776 : }
1425 : }
1426 :
1427 21782 : RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1428 10891 : HandleScope scope(isolate);
1429 : DCHECK_EQ(3, args.length());
1430 21782 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1431 21782 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1432 21782 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1433 :
1434 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1435 : DCHECK(replace_obj->map()->is_callable());
1436 :
1437 10891 : Factory* factory = isolate->factory();
1438 10891 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1439 :
1440 10891 : const int flags = regexp->GetFlags();
1441 : DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1442 :
1443 : // TODO(jgruber): This should be an easy port to CSA with massive payback.
1444 :
1445 10891 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1446 : uint32_t last_index = 0;
1447 10891 : if (sticky) {
1448 0 : Handle<Object> last_index_obj(regexp->last_index(), isolate);
1449 0 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1450 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1451 0 : last_index = PositiveNumberToUint32(*last_index_obj);
1452 :
1453 0 : if (static_cast<int>(last_index) > subject->length()) last_index = 0;
1454 : }
1455 :
1456 : Handle<Object> match_indices_obj;
1457 21782 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1458 : isolate, match_indices_obj,
1459 : RegExpImpl::Exec(regexp, subject, last_index, last_match_info));
1460 :
1461 10881 : if (match_indices_obj->IsNull(isolate)) {
1462 10588 : if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1463 : return *subject;
1464 : }
1465 :
1466 : Handle<RegExpMatchInfo> match_indices =
1467 293 : Handle<RegExpMatchInfo>::cast(match_indices_obj);
1468 :
1469 293 : const int index = match_indices->Capture(0);
1470 293 : const int end_of_match = match_indices->Capture(1);
1471 :
1472 293 : if (sticky)
1473 0 : regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1474 :
1475 293 : IncrementalStringBuilder builder(isolate);
1476 293 : builder.AppendString(factory->NewSubString(subject, 0, index));
1477 :
1478 : // Compute the parameter list consisting of the match, captures, index,
1479 : // and subject for the replace function invocation. If the RegExp contains
1480 : // named captures, they are also passed as the last argument.
1481 :
1482 : // The number of captures plus one for the match.
1483 293 : const int m = match_indices->NumberOfCaptureRegisters() / 2;
1484 :
1485 : bool has_named_captures = false;
1486 : Handle<FixedArray> capture_map;
1487 293 : if (m > 1) {
1488 : // The existence of capture groups implies IRREGEXP kind.
1489 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1490 :
1491 113 : Object* maybe_capture_map = regexp->CaptureNameMap();
1492 113 : if (maybe_capture_map->IsFixedArray()) {
1493 : has_named_captures = true;
1494 18 : capture_map = handle(FixedArray::cast(maybe_capture_map));
1495 : }
1496 : }
1497 :
1498 : DCHECK_IMPLIES(has_named_captures, FLAG_harmony_regexp_named_captures);
1499 293 : const int argc = has_named_captures ? m + 3 : m + 2;
1500 586 : ScopedVector<Handle<Object>> argv(argc);
1501 :
1502 : int cursor = 0;
1503 452 : for (int j = 0; j < m; j++) {
1504 : bool ok;
1505 : Handle<String> capture =
1506 452 : RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1507 452 : if (ok) {
1508 377 : argv[cursor++] = capture;
1509 : } else {
1510 75 : argv[cursor++] = factory->undefined_value();
1511 : }
1512 : }
1513 :
1514 586 : argv[cursor++] = handle(Smi::FromInt(index), isolate);
1515 293 : argv[cursor++] = subject;
1516 :
1517 293 : if (has_named_captures) {
1518 18 : argv[cursor++] = ConstructNamedCaptureGroupsObject(
1519 54 : isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1520 : }
1521 :
1522 : DCHECK_EQ(cursor, argc);
1523 :
1524 : Handle<Object> replacement_obj;
1525 879 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1526 : isolate, replacement_obj,
1527 : Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1528 : argv.start()));
1529 :
1530 : Handle<String> replacement;
1531 586 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1532 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1533 :
1534 293 : builder.AppendString(replacement);
1535 : builder.AppendString(
1536 293 : factory->NewSubString(subject, end_of_match, subject->length()));
1537 :
1538 11477 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1539 : }
1540 :
1541 : namespace {
1542 :
1543 80 : MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1544 : Handle<Object> object,
1545 : uint32_t* out) {
1546 80 : if (object->IsUndefined(isolate)) {
1547 40 : *out = kMaxUInt32;
1548 40 : return object;
1549 : }
1550 :
1551 : Handle<Object> number;
1552 80 : ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object);
1553 40 : *out = NumberToUint32(*number);
1554 40 : return object;
1555 : }
1556 :
1557 60 : Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1558 : Handle<FixedArray> elems,
1559 : int num_elems) {
1560 60 : elems->Shrink(num_elems);
1561 60 : return isolate->factory()->NewJSArrayWithElements(elems);
1562 : }
1563 :
1564 : } // namespace
1565 :
1566 : // Slow path for:
1567 : // ES#sec-regexp.prototype-@@replace
1568 : // RegExp.prototype [ @@split ] ( string, limit )
1569 160 : RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1570 80 : HandleScope scope(isolate);
1571 : DCHECK_EQ(3, args.length());
1572 :
1573 : DCHECK(args[1]->IsString());
1574 :
1575 160 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1576 160 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1577 80 : CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1578 :
1579 80 : Factory* factory = isolate->factory();
1580 :
1581 80 : Handle<JSFunction> regexp_fun = isolate->regexp_function();
1582 : Handle<Object> ctor;
1583 160 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1584 : isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1585 :
1586 : Handle<Object> flags_obj;
1587 240 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1588 : isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
1589 :
1590 : Handle<String> flags;
1591 160 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1592 : Object::ToString(isolate, flags_obj));
1593 :
1594 80 : Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1595 80 : const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1596 :
1597 80 : Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1598 80 : const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1599 :
1600 : Handle<String> new_flags = flags;
1601 80 : if (!sticky) {
1602 80 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1603 : factory->NewConsString(flags, y_str));
1604 : }
1605 :
1606 : Handle<JSReceiver> splitter;
1607 : {
1608 : const int argc = 2;
1609 :
1610 80 : ScopedVector<Handle<Object>> argv(argc);
1611 80 : argv[0] = recv;
1612 80 : argv[1] = new_flags;
1613 :
1614 : Handle<Object> splitter_obj;
1615 160 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1616 : isolate, splitter_obj,
1617 : Execution::New(isolate, ctor, argc, argv.start()));
1618 :
1619 80 : splitter = Handle<JSReceiver>::cast(splitter_obj);
1620 : }
1621 :
1622 : uint32_t limit;
1623 80 : RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1624 :
1625 80 : const uint32_t length = string->length();
1626 :
1627 90 : if (limit == 0) return *factory->NewJSArray(0);
1628 :
1629 70 : if (length == 0) {
1630 : Handle<Object> result;
1631 30 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1632 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1633 : factory->undefined_value()));
1634 :
1635 10 : if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1636 :
1637 10 : Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1638 10 : elems->set(0, *string);
1639 20 : return *factory->NewJSArrayWithElements(elems);
1640 : }
1641 :
1642 : static const int kInitialArraySize = 8;
1643 60 : Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1644 : int num_elems = 0;
1645 :
1646 : uint32_t string_index = 0;
1647 : uint32_t prev_string_index = 0;
1648 350 : while (string_index < length) {
1649 290 : RETURN_FAILURE_ON_EXCEPTION(
1650 : isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1651 :
1652 : Handle<Object> result;
1653 870 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1654 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1655 : factory->undefined_value()));
1656 :
1657 290 : if (result->IsNull(isolate)) {
1658 : string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
1659 170 : string_index, unicode);
1660 : continue;
1661 : }
1662 :
1663 : Handle<Object> last_index_obj;
1664 240 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1665 : isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1666 :
1667 240 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1668 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1669 :
1670 : const uint32_t end =
1671 120 : std::min(PositiveNumberToUint32(*last_index_obj), length);
1672 120 : if (end == prev_string_index) {
1673 : string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
1674 0 : string_index, unicode);
1675 : continue;
1676 : }
1677 :
1678 : {
1679 : Handle<String> substr =
1680 120 : factory->NewSubString(string, prev_string_index, string_index);
1681 120 : elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1682 120 : if (static_cast<uint32_t>(num_elems) == limit) {
1683 0 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1684 : }
1685 : }
1686 :
1687 : prev_string_index = end;
1688 :
1689 : Handle<Object> num_captures_obj;
1690 360 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1691 : isolate, num_captures_obj,
1692 : Object::GetProperty(result, isolate->factory()->length_string()));
1693 :
1694 240 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1695 : isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1696 120 : const int num_captures = PositiveNumberToUint32(*num_captures_obj);
1697 :
1698 120 : for (int i = 1; i < num_captures; i++) {
1699 : Handle<Object> capture;
1700 0 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1701 : isolate, capture, Object::GetElement(isolate, result, i));
1702 0 : elems = FixedArray::SetAndGrow(elems, num_elems++, capture);
1703 0 : if (static_cast<uint32_t>(num_elems) == limit) {
1704 0 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1705 : }
1706 : }
1707 :
1708 : string_index = prev_string_index;
1709 : }
1710 :
1711 : {
1712 : Handle<String> substr =
1713 60 : factory->NewSubString(string, prev_string_index, length);
1714 60 : elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1715 : }
1716 :
1717 120 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1718 : }
1719 :
1720 : // Slow path for:
1721 : // ES#sec-regexp.prototype-@@replace
1722 : // RegExp.prototype [ @@replace ] ( string, replaceValue )
1723 13253 : RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1724 6441 : HandleScope scope(isolate);
1725 : DCHECK_EQ(3, args.length());
1726 :
1727 12882 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1728 12882 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1729 6441 : Handle<Object> replace_obj = args.at(2);
1730 :
1731 6441 : Factory* factory = isolate->factory();
1732 :
1733 6441 : string = String::Flatten(string);
1734 :
1735 : // Fast-path for unmodified JSRegExps.
1736 6441 : if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1737 12140 : RETURN_RESULT_OR_FAILURE(
1738 : isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
1739 : replace_obj));
1740 : }
1741 :
1742 371 : const uint32_t length = string->length();
1743 : const bool functional_replace = replace_obj->IsCallable();
1744 :
1745 : Handle<String> replace;
1746 371 : if (!functional_replace) {
1747 652 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1748 : Object::ToString(isolate, replace_obj));
1749 : }
1750 :
1751 : Handle<Object> global_obj;
1752 1113 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1753 : isolate, global_obj,
1754 : JSReceiver::GetProperty(recv, factory->global_string()));
1755 371 : const bool global = global_obj->BooleanValue();
1756 :
1757 : bool unicode = false;
1758 371 : if (global) {
1759 : Handle<Object> unicode_obj;
1760 633 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1761 : isolate, unicode_obj,
1762 : JSReceiver::GetProperty(recv, factory->unicode_string()));
1763 211 : unicode = unicode_obj->BooleanValue();
1764 :
1765 211 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1766 : RegExpUtils::SetLastIndex(isolate, recv, 0));
1767 : }
1768 :
1769 742 : Zone zone(isolate->allocator(), ZONE_NAME);
1770 742 : ZoneVector<Handle<Object>> results(&zone);
1771 :
1772 : while (true) {
1773 : Handle<Object> result;
1774 1864 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1775 : isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1776 : factory->undefined_value()));
1777 :
1778 608 : if (result->IsNull(isolate)) break;
1779 :
1780 347 : results.push_back(result);
1781 347 : if (!global) break;
1782 :
1783 : Handle<Object> match_obj;
1784 494 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1785 : Object::GetElement(isolate, result, 0));
1786 :
1787 : Handle<String> match;
1788 494 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1789 : Object::ToString(isolate, match_obj));
1790 :
1791 247 : if (match->length() == 0) {
1792 0 : RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1793 : isolate, recv, string, unicode));
1794 : }
1795 : }
1796 :
1797 : // TODO(jgruber): Look into ReplacementStringBuilder instead.
1798 361 : IncrementalStringBuilder builder(isolate);
1799 : uint32_t next_source_position = 0;
1800 :
1801 1416 : for (const auto& result : results) {
1802 : Handle<Object> captures_length_obj;
1803 1041 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1804 : isolate, captures_length_obj,
1805 : Object::GetProperty(result, factory->length_string()));
1806 :
1807 694 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1808 : isolate, captures_length_obj,
1809 : Object::ToLength(isolate, captures_length_obj));
1810 347 : const int captures_length = PositiveNumberToUint32(*captures_length_obj);
1811 :
1812 : Handle<Object> match_obj;
1813 694 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1814 : Object::GetElement(isolate, result, 0));
1815 :
1816 : Handle<String> match;
1817 694 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1818 : Object::ToString(isolate, match_obj));
1819 :
1820 347 : const int match_length = match->length();
1821 :
1822 : Handle<Object> position_obj;
1823 1041 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1824 : isolate, position_obj,
1825 : Object::GetProperty(result, factory->index_string()));
1826 :
1827 694 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1828 : isolate, position_obj, Object::ToInteger(isolate, position_obj));
1829 : const uint32_t position =
1830 347 : std::min(PositiveNumberToUint32(*position_obj), length);
1831 :
1832 : // Do not reserve capacity since captures_length is user-controlled.
1833 347 : ZoneVector<Handle<Object>> captures(&zone);
1834 :
1835 2605 : for (int n = 0; n < captures_length; n++) {
1836 : Handle<Object> capture;
1837 2258 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1838 : isolate, capture, Object::GetElement(isolate, result, n));
1839 :
1840 1129 : if (!capture->IsUndefined(isolate)) {
1841 1736 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1842 : Object::ToString(isolate, capture));
1843 : }
1844 1129 : captures.push_back(capture);
1845 : }
1846 :
1847 347 : Handle<Object> groups_obj = isolate->factory()->undefined_value();
1848 347 : if (FLAG_harmony_regexp_named_captures) {
1849 1041 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1850 : isolate, groups_obj,
1851 : Object::GetProperty(result, factory->groups_string()));
1852 : }
1853 :
1854 347 : const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1855 : DCHECK_IMPLIES(has_named_captures, FLAG_harmony_regexp_named_captures);
1856 :
1857 : Handle<String> replacement;
1858 347 : if (functional_replace) {
1859 : const int argc =
1860 81 : has_named_captures ? captures_length + 3 : captures_length + 2;
1861 81 : ScopedVector<Handle<Object>> argv(argc);
1862 :
1863 : int cursor = 0;
1864 225 : for (int j = 0; j < captures_length; j++) {
1865 225 : argv[cursor++] = captures[j];
1866 : }
1867 :
1868 162 : argv[cursor++] = handle(Smi::FromInt(position), isolate);
1869 81 : argv[cursor++] = string;
1870 81 : if (has_named_captures) argv[cursor++] = groups_obj;
1871 :
1872 : DCHECK_EQ(cursor, argc);
1873 :
1874 : Handle<Object> replacement_obj;
1875 243 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1876 : isolate, replacement_obj,
1877 : Execution::Call(isolate, replace_obj, factory->undefined_value(),
1878 : argc, argv.start()));
1879 :
1880 162 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1881 81 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1882 : } else {
1883 : DCHECK(!functional_replace);
1884 266 : if (!groups_obj->IsUndefined(isolate)) {
1885 432 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1886 : isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
1887 : }
1888 : VectorBackedMatch m(isolate, string, match, position, &captures,
1889 266 : groups_obj);
1890 532 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1891 266 : isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1892 : }
1893 :
1894 347 : if (position >= next_source_position) {
1895 : builder.AppendString(
1896 347 : factory->NewSubString(string, next_source_position, position));
1897 347 : builder.AppendString(replacement);
1898 :
1899 347 : next_source_position = position + match_length;
1900 : }
1901 347 : }
1902 :
1903 361 : if (next_source_position < length) {
1904 : builder.AppendString(
1905 232 : factory->NewSubString(string, next_source_position, length));
1906 : }
1907 :
1908 7163 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1909 : }
1910 :
1911 0 : RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
1912 : SealHandleScope shs(isolate);
1913 : DCHECK_EQ(0, args.length());
1914 0 : Object* exception = isolate->pending_exception();
1915 0 : isolate->clear_pending_exception();
1916 0 : return isolate->ReThrow(exception);
1917 : }
1918 :
1919 837120 : RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1920 418560 : HandleScope scope(isolate);
1921 : DCHECK_EQ(3, args.length());
1922 : // TODO(pwong): To follow the spec more closely and simplify calling code,
1923 : // this could handle the canonicalization of pattern and flags. See
1924 : // https://tc39.github.io/ecma262/#sec-regexpinitialize
1925 837120 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1926 837120 : CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1927 837120 : CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1928 :
1929 418560 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1930 : JSRegExp::Initialize(regexp, source, flags));
1931 :
1932 418560 : return *regexp;
1933 : }
1934 :
1935 7384 : RUNTIME_FUNCTION(Runtime_IsRegExp) {
1936 : SealHandleScope shs(isolate);
1937 : DCHECK_EQ(1, args.length());
1938 3692 : CONVERT_ARG_CHECKED(Object, obj, 0);
1939 3692 : return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1940 : }
1941 :
1942 : } // namespace internal
1943 : } // namespace v8
|