Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/runtime/runtime-utils.h"
6 :
7 : #include <functional>
8 :
9 : #include "src/arguments.h"
10 : #include "src/conversions-inl.h"
11 : #include "src/isolate-inl.h"
12 : #include "src/messages.h"
13 : #include "src/regexp/jsregexp-inl.h"
14 : #include "src/regexp/jsregexp.h"
15 : #include "src/regexp/regexp-utils.h"
16 : #include "src/string-builder.h"
17 : #include "src/string-search.h"
18 :
19 : namespace v8 {
20 : namespace internal {
21 :
22 : namespace {
23 :
24 : // Looks up the capture of the given name. Returns the (1-based) numbered
25 : // capture index or -1 on failure.
26 168 : int LookupNamedCapture(std::function<bool(String*)> name_matches,
27 : FixedArray* capture_name_map) {
28 : // TODO(jgruber): Sort capture_name_map and do binary search via
29 : // internalized strings.
30 :
31 : int maybe_capture_index = -1;
32 168 : const int named_capture_count = capture_name_map->length() >> 1;
33 504 : for (int j = 0; j < named_capture_count; j++) {
34 : // The format of {capture_name_map} is documented at
35 : // JSRegExp::kIrregexpCaptureNameMapIndex.
36 420 : const int name_ix = j * 2;
37 420 : const int index_ix = j * 2 + 1;
38 :
39 : String* capture_name = String::cast(capture_name_map->get(name_ix));
40 420 : if (!name_matches(capture_name)) continue;
41 :
42 : maybe_capture_index = Smi::cast(capture_name_map->get(index_ix))->value();
43 84 : break;
44 : }
45 :
46 168 : return maybe_capture_index;
47 : }
48 :
49 : } // namespace
50 :
51 : class CompiledReplacement {
52 : public:
53 83981 : explicit CompiledReplacement(Zone* zone)
54 83981 : : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
55 :
56 : // Return whether the replacement is simple. Can also fail and return Nothing
57 : // if the given replacement string is invalid (and requires throwing a
58 : // SyntaxError).
59 : Maybe<bool> Compile(Handle<JSRegExp> regexp, Handle<String> replacement,
60 : int capture_count, int subject_length);
61 :
62 : // Use Apply only if Compile returned false.
63 : void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
64 : int32_t* match);
65 :
66 : // Number of distinct parts of the replacement pattern.
67 21080 : int parts() { return parts_.length(); }
68 :
69 : Zone* zone() const { return zone_; }
70 :
71 : private:
72 : enum PartType {
73 : SUBJECT_PREFIX = 1,
74 : SUBJECT_SUFFIX,
75 : SUBJECT_CAPTURE,
76 : REPLACEMENT_SUBSTRING,
77 : REPLACEMENT_STRING,
78 : NUMBER_OF_PART_TYPES
79 : };
80 :
81 : struct ReplacementPart {
82 : static inline ReplacementPart SubjectMatch() {
83 : return ReplacementPart(SUBJECT_CAPTURE, 0);
84 : }
85 : static inline ReplacementPart SubjectCapture(int capture_index) {
86 : return ReplacementPart(SUBJECT_CAPTURE, capture_index);
87 : }
88 : static inline ReplacementPart SubjectPrefix() {
89 : return ReplacementPart(SUBJECT_PREFIX, 0);
90 : }
91 : static inline ReplacementPart SubjectSuffix(int subject_length) {
92 : return ReplacementPart(SUBJECT_SUFFIX, subject_length);
93 : }
94 : static inline ReplacementPart ReplacementString() {
95 : return ReplacementPart(REPLACEMENT_STRING, 0);
96 : }
97 : static inline ReplacementPart ReplacementSubString(int from, int to) {
98 : DCHECK(from >= 0);
99 : DCHECK(to > from);
100 1050 : return ReplacementPart(-from, to);
101 : }
102 :
103 : // If tag <= 0 then it is the negation of a start index of a substring of
104 : // the replacement pattern, otherwise it's a value from PartType.
105 : ReplacementPart(int tag, int data) : tag(tag), data(data) {
106 : // Must be non-positive or a PartType value.
107 : DCHECK(tag < NUMBER_OF_PART_TYPES);
108 : }
109 : // Either a value of PartType or a non-positive number that is
110 : // the negation of an index into the replacement string.
111 : int tag;
112 : // The data value's interpretation depends on the value of tag:
113 : // tag == SUBJECT_PREFIX ||
114 : // tag == SUBJECT_SUFFIX: data is unused.
115 : // tag == SUBJECT_CAPTURE: data is the number of the capture.
116 : // tag == REPLACEMENT_SUBSTRING ||
117 : // tag == REPLACEMENT_STRING: data is index into array of substrings
118 : // of the replacement string.
119 : // tag <= 0: Temporary representation of the substring of the replacement
120 : // string ranging over -tag .. data.
121 : // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
122 : // substring objects.
123 : int data;
124 : };
125 :
126 : template <typename Char>
127 83981 : Maybe<bool> ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
128 : Vector<Char> characters,
129 : FixedArray* capture_name_map,
130 : int capture_count, int subject_length,
131 : Zone* zone) {
132 : // Equivalent to String::GetSubstitution, except that this method converts
133 : // the replacement string into an internal representation that avoids
134 : // repeated parsing when used repeatedly.
135 : DCHECK_IMPLIES(capture_name_map != nullptr,
136 : FLAG_harmony_regexp_named_captures);
137 :
138 83981 : int length = characters.length();
139 : int last = 0;
140 141821205 : for (int i = 0; i < length; i++) {
141 141737295 : Char c = characters[i];
142 141737295 : if (c == '$') {
143 4024 : int next_index = i + 1;
144 4024 : if (next_index == length) { // No next character!
145 : break;
146 : }
147 4009 : Char c2 = characters[next_index];
148 4009 : switch (c2) {
149 : case '$':
150 120 : if (i > last) {
151 : // There is a substring before. Include the first "$".
152 : parts->Add(
153 : ReplacementPart::ReplacementSubString(last, next_index),
154 150 : zone);
155 75 : last = next_index + 1; // Continue after the second "$".
156 : } else {
157 : // Let the next substring start with the second "$".
158 : last = next_index;
159 : }
160 : i = next_index;
161 120 : break;
162 : case '`':
163 30 : if (i > last) {
164 60 : parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
165 : }
166 60 : parts->Add(ReplacementPart::SubjectPrefix(), zone);
167 : i = next_index;
168 30 : last = i + 1;
169 30 : break;
170 : case '\'':
171 30 : if (i > last) {
172 60 : parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
173 : }
174 60 : parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
175 : i = next_index;
176 30 : last = i + 1;
177 30 : break;
178 : case '&':
179 30 : if (i > last) {
180 60 : parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
181 : }
182 60 : parts->Add(ReplacementPart::SubjectMatch(), zone);
183 : i = next_index;
184 30 : last = i + 1;
185 30 : break;
186 : case '0':
187 : case '1':
188 : case '2':
189 : case '3':
190 : case '4':
191 : case '5':
192 : case '6':
193 : case '7':
194 : case '8':
195 : case '9': {
196 3686 : int capture_ref = c2 - '0';
197 3686 : if (capture_ref > capture_count) {
198 : i = next_index;
199 : continue;
200 : }
201 2516 : int second_digit_index = next_index + 1;
202 2516 : if (second_digit_index < length) {
203 : // Peek ahead to see if we have two digits.
204 2308 : Char c3 = characters[second_digit_index];
205 2308 : if ('0' <= c3 && c3 <= '9') { // Double digits.
206 2220 : int double_digit_ref = capture_ref * 10 + c3 - '0';
207 2220 : if (double_digit_ref <= capture_count) {
208 : next_index = second_digit_index;
209 : capture_ref = double_digit_ref;
210 : }
211 : }
212 : }
213 2516 : if (capture_ref > 0) {
214 2456 : if (i > last) {
215 : parts->Add(ReplacementPart::ReplacementSubString(last, i),
216 180 : zone);
217 : }
218 : DCHECK(capture_ref <= capture_count);
219 4912 : parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
220 2456 : last = next_index + 1;
221 : }
222 : i = next_index;
223 2516 : break;
224 : }
225 : case '<': {
226 98 : if (capture_name_map == nullptr) {
227 : i = next_index;
228 : break;
229 : }
230 :
231 : // Scan until the next '>', throwing a SyntaxError exception if one
232 : // is not found, and let the enclosed substring be groupName.
233 :
234 98 : const int name_start_index = next_index + 1;
235 : int closing_bracket_index = -1;
236 392 : for (int j = name_start_index; j < length; j++) {
237 378 : if (characters[j] == '>') {
238 : closing_bracket_index = j;
239 : break;
240 : }
241 : }
242 :
243 : // Throw a SyntaxError for invalid replacement strings.
244 98 : if (closing_bracket_index == -1) return Nothing<bool>();
245 :
246 : Vector<Char> requested_name =
247 : characters.SubVector(name_start_index, closing_bracket_index);
248 :
249 : // Let capture be ? Get(namedCaptures, groupName).
250 :
251 : const int capture_index = LookupNamedCapture(
252 : [=](String* capture_name) {
253 : return capture_name->IsEqualTo(requested_name);
254 210 : },
255 168 : capture_name_map);
256 :
257 : // If ? HasProperty(_namedCaptures_, _groupName_) is *false*, throw
258 : // a *SyntaxError* exception.
259 84 : if (capture_index == -1) return Nothing<bool>();
260 :
261 : // If capture is undefined, replace the text through the following
262 : // '>' with the empty string.
263 : // Otherwise, replace the text through the following '>' with
264 : // ? ToString(capture).
265 :
266 : DCHECK(1 <= capture_index && capture_index <= capture_count);
267 :
268 42 : if (i > last) {
269 0 : parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
270 : }
271 84 : parts->Add(ReplacementPart::SubjectCapture(capture_index), zone);
272 42 : last = closing_bracket_index + 1;
273 : i = closing_bracket_index;
274 42 : break;
275 : }
276 : default:
277 : i = next_index;
278 : break;
279 : }
280 : }
281 : }
282 83925 : if (length > last) {
283 82219 : if (last == 0) {
284 : // Replacement is simple. Do not use Apply to do the replacement.
285 : return Just(true);
286 : } else {
287 1590 : parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
288 : }
289 : }
290 : return Just(false);
291 : }
292 :
293 : ZoneList<ReplacementPart> parts_;
294 : ZoneList<Handle<String> > replacement_substrings_;
295 : Zone* zone_;
296 : };
297 :
298 83981 : Maybe<bool> CompiledReplacement::Compile(Handle<JSRegExp> regexp,
299 : Handle<String> replacement,
300 : int capture_count,
301 85031 : int subject_length) {
302 : {
303 : DisallowHeapAllocation no_gc;
304 83981 : String::FlatContent content = replacement->GetFlatContent();
305 : DCHECK(content.IsFlat());
306 :
307 : FixedArray* capture_name_map = nullptr;
308 83981 : if (capture_count > 0) {
309 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
310 : Object* maybe_capture_name_map = regexp->CaptureNameMap();
311 2828 : if (maybe_capture_name_map->IsFixedArray()) {
312 : DCHECK(FLAG_harmony_regexp_named_captures);
313 : capture_name_map = FixedArray::cast(maybe_capture_name_map);
314 : }
315 : }
316 :
317 : Maybe<bool> simple = Nothing<bool>();
318 83981 : if (content.IsOneByte()) {
319 : simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
320 : capture_name_map, capture_count,
321 81203 : subject_length, zone());
322 : } else {
323 : DCHECK(content.IsTwoByte());
324 : simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
325 : capture_name_map, capture_count,
326 2778 : subject_length, zone());
327 : }
328 167906 : if (simple.IsNothing() || simple.FromJust()) return simple;
329 : }
330 :
331 : Isolate* isolate = replacement->GetIsolate();
332 : // Find substrings of replacement string and create them as String objects.
333 : int substring_index = 0;
334 7189 : for (int i = 0, n = parts_.length(); i < n; i++) {
335 3638 : int tag = parts_[i].tag;
336 3638 : if (tag <= 0) { // A replacement string slice.
337 1050 : int from = -tag;
338 1050 : int to = parts_[i].data;
339 : replacement_substrings_.Add(
340 2100 : isolate->factory()->NewSubString(replacement, from, to), zone());
341 1050 : parts_[i].tag = REPLACEMENT_SUBSTRING;
342 1050 : parts_[i].data = substring_index;
343 1050 : substring_index++;
344 2588 : } else if (tag == REPLACEMENT_STRING) {
345 : replacement_substrings_.Add(replacement, zone());
346 0 : parts_[i].data = substring_index;
347 0 : substring_index++;
348 : }
349 : }
350 : return Just(false);
351 : }
352 :
353 :
354 2902 : void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
355 : int match_from, int match_to, int32_t* match) {
356 : DCHECK_LT(0, parts_.length());
357 7658 : for (int i = 0, n = parts_.length(); i < n; i++) {
358 4756 : ReplacementPart part = parts_[i];
359 4756 : switch (part.tag) {
360 : case SUBJECT_PREFIX:
361 75 : if (match_from > 0) builder->AddSubjectSlice(0, match_from);
362 : break;
363 : case SUBJECT_SUFFIX: {
364 : int subject_length = part.data;
365 75 : if (match_to < subject_length) {
366 60 : builder->AddSubjectSlice(match_to, subject_length);
367 : }
368 : break;
369 : }
370 : case SUBJECT_CAPTURE: {
371 : int capture = part.data;
372 2896 : int from = match[capture * 2];
373 2896 : int to = match[capture * 2 + 1];
374 2896 : if (from >= 0 && to > from) {
375 2763 : builder->AddSubjectSlice(from, to);
376 : }
377 : break;
378 : }
379 : case REPLACEMENT_SUBSTRING:
380 : case REPLACEMENT_STRING:
381 3420 : builder->AddString(replacement_substrings_[part.data]);
382 1710 : break;
383 : default:
384 0 : UNREACHABLE();
385 : }
386 : }
387 2902 : }
388 :
389 1530299 : void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
390 : List<int>* indices, unsigned int limit) {
391 : DCHECK(limit > 0);
392 : // Collect indices of pattern in subject using memchr.
393 : // Stop after finding at most limit values.
394 1530299 : const uint8_t* subject_start = subject.start();
395 1530299 : const uint8_t* subject_end = subject_start + subject.length();
396 : const uint8_t* pos = subject_start;
397 4508443 : while (limit > 0) {
398 : pos = reinterpret_cast<const uint8_t*>(
399 2978099 : memchr(pos, pattern, subject_end - pos));
400 4508398 : if (pos == NULL) return;
401 1447845 : indices->Add(static_cast<int>(pos - subject_start));
402 1447845 : pos++;
403 1447845 : limit--;
404 : }
405 : }
406 :
407 58860 : void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
408 : List<int>* indices, unsigned int limit) {
409 : DCHECK(limit > 0);
410 58860 : const uc16* subject_start = subject.start();
411 58860 : const uc16* subject_end = subject_start + subject.length();
412 2072584 : for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
413 2013724 : if (*pos == pattern) {
414 18501 : indices->Add(static_cast<int>(pos - subject_start));
415 18501 : limit--;
416 : }
417 : }
418 58860 : }
419 :
420 : template <typename SubjectChar, typename PatternChar>
421 47013 : void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
422 : Vector<const PatternChar> pattern, List<int>* indices,
423 : unsigned int limit) {
424 : DCHECK(limit > 0);
425 : // Collect indices of pattern in subject.
426 : // Stop after finding at most limit values.
427 0 : int pattern_length = pattern.length();
428 47013 : int index = 0;
429 0 : StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
430 52899 : while (limit > 0) {
431 105798 : index = search.Search(subject, index);
432 99912 : if (index < 0) return;
433 5886 : indices->Add(index);
434 5886 : index += pattern_length;
435 5886 : limit--;
436 : }
437 : }
438 :
439 1636172 : void FindStringIndicesDispatch(Isolate* isolate, String* subject,
440 : String* pattern, List<int>* indices,
441 : unsigned int limit) {
442 : {
443 : DisallowHeapAllocation no_gc;
444 1636172 : String::FlatContent subject_content = subject->GetFlatContent();
445 1636172 : String::FlatContent pattern_content = pattern->GetFlatContent();
446 : DCHECK(subject_content.IsFlat());
447 : DCHECK(pattern_content.IsFlat());
448 1636172 : if (subject_content.IsOneByte()) {
449 1577208 : Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
450 1577208 : if (pattern_content.IsOneByte()) {
451 : Vector<const uint8_t> pattern_vector =
452 1577208 : pattern_content.ToOneByteVector();
453 1577208 : if (pattern_vector.length() == 1) {
454 : FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
455 1530299 : limit);
456 : } else {
457 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
458 46909 : limit);
459 : }
460 : } else {
461 : FindStringIndices(isolate, subject_vector,
462 0 : pattern_content.ToUC16Vector(), indices, limit);
463 : }
464 : } else {
465 58964 : Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
466 58964 : if (pattern_content.IsOneByte()) {
467 : Vector<const uint8_t> pattern_vector =
468 58964 : pattern_content.ToOneByteVector();
469 58964 : if (pattern_vector.length() == 1) {
470 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
471 58860 : limit);
472 : } else {
473 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
474 104 : limit);
475 : }
476 : } else {
477 0 : Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
478 0 : if (pattern_vector.length() == 1) {
479 : FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
480 0 : limit);
481 : } else {
482 : FindStringIndices(isolate, subject_vector, pattern_vector, indices,
483 0 : limit);
484 : }
485 : }
486 : }
487 : }
488 1636172 : }
489 :
490 : namespace {
491 55704 : List<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
492 55704 : List<int>* list = isolate->regexp_indices();
493 : list->Rewind(0);
494 55704 : return list;
495 : }
496 :
497 55704 : void TruncateRegexpIndicesList(Isolate* isolate) {
498 : // Same size as smallest zone segment, preserving behavior from the
499 : // runtime zone.
500 : static const int kMaxRegexpIndicesListCapacity = 8 * KB;
501 64929 : if (isolate->regexp_indices()->capacity() > kMaxRegexpIndicesListCapacity) {
502 : isolate->regexp_indices()->Clear(); // Throw away backing storage
503 : }
504 55704 : }
505 : } // namespace
506 :
507 : template <typename ResultSeqString>
508 1580468 : MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
509 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
510 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
511 : DCHECK(subject->IsFlat());
512 : DCHECK(replacement->IsFlat());
513 :
514 3160936 : List<int>* indices = GetRewoundRegexpIndicesList(isolate);
515 :
516 : DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
517 : String* pattern =
518 : String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
519 : int subject_len = subject->length();
520 : int pattern_len = pattern->length();
521 : int replacement_len = replacement->length();
522 :
523 1580468 : FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xffffffff);
524 :
525 : int matches = indices->length();
526 3151666 : if (matches == 0) return *subject;
527 :
528 : // Detect integer overflow.
529 : int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
530 : static_cast<int64_t>(pattern_len)) *
531 : static_cast<int64_t>(matches) +
532 9270 : static_cast<int64_t>(subject_len);
533 : int result_len;
534 9270 : if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
535 : STATIC_ASSERT(String::kMaxLength < kMaxInt);
536 : result_len = kMaxInt; // Provoke exception.
537 : } else {
538 9255 : result_len = static_cast<int>(result_len_64);
539 : }
540 9270 : if (result_len == 0) {
541 30 : return isolate->heap()->empty_string();
542 : }
543 :
544 : int subject_pos = 0;
545 : int result_pos = 0;
546 :
547 : MaybeHandle<SeqString> maybe_res;
548 : if (ResultSeqString::kHasOneByteEncoding) {
549 15852 : maybe_res = isolate->factory()->NewRawOneByteString(result_len);
550 : } else {
551 2628 : maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
552 : }
553 : Handle<SeqString> untyped_res;
554 9240 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
555 : Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
556 :
557 14796 : for (int i = 0; i < matches; i++) {
558 : // Copy non-matched subject content.
559 14796 : if (subject_pos < indices->at(i)) {
560 26912 : String::WriteToFlat(*subject, result->GetChars() + result_pos,
561 13456 : subject_pos, indices->at(i));
562 13456 : result_pos += indices->at(i) - subject_pos;
563 : }
564 :
565 : // Replace match.
566 14796 : if (replacement_len > 0) {
567 27288 : String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
568 13644 : replacement_len);
569 13644 : result_pos += replacement_len;
570 : }
571 :
572 14796 : subject_pos = indices->at(i) + pattern_len;
573 : }
574 : // Add remaining subject content at the end.
575 9225 : if (subject_pos < subject_len) {
576 15084 : String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
577 7542 : subject_len);
578 : }
579 :
580 9225 : int32_t match_indices[] = {indices->at(matches - 1),
581 9225 : indices->at(matches - 1) + pattern_len};
582 9225 : RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
583 :
584 : TruncateRegexpIndicesList(isolate);
585 :
586 9225 : return *result;
587 : }
588 :
589 84109 : MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
590 83981 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
591 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
592 : DCHECK(subject->IsFlat());
593 : DCHECK(replacement->IsFlat());
594 :
595 84109 : int capture_count = regexp->CaptureCount();
596 : int subject_length = subject->length();
597 :
598 84109 : JSRegExp::Type typeTag = regexp->TypeTag();
599 84109 : if (typeTag == JSRegExp::IRREGEXP) {
600 : // Ensure the RegExp is compiled so we can access the capture-name map.
601 11835 : if (RegExpImpl::IrregexpPrepare(regexp, subject) == -1) {
602 : DCHECK(isolate->has_pending_exception());
603 128 : return isolate->heap()->exception();
604 : }
605 : }
606 :
607 : // CompiledReplacement uses zone allocation.
608 83981 : Zone zone(isolate->allocator(), ZONE_NAME);
609 83981 : CompiledReplacement compiled_replacement(&zone);
610 : Maybe<bool> maybe_simple_replace = compiled_replacement.Compile(
611 83981 : regexp, replacement, capture_count, subject_length);
612 83981 : if (maybe_simple_replace.IsNothing()) {
613 112 : THROW_NEW_ERROR_RETURN_FAILURE(
614 : isolate, NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
615 : replacement));
616 : }
617 :
618 : const bool simple_replace = maybe_simple_replace.FromJust();
619 :
620 : // Shortcut for simple non-regexp global replacements
621 83925 : if (typeTag == JSRegExp::ATOM && simple_replace) {
622 142418 : if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
623 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
624 68621 : isolate, subject, regexp, replacement, last_match_info);
625 : } else {
626 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
627 3518 : isolate, subject, regexp, replacement, last_match_info);
628 : }
629 : }
630 :
631 11786 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
632 11786 : if (global_cache.HasException()) return isolate->heap()->exception();
633 :
634 : int32_t* current_match = global_cache.FetchNext();
635 11786 : if (current_match == NULL) {
636 1246 : if (global_cache.HasException()) return isolate->heap()->exception();
637 1152 : return *subject;
638 : }
639 :
640 : // Guessing the number of parts that the final result string is built
641 : // from. Global regexps can match any number of times, so we guess
642 : // conservatively.
643 10540 : int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
644 10540 : ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
645 :
646 : // Number of parts added by compiled replacement plus preceeding
647 : // string and possibly suffix after last match. It is possible for
648 : // all components to use two elements when encoded as two smis.
649 10540 : const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
650 :
651 : int prev = 0;
652 :
653 19580296 : do {
654 : builder.EnsureCapacity(parts_added_per_loop);
655 :
656 19580296 : int start = current_match[0];
657 19580296 : int end = current_match[1];
658 :
659 19580296 : if (prev < start) {
660 349660 : builder.AddSubjectSlice(prev, start);
661 : }
662 :
663 19580296 : if (simple_replace) {
664 19577394 : builder.AddString(replacement);
665 : } else {
666 2902 : compiled_replacement.Apply(&builder, start, end, current_match);
667 : }
668 : prev = end;
669 :
670 : current_match = global_cache.FetchNext();
671 : } while (current_match != NULL);
672 :
673 10540 : if (global_cache.HasException()) return isolate->heap()->exception();
674 :
675 10540 : if (prev < subject_length) {
676 : builder.EnsureCapacity(2);
677 8455 : builder.AddSubjectSlice(prev, subject_length);
678 : }
679 :
680 : RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
681 10540 : global_cache.LastSuccessfulMatch());
682 :
683 115572 : RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
684 : }
685 :
686 : template <typename ResultSeqString>
687 1716142 : MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
688 : Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
689 : Handle<RegExpMatchInfo> last_match_info) {
690 : DCHECK(subject->IsFlat());
691 :
692 : // Shortcut for simple non-regexp global replacements
693 1716142 : if (regexp->TypeTag() == JSRegExp::ATOM) {
694 1508329 : Handle<String> empty_string = isolate->factory()->empty_string();
695 1508329 : if (subject->IsOneByteRepresentation()) {
696 : return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
697 1459656 : isolate, subject, regexp, empty_string, last_match_info);
698 : } else {
699 : return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
700 48673 : isolate, subject, regexp, empty_string, last_match_info);
701 : }
702 : }
703 :
704 207813 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
705 207813 : if (global_cache.HasException()) return isolate->heap()->exception();
706 :
707 : int32_t* current_match = global_cache.FetchNext();
708 207813 : if (current_match == NULL) {
709 206456 : if (global_cache.HasException()) return isolate->heap()->exception();
710 206441 : return *subject;
711 : }
712 :
713 1357 : int start = current_match[0];
714 1357 : int end = current_match[1];
715 1357 : int capture_count = regexp->CaptureCount();
716 : int subject_length = subject->length();
717 :
718 1357 : int new_length = subject_length - (end - start);
719 1357 : if (new_length == 0) return isolate->heap()->empty_string();
720 :
721 : Handle<ResultSeqString> answer;
722 : if (ResultSeqString::kHasOneByteEncoding) {
723 : answer = Handle<ResultSeqString>::cast(
724 1726 : isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
725 : } else {
726 : answer = Handle<ResultSeqString>::cast(
727 764 : isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
728 : }
729 :
730 : int prev = 0;
731 : int position = 0;
732 :
733 1979897 : do {
734 1979897 : start = current_match[0];
735 1979897 : end = current_match[1];
736 1979897 : if (prev < start) {
737 : // Add substring subject[prev;start] to answer string.
738 3953398 : String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
739 1976699 : position += start - prev;
740 : }
741 : prev = end;
742 :
743 : current_match = global_cache.FetchNext();
744 : } while (current_match != NULL);
745 :
746 1245 : if (global_cache.HasException()) return isolate->heap()->exception();
747 :
748 1245 : RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
749 : global_cache.LastSuccessfulMatch());
750 :
751 1245 : if (prev < subject_length) {
752 : // Add substring subject[prev;length] to answer string.
753 1448 : String::WriteToFlat(*subject, answer->GetChars() + position, prev,
754 724 : subject_length);
755 724 : position += subject_length - prev;
756 : }
757 :
758 1245 : if (position == 0) return isolate->heap()->empty_string();
759 :
760 : // Shorten string and fill
761 : int string_size = ResultSeqString::SizeFor(position);
762 : int allocated_string_size = ResultSeqString::SizeFor(new_length);
763 965 : int delta = allocated_string_size - string_size;
764 :
765 : answer->set_length(position);
766 1597 : if (delta == 0) return *answer;
767 :
768 333 : Address end_of_string = answer->address() + string_size;
769 333 : Heap* heap = isolate->heap();
770 :
771 : // The trimming is performed on a newly allocated object, which is on a
772 : // freshly allocated page or on an already swept page. Hence, the sweeper
773 : // thread can not get confused with the filler creation. No synchronization
774 : // needed.
775 : // TODO(hpayer): We should shrink the large object page if the size
776 : // of the object changed significantly.
777 333 : if (!heap->lo_space()->Contains(*answer)) {
778 318 : heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
779 : }
780 666 : heap->AdjustLiveBytes(*answer, -delta);
781 333 : return *answer;
782 : }
783 :
784 : namespace {
785 :
786 1796479 : Object* StringReplaceGlobalRegExpWithStringHelper(
787 : Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
788 : Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
789 1796479 : CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
790 :
791 1796479 : subject = String::Flatten(subject);
792 :
793 1796479 : if (replacement->length() == 0) {
794 1716142 : if (subject->HasOnlyOneByteChars()) {
795 : return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
796 1666723 : isolate, subject, regexp, last_match_info);
797 : } else {
798 : return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
799 49419 : isolate, subject, regexp, last_match_info);
800 : }
801 : }
802 :
803 80337 : replacement = String::Flatten(replacement);
804 :
805 : return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
806 80337 : replacement, last_match_info);
807 : }
808 :
809 : } // namespace
810 :
811 3558044 : RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
812 1779022 : HandleScope scope(isolate);
813 : DCHECK_EQ(4, args.length());
814 :
815 3558044 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
816 3558044 : CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
817 3558044 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
818 3558044 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
819 :
820 : return StringReplaceGlobalRegExpWithStringHelper(
821 1779022 : isolate, regexp, subject, replacement, last_match_info);
822 : }
823 :
824 145106 : RUNTIME_FUNCTION(Runtime_StringSplit) {
825 72553 : HandleScope handle_scope(isolate);
826 : DCHECK_EQ(3, args.length());
827 145106 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
828 145106 : CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
829 145106 : CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
830 72553 : CHECK(limit > 0);
831 :
832 72553 : int subject_length = subject->length();
833 72553 : int pattern_length = pattern->length();
834 72553 : CHECK(pattern_length > 0);
835 :
836 72553 : if (limit == 0xffffffffu) {
837 : FixedArray* last_match_cache_unused;
838 : Handle<Object> cached_answer(
839 : RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
840 : &last_match_cache_unused,
841 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
842 19227 : isolate);
843 19227 : if (*cached_answer != Smi::kZero) {
844 : // The cache FixedArray is a COW-array and can therefore be reused.
845 : Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
846 33698 : Handle<FixedArray>::cast(cached_answer));
847 : return *result;
848 : }
849 : }
850 :
851 : // The limit can be very large (0xffffffffu), but since the pattern
852 : // isn't empty, we can never create more parts than ~half the length
853 : // of the subject.
854 :
855 55704 : subject = String::Flatten(subject);
856 55704 : pattern = String::Flatten(pattern);
857 :
858 167112 : List<int>* indices = GetRewoundRegexpIndicesList(isolate);
859 :
860 55704 : FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
861 :
862 55704 : if (static_cast<uint32_t>(indices->length()) < limit) {
863 55659 : indices->Add(subject_length);
864 : }
865 :
866 : // The list indices now contains the end of each part to create.
867 :
868 : // Create JSArray of substrings separated by separator.
869 : int part_count = indices->length();
870 :
871 : Handle<JSArray> result =
872 : isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count,
873 55704 : INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
874 :
875 : DCHECK(result->HasFastObjectElements());
876 :
877 55704 : Handle<FixedArray> elements(FixedArray::cast(result->elements()));
878 :
879 55704 : if (part_count == 1 && indices->at(0) == subject_length) {
880 38804 : elements->set(0, *subject);
881 : } else {
882 : int part_start = 0;
883 508165 : FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
884 : int part_end = indices->at(i);
885 : Handle<String> substring =
886 : isolate->factory()->NewProperSubString(subject, part_start, part_end);
887 : elements->set(i, *substring);
888 : part_start = part_end + pattern_length;
889 : });
890 : }
891 :
892 55704 : if (limit == 0xffffffffu) {
893 2378 : if (result->HasFastObjectElements()) {
894 : RegExpResultsCache::Enter(isolate, subject, pattern, elements,
895 : isolate->factory()->empty_fixed_array(),
896 4756 : RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
897 : }
898 : }
899 :
900 55704 : TruncateRegexpIndicesList(isolate);
901 :
902 72553 : return *result;
903 : }
904 :
905 : // ES##sec-regexpcreate
906 : // RegExpCreate ( P, F )
907 3256 : RUNTIME_FUNCTION(Runtime_RegExpCreate) {
908 1628 : HandleScope scope(isolate);
909 : DCHECK_EQ(1, args.length());
910 1628 : CONVERT_ARG_HANDLE_CHECKED(Object, source_object, 0);
911 :
912 : Handle<String> source;
913 1628 : if (source_object->IsUndefined(isolate)) {
914 30 : source = isolate->factory()->empty_string();
915 : } else {
916 3196 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
917 : isolate, source, Object::ToString(isolate, source_object));
918 : }
919 :
920 4884 : Handle<Map> map(isolate->regexp_function()->initial_map());
921 : Handle<JSRegExp> regexp =
922 1628 : Handle<JSRegExp>::cast(isolate->factory()->NewJSObjectFromMap(map));
923 :
924 1628 : JSRegExp::Flags flags = JSRegExp::kNone;
925 :
926 1628 : RETURN_FAILURE_ON_EXCEPTION(isolate,
927 : JSRegExp::Initialize(regexp, source, flags));
928 :
929 1628 : return *regexp;
930 : }
931 :
932 1774050 : RUNTIME_FUNCTION(Runtime_RegExpExec) {
933 591350 : HandleScope scope(isolate);
934 : DCHECK_EQ(4, args.length());
935 1182700 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
936 1182700 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
937 1182700 : CONVERT_INT32_ARG_CHECKED(index, 2);
938 1182700 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
939 : // Due to the way the JS calls are constructed this must be less than the
940 : // length of a string, i.e. it is always a Smi. We check anyway for security.
941 591350 : CHECK(index >= 0);
942 591350 : CHECK(index <= subject->length());
943 591350 : isolate->counters()->regexp_entry_runtime()->Increment();
944 1182700 : RETURN_RESULT_OR_FAILURE(
945 591350 : isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info));
946 : }
947 :
948 34914 : RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
949 17457 : HandleScope scope(isolate);
950 : DCHECK_EQ(3, args.length());
951 34914 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
952 34914 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
953 34914 : CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
954 :
955 : Handle<RegExpMatchInfo> internal_match_info =
956 17457 : isolate->regexp_internal_match_info();
957 :
958 : return StringReplaceGlobalRegExpWithStringHelper(
959 17457 : isolate, regexp, subject, replacement, internal_match_info);
960 : }
961 :
962 : namespace {
963 :
964 4976 : class MatchInfoBackedMatch : public String::Match {
965 : public:
966 5032 : MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
967 : Handle<String> subject,
968 : Handle<RegExpMatchInfo> match_info)
969 10064 : : isolate_(isolate), match_info_(match_info) {
970 5032 : subject_ = String::Flatten(subject);
971 :
972 5032 : if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
973 : Object* o = regexp->CaptureNameMap();
974 4852 : has_named_captures_ = o->IsFixedArray();
975 4852 : if (has_named_captures_) {
976 : DCHECK(FLAG_harmony_regexp_named_captures);
977 98 : capture_name_map_ = handle(FixedArray::cast(o));
978 : }
979 : } else {
980 180 : has_named_captures_ = false;
981 : }
982 5032 : }
983 :
984 30 : Handle<String> GetMatch() override {
985 30 : return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
986 : }
987 :
988 30 : Handle<String> GetPrefix() override {
989 : const int match_start = match_info_->Capture(0);
990 30 : return isolate_->factory()->NewSubString(subject_, 0, match_start);
991 : }
992 :
993 30 : Handle<String> GetSuffix() override {
994 : const int match_end = match_info_->Capture(1);
995 : return isolate_->factory()->NewSubString(subject_, match_end,
996 30 : subject_->length());
997 : }
998 :
999 182 : bool HasNamedCaptures() override { return has_named_captures_; }
1000 :
1001 5032 : int CaptureCount() override {
1002 5032 : return match_info_->NumberOfCaptureRegisters() / 2;
1003 : }
1004 :
1005 11486 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1006 : Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
1007 11486 : isolate_, match_info_, i, capture_exists);
1008 : return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
1009 22972 : : isolate_->factory()->empty_string();
1010 : }
1011 :
1012 84 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
1013 : CaptureState* state) override {
1014 : DCHECK(has_named_captures_);
1015 : const int capture_index = LookupNamedCapture(
1016 210 : [=](String* capture_name) { return capture_name->Equals(*name); },
1017 252 : *capture_name_map_);
1018 :
1019 84 : if (capture_index == -1) {
1020 42 : *state = INVALID;
1021 : return name; // Arbitrary string handle.
1022 : }
1023 :
1024 : DCHECK(1 <= capture_index && capture_index <= CaptureCount());
1025 :
1026 : bool capture_exists;
1027 : Handle<String> capture_value;
1028 84 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
1029 : GetCapture(capture_index, &capture_exists),
1030 : String);
1031 :
1032 42 : if (!capture_exists) {
1033 14 : *state = UNMATCHED;
1034 14 : return isolate_->factory()->empty_string();
1035 : } else {
1036 28 : *state = MATCHED;
1037 : return capture_value;
1038 : }
1039 : }
1040 :
1041 : private:
1042 : Isolate* isolate_;
1043 : Handle<String> subject_;
1044 : Handle<RegExpMatchInfo> match_info_;
1045 :
1046 : bool has_named_captures_;
1047 : Handle<FixedArray> capture_name_map_;
1048 : };
1049 :
1050 626 : class VectorBackedMatch : public String::Match {
1051 : public:
1052 313 : VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1053 : Handle<String> match, int match_position,
1054 : ZoneVector<Handle<Object>>* captures,
1055 : Handle<Object> groups_obj)
1056 : : isolate_(isolate),
1057 : match_(match),
1058 : match_position_(match_position),
1059 626 : captures_(captures) {
1060 313 : subject_ = String::Flatten(subject);
1061 :
1062 : DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1063 313 : has_named_captures_ = !groups_obj->IsUndefined(isolate);
1064 313 : if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1065 313 : }
1066 :
1067 0 : Handle<String> GetMatch() override { return match_; }
1068 :
1069 0 : Handle<String> GetPrefix() override {
1070 0 : return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1071 : }
1072 :
1073 0 : Handle<String> GetSuffix() override {
1074 0 : const int match_end_position = match_position_ + match_->length();
1075 : return isolate_->factory()->NewSubString(subject_, match_end_position,
1076 0 : subject_->length());
1077 : }
1078 :
1079 238 : bool HasNamedCaptures() override { return has_named_captures_; }
1080 :
1081 626 : int CaptureCount() override { return static_cast<int>(captures_->size()); }
1082 :
1083 84 : MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1084 168 : Handle<Object> capture_obj = captures_->at(i);
1085 168 : if (capture_obj->IsUndefined(isolate_)) {
1086 0 : *capture_exists = false;
1087 0 : return isolate_->factory()->empty_string();
1088 : }
1089 84 : *capture_exists = true;
1090 84 : return Object::ToString(isolate_, capture_obj);
1091 : }
1092 :
1093 210 : MaybeHandle<String> GetNamedCapture(Handle<String> name,
1094 : CaptureState* state) override {
1095 : DCHECK(has_named_captures_);
1096 :
1097 : Maybe<bool> maybe_capture_exists =
1098 210 : JSReceiver::HasProperty(groups_obj_, name);
1099 210 : if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1100 :
1101 210 : if (!maybe_capture_exists.FromJust()) {
1102 84 : *state = INVALID;
1103 : return name; // Arbitrary string handle.
1104 : }
1105 :
1106 : Handle<Object> capture_obj;
1107 252 : ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1108 : Object::GetProperty(groups_obj_, name), String);
1109 252 : if (capture_obj->IsUndefined(isolate_)) {
1110 42 : *state = UNMATCHED;
1111 42 : return isolate_->factory()->empty_string();
1112 : } else {
1113 84 : *state = MATCHED;
1114 84 : return Object::ToString(isolate_, capture_obj);
1115 : }
1116 : }
1117 :
1118 : private:
1119 : Isolate* isolate_;
1120 : Handle<String> subject_;
1121 : Handle<String> match_;
1122 : const int match_position_;
1123 : ZoneVector<Handle<Object>>* captures_;
1124 :
1125 : bool has_named_captures_;
1126 : Handle<JSReceiver> groups_obj_;
1127 : };
1128 :
1129 : // Create the groups object (see also the RegExp result creation in
1130 : // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
1131 112 : Handle<JSObject> ConstructNamedCaptureGroupsObject(
1132 : Isolate* isolate, Handle<FixedArray> capture_map,
1133 : std::function<Object*(int)> f_get_capture) {
1134 : DCHECK(FLAG_harmony_regexp_named_captures);
1135 112 : Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1136 :
1137 112 : const int capture_count = capture_map->length() >> 1;
1138 336 : for (int i = 0; i < capture_count; i++) {
1139 224 : const int name_ix = i * 2;
1140 224 : const int index_ix = i * 2 + 1;
1141 :
1142 : Handle<String> capture_name(String::cast(capture_map->get(name_ix)));
1143 : const int capture_ix = Smi::cast(capture_map->get(index_ix))->value();
1144 : DCHECK(1 <= capture_ix && capture_ix <= capture_count);
1145 :
1146 224 : Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1147 : DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1148 :
1149 224 : JSObject::AddProperty(groups, capture_name, capture_value, NONE);
1150 : }
1151 :
1152 112 : return groups;
1153 : }
1154 :
1155 : // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1156 : // separate last match info. See comment on that function.
1157 : template <bool has_capture>
1158 141405 : static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1159 : Handle<JSRegExp> regexp,
1160 : Handle<RegExpMatchInfo> last_match_array,
1161 : Handle<JSArray> result_array) {
1162 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1163 : DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1164 : DCHECK(subject->IsFlat());
1165 :
1166 141405 : int capture_count = regexp->CaptureCount();
1167 : int subject_length = subject->length();
1168 :
1169 : static const int kMinLengthToCache = 0x1000;
1170 :
1171 141405 : if (subject_length > kMinLengthToCache) {
1172 : FixedArray* last_match_cache;
1173 : Object* cached_answer = RegExpResultsCache::Lookup(
1174 : isolate->heap(), *subject, regexp->data(), &last_match_cache,
1175 60 : RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1176 60 : if (cached_answer->IsFixedArray()) {
1177 0 : int capture_registers = (capture_count + 1) * 2;
1178 0 : int32_t* last_match = NewArray<int32_t>(capture_registers);
1179 0 : for (int i = 0; i < capture_registers; i++) {
1180 0 : last_match[i] = Smi::cast(last_match_cache->get(i))->value();
1181 : }
1182 : Handle<FixedArray> cached_fixed_array =
1183 : Handle<FixedArray>(FixedArray::cast(cached_answer));
1184 : // The cache FixedArray is a COW-array and we need to return a copy.
1185 : Handle<FixedArray> copied_fixed_array =
1186 : isolate->factory()->CopyFixedArrayWithMap(
1187 0 : cached_fixed_array, isolate->factory()->fixed_array_map());
1188 0 : JSArray::SetContent(result_array, copied_fixed_array);
1189 0 : RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1190 : last_match);
1191 : DeleteArray(last_match);
1192 : return *result_array;
1193 : }
1194 : }
1195 :
1196 141405 : RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1197 141405 : if (global_cache.HasException()) return isolate->heap()->exception();
1198 :
1199 : // Ensured in Runtime_RegExpExecMultiple.
1200 : DCHECK(result_array->HasFastObjectElements());
1201 : Handle<FixedArray> result_elements(
1202 : FixedArray::cast(result_array->elements()));
1203 141288 : if (result_elements->length() < 16) {
1204 0 : result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1205 : }
1206 :
1207 : FixedArrayBuilder builder(result_elements);
1208 :
1209 : // Position to search from.
1210 : int match_start = -1;
1211 : int match_end = 0;
1212 : bool first = true;
1213 :
1214 : // Two smis before and after the match, for very long strings.
1215 : static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1216 :
1217 : while (true) {
1218 : int32_t* current_match = global_cache.FetchNext();
1219 731799 : if (current_match == NULL) break;
1220 590511 : match_start = current_match[0];
1221 590511 : builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
1222 590511 : if (match_end < match_start) {
1223 95834 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1224 : match_start);
1225 : }
1226 590511 : match_end = current_match[1];
1227 : {
1228 : // Avoid accumulating new handles inside loop.
1229 : HandleScope temp_scope(isolate);
1230 : Handle<String> match;
1231 590511 : if (!first) {
1232 449321 : match = isolate->factory()->NewProperSubString(subject, match_start,
1233 : match_end);
1234 : } else {
1235 141190 : match =
1236 : isolate->factory()->NewSubString(subject, match_start, match_end);
1237 : first = false;
1238 : }
1239 :
1240 : if (has_capture) {
1241 : // Arguments array to replace function is match, captures, index and
1242 : // subject, i.e., 3 + capture count in total. If the RegExp contains
1243 : // named captures, they are also passed as the last argument.
1244 :
1245 : Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1246 : const bool has_named_captures = maybe_capture_map->IsFixedArray();
1247 : DCHECK_IMPLIES(has_named_captures, FLAG_harmony_regexp_named_captures);
1248 :
1249 : const int argc =
1250 282419 : has_named_captures ? 4 + capture_count : 3 + capture_count;
1251 :
1252 282419 : Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1253 : int cursor = 0;
1254 :
1255 282419 : elements->set(cursor++, *match);
1256 829569 : for (int i = 1; i <= capture_count; i++) {
1257 547150 : int start = current_match[i * 2];
1258 547150 : if (start >= 0) {
1259 547019 : int end = current_match[i * 2 + 1];
1260 : DCHECK(start <= end);
1261 : Handle<String> substring =
1262 547019 : isolate->factory()->NewSubString(subject, start, end);
1263 1094038 : elements->set(cursor++, *substring);
1264 : } else {
1265 : DCHECK(current_match[i * 2 + 1] < 0);
1266 262 : elements->set(cursor++, isolate->heap()->undefined_value());
1267 : }
1268 : }
1269 :
1270 282419 : elements->set(cursor++, Smi::FromInt(match_start));
1271 282419 : elements->set(cursor++, *subject);
1272 :
1273 282419 : if (has_named_captures) {
1274 : Handle<FixedArray> capture_map =
1275 84 : Handle<FixedArray>::cast(maybe_capture_map);
1276 : Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1277 168 : isolate, capture_map, [=](int ix) { return elements->get(ix); });
1278 84 : elements->set(cursor++, *groups);
1279 : }
1280 :
1281 : DCHECK_EQ(cursor, argc);
1282 282419 : builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1283 : } else {
1284 308092 : builder.Add(*match);
1285 : }
1286 : }
1287 : }
1288 :
1289 141288 : if (global_cache.HasException()) return isolate->heap()->exception();
1290 :
1291 141209 : if (match_start >= 0) {
1292 : // Finished matching, with at least one match.
1293 141190 : if (match_end < subject_length) {
1294 583 : ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1295 : subject_length);
1296 : }
1297 :
1298 141190 : RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
1299 : global_cache.LastSuccessfulMatch());
1300 :
1301 141190 : if (subject_length > kMinLengthToCache) {
1302 : // Store the last successful match into the array for caching.
1303 : // TODO(yangguo): do not expose last match to JS and simplify caching.
1304 60 : int capture_registers = (capture_count + 1) * 2;
1305 : Handle<FixedArray> last_match_cache =
1306 60 : isolate->factory()->NewFixedArray(capture_registers);
1307 : int32_t* last_match = global_cache.LastSuccessfulMatch();
1308 270 : for (int i = 0; i < capture_registers; i++) {
1309 210 : last_match_cache->set(i, Smi::FromInt(last_match[i]));
1310 : }
1311 : Handle<FixedArray> result_fixed_array = builder.array();
1312 120 : result_fixed_array->Shrink(builder.length());
1313 : // Cache the result and copy the FixedArray into a COW array.
1314 : Handle<FixedArray> copied_fixed_array =
1315 : isolate->factory()->CopyFixedArrayWithMap(
1316 60 : result_fixed_array, isolate->factory()->fixed_array_map());
1317 60 : RegExpResultsCache::Enter(
1318 : isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1319 : last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1320 : }
1321 282380 : return *builder.ToJSArray(result_array);
1322 : } else {
1323 19 : return isolate->heap()->null_value(); // No matches at all.
1324 : }
1325 : }
1326 :
1327 : // Legacy implementation of RegExp.prototype[Symbol.replace] which
1328 : // doesn't properly call the underlying exec method.
1329 8818 : MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
1330 : Handle<JSRegExp> regexp,
1331 : Handle<String> string,
1332 : Handle<Object> replace_obj) {
1333 : // Functional fast-paths are dispatched directly by replace builtin.
1334 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1335 : DCHECK(!replace_obj->IsCallable());
1336 :
1337 : Factory* factory = isolate->factory();
1338 :
1339 : const int flags = regexp->GetFlags();
1340 8818 : const bool global = (flags & JSRegExp::kGlobal) != 0;
1341 8818 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1342 :
1343 : Handle<String> replace;
1344 17636 : ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1345 : Object::ToString(isolate, replace_obj), String);
1346 8818 : replace = String::Flatten(replace);
1347 :
1348 8818 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1349 :
1350 8818 : if (!global) {
1351 : // Non-global regexp search, string replace.
1352 :
1353 : uint32_t last_index = 0;
1354 5046 : if (sticky) {
1355 : Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
1356 0 : ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1357 : Object::ToLength(isolate, last_index_obj),
1358 : String);
1359 0 : last_index = PositiveNumberToUint32(*last_index_obj);
1360 :
1361 0 : if (static_cast<int>(last_index) > string->length()) last_index = 0;
1362 : }
1363 :
1364 : Handle<Object> match_indices_obj;
1365 10092 : ASSIGN_RETURN_ON_EXCEPTION(
1366 : isolate, match_indices_obj,
1367 : RegExpImpl::Exec(regexp, string, last_index, last_match_info), String);
1368 :
1369 5046 : if (match_indices_obj->IsNull(isolate)) {
1370 14 : if (sticky) regexp->SetLastIndex(0);
1371 : return string;
1372 : }
1373 :
1374 : auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1375 :
1376 : const int start_index = match_indices->Capture(0);
1377 : const int end_index = match_indices->Capture(1);
1378 :
1379 5032 : if (sticky) regexp->SetLastIndex(end_index);
1380 :
1381 5032 : IncrementalStringBuilder builder(isolate);
1382 5032 : builder.AppendString(factory->NewSubString(string, 0, start_index));
1383 :
1384 5032 : if (replace->length() > 0) {
1385 5032 : MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1386 : Handle<String> replacement;
1387 10064 : ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1388 : String::GetSubstitution(isolate, &m, replace),
1389 : String);
1390 4976 : builder.AppendString(replacement);
1391 : }
1392 :
1393 : builder.AppendString(
1394 4976 : factory->NewSubString(string, end_index, string->length()));
1395 4976 : return builder.Finish();
1396 : } else {
1397 : // Global regexp search, string replace.
1398 : DCHECK(global);
1399 7544 : RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1400 : String);
1401 :
1402 3772 : if (replace->length() == 0) {
1403 0 : if (string->HasOnlyOneByteChars()) {
1404 : Object* result =
1405 : StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1406 0 : isolate, string, regexp, last_match_info);
1407 : return handle(String::cast(result), isolate);
1408 : } else {
1409 : Object* result =
1410 : StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1411 0 : isolate, string, regexp, last_match_info);
1412 : return handle(String::cast(result), isolate);
1413 : }
1414 : }
1415 :
1416 : Object* result = StringReplaceGlobalRegExpWithString(
1417 3772 : isolate, string, regexp, replace, last_match_info);
1418 3772 : if (result->IsString()) {
1419 : return handle(String::cast(result), isolate);
1420 : } else {
1421 : return MaybeHandle<String>();
1422 : }
1423 : }
1424 :
1425 : UNREACHABLE();
1426 : return MaybeHandle<String>();
1427 : }
1428 :
1429 : } // namespace
1430 :
1431 : // This is only called for StringReplaceGlobalRegExpWithFunction.
1432 282810 : RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1433 141405 : HandleScope handles(isolate);
1434 : DCHECK_EQ(4, args.length());
1435 :
1436 282810 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1437 282810 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1438 282810 : CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1439 282810 : CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1440 141405 : CHECK(result_array->HasFastObjectElements());
1441 :
1442 141405 : subject = String::Flatten(subject);
1443 141405 : CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1444 :
1445 141405 : if (regexp->CaptureCount() == 0) {
1446 : return SearchRegExpMultiple<false>(isolate, subject, regexp,
1447 140395 : last_match_info, result_array);
1448 : } else {
1449 : return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1450 1010 : result_array);
1451 141405 : }
1452 : }
1453 :
1454 29414 : RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1455 14707 : HandleScope scope(isolate);
1456 : DCHECK_EQ(3, args.length());
1457 29414 : CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1458 29414 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1459 29414 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1460 :
1461 : DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1462 : DCHECK(replace_obj->map()->is_callable());
1463 :
1464 14707 : Factory* factory = isolate->factory();
1465 14707 : Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1466 :
1467 14707 : const int flags = regexp->GetFlags();
1468 : DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1469 :
1470 : // TODO(jgruber): This should be an easy port to CSA with massive payback.
1471 :
1472 14707 : const bool sticky = (flags & JSRegExp::kSticky) != 0;
1473 : uint32_t last_index = 0;
1474 14707 : if (sticky) {
1475 0 : Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
1476 0 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1477 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1478 0 : last_index = PositiveNumberToUint32(*last_index_obj);
1479 :
1480 0 : if (static_cast<int>(last_index) > subject->length()) last_index = 0;
1481 : }
1482 :
1483 : Handle<Object> match_indices_obj;
1484 29414 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1485 : isolate, match_indices_obj,
1486 : RegExpImpl::Exec(regexp, subject, last_index, last_match_info));
1487 :
1488 14692 : if (match_indices_obj->IsNull(isolate)) {
1489 14247 : if (sticky) regexp->SetLastIndex(0);
1490 : return *subject;
1491 : }
1492 :
1493 : Handle<RegExpMatchInfo> match_indices =
1494 445 : Handle<RegExpMatchInfo>::cast(match_indices_obj);
1495 :
1496 445 : const int index = match_indices->Capture(0);
1497 445 : const int end_of_match = match_indices->Capture(1);
1498 :
1499 445 : if (sticky) regexp->SetLastIndex(end_of_match);
1500 :
1501 445 : IncrementalStringBuilder builder(isolate);
1502 445 : builder.AppendString(factory->NewSubString(subject, 0, index));
1503 :
1504 : // Compute the parameter list consisting of the match, captures, index,
1505 : // and subject for the replace function invocation. If the RegExp contains
1506 : // named captures, they are also passed as the last argument.
1507 :
1508 : // The number of captures plus one for the match.
1509 445 : const int m = match_indices->NumberOfCaptureRegisters() / 2;
1510 :
1511 : bool has_named_captures = false;
1512 : Handle<FixedArray> capture_map;
1513 445 : if (m > 1) {
1514 : // The existence of capture groups implies IRREGEXP kind.
1515 : DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1516 :
1517 173 : Object* maybe_capture_map = regexp->CaptureNameMap();
1518 173 : if (maybe_capture_map->IsFixedArray()) {
1519 : has_named_captures = true;
1520 28 : capture_map = handle(FixedArray::cast(maybe_capture_map));
1521 : }
1522 : }
1523 :
1524 : DCHECK_IMPLIES(has_named_captures, FLAG_harmony_regexp_named_captures);
1525 445 : const int argc = has_named_captures ? m + 3 : m + 2;
1526 890 : ScopedVector<Handle<Object>> argv(argc);
1527 :
1528 : int cursor = 0;
1529 689 : for (int j = 0; j < m; j++) {
1530 : bool ok;
1531 : Handle<String> capture =
1532 689 : RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1533 689 : if (ok) {
1534 574 : argv[cursor++] = capture;
1535 : } else {
1536 115 : argv[cursor++] = factory->undefined_value();
1537 : }
1538 : }
1539 :
1540 890 : argv[cursor++] = handle(Smi::FromInt(index), isolate);
1541 445 : argv[cursor++] = subject;
1542 :
1543 445 : if (has_named_captures) {
1544 28 : argv[cursor++] = ConstructNamedCaptureGroupsObject(
1545 84 : isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1546 : }
1547 :
1548 : DCHECK_EQ(cursor, argc);
1549 :
1550 : Handle<Object> replacement_obj;
1551 1335 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1552 : isolate, replacement_obj,
1553 : Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1554 : argv.start()));
1555 :
1556 : Handle<String> replacement;
1557 890 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1558 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1559 :
1560 445 : builder.AppendString(replacement);
1561 : builder.AppendString(
1562 445 : factory->NewSubString(subject, end_of_match, subject->length()));
1563 :
1564 15597 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1565 : }
1566 :
1567 : namespace {
1568 :
1569 60 : MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1570 : Handle<Object> object,
1571 : uint32_t* out) {
1572 60 : if (object->IsUndefined(isolate)) {
1573 45 : *out = kMaxUInt32;
1574 : return object;
1575 : }
1576 :
1577 : Handle<Object> number;
1578 30 : ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object);
1579 15 : *out = NumberToUint32(*number);
1580 : return object;
1581 : }
1582 :
1583 45 : Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1584 : Handle<FixedArray> elems,
1585 : int num_elems) {
1586 45 : elems->Shrink(num_elems);
1587 45 : return isolate->factory()->NewJSArrayWithElements(elems);
1588 : }
1589 :
1590 : } // namespace
1591 :
1592 : // Slow path for:
1593 : // ES#sec-regexp.prototype-@@replace
1594 : // RegExp.prototype [ @@split ] ( string, limit )
1595 120 : RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1596 60 : HandleScope scope(isolate);
1597 : DCHECK_EQ(3, args.length());
1598 :
1599 : DCHECK(args[1]->IsString());
1600 :
1601 120 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1602 120 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1603 60 : CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1604 :
1605 60 : Factory* factory = isolate->factory();
1606 :
1607 60 : Handle<JSFunction> regexp_fun = isolate->regexp_function();
1608 : Handle<Object> ctor;
1609 120 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1610 : isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1611 :
1612 : Handle<Object> flags_obj;
1613 180 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1614 : isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
1615 :
1616 : Handle<String> flags;
1617 120 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1618 : Object::ToString(isolate, flags_obj));
1619 :
1620 60 : Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1621 60 : const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1622 :
1623 60 : Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1624 60 : const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1625 :
1626 : Handle<String> new_flags = flags;
1627 60 : if (!sticky) {
1628 120 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1629 : factory->NewConsString(flags, y_str));
1630 : }
1631 :
1632 : Handle<JSReceiver> splitter;
1633 : {
1634 : const int argc = 2;
1635 :
1636 60 : ScopedVector<Handle<Object>> argv(argc);
1637 60 : argv[0] = recv;
1638 60 : argv[1] = new_flags;
1639 :
1640 60 : Handle<JSFunction> ctor_fun = Handle<JSFunction>::cast(ctor);
1641 : Handle<Object> splitter_obj;
1642 120 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1643 : isolate, splitter_obj, Execution::New(ctor_fun, argc, argv.start()));
1644 :
1645 60 : splitter = Handle<JSReceiver>::cast(splitter_obj);
1646 : }
1647 :
1648 : uint32_t limit;
1649 60 : RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1650 :
1651 60 : const uint32_t length = string->length();
1652 :
1653 60 : if (limit == 0) return *factory->NewJSArray(0);
1654 :
1655 60 : if (length == 0) {
1656 : Handle<Object> result;
1657 45 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1658 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1659 : factory->undefined_value()));
1660 :
1661 15 : if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1662 :
1663 15 : Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1664 15 : elems->set(0, *string);
1665 30 : return *factory->NewJSArrayWithElements(elems);
1666 : }
1667 :
1668 : static const int kInitialArraySize = 8;
1669 45 : Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1670 : int num_elems = 0;
1671 :
1672 : uint32_t string_index = 0;
1673 : uint32_t prev_string_index = 0;
1674 225 : while (string_index < length) {
1675 180 : RETURN_FAILURE_ON_EXCEPTION(
1676 : isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1677 :
1678 : Handle<Object> result;
1679 540 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1680 : isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1681 : factory->undefined_value()));
1682 :
1683 180 : if (result->IsNull(isolate)) {
1684 : string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
1685 75 : string_index, unicode);
1686 : continue;
1687 : }
1688 :
1689 : Handle<Object> last_index_obj;
1690 210 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1691 : isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1692 :
1693 210 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1694 : isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1695 :
1696 : const uint32_t end =
1697 105 : std::min(PositiveNumberToUint32(*last_index_obj), length);
1698 105 : if (end == prev_string_index) {
1699 : string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
1700 0 : string_index, unicode);
1701 : continue;
1702 : }
1703 :
1704 : {
1705 : Handle<String> substr =
1706 105 : factory->NewSubString(string, prev_string_index, string_index);
1707 105 : elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1708 105 : if (static_cast<uint32_t>(num_elems) == limit) {
1709 0 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1710 : }
1711 : }
1712 :
1713 : prev_string_index = end;
1714 :
1715 : Handle<Object> num_captures_obj;
1716 315 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1717 : isolate, num_captures_obj,
1718 : Object::GetProperty(result, isolate->factory()->length_string()));
1719 :
1720 210 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1721 : isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1722 105 : const int num_captures = PositiveNumberToUint32(*num_captures_obj);
1723 :
1724 105 : for (int i = 1; i < num_captures; i++) {
1725 : Handle<Object> capture;
1726 0 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1727 : isolate, capture, Object::GetElement(isolate, result, i));
1728 0 : elems = FixedArray::SetAndGrow(elems, num_elems++, capture);
1729 0 : if (static_cast<uint32_t>(num_elems) == limit) {
1730 0 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1731 : }
1732 : }
1733 :
1734 : string_index = prev_string_index;
1735 : }
1736 :
1737 : {
1738 : Handle<String> substr =
1739 45 : factory->NewSubString(string, prev_string_index, length);
1740 45 : elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1741 : }
1742 :
1743 90 : return *NewJSArrayWithElements(isolate, elems, num_elems);
1744 : }
1745 :
1746 : // Slow path for:
1747 : // ES#sec-regexp.prototype-@@replace
1748 : // RegExp.prototype [ @@replace ] ( string, replaceValue )
1749 19265 : RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1750 9361 : HandleScope scope(isolate);
1751 : DCHECK_EQ(3, args.length());
1752 :
1753 18722 : CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1754 18722 : CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1755 9361 : Handle<Object> replace_obj = args.at(2);
1756 :
1757 9361 : Factory* factory = isolate->factory();
1758 :
1759 9361 : string = String::Flatten(string);
1760 :
1761 : // Fast-path for unmodified JSRegExps.
1762 9361 : if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1763 17636 : RETURN_RESULT_OR_FAILURE(
1764 : isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
1765 : replace_obj));
1766 : }
1767 :
1768 543 : const uint32_t length = string->length();
1769 : const bool functional_replace = replace_obj->IsCallable();
1770 :
1771 : Handle<String> replace;
1772 543 : if (!functional_replace) {
1773 946 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1774 : Object::ToString(isolate, replace_obj));
1775 : }
1776 :
1777 : Handle<Object> global_obj;
1778 1629 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1779 : isolate, global_obj,
1780 : JSReceiver::GetProperty(recv, factory->global_string()));
1781 543 : const bool global = global_obj->BooleanValue();
1782 :
1783 : bool unicode = false;
1784 543 : if (global) {
1785 : Handle<Object> unicode_obj;
1786 936 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1787 : isolate, unicode_obj,
1788 : JSReceiver::GetProperty(recv, factory->unicode_string()));
1789 312 : unicode = unicode_obj->BooleanValue();
1790 :
1791 312 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1792 : RegExpUtils::SetLastIndex(isolate, recv, 0));
1793 : }
1794 :
1795 1086 : Zone zone(isolate->allocator(), ZONE_NAME);
1796 1086 : ZoneVector<Handle<Object>> results(&zone);
1797 :
1798 : while (true) {
1799 : Handle<Object> result;
1800 2706 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1801 : isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1802 : factory->undefined_value()));
1803 :
1804 882 : if (result->IsNull(isolate)) break;
1805 :
1806 495 : results.push_back(result);
1807 495 : if (!global) break;
1808 :
1809 : Handle<Object> match_obj;
1810 708 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1811 : Object::GetElement(isolate, result, 0));
1812 :
1813 : Handle<String> match;
1814 708 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1815 : Object::ToString(isolate, match_obj));
1816 :
1817 354 : if (match->length() == 0) {
1818 0 : RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1819 : isolate, recv, string, unicode));
1820 : }
1821 : }
1822 :
1823 : // TODO(jgruber): Look into ReplacementStringBuilder instead.
1824 528 : IncrementalStringBuilder builder(isolate);
1825 : uint32_t next_source_position = 0;
1826 :
1827 1710 : for (const auto& result : results) {
1828 : Handle<Object> captures_length_obj;
1829 1429 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1830 : isolate, captures_length_obj,
1831 : Object::GetProperty(result, factory->length_string()));
1832 :
1833 878 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1834 : isolate, captures_length_obj,
1835 : Object::ToLength(isolate, captures_length_obj));
1836 439 : const int captures_length = PositiveNumberToUint32(*captures_length_obj);
1837 :
1838 : Handle<Object> match_obj;
1839 878 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1840 : Object::GetElement(isolate, result, 0));
1841 :
1842 : Handle<String> match;
1843 878 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1844 : Object::ToString(isolate, match_obj));
1845 :
1846 439 : const int match_length = match->length();
1847 :
1848 : Handle<Object> position_obj;
1849 1317 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1850 : isolate, position_obj,
1851 : Object::GetProperty(result, factory->index_string()));
1852 :
1853 878 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1854 : isolate, position_obj, Object::ToInteger(isolate, position_obj));
1855 : const uint32_t position =
1856 439 : std::min(PositiveNumberToUint32(*position_obj), length);
1857 :
1858 : // Do not reserve capacity since captures_length is user-controlled.
1859 439 : ZoneVector<Handle<Object>> captures(&zone);
1860 :
1861 3163 : for (int n = 0; n < captures_length; n++) {
1862 : Handle<Object> capture;
1863 2724 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1864 : isolate, capture, Object::GetElement(isolate, result, n));
1865 :
1866 1362 : if (!capture->IsUndefined(isolate)) {
1867 2108 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1868 : Object::ToString(isolate, capture));
1869 : }
1870 1362 : captures.push_back(capture);
1871 : }
1872 :
1873 439 : Handle<Object> groups_obj = isolate->factory()->undefined_value();
1874 439 : if (FLAG_harmony_regexp_named_captures) {
1875 1050 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1876 : isolate, groups_obj,
1877 : Object::GetProperty(result, factory->groups_string()));
1878 : }
1879 :
1880 439 : const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1881 : DCHECK_IMPLIES(has_named_captures, FLAG_harmony_regexp_named_captures);
1882 :
1883 : Handle<String> replacement;
1884 439 : if (functional_replace) {
1885 : const int argc =
1886 126 : has_named_captures ? captures_length + 3 : captures_length + 2;
1887 126 : ScopedVector<Handle<Object>> argv(argc);
1888 :
1889 : int cursor = 0;
1890 350 : for (int j = 0; j < captures_length; j++) {
1891 350 : argv[cursor++] = captures[j];
1892 : }
1893 :
1894 252 : argv[cursor++] = handle(Smi::FromInt(position), isolate);
1895 126 : argv[cursor++] = string;
1896 126 : if (has_named_captures) argv[cursor++] = groups_obj;
1897 :
1898 : DCHECK_EQ(cursor, argc);
1899 :
1900 : Handle<Object> replacement_obj;
1901 378 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1902 : isolate, replacement_obj,
1903 : Execution::Call(isolate, replace_obj, factory->undefined_value(),
1904 : argc, argv.start()));
1905 :
1906 252 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1907 126 : isolate, replacement, Object::ToString(isolate, replacement_obj));
1908 : } else {
1909 : DCHECK(!functional_replace);
1910 313 : if (!groups_obj->IsUndefined(isolate)) {
1911 588 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1912 : isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
1913 : }
1914 : VectorBackedMatch m(isolate, string, match, position, &captures,
1915 313 : groups_obj);
1916 626 : ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1917 201 : isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1918 : }
1919 :
1920 327 : if (position >= next_source_position) {
1921 : builder.AppendString(
1922 327 : factory->NewSubString(string, next_source_position, position));
1923 327 : builder.AppendString(replacement);
1924 :
1925 327 : next_source_position = position + match_length;
1926 : }
1927 327 : }
1928 :
1929 416 : if (next_source_position < length) {
1930 : builder.AppendString(
1931 287 : factory->NewSubString(string, next_source_position, length));
1932 : }
1933 :
1934 10193 : RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1935 : }
1936 :
1937 0 : RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
1938 : SealHandleScope shs(isolate);
1939 : DCHECK_EQ(0, args.length());
1940 0 : Object* exception = isolate->pending_exception();
1941 0 : isolate->clear_pending_exception();
1942 0 : return isolate->ReThrow(exception);
1943 : }
1944 :
1945 1186418 : RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1946 593209 : HandleScope scope(isolate);
1947 : DCHECK_EQ(3, args.length());
1948 1186418 : CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1949 1186418 : CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1950 1186418 : CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1951 :
1952 593209 : RETURN_FAILURE_ON_EXCEPTION(isolate,
1953 : JSRegExp::Initialize(regexp, source, flags));
1954 :
1955 593209 : return *regexp;
1956 : }
1957 :
1958 1676164 : RUNTIME_FUNCTION(Runtime_IsRegExp) {
1959 : SealHandleScope shs(isolate);
1960 : DCHECK_EQ(1, args.length());
1961 838082 : CONVERT_ARG_CHECKED(Object, obj, 0);
1962 838082 : return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1963 : }
1964 :
1965 : } // namespace internal
1966 : } // namespace v8
|